src/arch/gcn3/insts/instructions.cc

   1 /*
   2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33
  34 #include "arch/gcn3/insts/instructions.hh"
  35
  36 #include <cmath>
  37
  38 #include "arch/gcn3/insts/inst_util.hh"
  39 #include "debug/GCN3.hh"
  40 #include "debug/GPUSync.hh"
  41 #include "gpu-compute/shader.hh"
  42
  43 namespace Gcn3ISA
  44 {
  45
  46     Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt)
  47         : Inst_SOP2(iFmt, "s_add_u32")
  48     {
  49         setFlag(ALU);
  50     } // Inst_SOP2__S_ADD_U32
  51
  52     Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
  53     {
  54     } // ~Inst_SOP2__S_ADD_U32
  55
  56     // D.u = S0.u + S1.u;
  57     // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned
  58     // overflow/carry-out.
  59     void
  60     Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
  61     {
  62         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
  63         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
  64         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
  65         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
  66
  67         src0.read();
  68         src1.read();
  69
  70         sdst = src0.rawData() + src1.rawData();
  71         scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
  72             >= 0x100000000ULL ? 1 : 0;
  73
  74         sdst.write();
  75         scc.write();
  76     }
  77
  78     Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt)
  79         : Inst_SOP2(iFmt, "s_sub_u32")
  80     {
  81         setFlag(ALU);
  82     } // Inst_SOP2__S_SUB_U32
  83
  84     Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
  85     {
  86     } // ~Inst_SOP2__S_SUB_U32
  87
  88     // D.u = S0.u - S1.u;
  89     // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out.
  90     void
  91     Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
  92     {
  93         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
  94         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
  95         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
  96         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
  97
  98         src0.read();
  99         src1.read();
 100
 101         sdst = src0.rawData() - src1.rawData();
 102         scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
 103
 104         sdst.write();
 105         scc.write();
 106     }
 107
 108     Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt)
 109         : Inst_SOP2(iFmt, "s_add_i32")
 110     {
 111         setFlag(ALU);
 112     } // Inst_SOP2__S_ADD_I32
 113
 114     Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
 115     {
 116     } // ~Inst_SOP2__S_ADD_I32
 117
 118     // D.i = S0.i + S1.i;
 119     // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
 120     // overflow.
 121     void
 122     Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst)
 123     {
 124         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 125         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 126         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 127         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 128
 129         src0.read();
 130         src1.read();
 131
 132         sdst = src0.rawData() + src1.rawData();
 133         scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
 134             && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
 135             ? 1 : 0;
 136
 137         sdst.write();
 138         scc.write();
 139     }
 140
 141     Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt)
 142         : Inst_SOP2(iFmt, "s_sub_i32")
 143     {
 144         setFlag(ALU);
 145     } // Inst_SOP2__S_SUB_I32
 146
 147     Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
 148     {
 149     } // ~Inst_SOP2__S_SUB_I32
 150
 151     // D.i = S0.i - S1.i;
 152     // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
 153     // overflow.
 154     void
 155     Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst)
 156     {
 157         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 158         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 159         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 160         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 161
 162         src0.read();
 163         src1.read();
 164
 165         sdst = src0.rawData() - src1.rawData();
 166         scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
 167             && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
 168
 169         sdst.write();
 170         scc.write();
 171     }
 172
 173     Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt)
 174         : Inst_SOP2(iFmt, "s_addc_u32")
 175     {
 176         setFlag(ALU);
 177     } // Inst_SOP2__S_ADDC_U32
 178
 179     Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
 180     {
 181     } // ~Inst_SOP2__S_ADDC_U32
 182
 183     // D.u = S0.u + S1.u + SCC;
 184     // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned
 185     // overflow.
 186     void
 187     Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
 188     {
 189         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 190         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 191         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 192         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 193
 194         src0.read();
 195         src1.read();
 196         scc.read();
 197
 198         sdst = src0.rawData() + src1.rawData() + scc.rawData();
 199         scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
 200             + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
 201
 202         sdst.write();
 203         scc.write();
 204     }
 205
 206     Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt)
 207         : Inst_SOP2(iFmt, "s_subb_u32")
 208     {
 209         setFlag(ALU);
 210     } // Inst_SOP2__S_SUBB_U32
 211
 212     Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
 213     {
 214     } // ~Inst_SOP2__S_SUBB_U32
 215
 216     // D.u = S0.u - S1.u - SCC;
 217     // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
 218     void
 219     Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
 220     {
 221         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 222         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 223         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 224         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 225
 226         src0.read();
 227         src1.read();
 228         scc.read();
 229
 230         sdst = src0.rawData() - src1.rawData() - scc.rawData();
 231         scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
 232
 233         sdst.write();
 234         scc.write();
 235     }
 236
 237     Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt)
 238         : Inst_SOP2(iFmt, "s_min_i32")
 239     {
 240         setFlag(ALU);
 241     } // Inst_SOP2__S_MIN_I32
 242
 243     Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
 244     {
 245     } // ~Inst_SOP2__S_MIN_I32
 246
 247     // D.i = (S0.i < S1.i) ? S0.i : S1.i;
 248     // SCC = 1 if S0 is chosen as the minimum value.
 249     void
 250     Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
 251     {
 252         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 253         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 254         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 255         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 256
 257         src0.read();
 258         src1.read();
 259
 260         sdst = std::min(src0.rawData(), src1.rawData());
 261         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
 262
 263         sdst.write();
 264         scc.write();
 265     }
 266
 267     Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt)
 268         : Inst_SOP2(iFmt, "s_min_u32")
 269     {
 270         setFlag(ALU);
 271     } // Inst_SOP2__S_MIN_U32
 272
 273     Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
 274     {
 275     } // ~Inst_SOP2__S_MIN_U32
 276
 277     // D.u = (S0.u < S1.u) ? S0.u : S1.u;
 278     // SCC = 1 if S0 is chosen as the minimum value.
 279     void
 280     Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
 281     {
 282         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 283         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 284         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 285         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 286
 287         src0.read();
 288         src1.read();
 289
 290         sdst = std::min(src0.rawData(), src1.rawData());
 291         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
 292
 293         sdst.write();
 294         scc.write();
 295     }
 296
 297     Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt)
 298         : Inst_SOP2(iFmt, "s_max_i32")
 299     {
 300         setFlag(ALU);
 301     } // Inst_SOP2__S_MAX_I32
 302
 303     Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
 304     {
 305     } // ~Inst_SOP2__S_MAX_I32
 306
 307     // D.i = (S0.i > S1.i) ? S0.i : S1.i;
 308     // SCC = 1 if S0 is chosen as the maximum value.
 309     void
 310     Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
 311     {
 312         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 313         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 314         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 315         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 316
 317         src0.read();
 318         src1.read();
 319
 320         sdst = std::max(src0.rawData(), src1.rawData());
 321         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
 322
 323         sdst.write();
 324         scc.write();
 325     }
 326
 327     Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt)
 328         : Inst_SOP2(iFmt, "s_max_u32")
 329     {
 330         setFlag(ALU);
 331     } // Inst_SOP2__S_MAX_U32
 332
 333     Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
 334     {
 335     } // ~Inst_SOP2__S_MAX_U32
 336
 337     // D.u = (S0.u > S1.u) ? S0.u : S1.u;
 338     // SCC = 1 if S0 is chosen as the maximum value.
 339     void
 340     Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
 341     {
 342         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 343         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 344         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 345         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 346
 347         src0.read();
 348         src1.read();
 349
 350         sdst = std::max(src0.rawData(), src1.rawData());
 351         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
 352
 353         sdst.write();
 354         scc.write();
 355     }
 356
 357     Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt)
 358         : Inst_SOP2(iFmt, "s_cselect_b32")
 359     {
 360         setFlag(ALU);
 361     } // Inst_SOP2__S_CSELECT_B32
 362
 363     Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
 364     {
 365     } // ~Inst_SOP2__S_CSELECT_B32
 366
 367     // D.u = SCC ? S0.u : S1.u (conditional select).
 368     void
 369     Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst)
 370     {
 371         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 372         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 373         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 374         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 375
 376         src0.read();
 377         src1.read();
 378         scc.read();
 379
 380         sdst = scc.rawData() ? src0.rawData() : src1.rawData();
 381
 382         sdst.write();
 383     }
 384
 385     Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt)
 386         : Inst_SOP2(iFmt, "s_cselect_b64")
 387     {
 388         setFlag(ALU);
 389     } // Inst_SOP2__S_CSELECT_B64
 390
 391     Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
 392     {
 393     } // ~Inst_SOP2__S_CSELECT_B64
 394
 395     // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
 396     void
 397     Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst)
 398     {
 399         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 400         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 401         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 402         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 403
 404         src0.read();
 405         src1.read();
 406         scc.read();
 407
 408         sdst = scc.rawData() ? src0.rawData() : src1.rawData();
 409
 410         sdst.write();
 411     }
 412
 413     Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt)
 414         : Inst_SOP2(iFmt, "s_and_b32")
 415     {
 416         setFlag(ALU);
 417     } // Inst_SOP2__S_AND_B32
 418
 419     Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
 420     {
 421     } // ~Inst_SOP2__S_AND_B32
 422
 423     // D.u = S0.u & S1.u;
 424     // SCC = 1 if result is non-zero.
 425     void
 426     Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst)
 427     {
 428         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 429         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 430         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 431         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 432
 433         src0.read();
 434         src1.read();
 435
 436         sdst = src0.rawData() & src1.rawData();
 437         scc = sdst.rawData() ? 1 : 0;
 438
 439         sdst.write();
 440         scc.write();
 441     }
 442
 443     Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt)
 444         : Inst_SOP2(iFmt, "s_and_b64")
 445     {
 446         setFlag(ALU);
 447     } // Inst_SOP2__S_AND_B64
 448
 449     Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
 450     {
 451     } // ~Inst_SOP2__S_AND_B64
 452
 453     // D.u64 = S0.u64 & S1.u64;
 454     // SCC = 1 if result is non-zero.
 455     void
 456     Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst)
 457     {
 458         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 459         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 460         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 461         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 462
 463         src0.read();
 464         src1.read();
 465
 466         sdst = src0.rawData() & src1.rawData();
 467         scc = sdst.rawData() ? 1 : 0;
 468
 469         sdst.write();
 470         scc.write();
 471     }
 472
 473     Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt)
 474         : Inst_SOP2(iFmt, "s_or_b32")
 475     {
 476         setFlag(ALU);
 477     } // Inst_SOP2__S_OR_B32
 478
 479     Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
 480     {
 481     } // ~Inst_SOP2__S_OR_B32
 482
 483     // D.u = S0.u | S1.u;
 484     // SCC = 1 if result is non-zero.
 485     void
 486     Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst)
 487     {
 488         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 489         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 490         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 491         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 492
 493         src0.read();
 494         src1.read();
 495
 496         sdst = src0.rawData() | src1.rawData();
 497         scc = sdst.rawData() ? 1 : 0;
 498
 499         sdst.write();
 500         scc.write();
 501     }
 502
 503     Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt)
 504         : Inst_SOP2(iFmt, "s_or_b64")
 505     {
 506         setFlag(ALU);
 507     } // Inst_SOP2__S_OR_B64
 508
 509     Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
 510     {
 511     } // ~Inst_SOP2__S_OR_B64
 512
 513     // D.u64 = S0.u64 | S1.u64;
 514     // SCC = 1 if result is non-zero.
 515     void
 516     Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst)
 517     {
 518         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 519         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 520         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 521         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 522
 523         src0.read();
 524         src1.read();
 525
 526         sdst = src0.rawData() | src1.rawData();
 527         scc = sdst.rawData() ? 1 : 0;
 528
 529         sdst.write();
 530         scc.write();
 531     }
 532
 533     Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt)
 534         : Inst_SOP2(iFmt, "s_xor_b32")
 535     {
 536         setFlag(ALU);
 537     } // Inst_SOP2__S_XOR_B32
 538
 539     Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
 540     {
 541     } // ~Inst_SOP2__S_XOR_B32
 542
 543     // D.u = S0.u ^ S1.u;
 544     // SCC = 1 if result is non-zero.
 545     void
 546     Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
 547     {
 548         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 549         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 550         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 551         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 552
 553         src0.read();
 554         src1.read();
 555
 556         sdst = src0.rawData() ^ src1.rawData();
 557         scc = sdst.rawData() ? 1 : 0;
 558
 559         sdst.write();
 560         scc.write();
 561     }
 562
 563     Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt)
 564         : Inst_SOP2(iFmt, "s_xor_b64")
 565     {
 566         setFlag(ALU);
 567     } // Inst_SOP2__S_XOR_B64
 568
 569     Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
 570     {
 571     } // ~Inst_SOP2__S_XOR_B64
 572
 573     // D.u64 = S0.u64 ^ S1.u64;
 574     // SCC = 1 if result is non-zero.
 575     void
 576     Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
 577     {
 578         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 579         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 580         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 581         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 582
 583         src0.read();
 584         src1.read();
 585
 586         sdst = src0.rawData() ^ src1.rawData();
 587         scc = sdst.rawData() ? 1 : 0;
 588
 589         sdst.write();
 590         scc.write();
 591     }
 592
 593     Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt)
 594         : Inst_SOP2(iFmt, "s_andn2_b32")
 595     {
 596         setFlag(ALU);
 597     } // Inst_SOP2__S_ANDN2_B32
 598
 599     Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
 600     {
 601     } // ~Inst_SOP2__S_ANDN2_B32
 602
 603     // D.u = S0.u & ~S1.u;
 604     // SCC = 1 if result is non-zero.
 605     void
 606     Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst)
 607     {
 608         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 609         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 610         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 611         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 612
 613         src0.read();
 614         src1.read();
 615
 616         sdst = src0.rawData() &~ src1.rawData();
 617         scc = sdst.rawData() ? 1 : 0;
 618
 619         sdst.write();
 620         scc.write();
 621     }
 622
 623     Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt)
 624         : Inst_SOP2(iFmt, "s_andn2_b64")
 625     {
 626         setFlag(ALU);
 627     } // Inst_SOP2__S_ANDN2_B64
 628
 629     Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
 630     {
 631     } // ~Inst_SOP2__S_ANDN2_B64
 632
 633     // D.u64 = S0.u64 & ~S1.u64;
 634     // SCC = 1 if result is non-zero.
 635     void
 636     Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst)
 637     {
 638         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 639         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 640         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 641         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 642
 643         src0.read();
 644         src1.read();
 645
 646         sdst = src0.rawData() &~ src1.rawData();
 647         scc = sdst.rawData() ? 1 : 0;
 648
 649         sdst.write();
 650         scc.write();
 651     }
 652
 653     Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt)
 654         : Inst_SOP2(iFmt, "s_orn2_b32")
 655     {
 656         setFlag(ALU);
 657     } // Inst_SOP2__S_ORN2_B32
 658
 659     Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
 660     {
 661     } // ~Inst_SOP2__S_ORN2_B32
 662
 663     // D.u = S0.u | ~S1.u;
 664     // SCC = 1 if result is non-zero.
 665     void
 666     Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst)
 667     {
 668         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 669         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 670         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 671         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 672
 673         src0.read();
 674         src1.read();
 675
 676         sdst = src0.rawData() |~ src1.rawData();
 677         scc = sdst.rawData() ? 1 : 0;
 678
 679         sdst.write();
 680         scc.write();
 681     }
 682
 683     Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt)
 684         : Inst_SOP2(iFmt, "s_orn2_b64")
 685     {
 686         setFlag(ALU);
 687     } // Inst_SOP2__S_ORN2_B64
 688
 689     Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
 690     {
 691     } // ~Inst_SOP2__S_ORN2_B64
 692
 693     // D.u64 = S0.u64 | ~S1.u64;
 694     // SCC = 1 if result is non-zero.
 695     void
 696     Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst)
 697     {
 698         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 699         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 700         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 701         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 702
 703         src0.read();
 704         src1.read();
 705
 706         sdst = src0.rawData() |~ src1.rawData();
 707         scc = sdst.rawData() ? 1 : 0;
 708
 709         sdst.write();
 710         scc.write();
 711     }
 712
 713     Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt)
 714         : Inst_SOP2(iFmt, "s_nand_b32")
 715     {
 716         setFlag(ALU);
 717     } // Inst_SOP2__S_NAND_B32
 718
 719     Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
 720     {
 721     } // ~Inst_SOP2__S_NAND_B32
 722
 723     // D.u = ~(S0.u & S1.u);
 724     // SCC = 1 if result is non-zero.
 725     void
 726     Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst)
 727     {
 728         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 729         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 730         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 731         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 732
 733         src0.read();
 734         src1.read();
 735
 736         sdst = ~(src0.rawData() & src1.rawData());
 737         scc = sdst.rawData() ? 1 : 0;
 738
 739         sdst.write();
 740         scc.write();
 741     }
 742
 743     Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt)
 744         : Inst_SOP2(iFmt, "s_nand_b64")
 745     {
 746         setFlag(ALU);
 747     } // Inst_SOP2__S_NAND_B64
 748
 749     Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
 750     {
 751     } // ~Inst_SOP2__S_NAND_B64
 752
 753     // D.u64 = ~(S0.u64 & S1.u64);
 754     // SCC = 1 if result is non-zero.
 755     void
 756     Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst)
 757     {
 758         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 759         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 760         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 761         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 762
 763         src0.read();
 764         src1.read();
 765
 766         sdst = ~(src0.rawData() & src1.rawData());
 767         scc = sdst.rawData() ? 1 : 0;
 768
 769         sdst.write();
 770         scc.write();
 771     }
 772
 773     Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt)
 774         : Inst_SOP2(iFmt, "s_nor_b32")
 775     {
 776         setFlag(ALU);
 777     } // Inst_SOP2__S_NOR_B32
 778
 779     Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
 780     {
 781     } // ~Inst_SOP2__S_NOR_B32
 782
 783     // D.u = ~(S0.u | S1.u);
 784     // SCC = 1 if result is non-zero.
 785     void
 786     Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst)
 787     {
 788         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 789         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 790         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 791         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 792
 793         src0.read();
 794         src1.read();
 795
 796         sdst = ~(src0.rawData() | src1.rawData());
 797         scc = sdst.rawData() ? 1 : 0;
 798
 799         sdst.write();
 800         scc.write();
 801     }
 802
 803     Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt)
 804         : Inst_SOP2(iFmt, "s_nor_b64")
 805     {
 806         setFlag(ALU);
 807     } // Inst_SOP2__S_NOR_B64
 808
 809     Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
 810     {
 811     } // ~Inst_SOP2__S_NOR_B64
 812
 813     // D.u64 = ~(S0.u64 | S1.u64);
 814     // SCC = 1 if result is non-zero.
 815     void
 816     Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst)
 817     {
 818         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 819         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 820         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 821         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 822
 823         src0.read();
 824         src1.read();
 825
 826         sdst = ~(src0.rawData() | src1.rawData());
 827         scc = sdst.rawData() ? 1 : 0;
 828
 829         sdst.write();
 830         scc.write();
 831     }
 832
 833     Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt)
 834         : Inst_SOP2(iFmt, "s_xnor_b32")
 835     {
 836         setFlag(ALU);
 837     } // Inst_SOP2__S_XNOR_B32
 838
 839     Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
 840     {
 841     } // ~Inst_SOP2__S_XNOR_B32
 842
 843     // D.u = ~(S0.u ^ S1.u);
 844     // SCC = 1 if result is non-zero.
 845     void
 846     Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)
 847     {
 848         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 849         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 850         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 851         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 852
 853         src0.read();
 854         src1.read();
 855
 856         sdst = ~(src0.rawData() ^ src1.rawData());
 857         scc = sdst.rawData() ? 1 : 0;
 858
 859         sdst.write();
 860         scc.write();
 861     }
 862
 863     Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt)
 864         : Inst_SOP2(iFmt, "s_xnor_b64")
 865     {
 866         setFlag(ALU);
 867     } // Inst_SOP2__S_XNOR_B64
 868
 869     Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
 870     {
 871     } // ~Inst_SOP2__S_XNOR_B64
 872
 873     // D.u64 = ~(S0.u64 ^ S1.u64);
 874     // SCC = 1 if result is non-zero.
 875     void
 876     Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst)
 877     {
 878         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 879         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 880         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 881         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 882
 883         src0.read();
 884         src1.read();
 885
 886         sdst = ~(src0.rawData() ^ src1.rawData());
 887         scc = sdst.rawData() ? 1 : 0;
 888
 889         sdst.write();
 890         scc.write();
 891     }
 892
 893     Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt)
 894         : Inst_SOP2(iFmt, "s_lshl_b32")
 895     {
 896         setFlag(ALU);
 897     } // Inst_SOP2__S_LSHL_B32
 898
 899     Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
 900     {
 901     } // ~Inst_SOP2__S_LSHL_B32
 902
 903     // D.u = S0.u << S1.u[4:0];
 904     // SCC = 1 if result is non-zero.
 905     void
 906     Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst)
 907     {
 908         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 909         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 910         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 911         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 912
 913         src0.read();
 914         src1.read();
 915
 916         sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
 917         scc = sdst.rawData() ? 1 : 0;
 918
 919         sdst.write();
 920         scc.write();
 921     }
 922
 923     Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt)
 924         : Inst_SOP2(iFmt, "s_lshl_b64")
 925     {
 926         setFlag(ALU);
 927     } // Inst_SOP2__S_LSHL_B64
 928
 929     Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
 930     {
 931     } // ~Inst_SOP2__S_LSHL_B64
 932
 933     // D.u64 = S0.u64 << S1.u[5:0];
 934     // SCC = 1 if result is non-zero.
 935     void
 936     Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst)
 937     {
 938         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 939         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 940         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 941         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 942
 943         src0.read();
 944         src1.read();
 945
 946         sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
 947         scc = sdst.rawData() ? 1 : 0;
 948
 949         sdst.write();
 950         scc.write();
 951     }
 952
 953     Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt)
 954         : Inst_SOP2(iFmt, "s_lshr_b32")
 955     {
 956         setFlag(ALU);
 957     } // Inst_SOP2__S_LSHR_B32
 958
 959     Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
 960     {
 961     } // ~Inst_SOP2__S_LSHR_B32
 962
 963     // D.u = S0.u >> S1.u[4:0];
 964     // SCC = 1 if result is non-zero.
 965     // The vacated bits are set to zero.
 966     void
 967     Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst)
 968     {
 969         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 970         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 971         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 972         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 973
 974         src0.read();
 975         src1.read();
 976
 977         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
 978         scc = sdst.rawData() ? 1 : 0;
 979
 980         sdst.write();
 981         scc.write();
 982     }
 983
 984     Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt)
 985         : Inst_SOP2(iFmt, "s_lshr_b64")
 986     {
 987         setFlag(ALU);
 988     } // Inst_SOP2__S_LSHR_B64
 989
 990     Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
 991     {
 992     } // ~Inst_SOP2__S_LSHR_B64
 993
 994     // D.u64 = S0.u64 >> S1.u[5:0];
 995     // SCC = 1 if result is non-zero.
 996     // The vacated bits are set to zero.
 997     void
 998     Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst)
 999     {
1000         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1001         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1002         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1003         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1004
1005         src0.read();
1006         src1.read();
1007
1008         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1009         scc = sdst.rawData() ? 1 : 0;
1010
1011         sdst.write();
1012         scc.write();
1013     }
1014
1015     Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt)
1016         : Inst_SOP2(iFmt, "s_ashr_i32")
1017     {
1018         setFlag(ALU);
1019     } // Inst_SOP2__S_ASHR_I32
1020
1021     Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
1022     {
1023     } // ~Inst_SOP2__S_ASHR_I32
1024
1025     // D.i = signext(S0.i) >> S1.u[4:0];
1026     // SCC = 1 if result is non-zero.
1027     // The vacated bits are set to the sign bit of the input value.
1028     void
1029     Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst)
1030     {
1031         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1032         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1033         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1034         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1035
1036         src0.read();
1037         src1.read();
1038
1039         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
1040         scc = sdst.rawData() ? 1 : 0;
1041
1042         sdst.write();
1043         scc.write();
1044     }
1045
1046     Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt)
1047         : Inst_SOP2(iFmt, "s_ashr_i64")
1048     {
1049         setFlag(ALU);
1050     } // Inst_SOP2__S_ASHR_I64
1051
1052     Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
1053     {
1054     } // ~Inst_SOP2__S_ASHR_I64
1055
1056     // D.i64 = signext(S0.i64) >> S1.u[5:0];
1057     // SCC = 1 if result is non-zero.
1058     // The vacated bits are set to the sign bit of the input value.
1059     void
1060     Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst)
1061     {
1062         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1063         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1064         ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1065         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1066
1067         src0.read();
1068         src1.read();
1069
1070         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1071         scc = sdst.rawData() ? 1 : 0;
1072
1073         sdst.write();
1074         scc.write();
1075     }
1076
1077     Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt)
1078         : Inst_SOP2(iFmt, "s_bfm_b32")
1079     {
1080         setFlag(ALU);
1081     } // Inst_SOP2__S_BFM_B32
1082
1083     Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
1084     {
1085     } // ~Inst_SOP2__S_BFM_B32
1086
1087     // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1088     void
1089     Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
1090     {
1091         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1092         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1093         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1094
1095         src0.read();
1096         src1.read();
1097
1098         sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
1099             << bits(src1.rawData(), 4, 0);
1100
1101         sdst.write();
1102     }
1103
1104     Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt)
1105         : Inst_SOP2(iFmt, "s_bfm_b64")
1106     {
1107         setFlag(ALU);
1108     } // Inst_SOP2__S_BFM_B64
1109
1110     Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
1111     {
1112     } // ~Inst_SOP2__S_BFM_B64
1113
1114     // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1115     void
1116     Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst)
1117     {
1118         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1119         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1120         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1121
1122         src0.read();
1123         src1.read();
1124
1125         sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
1126             << bits(src1.rawData(), 5, 0);
1127
1128         sdst.write();
1129     }
1130
1131     Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt)
1132         : Inst_SOP2(iFmt, "s_mul_i32")
1133     {
1134         setFlag(ALU);
1135     } // Inst_SOP2__S_MUL_I32
1136
1137     Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
1138     {
1139     } // ~Inst_SOP2__S_MUL_I32
1140
1141     // D.i = S0.i * S1.i.
1142     void
1143     Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst)
1144     {
1145         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1146         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1147         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1148
1149         src0.read();
1150         src1.read();
1151
1152         sdst = src0.rawData() * src1.rawData();
1153
1154         sdst.write();
1155     }
1156
1157     Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt)
1158         : Inst_SOP2(iFmt, "s_bfe_u32")
1159     {
1160         setFlag(ALU);
1161     } // Inst_SOP2__S_BFE_U32
1162
1163     Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
1164     {
1165     } // ~Inst_SOP2__S_BFE_U32
1166
1167     // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1168     // field width.
1169     // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1170     // SCC = 1 if result is non-zero.
1171     void
1172     Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
1173     {
1174         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1175         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1176         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1177         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1178
1179         src0.read();
1180         src1.read();
1181
1182         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1183             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1184         scc = sdst.rawData() ? 1 : 0;
1185
1186         sdst.write();
1187         scc.write();
1188     }
1189
1190     Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt)
1191         : Inst_SOP2(iFmt, "s_bfe_i32")
1192     {
1193         setFlag(ALU);
1194     } // Inst_SOP2__S_BFE_I32
1195
1196     Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
1197     {
1198     } // ~Inst_SOP2__S_BFE_I32
1199
1200     // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1201     // field width.
1202     // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1203     // Sign-extend the result;
1204     // SCC = 1 if result is non-zero.
1205     void
1206     Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
1207     {
1208         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1209         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1210         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1211         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1212
1213         src0.read();
1214         src1.read();
1215
1216         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1217             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1218         scc = sdst.rawData() ? 1 : 0;
1219
1220         sdst.write();
1221         scc.write();
1222     }
1223
1224     Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt)
1225         : Inst_SOP2(iFmt, "s_bfe_u64")
1226     {
1227         setFlag(ALU);
1228     } // Inst_SOP2__S_BFE_U64
1229
1230     Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
1231     {
1232     } // ~Inst_SOP2__S_BFE_U64
1233
1234     // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1235     // field width.
1236     // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1237     // SCC = 1 if result is non-zero.
1238     void
1239     Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst)
1240     {
1241         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1242         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1243         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1244         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1245
1246         src0.read();
1247         src1.read();
1248
1249         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1250             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1251         scc = sdst.rawData() ? 1 : 0;
1252
1253         sdst.write();
1254         scc.write();
1255     }
1256
1257     Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt)
1258         : Inst_SOP2(iFmt, "s_bfe_i64")
1259     {
1260         setFlag(ALU);
1261     } // Inst_SOP2__S_BFE_I64
1262
1263     Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
1264     {
1265     } // ~Inst_SOP2__S_BFE_I64
1266
1267     // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1268     // field width.
1269     // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1270     // Sign-extend result;
1271     // SCC = 1 if result is non-zero.
1272     void
1273     Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst)
1274     {
1275         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1276         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1277         ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1278         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1279
1280         src0.read();
1281         src1.read();
1282
1283         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1284             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1285         scc = sdst.rawData() ? 1 : 0;
1286
1287         sdst.write();
1288         scc.write();
1289     }
1290
1291     Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt)
1292         : Inst_SOP2(iFmt, "s_cbranch_g_fork")
1293     {
1294         setFlag(Branch);
1295     } // Inst_SOP2__S_CBRANCH_G_FORK
1296
1297     Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
1298     {
1299     } // ~Inst_SOP2__S_CBRANCH_G_FORK
1300
1301     // Conditional branch using branch-stack.
1302     // S0 = compare mask(vcc or any sgpr) and
1303     // S1 = 64-bit byte address of target instruction.
1304     void
1305     Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst)
1306     {
1307         panicUnimplemented();
1308     }
1309
1310     Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt)
1311         : Inst_SOP2(iFmt, "s_absdiff_i32")
1312     {
1313         setFlag(ALU);
1314     } // Inst_SOP2__S_ABSDIFF_I32
1315
1316     Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
1317     {
1318     } // ~Inst_SOP2__S_ABSDIFF_I32
1319
1320     // D.i = S0.i - S1.i;
1321     // if (D.i < 0) then D.i = -D.i;
1322     // SCC = 1 if result is non-zero.
1323     // Compute the absolute value of difference between two values.
1324     void
1325     Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst)
1326     {
1327         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1328         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1329         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1330         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1331
1332         sdst = std::abs(src0.rawData() - src1.rawData());
1333         scc = sdst.rawData() ? 1 : 0;
1334
1335         sdst.write();
1336         scc.write();
1337     }
1338
1339     Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
1340           InFmt_SOP2 *iFmt)
1341         : Inst_SOP2(iFmt, "s_rfe_restore_b64")
1342     {
1343     } // Inst_SOP2__S_RFE_RESTORE_B64
1344
1345     Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
1346     {
1347     } // ~Inst_SOP2__S_RFE_RESTORE_B64
1348
1349     // Return from exception handler and continue.
1350     void
1351     Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst)
1352     {
1353         panicUnimplemented();
1354     }
1355
1356     Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt)
1357         : Inst_SOPK(iFmt, "s_movk_i32")
1358     {
1359         setFlag(ALU);
1360     } // Inst_SOPK__S_MOVK_I32
1361
1362     Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
1363     {
1364     } // ~Inst_SOPK__S_MOVK_I32
1365
1366     // D.i = signext(SIMM16) (sign extension).
1367     void
1368     Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1369     {
1370         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1371         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1372
1373         sdst = simm16;
1374
1375         sdst.write();
1376     }
1377
1378     Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt)
1379         : Inst_SOPK(iFmt, "s_cmovk_i32")
1380     {
1381         setFlag(ALU);
1382     } // Inst_SOPK__S_CMOVK_I32
1383
1384     Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
1385     {
1386     } // ~Inst_SOPK__S_CMOVK_I32
1387
1388     // if (SCC) then D.i = signext(SIMM16);
1389     // else NOP.
1390     // Conditional move with sign extension.
1391     void
1392     Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1393     {
1394         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1395         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1396         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
1397
1398         scc.read();
1399
1400         if (scc.rawData()) {
1401             sdst = simm16;
1402             sdst.write();
1403         }
1404     }
1405
1406     Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt)
1407         : Inst_SOPK(iFmt, "s_cmpk_eq_i32")
1408     {
1409         setFlag(ALU);
1410     } // Inst_SOPK__S_CMPK_EQ_I32
1411
1412     Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
1413     {
1414     } // ~Inst_SOPK__S_CMPK_EQ_I32
1415
1416     // SCC = (S0.i == signext(SIMM16)).
1417     void
1418     Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
1419     {
1420         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1421         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1422         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1423
1424         src.read();
1425
1426         scc = (src.rawData() == simm16) ? 1 : 0;
1427
1428         scc.write();
1429     }
1430
1431     Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt)
1432         : Inst_SOPK(iFmt, "s_cmpk_lg_i32")
1433     {
1434         setFlag(ALU);
1435     } // Inst_SOPK__S_CMPK_LG_I32
1436
1437     Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
1438     {
1439     } // ~Inst_SOPK__S_CMPK_LG_I32
1440
1441     // SCC = (S0.i != signext(SIMM16)).
1442     void
1443     Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
1444     {
1445         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1446         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1447         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1448
1449         src.read();
1450
1451         scc = (src.rawData() != simm16) ? 1 : 0;
1452
1453         scc.write();
1454     }
1455
1456     Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt)
1457         : Inst_SOPK(iFmt, "s_cmpk_gt_i32")
1458     {
1459         setFlag(ALU);
1460     } // Inst_SOPK__S_CMPK_GT_I32
1461
1462     Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
1463     {
1464     } // ~Inst_SOPK__S_CMPK_GT_I32
1465
1466     // SCC = (S0.i > signext(SIMM16)).
1467     void
1468     Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
1469     {
1470         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1471         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1472         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1473
1474         src.read();
1475
1476         scc = (src.rawData() > simm16) ? 1 : 0;
1477
1478         scc.write();
1479     }
1480
1481     Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt)
1482         : Inst_SOPK(iFmt, "s_cmpk_ge_i32")
1483     {
1484         setFlag(ALU);
1485     } // Inst_SOPK__S_CMPK_GE_I32
1486
1487     Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
1488     {
1489     } // ~Inst_SOPK__S_CMPK_GE_I32
1490
1491     // SCC = (S0.i >= signext(SIMM16)).
1492     void
1493     Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
1494     {
1495         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1496         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1497         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1498
1499         src.read();
1500
1501         scc = (src.rawData() >= simm16) ? 1 : 0;
1502
1503         scc.write();
1504     }
1505
1506     Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt)
1507         : Inst_SOPK(iFmt, "s_cmpk_lt_i32")
1508     {
1509         setFlag(ALU);
1510     } // Inst_SOPK__S_CMPK_LT_I32
1511
1512     Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
1513     {
1514     } // ~Inst_SOPK__S_CMPK_LT_I32
1515
1516     // SCC = (S0.i < signext(SIMM16)).
1517     void
1518     Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
1519     {
1520         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1521         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1522         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1523
1524         src.read();
1525
1526         scc = (src.rawData() < simm16) ? 1 : 0;
1527
1528         scc.write();
1529     }
1530
1531     Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt)
1532         : Inst_SOPK(iFmt, "s_cmpk_le_i32")
1533     {
1534         setFlag(ALU);
1535     } // Inst_SOPK__S_CMPK_LE_I32
1536
1537     Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
1538     {
1539     } // ~Inst_SOPK__S_CMPK_LE_I32
1540
1541     // SCC = (S0.i <= signext(SIMM16)).
1542     void
1543     Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
1544     {
1545         ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
1546         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1547         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1548
1549         src.read();
1550
1551         scc = (src.rawData() <= simm16) ? 1 : 0;
1552
1553         scc.write();
1554     }
1555
1556     Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt)
1557         : Inst_SOPK(iFmt, "s_cmpk_eq_u32")
1558     {
1559         setFlag(ALU);
1560     } // Inst_SOPK__S_CMPK_EQ_U32
1561
1562     Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
1563     {
1564     } // ~Inst_SOPK__S_CMPK_EQ_U32
1565
1566     // SCC = (S0.u == SIMM16).
1567     void
1568     Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
1569     {
1570         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1571         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1572         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1573
1574         src.read();
1575
1576         scc = (src.rawData() == simm16) ? 1 : 0;
1577
1578         scc.write();
1579     }
1580
1581     Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt)
1582         : Inst_SOPK(iFmt, "s_cmpk_lg_u32")
1583     {
1584         setFlag(ALU);
1585     } // Inst_SOPK__S_CMPK_LG_U32
1586
1587     Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
1588     {
1589     } // ~Inst_SOPK__S_CMPK_LG_U32
1590
1591     // SCC = (S0.u != SIMM16).
1592     void
1593     Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst)
1594     {
1595         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1596         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1597         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1598
1599         src.read();
1600
1601         scc = (src.rawData() != simm16) ? 1 : 0;
1602
1603         scc.write();
1604     }
1605
1606     Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt)
1607         : Inst_SOPK(iFmt, "s_cmpk_gt_u32")
1608     {
1609         setFlag(ALU);
1610     } // Inst_SOPK__S_CMPK_GT_U32
1611
1612     Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
1613     {
1614     } // ~Inst_SOPK__S_CMPK_GT_U32
1615
1616     // SCC = (S0.u > SIMM16).
1617     void
1618     Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst)
1619     {
1620         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1621         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1622         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1623
1624         src.read();
1625
1626         scc = (src.rawData() > simm16) ? 1 : 0;
1627
1628         scc.write();
1629     }
1630
1631     Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt)
1632         : Inst_SOPK(iFmt, "s_cmpk_ge_u32")
1633     {
1634         setFlag(ALU);
1635     } // Inst_SOPK__S_CMPK_GE_U32
1636
1637     Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
1638     {
1639     } // ~Inst_SOPK__S_CMPK_GE_U32
1640
1641     // SCC = (S0.u >= SIMM16).
1642     void
1643     Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst)
1644     {
1645         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1646         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1647         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1648
1649         src.read();
1650
1651         scc = (src.rawData() >= simm16) ? 1 : 0;
1652
1653         scc.write();
1654     }
1655
1656     Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt)
1657         : Inst_SOPK(iFmt, "s_cmpk_lt_u32")
1658     {
1659         setFlag(ALU);
1660     } // Inst_SOPK__S_CMPK_LT_U32
1661
1662     Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
1663     {
1664     } // ~Inst_SOPK__S_CMPK_LT_U32
1665
1666     // SCC = (S0.u < SIMM16).
1667     void
1668     Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst)
1669     {
1670         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1671         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1672         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1673
1674         src.read();
1675
1676         scc = (src.rawData() < simm16) ? 1 : 0;
1677
1678         scc.write();
1679     }
1680
1681     Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt)
1682         : Inst_SOPK(iFmt, "s_cmpk_le_u32")
1683     {
1684         setFlag(ALU);
1685     } // Inst_SOPK__S_CMPK_LE_U32
1686
1687     Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
1688     {
1689     } // ~Inst_SOPK__S_CMPK_LE_U32
1690
1691     // SCC = (S0.u <= SIMM16).
1692     void
1693     Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst)
1694     {
1695         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1696         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1697         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1698
1699         src.read();
1700
1701         scc = (src.rawData() <= simm16) ? 1 : 0;
1702
1703         scc.write();
1704     }
1705
1706     Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt)
1707         : Inst_SOPK(iFmt, "s_addk_i32")
1708     {
1709         setFlag(ALU);
1710     } // Inst_SOPK__S_ADDK_I32
1711
1712     Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
1713     {
1714     } // ~Inst_SOPK__S_ADDK_I32
1715
1716     // D.i = D.i + signext(SIMM16);
1717     // SCC = overflow.
1718     void
1719     Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst)
1720     {
1721         ScalarRegI16 simm16 = instData.SIMM16;
1722         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1723         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1724         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1725
1726         src.read();
1727
1728         sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
1729         scc = (bits(src.rawData(), 31) == bits(simm16, 15)
1730             && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
1731
1732         sdst.write();
1733         scc.write();
1734     }
1735
1736     Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt)
1737         : Inst_SOPK(iFmt, "s_mulk_i32")
1738     {
1739         setFlag(ALU);
1740     } // Inst_SOPK__S_MULK_I32
1741
1742     Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
1743     {
1744     } // ~Inst_SOPK__S_MULK_I32
1745
1746     // D.i = D.i * signext(SIMM16).
1747     void
1748     Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst)
1749     {
1750         ScalarRegI16 simm16 = instData.SIMM16;
1751         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1752
1753         sdst.read();
1754
1755         sdst = sdst.rawData() * (ScalarRegI32)sext<16>(simm16);
1756
1757         sdst.write();
1758     }
1759
1760     Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt)
1761         : Inst_SOPK(iFmt, "s_cbranch_i_fork")
1762     {
1763         setFlag(Branch);
1764     } // Inst_SOPK__S_CBRANCH_I_FORK
1765
1766     Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
1767     {
1768     } // ~Inst_SOPK__S_CBRANCH_I_FORK
1769
1770     // Conditional branch using branch-stack.
1771     // S0 = compare mask(vcc or any sgpr), and
1772     // SIMM16 = signed DWORD branch offset relative to next instruction.
1773     void
1774     Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst)
1775     {
1776         panicUnimplemented();
1777     }
1778
1779     Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt)
1780         : Inst_SOPK(iFmt, "s_getreg_b32")
1781     {
1782     } // Inst_SOPK__S_GETREG_B32
1783
1784     Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
1785     {
1786     } // ~Inst_SOPK__S_GETREG_B32
1787
1788     // D.u = hardware-reg. Read some or all of a hardware register into the
1789     // LSBs of D.
1790     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1791     // is 1..32.
1792     void
1793     Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1794     {
1795         panicUnimplemented();
1796     }
1797
1798     Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt)
1799         : Inst_SOPK(iFmt, "s_setreg_b32")
1800     {
1801         setFlag(ALU);
1802     } // Inst_SOPK__S_SETREG_B32
1803
1804     Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
1805     {
1806     } // ~Inst_SOPK__S_SETREG_B32
1807
1808     // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
1809     // register.
1810     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1811     // is 1..32.
1812     void
1813     Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1814     {
1815         ScalarRegI16 simm16 = instData.SIMM16;
1816         ScalarRegU32 hwregId = simm16 & 0x3f;
1817         ScalarRegU32 offset = (simm16 >> 6) & 31;
1818         ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
1819
1820         ScalarOperandU32 hwreg(gpuDynInst, hwregId);
1821         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1822         hwreg.read();
1823         sdst.read();
1824
1825         // Store value from SDST to part of the hardware register.
1826         ScalarRegU32 mask = (((1U << size) - 1U) << offset);
1827         hwreg = ((hwreg.rawData() & ~mask)
1828                         | ((sdst.rawData() << offset) & mask));
1829         hwreg.write();
1830
1831         // set MODE register to control the behavior of single precision
1832         // floating-point numbers: denormal mode or round mode
1833         if (hwregId==1 && size==2
1834                         && (offset==4 || offset==0)) {
1835             warn_once("Be cautious that s_setreg_b32 has no real effect "
1836                             "on FP modes: %s\n", gpuDynInst->disassemble());
1837             return;
1838         }
1839
1840         // panic if not changing MODE of floating-point numbers
1841         panicUnimplemented();
1842     }
1843
1844     Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
1845           InFmt_SOPK *iFmt)
1846         : Inst_SOPK(iFmt, "s_setreg_imm32_b32")
1847     {
1848         setFlag(ALU);
1849     } // Inst_SOPK__S_SETREG_IMM32_B32
1850
1851     Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
1852     {
1853     } // ~Inst_SOPK__S_SETREG_IMM32_B32
1854
1855     // Write some or all of the LSBs of IMM32 into a hardware register; this
1856     // instruction requires a 32-bit literal constant.
1857     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1858     // is 1..32.
1859     void
1860     Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst)
1861     {
1862         ScalarRegI16 simm16 = instData.SIMM16;
1863         ScalarRegU32 hwregId = simm16 & 0x3f;
1864         ScalarRegU32 offset = (simm16 >> 6) & 31;
1865         ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
1866
1867         ScalarOperandU32 hwreg(gpuDynInst, hwregId);
1868         ScalarRegU32 simm32 = extData.imm_u32;
1869         hwreg.read();
1870
1871         ScalarRegU32 mask = (((1U << size) - 1U) << offset);
1872         hwreg = ((hwreg.rawData() & ~mask)
1873                     | ((simm32 << offset) & mask));
1874         hwreg.write();
1875
1876         if (hwregId==1 && size==2
1877                         && (offset==4 || offset==0)) {
1878             warn_once("Be cautious that s_setreg_imm32_b32 has no real effect "
1879                             "on FP modes: %s\n", gpuDynInst->disassemble());
1880             return;
1881         }
1882
1883         // panic if not changing MODE of floating-point numbers
1884         panicUnimplemented();
1885     }
1886
1887     Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt)
1888         : Inst_SOP1(iFmt, "s_mov_b32")
1889     {
1890         setFlag(ALU);
1891     } // Inst_SOP1__S_MOV_B32
1892
1893     Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
1894     {
1895     } // ~Inst_SOP1__S_MOV_B32
1896
1897     // D.u = S0.u.
1898     void
1899     Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
1900     {
1901         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1902         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1903
1904         src.read();
1905
1906         sdst = src.rawData();
1907
1908         sdst.write();
1909     }
1910
1911     Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt)
1912         : Inst_SOP1(iFmt, "s_mov_b64")
1913     {
1914         setFlag(ALU);
1915     } // Inst_SOP1__S_MOV_B64
1916
1917     Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
1918     {
1919     } // ~Inst_SOP1__S_MOV_B64
1920
1921     // D.u64 = S0.u64.
1922     void
1923     Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
1924     {
1925         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1926         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1927
1928         src.read();
1929
1930         sdst = src.rawData();
1931
1932         sdst.write();
1933     }
1934
1935     Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt)
1936         : Inst_SOP1(iFmt, "s_cmov_b32")
1937     {
1938         setFlag(ALU);
1939     } // Inst_SOP1__S_CMOV_B32
1940
1941     Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
1942     {
1943     } // ~Inst_SOP1__S_CMOV_B32
1944
1945     // if (SCC) then D.u = S0.u;
1946     // else NOP.
1947     // Conditional move.
1948     void
1949     Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst)
1950     {
1951         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1952         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1953         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1954
1955         src.read();
1956         scc.read();
1957
1958         if (scc.rawData()) {
1959             sdst = src.rawData();
1960             sdst.write();
1961         }
1962     }
1963
1964     Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt)
1965         : Inst_SOP1(iFmt, "s_cmov_b64")
1966     {
1967         setFlag(ALU);
1968     } // Inst_SOP1__S_CMOV_B64
1969
1970     Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
1971     {
1972     } // ~Inst_SOP1__S_CMOV_B64
1973
1974     // if (SCC) then D.u64 = S0.u64;
1975     // else NOP.
1976     // Conditional move.
1977     void
1978     Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst)
1979     {
1980         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1981         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1982         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1983
1984         src.read();
1985         scc.read();
1986
1987         if (scc.rawData()) {
1988             sdst = src.rawData();
1989             sdst.write();
1990         }
1991     }
1992
1993     Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt)
1994         : Inst_SOP1(iFmt, "s_not_b32")
1995     {
1996         setFlag(ALU);
1997     } // Inst_SOP1__S_NOT_B32
1998
1999     Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
2000     {
2001     } // ~Inst_SOP1__S_NOT_B32
2002
2003     // D.u = ~S0.u;
2004     // SCC = 1 if result is non-zero.
2005     // Bitwise negation.
2006     void
2007     Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
2008     {
2009         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2010         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2011         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2012
2013         src.read();
2014
2015         sdst = ~src.rawData();
2016
2017         scc = sdst.rawData() ? 1 : 0;
2018
2019         sdst.write();
2020         scc.write();
2021     }
2022
2023     Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt)
2024         : Inst_SOP1(iFmt, "s_not_b64")
2025     {
2026         setFlag(ALU);
2027     } // Inst_SOP1__S_NOT_B64
2028
2029     Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
2030     {
2031     } // ~Inst_SOP1__S_NOT_B64
2032
2033     // D.u64 = ~S0.u64;
2034     // SCC = 1 if result is non-zero.
2035     // Bitwise negation.
2036     void
2037     Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst)
2038     {
2039         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2040         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2041         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2042
2043         src.read();
2044
2045         sdst = ~src.rawData();
2046         scc = sdst.rawData() ? 1 : 0;
2047
2048         sdst.write();
2049         scc.write();
2050     }
2051
2052     Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt)
2053         : Inst_SOP1(iFmt, "s_wqm_b32")
2054     {
2055         setFlag(ALU);
2056     } // Inst_SOP1__S_WQM_B32
2057
2058     Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
2059     {
2060     } // ~Inst_SOP1__S_WQM_B32
2061
2062     // Computes whole quad mode for an active/valid mask.
2063     // SCC = 1 if result is non-zero.
2064     void
2065     Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst)
2066     {
2067         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2068         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2069         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2070
2071         src.read();
2072
2073         sdst = wholeQuadMode(src.rawData());
2074         scc = sdst.rawData() ? 1 : 0;
2075
2076         sdst.write();
2077         scc.write();
2078     }
2079
2080     Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt)
2081         : Inst_SOP1(iFmt, "s_wqm_b64")
2082     {
2083         setFlag(ALU);
2084     } // Inst_SOP1__S_WQM_B64
2085
2086     Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
2087     {
2088     } // ~Inst_SOP1__S_WQM_B64
2089
2090     // Computes whole quad mode for an active/valid mask.
2091     // SCC = 1 if result is non-zero.
2092     void
2093     Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst)
2094     {
2095         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2096         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2097         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2098
2099         src.read();
2100
2101         sdst = wholeQuadMode(src.rawData());
2102         scc = sdst.rawData() ? 1 : 0;
2103
2104         sdst.write();
2105         scc.write();
2106     }
2107
2108     Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt)
2109         : Inst_SOP1(iFmt, "s_brev_b32")
2110     {
2111         setFlag(ALU);
2112     } // Inst_SOP1__S_BREV_B32
2113
2114     Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
2115     {
2116     } // ~Inst_SOP1__S_BREV_B32
2117
2118     // D.u[31:0] = S0.u[0:31] (reverse bits).
2119     void
2120     Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst)
2121     {
2122         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2123         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2124
2125         src.read();
2126
2127         sdst = reverseBits(src.rawData());
2128
2129         sdst.write();
2130     }
2131
2132     Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt)
2133         : Inst_SOP1(iFmt, "s_brev_b64")
2134     {
2135         setFlag(ALU);
2136     } // Inst_SOP1__S_BREV_B64
2137
2138     Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
2139     {
2140     } // ~Inst_SOP1__S_BREV_B64
2141
2142     // D.u64[63:0] = S0.u64[0:63] (reverse bits).
2143     void
2144     Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst)
2145     {
2146         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2147         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2148
2149         src.read();
2150
2151         sdst = reverseBits(src.rawData());
2152
2153         sdst.write();
2154     }
2155
2156     Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt)
2157         : Inst_SOP1(iFmt, "s_bcnt0_i32_b32")
2158     {
2159         setFlag(ALU);
2160     } // Inst_SOP1__S_BCNT0_I32_B32
2161
2162     Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
2163     {
2164     } // ~Inst_SOP1__S_BCNT0_I32_B32
2165
2166     // D.i = CountZeroBits(S0.u);
2167     // SCC = 1 if result is non-zero.
2168     void
2169     Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2170     {
2171         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2172         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2173         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2174
2175         src.read();
2176
2177         sdst = countZeroBits(src.rawData());
2178         scc = sdst.rawData() ? 1 : 0;
2179
2180         sdst.write();
2181         scc.write();
2182     }
2183
2184     Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt)
2185         : Inst_SOP1(iFmt, "s_bcnt0_i32_b64")
2186     {
2187         setFlag(ALU);
2188     } // Inst_SOP1__S_BCNT0_I32_B64
2189
2190     Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
2191     {
2192     } // ~Inst_SOP1__S_BCNT0_I32_B64
2193
2194     // D.i = CountZeroBits(S0.u64);
2195     // SCC = 1 if result is non-zero.
2196     void
2197     Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2198     {
2199         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2200         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2201         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2202
2203         src.read();
2204
2205         sdst = countZeroBits(src.rawData());
2206         scc = sdst.rawData() ? 1 : 0;
2207
2208         sdst.write();
2209         scc.write();
2210     }
2211
2212     Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt)
2213         : Inst_SOP1(iFmt, "s_bcnt1_i32_b32")
2214     {
2215         setFlag(ALU);
2216     } // Inst_SOP1__S_BCNT1_I32_B32
2217
2218     Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
2219     {
2220     } // ~Inst_SOP1__S_BCNT1_I32_B32
2221
2222     // D.i = CountOneBits(S0.u);
2223     // SCC = 1 if result is non-zero.
2224     void
2225     Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2226     {
2227         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2228         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2229         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2230
2231         src.read();
2232
2233         sdst = popCount(src.rawData());
2234         scc = sdst.rawData() ? 1 : 0;
2235
2236         sdst.write();
2237         scc.write();
2238     }
2239
2240     Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt)
2241         : Inst_SOP1(iFmt, "s_bcnt1_i32_b64")
2242     {
2243         setFlag(ALU);
2244     } // Inst_SOP1__S_BCNT1_I32_B64
2245
2246     Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
2247     {
2248     } // ~Inst_SOP1__S_BCNT1_I32_B64
2249
2250     // D.i = CountOneBits(S0.u64);
2251     // SCC = 1 if result is non-zero.
2252     void
2253     Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2254     {
2255         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2256         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2257         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2258
2259         src.read();
2260
2261         sdst = popCount(src.rawData());
2262         scc = sdst.rawData() ? 1 : 0;
2263
2264         sdst.write();
2265         scc.write();
2266     }
2267
2268     Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt)
2269         : Inst_SOP1(iFmt, "s_ff0_i32_b32")
2270     {
2271         setFlag(ALU);
2272     } // Inst_SOP1__S_FF0_I32_B32
2273
2274     Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
2275     {
2276     } // ~Inst_SOP1__S_FF0_I32_B32
2277
2278     // D.i = FindFirstZero(S0.u);
2279     // If no zeros are found, return -1.
2280     // Returns the bit position of the first zero from the LSB.
2281     void
2282     Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2283     {
2284         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2285         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2286
2287         src.read();
2288
2289         sdst = findFirstZero(src.rawData());
2290
2291         sdst.write();
2292     }
2293
2294     Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt)
2295         : Inst_SOP1(iFmt, "s_ff0_i32_b64")
2296     {
2297         setFlag(ALU);
2298     } // Inst_SOP1__S_FF0_I32_B64
2299
2300     Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
2301     {
2302     } // ~Inst_SOP1__S_FF0_I32_B64
2303
2304     // D.i = FindFirstZero(S0.u64);
2305     // If no zeros are found, return -1.
2306     // Returns the bit position of the first zero from the LSB.
2307     void
2308     Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2309     {
2310         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2311         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2312
2313         src.read();
2314
2315         sdst = findFirstZero(src.rawData());
2316
2317         sdst.write();
2318     }
2319
2320     Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt)
2321         : Inst_SOP1(iFmt, "s_ff1_i32_b32")
2322     {
2323         setFlag(ALU);
2324     } // Inst_SOP1__S_FF1_I32_B32
2325
2326     Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
2327     {
2328     } // ~Inst_SOP1__S_FF1_I32_B32
2329
2330     // D.i = FindFirstOne(S0.u);
2331     // If no ones are found, return -1.
2332     // Returns the bit position of the first one from the LSB.
2333     void
2334     Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2335     {
2336         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2337         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2338
2339         src.read();
2340
2341         sdst = findFirstOne(src.rawData());
2342
2343         sdst.write();
2344     }
2345
2346     Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt)
2347         : Inst_SOP1(iFmt, "s_ff1_i32_b64")
2348     {
2349         setFlag(ALU);
2350     } // Inst_SOP1__S_FF1_I32_B64
2351
2352     Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
2353     {
2354     } // ~Inst_SOP1__S_FF1_I32_B64
2355
2356     // D.i = FindFirstOne(S0.u64);
2357     // If no ones are found, return -1.
2358     // Returns the bit position of the first one from the LSB.
2359     void
2360     Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2361     {
2362         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2363         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2364
2365         src.read();
2366
2367         sdst = findFirstOne(src.rawData());
2368
2369         sdst.write();
2370     }
2371
2372     Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt)
2373         : Inst_SOP1(iFmt, "s_flbit_i32_b32")
2374     {
2375         setFlag(ALU);
2376     } // Inst_SOP1__S_FLBIT_I32_B32
2377
2378     Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
2379     {
2380     } // ~Inst_SOP1__S_FLBIT_I32_B32
2381
2382     // D.i = FindFirstOne(S0.u);
2383     // If no ones are found, return -1.
2384     // Counts how many zeros before the first one starting from the MSB.
2385     void
2386     Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2387     {
2388         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2389         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2390
2391         src.read();
2392
2393         sdst = countZeroBitsMsb(src.rawData());
2394
2395         sdst.write();
2396     }
2397
2398     Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt)
2399         : Inst_SOP1(iFmt, "s_flbit_i32_b64")
2400     {
2401         setFlag(ALU);
2402     } // Inst_SOP1__S_FLBIT_I32_B64
2403
2404     Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
2405     {
2406     } // ~Inst_SOP1__S_FLBIT_I32_B64
2407
2408     // D.i = FindFirstOne(S0.u64);
2409     // If no ones are found, return -1.
2410     // Counts how many zeros before the first one starting from the MSB.
2411     void
2412     Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2413     {
2414         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2415         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2416
2417         src.read();
2418
2419         sdst = countZeroBitsMsb(src.rawData());
2420
2421         sdst.write();
2422     }
2423
2424     Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt)
2425         : Inst_SOP1(iFmt, "s_flbit_i32")
2426     {
2427         setFlag(ALU);
2428     } // Inst_SOP1__S_FLBIT_I32
2429
2430     Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
2431     {
2432     } // ~Inst_SOP1__S_FLBIT_I32
2433
2434     // D.i = FirstOppositeSignBit(S0.i);
2435     // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2436     // Counts how many bits in a row (from MSB to LSB) are the same as the
2437     // sign bit.
2438     void
2439     Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst)
2440     {
2441         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2442         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2443
2444         src.read();
2445
2446         sdst = firstOppositeSignBit(src.rawData());
2447
2448         sdst.write();
2449     }
2450
2451     Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt)
2452         : Inst_SOP1(iFmt, "s_flbit_i32_i64")
2453     {
2454         setFlag(ALU);
2455     } // Inst_SOP1__S_FLBIT_I32_I64
2456
2457     Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
2458     {
2459     } // ~Inst_SOP1__S_FLBIT_I32_I64
2460
2461     // D.i = FirstOppositeSignBit(S0.i64);
2462     // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2463     // Counts how many bits in a row (from MSB to LSB) are the same as the
2464     // sign bit.
2465     void
2466     Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst)
2467     {
2468         ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0);
2469         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2470
2471         src.read();
2472
2473         sdst = firstOppositeSignBit(src.rawData());
2474
2475         sdst.write();
2476     }
2477
2478     Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt)
2479         : Inst_SOP1(iFmt, "s_sext_i32_i8")
2480     {
2481         setFlag(ALU);
2482     } // Inst_SOP1__S_SEXT_I32_I8
2483
2484     Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
2485     {
2486     } // ~Inst_SOP1__S_SEXT_I32_I8
2487
2488     // D.i = signext(S0.i[7:0]) (sign extension).
2489     void
2490     Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst)
2491     {
2492         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2493         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2494
2495         src.read();
2496
2497         sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
2498             bits(src.rawData(), 7, 0));
2499
2500         sdst.write();
2501     }
2502
2503     Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt)
2504         : Inst_SOP1(iFmt, "s_sext_i32_i16")
2505     {
2506         setFlag(ALU);
2507     } // Inst_SOP1__S_SEXT_I32_I16
2508
2509     Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
2510     {
2511     } // ~Inst_SOP1__S_SEXT_I32_I16
2512
2513     // D.i = signext(S0.i[15:0]) (sign extension).
2514     void
2515     Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst)
2516     {
2517         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2518         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2519
2520         src.read();
2521
2522         sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
2523             bits(src.rawData(), 15, 0));
2524
2525         sdst.write();
2526     }
2527
2528     Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt)
2529         : Inst_SOP1(iFmt, "s_bitset0_b32")
2530     {
2531         setFlag(ALU);
2532     } // Inst_SOP1__S_BITSET0_B32
2533
2534     Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
2535     {
2536     } // ~Inst_SOP1__S_BITSET0_B32
2537
2538     // D.u[S0.u[4:0]] = 0.
2539     void
2540     Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst)
2541     {
2542         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2543         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2544
2545         src.read();
2546
2547         sdst.setBit(bits(src.rawData(), 4, 0), 0);
2548
2549         sdst.write();
2550     }
2551
2552     Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt)
2553         : Inst_SOP1(iFmt, "s_bitset0_b64")
2554     {
2555         setFlag(ALU);
2556     } // Inst_SOP1__S_BITSET0_B64
2557
2558     Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
2559     {
2560     } // ~Inst_SOP1__S_BITSET0_B64
2561
2562     // D.u64[S0.u[5:0]] = 0.
2563     void
2564     Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst)
2565     {
2566         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2567         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2568
2569         src.read();
2570
2571         sdst.setBit(bits(src.rawData(), 5, 0), 0);
2572
2573         sdst.write();
2574     }
2575
2576     Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt)
2577         : Inst_SOP1(iFmt, "s_bitset1_b32")
2578     {
2579         setFlag(ALU);
2580     } // Inst_SOP1__S_BITSET1_B32
2581
2582     Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
2583     {
2584     } // ~Inst_SOP1__S_BITSET1_B32
2585
2586     // D.u[S0.u[4:0]] = 1.
2587     void
2588     Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst)
2589     {
2590         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2591         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2592
2593         src.read();
2594
2595         sdst.setBit(bits(src.rawData(), 4, 0), 1);
2596
2597         sdst.write();
2598     }
2599
2600     Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt)
2601         : Inst_SOP1(iFmt, "s_bitset1_b64")
2602     {
2603         setFlag(ALU);
2604     } // Inst_SOP1__S_BITSET1_B64
2605
2606     Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
2607     {
2608     } // ~Inst_SOP1__S_BITSET1_B64
2609
2610     // D.u64[S0.u[5:0]] = 1.
2611     void
2612     Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst)
2613     {
2614         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2615         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2616
2617         src.read();
2618
2619         sdst.setBit(bits(src.rawData(), 5, 0), 1);
2620
2621         sdst.write();
2622     }
2623
2624     Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt)
2625         : Inst_SOP1(iFmt, "s_getpc_b64")
2626     {
2627         setFlag(ALU);
2628     } // Inst_SOP1__S_GETPC_B64
2629
2630     Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
2631     {
2632     } // ~Inst_SOP1__S_GETPC_B64
2633
2634     // D.u64 = PC + 4.
2635     // Destination receives the byte address of the next instruction.
2636     void
2637     Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2638     {
2639         Wavefront *wf = gpuDynInst->wavefront();
2640         Addr pc = wf->pc();
2641         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2642
2643         sdst = pc + 4;
2644
2645         sdst.write();
2646     }
2647
2648     Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt)
2649         : Inst_SOP1(iFmt, "s_setpc_b64")
2650     {
2651         setFlag(ALU);
2652     } // Inst_SOP1__S_SETPC_B64
2653
2654     Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
2655     {
2656     } // ~Inst_SOP1__S_SETPC_B64
2657
2658     // PC = S0.u64.
2659     // S0.u64 is a byte address of the instruction to jump to.
2660     void
2661     Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2662     {
2663         Wavefront *wf = gpuDynInst->wavefront();
2664         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2665
2666         src.read();
2667
2668         wf->pc(src.rawData());
2669     }
2670
2671     Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt)
2672         : Inst_SOP1(iFmt, "s_swappc_b64")
2673     {
2674         setFlag(ALU);
2675     } // Inst_SOP1__S_SWAPPC_B64
2676
2677     Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
2678     {
2679     } // ~Inst_SOP1__S_SWAPPC_B64
2680
2681     // D.u64 = PC + 4; PC = S0.u64.
2682     // S0.u64 is a byte address of the instruction to jump to.
2683     void
2684     Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst)
2685     {
2686         Wavefront *wf = gpuDynInst->wavefront();
2687         Addr pc = wf->pc();
2688         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2689         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2690
2691         src.read();
2692
2693         sdst = pc + 4;
2694
2695         wf->pc(src.rawData());
2696         sdst.write();
2697     }
2698
2699     Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt)
2700         : Inst_SOP1(iFmt, "s_rfe_b64")
2701     {
2702     } // Inst_SOP1__S_RFE_B64
2703
2704     Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
2705     {
2706     } // ~Inst_SOP1__S_RFE_B64
2707
2708     // Return from exception handler and continue.
2709     void
2710     Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst)
2711     {
2712         panicUnimplemented();
2713     }
2714
2715     Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
2716           InFmt_SOP1 *iFmt)
2717         : Inst_SOP1(iFmt, "s_and_saveexec_b64")
2718     {
2719         setFlag(ALU);
2720     } // Inst_SOP1__S_AND_SAVEEXEC_B64
2721
2722     Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
2723     {
2724     } // ~Inst_SOP1__S_AND_SAVEEXEC_B64
2725
2726     // D.u64 = EXEC;
2727     // EXEC = S0.u64 & EXEC;
2728     // SCC = 1 if the new value of EXEC is non-zero.
2729     void
2730     Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2731     {
2732         Wavefront *wf = gpuDynInst->wavefront();
2733         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2734         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2735         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2736
2737         src.read();
2738
2739         sdst = wf->execMask().to_ullong();
2740         wf->execMask() = src.rawData() & wf->execMask().to_ullong();
2741         scc = wf->execMask().any() ? 1 : 0;
2742
2743         sdst.write();
2744         scc.write();
2745     }
2746
2747     Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
2748           InFmt_SOP1 *iFmt)
2749         : Inst_SOP1(iFmt, "s_or_saveexec_b64")
2750     {
2751         setFlag(ALU);
2752     } // Inst_SOP1__S_OR_SAVEEXEC_B64
2753
2754     Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
2755     {
2756     } // ~Inst_SOP1__S_OR_SAVEEXEC_B64
2757
2758     // D.u64 = EXEC;
2759     // EXEC = S0.u64 | EXEC;
2760     // SCC = 1 if the new value of EXEC is non-zero.
2761     void
2762     Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2763     {
2764         Wavefront *wf = gpuDynInst->wavefront();
2765         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2766         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2767         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2768
2769         src.read();
2770
2771         sdst = wf->execMask().to_ullong();
2772         wf->execMask() = src.rawData() | wf->execMask().to_ullong();
2773         scc = wf->execMask().any() ? 1 : 0;
2774
2775         sdst.write();
2776         scc.write();
2777     }
2778
2779     Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
2780           InFmt_SOP1 *iFmt)
2781         : Inst_SOP1(iFmt, "s_xor_saveexec_b64")
2782     {
2783         setFlag(ALU);
2784     } // Inst_SOP1__S_XOR_SAVEEXEC_B64
2785
2786     Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
2787     {
2788     } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
2789
2790     // D.u64 = EXEC;
2791     // EXEC = S0.u64 ^ EXEC;
2792     // SCC = 1 if the new value of EXEC is non-zero.
2793     void
2794     Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2795     {
2796         Wavefront *wf = gpuDynInst->wavefront();
2797         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2798         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2799         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2800
2801         src.read();
2802
2803         sdst = wf->execMask().to_ullong();
2804         wf->execMask() = src.rawData() ^ wf->execMask().to_ullong();
2805         scc = wf->execMask().any() ? 1 : 0;
2806
2807         sdst.write();
2808         scc.write();
2809     }
2810
2811     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
2812           InFmt_SOP1 *iFmt)
2813         : Inst_SOP1(iFmt, "s_andn2_saveexec_b64")
2814     {
2815         setFlag(ALU);
2816     } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2817
2818     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
2819     {
2820     } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2821
2822     // D.u64 = EXEC;
2823     // EXEC = S0.u64 & ~EXEC;
2824     // SCC = 1 if the new value of EXEC is non-zero.
2825     void
2826     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2827     {
2828         Wavefront *wf = gpuDynInst->wavefront();
2829         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2830         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2831         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2832
2833         src.read();
2834
2835         sdst = wf->execMask().to_ullong();
2836         wf->execMask() = src.rawData() &~ wf->execMask().to_ullong();
2837         scc = wf->execMask().any() ? 1 : 0;
2838
2839         sdst.write();
2840         scc.write();
2841     }
2842
2843     Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
2844           InFmt_SOP1 *iFmt)
2845         : Inst_SOP1(iFmt, "s_orn2_saveexec_b64")
2846     {
2847         setFlag(ALU);
2848     } // Inst_SOP1__S_ORN2_SAVEEXEC_B64
2849
2850     Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
2851     {
2852     } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
2853
2854     // D.u64 = EXEC;
2855     // EXEC = S0.u64 | ~EXEC;
2856     // SCC = 1 if the new value of EXEC is non-zero.
2857     void
2858     Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2859     {
2860         Wavefront *wf = gpuDynInst->wavefront();
2861         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2862         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2863         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2864
2865         src.read();
2866
2867         sdst = wf->execMask().to_ullong();
2868         wf->execMask() = src.rawData() |~ wf->execMask().to_ullong();
2869         scc = wf->execMask().any() ? 1 : 0;
2870
2871         sdst.write();
2872         scc.write();
2873     }
2874
2875     Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
2876           InFmt_SOP1 *iFmt)
2877         : Inst_SOP1(iFmt, "s_nand_saveexec_b64")
2878     {
2879         setFlag(ALU);
2880     } // Inst_SOP1__S_NAND_SAVEEXEC_B64
2881
2882     Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
2883     {
2884     } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
2885
2886     // D.u64 = EXEC;
2887     // EXEC = ~(S0.u64 & EXEC);
2888     // SCC = 1 if the new value of EXEC is non-zero.
2889     void
2890     Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2891     {
2892         Wavefront *wf = gpuDynInst->wavefront();
2893         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2894         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2895         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2896
2897         src.read();
2898
2899         sdst = wf->execMask().to_ullong();
2900         wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong());
2901         scc = wf->execMask().any() ? 1 : 0;
2902
2903         sdst.write();
2904         scc.write();
2905     }
2906
2907     Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
2908           InFmt_SOP1 *iFmt)
2909         : Inst_SOP1(iFmt, "s_nor_saveexec_b64")
2910     {
2911         setFlag(ALU);
2912     } // Inst_SOP1__S_NOR_SAVEEXEC_B64
2913
2914     Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
2915     {
2916     } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
2917
2918     // D.u64 = EXEC;
2919     // EXEC = ~(S0.u64 | EXEC);
2920     // SCC = 1 if the new value of EXEC is non-zero.
2921     void
2922     Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2923     {
2924         Wavefront *wf = gpuDynInst->wavefront();
2925         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2926         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2927         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2928
2929         src.read();
2930
2931         sdst = wf->execMask().to_ullong();
2932         wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong());
2933         scc = wf->execMask().any() ? 1 : 0;
2934
2935         sdst.write();
2936         scc.write();
2937     }
2938
2939     Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
2940           InFmt_SOP1 *iFmt)
2941         : Inst_SOP1(iFmt, "s_xnor_saveexec_b64")
2942     {
2943         setFlag(ALU);
2944     } // Inst_SOP1__S_XNOR_SAVEEXEC_B64
2945
2946     Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
2947     {
2948     } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
2949
2950     // D.u64 = EXEC;
2951     // EXEC = ~(S0.u64 ^ EXEC);
2952     // SCC = 1 if the new value of EXEC is non-zero.
2953     void
2954     Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2955     {
2956         Wavefront *wf = gpuDynInst->wavefront();
2957         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2958         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2959         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2960
2961         src.read();
2962
2963         sdst = wf->execMask().to_ullong();
2964         wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong());
2965         scc = wf->execMask().any() ? 1 : 0;
2966
2967         sdst.write();
2968         scc.write();
2969     }
2970
2971     Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt)
2972         : Inst_SOP1(iFmt, "s_quadmask_b32")
2973     {
2974         setFlag(ALU);
2975     } // Inst_SOP1__S_QUADMASK_B32
2976
2977     Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
2978     {
2979     } // ~Inst_SOP1__S_QUADMASK_B32
2980
2981     // D.u = QuadMask(S0.u):
2982     // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
2983     // SCC = 1 if result is non-zero.
2984     void
2985     Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst)
2986     {
2987         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2988         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2989         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2990
2991         src.read();
2992
2993         sdst = quadMask(src.rawData());
2994         scc = sdst.rawData() ? 1 : 0;
2995
2996         sdst.write();
2997         scc.write();
2998     }
2999
3000     Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt)
3001         : Inst_SOP1(iFmt, "s_quadmask_b64")
3002     {
3003         setFlag(ALU);
3004     } // Inst_SOP1__S_QUADMASK_B64
3005
3006     Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
3007     {
3008     } // ~Inst_SOP1__S_QUADMASK_B64
3009
3010     // D.u64 = QuadMask(S0.u64):
3011     // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
3012     // SCC = 1 if result is non-zero.
3013     void
3014     Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst)
3015     {
3016         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
3017         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
3018         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3019
3020         src.read();
3021
3022         sdst = quadMask(src.rawData());
3023         scc = sdst.rawData() ? 1 : 0;
3024
3025         sdst.write();
3026         scc.write();
3027     }
3028
3029     Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt)
3030         : Inst_SOP1(iFmt, "s_movrels_b32")
3031     {
3032         setFlag(ALU);
3033     } // Inst_SOP1__S_MOVRELS_B32
3034
3035     Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
3036     {
3037     } // ~Inst_SOP1__S_MOVRELS_B32
3038
3039     // D.u = SGPR[S0.u + M0.u].u (move from relative source).
3040     void
3041     Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst)
3042     {
3043         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3044         m0.read();
3045         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3046         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
3047
3048         src.read();
3049
3050         sdst = src.rawData();
3051
3052         sdst.write();
3053     }
3054
3055     Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt)
3056         : Inst_SOP1(iFmt, "s_movrels_b64")
3057     {
3058         setFlag(ALU);
3059     } // Inst_SOP1__S_MOVRELS_B64
3060
3061     Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
3062     {
3063     } // ~Inst_SOP1__S_MOVRELS_B64
3064
3065     // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
3066     // The index in M0.u must be even for this operation.
3067     void
3068     Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst)
3069     {
3070         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3071         m0.read();
3072         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3073         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
3074
3075         src.read();
3076
3077         sdst = src.rawData();
3078
3079         sdst.write();
3080     }
3081
3082     Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt)
3083         : Inst_SOP1(iFmt, "s_movreld_b32")
3084     {
3085         setFlag(ALU);
3086     } // Inst_SOP1__S_MOVRELD_B32
3087
3088     Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
3089     {
3090     } // ~Inst_SOP1__S_MOVRELD_B32
3091
3092     // SGPR[D.u + M0.u].u = S0.u (move to relative destination).
3093     void
3094     Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst)
3095     {
3096         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3097         m0.read();
3098         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
3099         ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData());
3100
3101         src.read();
3102
3103         sdst = src.rawData();
3104
3105         sdst.write();
3106     }
3107
3108     Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt)
3109         : Inst_SOP1(iFmt, "s_movreld_b64")
3110     {
3111         setFlag(ALU);
3112     } // Inst_SOP1__S_MOVRELD_B64
3113
3114     Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
3115     {
3116     } // ~Inst_SOP1__S_MOVRELD_B64
3117
3118     // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
3119     // The index in M0.u must be even for this operation.
3120     void
3121     Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst)
3122     {
3123         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3124         m0.read();
3125         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
3126         ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData());
3127
3128         src.read();
3129
3130         sdst = src.rawData();
3131
3132         sdst.write();
3133     }
3134
3135     Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt)
3136         : Inst_SOP1(iFmt, "s_cbranch_join")
3137     {
3138         setFlag(Branch);
3139     } // Inst_SOP1__S_CBRANCH_JOIN
3140
3141     Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
3142     {
3143     } // ~Inst_SOP1__S_CBRANCH_JOIN
3144
3145     // Conditional branch join point (end of conditional branch block).
3146     void
3147     Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst)
3148     {
3149         panicUnimplemented();
3150     }
3151
3152     Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt)
3153         : Inst_SOP1(iFmt, "s_abs_i32")
3154     {
3155         setFlag(ALU);
3156     } // Inst_SOP1__S_ABS_I32
3157
3158     Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
3159     {
3160     } // ~Inst_SOP1__S_ABS_I32
3161
3162     // if (S.i < 0) then D.i = -S.i;
3163     // else D.i = S.i;
3164     // SCC = 1 if result is non-zero.
3165     // Integer absolute value.
3166     void
3167     Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst)
3168     {
3169         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
3170         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
3171         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3172
3173         src.read();
3174
3175         sdst = std::abs(src.rawData());
3176
3177         scc = sdst.rawData() ? 1 : 0;
3178
3179         sdst.write();
3180         scc.write();
3181     }
3182
3183     Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt)
3184         : Inst_SOP1(iFmt, "s_mov_fed_b32")
3185     {
3186         setFlag(ALU);
3187     } // Inst_SOP1__S_MOV_FED_B32
3188
3189     Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
3190     {
3191     } // ~Inst_SOP1__S_MOV_FED_B32
3192
3193     // D.u = S0.u.
3194     void
3195     Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
3196     {
3197         panicUnimplemented();
3198     }
3199
3200     Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
3201           InFmt_SOP1 *iFmt)
3202         : Inst_SOP1(iFmt, "s_set_gpr_idx_idx")
3203     {
3204     } // Inst_SOP1__S_SET_GPR_IDX_IDX
3205
3206     Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
3207     {
3208     } // ~Inst_SOP1__S_SET_GPR_IDX_IDX
3209
3210     // M0[7:0] = S0.u[7:0].
3211     // Modify the index used in vector GPR indexing.
3212     void
3213     Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst)
3214     {
3215         panicUnimplemented();
3216     }
3217
3218     Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt)
3219         : Inst_SOPC(iFmt, "s_cmp_eq_i32")
3220     {
3221         setFlag(ALU);
3222     } // Inst_SOPC__S_CMP_EQ_I32
3223
3224     Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
3225     {
3226     } // ~Inst_SOPC__S_CMP_EQ_I32
3227
3228     // SCC = (S0.i == S1.i).
3229     void
3230     Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
3231     {
3232         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3233         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3234         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3235
3236         src0.read();
3237         src1.read();
3238
3239         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3240
3241         scc.write();
3242     }
3243
3244     Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt)
3245         : Inst_SOPC(iFmt, "s_cmp_lg_i32")
3246     {
3247         setFlag(ALU);
3248     } // Inst_SOPC__S_CMP_LG_I32
3249
3250     Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
3251     {
3252     } // ~Inst_SOPC__S_CMP_LG_I32
3253
3254     // SCC = (S0.i != S1.i).
3255     void
3256     Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst)
3257     {
3258         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3259         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3260         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3261
3262         src0.read();
3263         src1.read();
3264
3265         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3266
3267         scc.write();
3268     }
3269
3270     Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt)
3271         : Inst_SOPC(iFmt, "s_cmp_gt_i32")
3272     {
3273         setFlag(ALU);
3274     } // Inst_SOPC__S_CMP_GT_I32
3275
3276     Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
3277     {
3278     } // ~Inst_SOPC__S_CMP_GT_I32
3279
3280     // SCC = (S0.i > S1.i).
3281     void
3282     Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
3283     {
3284         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3285         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3286         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3287
3288         src0.read();
3289         src1.read();
3290
3291         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3292
3293         scc.write();
3294     }
3295
3296     Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt)
3297         : Inst_SOPC(iFmt, "s_cmp_ge_i32")
3298     {
3299         setFlag(ALU);
3300     } // Inst_SOPC__S_CMP_GE_I32
3301
3302     Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
3303     {
3304     } // ~Inst_SOPC__S_CMP_GE_I32
3305
3306     // SCC = (S0.i >= S1.i).
3307     void
3308     Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
3309     {
3310         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3311         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3312         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3313
3314         src0.read();
3315         src1.read();
3316
3317         scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3318
3319         scc.write();
3320     }
3321
3322     Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt)
3323         : Inst_SOPC(iFmt, "s_cmp_lt_i32")
3324     {
3325         setFlag(ALU);
3326     } // Inst_SOPC__S_CMP_LT_I32
3327
3328     Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
3329     {
3330     } // ~Inst_SOPC__S_CMP_LT_I32
3331
3332     // SCC = (S0.i < S1.i).
3333     void
3334     Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
3335     {
3336         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3337         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3338         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3339
3340         src0.read();
3341         src1.read();
3342
3343         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
3344
3345         scc.write();
3346     }
3347
3348     Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt)
3349         : Inst_SOPC(iFmt, "s_cmp_le_i32")
3350     {
3351         setFlag(ALU);
3352     } // Inst_SOPC__S_CMP_LE_I32
3353
3354     Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
3355     {
3356     } // ~Inst_SOPC__S_CMP_LE_I32
3357
3358     // SCC = (S0.i <= S1.i).
3359     void
3360     Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
3361     {
3362         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3363         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3364         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3365
3366         src0.read();
3367         src1.read();
3368
3369         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3370
3371         scc.write();
3372     }
3373
3374     Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt)
3375         : Inst_SOPC(iFmt, "s_cmp_eq_u32")
3376     {
3377         setFlag(ALU);
3378     } // Inst_SOPC__S_CMP_EQ_U32
3379
3380     Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
3381     {
3382     } // ~Inst_SOPC__S_CMP_EQ_U32
3383
3384     // SCC = (S0.u == S1.u).
3385     void
3386     Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
3387     {
3388         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3389         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3390         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3391
3392         src0.read();
3393         src1.read();
3394
3395         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3396
3397         scc.write();
3398     }
3399
3400     Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt)
3401         : Inst_SOPC(iFmt, "s_cmp_lg_u32")
3402     {
3403         setFlag(ALU);
3404     } // Inst_SOPC__S_CMP_LG_U32
3405
3406     Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
3407     {
3408     } // ~Inst_SOPC__S_CMP_LG_U32
3409
3410     // SCC = (S0.u != S1.u).
3411     void
3412     Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst)
3413     {
3414         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3415         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3416         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3417
3418         src0.read();
3419         src1.read();
3420
3421         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3422
3423         scc.write();
3424     }
3425
3426     Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt)
3427         : Inst_SOPC(iFmt, "s_cmp_gt_u32")
3428     {
3429         setFlag(ALU);
3430     } // Inst_SOPC__S_CMP_GT_U32
3431
3432     Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
3433     {
3434     } // ~Inst_SOPC__S_CMP_GT_U32
3435
3436     // SCC = (S0.u > S1.u).
3437     void
3438     Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
3439     {
3440         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3441         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3442         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3443
3444         src0.read();
3445         src1.read();
3446
3447         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3448
3449         scc.write();
3450     }
3451
3452     Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt)
3453         : Inst_SOPC(iFmt, "s_cmp_ge_u32")
3454     {
3455         setFlag(ALU);
3456     } // Inst_SOPC__S_CMP_GE_U32
3457
3458     Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
3459     {
3460     } // ~Inst_SOPC__S_CMP_GE_U32
3461
3462     // SCC = (S0.u >= S1.u).
3463     void
3464     Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
3465     {
3466         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3467         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3468         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3469
3470         src0.read();
3471         src1.read();
3472
3473         scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3474
3475         scc.write();
3476     }
3477
3478     Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt)
3479         : Inst_SOPC(iFmt, "s_cmp_lt_u32")
3480     {
3481         setFlag(ALU);
3482     } // Inst_SOPC__S_CMP_LT_U32
3483
3484     Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
3485     {
3486     } // ~Inst_SOPC__S_CMP_LT_U32
3487
3488     // SCC = (S0.u < S1.u).
3489     void
3490     Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
3491     {
3492         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3493         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3494         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3495
3496         src0.read();
3497         src1.read();
3498
3499         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3500
3501         scc.write();
3502     }
3503
3504     Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt)
3505         : Inst_SOPC(iFmt, "s_cmp_le_u32")
3506     {
3507         setFlag(ALU);
3508     } // Inst_SOPC__S_CMP_LE_U32
3509
3510     Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
3511     {
3512     } // ~Inst_SOPC__S_CMP_LE_U32
3513
3514     // SCC = (S0.u <= S1.u).
3515     void
3516     Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
3517     {
3518         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3519         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3520         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3521
3522         src0.read();
3523         src1.read();
3524
3525         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3526
3527         scc.write();
3528     }
3529
3530     Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt)
3531         : Inst_SOPC(iFmt, "s_bitcmp0_b32")
3532     {
3533         setFlag(ALU);
3534     } // Inst_SOPC__S_BITCMP0_B32
3535
3536     Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
3537     {
3538     } // ~Inst_SOPC__S_BITCMP0_B32
3539
3540     // SCC = (S0.u[S1.u[4:0]] == 0).
3541     void
3542     Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst)
3543     {
3544         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3545         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3546         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3547
3548         src0.read();
3549         src1.read();
3550
3551         scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3552
3553         scc.write();
3554     }
3555
3556     Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt)
3557         : Inst_SOPC(iFmt, "s_bitcmp1_b32")
3558     {
3559         setFlag(ALU);
3560     } // Inst_SOPC__S_BITCMP1_B32
3561
3562     Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
3563     {
3564     } // ~Inst_SOPC__S_BITCMP1_B32
3565
3566     // SCC = (S0.u[S1.u[4:0]] == 1).
3567     void
3568     Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst)
3569     {
3570         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3571         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3572         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3573
3574         src0.read();
3575         src1.read();
3576
3577         scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3578
3579         scc.write();
3580     }
3581
3582     Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt)
3583         : Inst_SOPC(iFmt, "s_bitcmp0_b64")
3584     {
3585         setFlag(ALU);
3586     } // Inst_SOPC__S_BITCMP0_B64
3587
3588     Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
3589     {
3590     } // ~Inst_SOPC__S_BITCMP0_B64
3591
3592     // SCC = (S0.u64[S1.u[5:0]] == 0).
3593     void
3594     Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst)
3595     {
3596         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3597         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3598         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3599
3600         src0.read();
3601         src1.read();
3602
3603         scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3604
3605         scc.write();
3606     }
3607
3608     Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt)
3609         : Inst_SOPC(iFmt, "s_bitcmp1_b64")
3610     {
3611         setFlag(ALU);
3612     } // Inst_SOPC__S_BITCMP1_B64
3613
3614     Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
3615     {
3616     } // ~Inst_SOPC__S_BITCMP1_B64
3617
3618     // SCC = (S0.u64[S1.u[5:0]] == 1).
3619     void
3620     Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst)
3621     {
3622         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3623         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3624         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3625
3626         src0.read();
3627         src1.read();
3628
3629         scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3630
3631         scc.write();
3632     }
3633
3634     Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt)
3635         : Inst_SOPC(iFmt, "s_setvskip")
3636     {
3637         setFlag(UnconditionalJump);
3638     } // Inst_SOPC__S_SETVSKIP
3639
3640     Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
3641     {
3642     } // ~Inst_SOPC__S_SETVSKIP
3643
3644     // VSKIP = S0.u[S1.u[4:0]].
3645     // Enables and disables VSKIP mode.
3646     // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
3647     // issued.
3648     void
3649     Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst)
3650     {
3651         panicUnimplemented();
3652     }
3653
3654     Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt)
3655         : Inst_SOPC(iFmt, "s_set_gpr_idx_on")
3656     {
3657     } // Inst_SOPC__S_SET_GPR_IDX_ON
3658
3659     Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
3660     {
3661     } // ~Inst_SOPC__S_SET_GPR_IDX_ON
3662
3663     // MODE.gpr_idx_en = 1;
3664     // M0[7:0] = S0.u[7:0];
3665     // M0[15:12] = SIMM4 (direct contents of S1 field);
3666     // Remaining bits of M0 are unmodified.
3667     // Enable GPR indexing mode. Vector operations after this will perform
3668     // relative GPR addressing based on the contents of M0.
3669     // The raw contents of the S1 field are read and used to set the enable
3670     // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
3671     // S1[3] = VDST_REL.
3672     void
3673     Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst)
3674     {
3675         panicUnimplemented();
3676     }
3677
3678     Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt)
3679         : Inst_SOPC(iFmt, "s_cmp_eq_u64")
3680     {
3681         setFlag(ALU);
3682     } // Inst_SOPC__S_CMP_EQ_U64
3683
3684     Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
3685     {
3686     } // ~Inst_SOPC__S_CMP_EQ_U64
3687
3688     // SCC = (S0.i64 == S1.i64).
3689     void
3690     Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
3691     {
3692         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3693         ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3694         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3695
3696         src0.read();
3697         src1.read();
3698
3699         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3700
3701         scc.write();
3702     }
3703
3704     Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt)
3705         : Inst_SOPC(iFmt, "s_cmp_lg_u64")
3706     {
3707         setFlag(ALU);
3708     } // Inst_SOPC__S_CMP_LG_U64
3709
3710     Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
3711     {
3712     } // ~Inst_SOPC__S_CMP_LG_U64
3713
3714     // SCC = (S0.i64 != S1.i64).
3715     void
3716     Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst)
3717     {
3718         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3719         ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3720         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3721
3722         src0.read();
3723         src1.read();
3724
3725         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3726
3727         scc.write();
3728     }
3729
3730     Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt)
3731         : Inst_SOPP(iFmt, "s_nop")
3732     {
3733         setFlag(Nop);
3734     } // Inst_SOPP__S_NOP
3735
3736     Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
3737     {
3738     } // ~Inst_SOPP__S_NOP
3739
3740     // Do nothing.
3741     void
3742     Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst)
3743     {
3744     }
3745
3746     Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt)
3747         : Inst_SOPP(iFmt, "s_endpgm")
3748     {
3749         setFlag(EndOfKernel);
3750     } // Inst_SOPP__S_ENDPGM
3751
3752     Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
3753     {
3754     } // ~Inst_SOPP__S_ENDPGM
3755
3756     // End of program; terminate wavefront.
3757     void
3758     Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst)
3759     {
3760         Wavefront *wf = gpuDynInst->wavefront();
3761         ComputeUnit *cu = gpuDynInst->computeUnit();
3762
3763         // delete extra instructions fetched for completed work-items
3764         wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1,
3765             wf->instructionBuffer.end());
3766
3767         if (wf->pendingFetch) {
3768             wf->dropFetch = true;
3769         }
3770
3771         wf->computeUnit->fetchStage.fetchUnit(wf->simdId)
3772             .flushBuf(wf->wfSlotId);
3773         wf->setStatus(Wavefront::S_STOPPED);
3774
3775         int refCount = wf->computeUnit->getLds()
3776             .decreaseRefCounter(wf->dispatchId, wf->wgId);
3777
3778         /**
3779          * The parent WF of this instruction is exiting, therefore
3780          * it should not participate in this barrier any longer. This
3781          * prevents possible deadlock issues if WFs exit early.
3782          */
3783         int bar_id = WFBarrier::InvalidID;
3784         if (wf->hasBarrier()) {
3785             assert(wf->getStatus() != Wavefront::S_BARRIER);
3786             bar_id = wf->barrierId();
3787             assert(bar_id != WFBarrier::InvalidID);
3788             wf->releaseBarrier();
3789             cu->decMaxBarrierCnt(bar_id);
3790             DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3791                     "program and decrementing max barrier count for "
3792                     "barrier Id%d. New max count: %d.\n", cu->cu_id,
3793                     wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id,
3794                     cu->maxBarrierCnt(bar_id));
3795         }
3796
3797         DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
3798             wf->computeUnit->cu_id, wf->wgId, refCount);
3799
3800         wf->computeUnit->registerManager->freeRegisters(wf);
3801         wf->computeUnit->stats.completedWfs++;
3802         wf->computeUnit->activeWaves--;
3803
3804         panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less "
3805             "than zero\n", wf->computeUnit->cu_id);
3806
3807         DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
3808             wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId);
3809
3810         for (int i = 0; i < wf->vecReads.size(); i++) {
3811             if (wf->rawDist.find(i) != wf->rawDist.end()) {
3812                 wf->stats.readsPerWrite.sample(wf->vecReads.at(i));
3813             }
3814         }
3815         wf->vecReads.clear();
3816         wf->rawDist.clear();
3817         wf->lastInstExec = 0;
3818
3819         if (!refCount) {
3820             /**
3821              * If all WFs have finished, and hence the WG has finished,
3822              * then we can free up the barrier belonging to the parent
3823              * WG, but only if we actually used a barrier (i.e., more
3824              * than one WF in the WG).
3825              */
3826             if (bar_id != WFBarrier::InvalidID) {
3827                 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3828                         "now complete. Releasing barrier Id%d.\n", cu->cu_id,
3829                         wf->simdId, wf->wfSlotId, wf->wfDynId,
3830                         wf->barrierId());
3831                 cu->releaseBarrier(bar_id);
3832             }
3833
3834            /**
3835              * Last wavefront of the workgroup has executed return. If the
3836              * workgroup is not the final one in the kernel, then simply
3837              * retire it; however, if it is the final one (i.e., indicating
3838              * the kernel end) then release operation is needed.
3839              */
3840
3841             // check whether the workgroup is indicating the kernel end (i.e.,
3842             // the last workgroup in the kernel).
3843             bool kernelEnd =
3844                 wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
3845             // further check whether 'release @ kernel end' is needed
3846             bool relNeeded =
3847                 wf->computeUnit->shader->impl_kern_end_rel;
3848
3849             // if not a kernel end or no release needed, retire the workgroup
3850             // directly
3851             if (!kernelEnd || !relNeeded) {
3852                 wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
3853                 wf->setStatus(Wavefront::S_STOPPED);
3854                 wf->computeUnit->stats.completedWGs++;
3855
3856                 return;
3857             }
3858
3859             /**
3860              * If a kernel end and release needed, inject a memory sync and
3861              * retire the workgroup after receving all acks.
3862              */
3863             setFlag(MemSync);
3864             setFlag(GlobalSegment);
3865             // Notify Memory System of Kernel Completion
3866             wf->setStatus(Wavefront::S_RETURNING);
3867             gpuDynInst->simdId = wf->simdId;
3868             gpuDynInst->wfSlotId = wf->wfSlotId;
3869             gpuDynInst->wfDynId = wf->wfDynId;
3870
3871             DPRINTF(GPUExec, "inject global memory fence for CU%d: "
3872                             "WF[%d][%d][%d]\n", wf->computeUnit->cu_id,
3873                             wf->simdId, wf->wfSlotId, wf->wfDynId);
3874
3875             // call shader to prepare the flush operations
3876             wf->computeUnit->shader->prepareFlush(gpuDynInst);
3877
3878             wf->computeUnit->stats.completedWGs++;
3879         } else {
3880             wf->computeUnit->shader->dispatcher().scheduleDispatch();
3881         }
3882     }
3883
3884
3885     Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt)
3886         : Inst_SOPP(iFmt, "s_branch")
3887     {
3888         setFlag(Branch);
3889     } // Inst_SOPP__S_BRANCH
3890
3891     Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
3892     {
3893     } // ~Inst_SOPP__S_BRANCH
3894
3895     // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
3896     void
3897     Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst)
3898     {
3899         Wavefront *wf = gpuDynInst->wavefront();
3900         Addr pc = wf->pc();
3901         ScalarRegI16 simm16 = instData.SIMM16;
3902
3903         pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
3904
3905         wf->pc(pc);
3906     }
3907
3908     Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt)
3909         : Inst_SOPP(iFmt, "s_wakeup")
3910     {
3911     } // Inst_SOPP__S_WAKEUP
3912
3913     Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
3914     {
3915     } // ~Inst_SOPP__S_WAKEUP
3916
3917     // Allow a wave to wakeup all the other waves in its workgroup to force
3918     // them to wake up immediately from an S_SLEEP instruction. The wakeup is
3919     // ignored if the waves are not sleeping.
3920     void
3921     Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst)
3922     {
3923         panicUnimplemented();
3924     }
3925
3926     Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt)
3927         : Inst_SOPP(iFmt, "s_cbranch_scc0")
3928     {
3929         setFlag(Branch);
3930     } // Inst_SOPP__S_CBRANCH_SCC0
3931
3932     Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
3933     {
3934     } // ~Inst_SOPP__S_CBRANCH_SCC0
3935
3936     // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3937     // else NOP.
3938     void
3939     Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst)
3940     {
3941         Wavefront *wf = gpuDynInst->wavefront();
3942         Addr pc = wf->pc();
3943         ScalarRegI16 simm16 = instData.SIMM16;
3944         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3945
3946         scc.read();
3947
3948         if (!scc.rawData()) {
3949             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
3950         }
3951
3952         wf->pc(pc);
3953     }
3954
3955     Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt)
3956         : Inst_SOPP(iFmt, "s_cbranch_scc1")
3957     {
3958         setFlag(Branch);
3959     } // Inst_SOPP__S_CBRANCH_SCC1
3960
3961     Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
3962     {
3963     } // ~Inst_SOPP__S_CBRANCH_SCC1
3964
3965     // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
3966     // else NOP.
3967     void
3968     Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst)
3969     {
3970         Wavefront *wf = gpuDynInst->wavefront();
3971         Addr pc = wf->pc();
3972         ScalarRegI16 simm16 = instData.SIMM16;
3973         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3974
3975         scc.read();
3976
3977         if (scc.rawData()) {
3978             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
3979         }
3980
3981         wf->pc(pc);
3982     }
3983
3984     Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt)
3985         : Inst_SOPP(iFmt, "s_cbranch_vccz")
3986     {
3987         setFlag(Branch);
3988         setFlag(ReadsVCC);
3989     } // Inst_SOPP__S_CBRANCH_VCCZ
3990
3991     Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
3992     {
3993     } // ~Inst_SOPP__S_CBRANCH_VCCZ
3994
3995     // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3996     // else NOP.
3997     void
3998     Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst)
3999     {
4000         Wavefront *wf = gpuDynInst->wavefront();
4001         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
4002         Addr pc = wf->pc();
4003         ScalarRegI16 simm16 = instData.SIMM16;
4004
4005         vcc.read();
4006
4007         if (!vcc.rawData()) {
4008             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
4009         }
4010
4011         wf->pc(pc);
4012     }
4013
4014     Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt)
4015         : Inst_SOPP(iFmt, "s_cbranch_vccnz")
4016     {
4017         setFlag(Branch);
4018         setFlag(ReadsVCC);
4019     } // Inst_SOPP__S_CBRANCH_VCCNZ
4020
4021     Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
4022     {
4023     } // ~Inst_SOPP__S_CBRANCH_VCCNZ
4024
4025     // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4026     // else NOP.
4027     void
4028     Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst)
4029     {
4030         Wavefront *wf = gpuDynInst->wavefront();
4031         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
4032
4033         vcc.read();
4034
4035         if (vcc.rawData()) {
4036             Addr pc = wf->pc();
4037             ScalarRegI16 simm16 = instData.SIMM16;
4038             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
4039             wf->pc(pc);
4040         }
4041     }
4042
4043     Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt)
4044         : Inst_SOPP(iFmt, "s_cbranch_execz")
4045     {
4046         setFlag(Branch);
4047     } // Inst_SOPP__S_CBRANCH_EXECZ
4048
4049     Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
4050     {
4051     } // ~Inst_SOPP__S_CBRANCH_EXECZ
4052
4053     // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
4054     // else NOP.
4055     void
4056     Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst)
4057     {
4058         Wavefront *wf = gpuDynInst->wavefront();
4059
4060         if (wf->execMask().none()) {
4061             Addr pc = wf->pc();
4062             ScalarRegI16 simm16 = instData.SIMM16;
4063             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
4064             wf->pc(pc);
4065         }
4066     }
4067
4068     Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt)
4069         : Inst_SOPP(iFmt, "s_cbranch_execnz")
4070     {
4071         setFlag(Branch);
4072     } // Inst_SOPP__S_CBRANCH_EXECNZ
4073
4074     Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
4075     {
4076     } // ~Inst_SOPP__S_CBRANCH_EXECNZ
4077
4078     // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4079     // else NOP.
4080     void
4081     Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst)
4082     {
4083         Wavefront *wf = gpuDynInst->wavefront();
4084
4085         if (wf->execMask().any()) {
4086             Addr pc = wf->pc();
4087             ScalarRegI16 simm16 = instData.SIMM16;
4088             pc = pc + ((ScalarRegI64)sext<18>(simm16 * 4LL)) + 4LL;
4089             wf->pc(pc);
4090         }
4091     }
4092
4093     Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt)
4094         : Inst_SOPP(iFmt, "s_barrier")
4095     {
4096         setFlag(MemBarrier);
4097     } // Inst_SOPP__S_BARRIER
4098
4099     Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
4100     {
4101     } // ~Inst_SOPP__S_BARRIER
4102
4103     /**
4104      * Synchronize waves within a workgroup. If not all waves of the workgroup
4105      * have been created yet, wait for entire group before proceeding. If some
4106      * waves in the wokgroup have already terminated, this waits on only the
4107      * surviving waves.
4108      */
4109     void
4110     Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst)
4111     {
4112         Wavefront *wf = gpuDynInst->wavefront();
4113         ComputeUnit *cu = gpuDynInst->computeUnit();
4114
4115         if (wf->hasBarrier()) {
4116             int bar_id = wf->barrierId();
4117             assert(wf->getStatus() != Wavefront::S_BARRIER);
4118             wf->setStatus(Wavefront::S_BARRIER);
4119             cu->incNumAtBarrier(bar_id);
4120             DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4121                     "barrier Id%d. %d waves now at barrier, %d waves "
4122                     "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId,
4123                     wf->wfDynId, bar_id, cu->numAtBarrier(bar_id),
4124                     cu->numYetToReachBarrier(bar_id));
4125         }
4126     } // execute
4127     // --- Inst_SOPP__S_SETKILL class methods ---
4128
4129     Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt)
4130         : Inst_SOPP(iFmt, "s_setkill")
4131     {
4132     } // Inst_SOPP__S_SETKILL
4133
4134     Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
4135     {
4136     } // ~Inst_SOPP__S_SETKILL
4137
4138     void
4139     Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst)
4140     {
4141         panicUnimplemented();
4142     }
4143
4144     Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt)
4145         : Inst_SOPP(iFmt, "s_waitcnt")
4146     {
4147         setFlag(ALU);
4148         setFlag(Waitcnt);
4149     } // Inst_SOPP__S_WAITCNT
4150
4151     Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
4152     {
4153     } // ~Inst_SOPP__S_WAITCNT
4154
4155     // Wait for the counts of outstanding lds, vector-memory and
4156     // export/vmem-write-data to be at or below the specified levels.
4157     // SIMM16[3:0] = vmcount (vector memory operations),
4158     // SIMM16[6:4] = export/mem-write-data count,
4159     // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
4160     void
4161     Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst)
4162     {
4163         ScalarRegI32 vm_cnt = 0;
4164         ScalarRegI32 exp_cnt = 0;
4165         ScalarRegI32 lgkm_cnt = 0;
4166         vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0);
4167         exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4);
4168         lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8);
4169         gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
4170     }
4171
4172     Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt)
4173         : Inst_SOPP(iFmt, "s_sethalt")
4174     {
4175     } // Inst_SOPP__S_SETHALT
4176
4177     Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
4178     {
4179     } // ~Inst_SOPP__S_SETHALT
4180
4181     void
4182     Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst)
4183     {
4184         panicUnimplemented();
4185     }
4186
4187     Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt)
4188         : Inst_SOPP(iFmt, "s_sleep")
4189     {
4190         setFlag(ALU);
4191         setFlag(Sleep);
4192     } // Inst_SOPP__S_SLEEP
4193
4194     Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
4195     {
4196     } // ~Inst_SOPP__S_SLEEP
4197
4198     // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
4199     void
4200     Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst)
4201     {
4202         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
4203         gpuDynInst->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP);
4204         // sleep duration is specified in multiples of 64 cycles
4205         gpuDynInst->wavefront()->setSleepTime(64 * simm16);
4206     } // execute
4207     // --- Inst_SOPP__S_SETPRIO class methods ---
4208
4209     Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
4210         : Inst_SOPP(iFmt, "s_setprio")
4211     {
4212     } // Inst_SOPP__S_SETPRIO
4213
4214     Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
4215     {
4216     } // ~Inst_SOPP__S_SETPRIO
4217
4218     // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
4219     // 3 = highest.
4220     void
4221     Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst)
4222     {
4223         panicUnimplemented();
4224     }
4225
4226     Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt)
4227         : Inst_SOPP(iFmt, "s_sendmsg")
4228     {
4229     } // Inst_SOPP__S_SENDMSG
4230
4231     Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
4232     {
4233     } // ~Inst_SOPP__S_SENDMSG
4234
4235     void
4236     Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst)
4237     {
4238         panicUnimplemented();
4239     }
4240
4241     Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt)
4242         : Inst_SOPP(iFmt, "s_sendmsghalt")
4243     {
4244     } // Inst_SOPP__S_SENDMSGHALT
4245
4246     Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
4247     {
4248     } // ~Inst_SOPP__S_SENDMSGHALT
4249
4250     void
4251     Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst)
4252     {
4253         panicUnimplemented();
4254     }
4255
4256     Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt)
4257         : Inst_SOPP(iFmt, "s_trap")
4258     {
4259     } // Inst_SOPP__S_TRAP
4260
4261     Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
4262     {
4263     } // ~Inst_SOPP__S_TRAP
4264
4265     // Enter the trap handler.
4266     void
4267     Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst)
4268     {
4269         panicUnimplemented();
4270     }
4271
4272     Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt)
4273         : Inst_SOPP(iFmt, "s_icache_inv")
4274     {
4275     } // Inst_SOPP__S_ICACHE_INV
4276
4277     Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
4278     {
4279     } // ~Inst_SOPP__S_ICACHE_INV
4280
4281     // Invalidate entire L1 instruction cache.
4282     void
4283     Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst)
4284     {
4285         panicUnimplemented();
4286     }
4287
4288     Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt)
4289         : Inst_SOPP(iFmt, "s_incperflevel")
4290     {
4291     } // Inst_SOPP__S_INCPERFLEVEL
4292
4293     Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
4294     {
4295     } // ~Inst_SOPP__S_INCPERFLEVEL
4296
4297     void
4298     Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4299     {
4300         panicUnimplemented();
4301     }
4302
4303     Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt)
4304         : Inst_SOPP(iFmt, "s_decperflevel")
4305     {
4306     } // Inst_SOPP__S_DECPERFLEVEL
4307
4308     Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
4309     {
4310     } // ~Inst_SOPP__S_DECPERFLEVEL
4311
4312     void
4313     Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4314     {
4315         panicUnimplemented();
4316     }
4317
4318     Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt)
4319         : Inst_SOPP(iFmt, "s_ttracedata")
4320     {
4321     } // Inst_SOPP__S_TTRACEDATA
4322
4323     Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
4324     {
4325     } // ~Inst_SOPP__S_TTRACEDATA
4326
4327     void
4328     Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst)
4329     {
4330         panicUnimplemented();
4331     }
4332
4333     Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
4334           InFmt_SOPP *iFmt)
4335         : Inst_SOPP(iFmt, "s_cbranch_cdbgsys")
4336     {
4337         setFlag(Branch);
4338     } // Inst_SOPP__S_CBRANCH_CDBGSYS
4339
4340     Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
4341     {
4342     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
4343
4344     void
4345     Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst)
4346     {
4347         panicUnimplemented();
4348     }
4349
4350     Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
4351           InFmt_SOPP *iFmt)
4352         : Inst_SOPP(iFmt, "s_cbranch_cdbguser")
4353     {
4354         setFlag(Branch);
4355     } // Inst_SOPP__S_CBRANCH_CDBGUSER
4356
4357     Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
4358     {
4359     } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
4360
4361     void
4362     Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst)
4363     {
4364         panicUnimplemented();
4365     }
4366
4367     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
4368           InFmt_SOPP *iFmt)
4369         : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user")
4370     {
4371         setFlag(Branch);
4372     } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4373
4374     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
4375         ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
4376     {
4377     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4378
4379     void
4380     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst)
4381     {
4382         panicUnimplemented();
4383     }
4384
4385     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4386         Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt)
4387             : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user")
4388     {
4389         setFlag(Branch);
4390     } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4391
4392     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4393         ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
4394     {
4395     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4396
4397     void
4398     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst)
4399     {
4400         panicUnimplemented();
4401     }
4402
4403     Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt)
4404         : Inst_SOPP(iFmt, "s_endpgm_saved")
4405     {
4406     } // Inst_SOPP__S_ENDPGM_SAVED
4407
4408     Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
4409     {
4410     } // ~Inst_SOPP__S_ENDPGM_SAVED
4411
4412     // End of program.
4413     void
4414     Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst)
4415     {
4416         panicUnimplemented();
4417     }
4418
4419     Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
4420           InFmt_SOPP *iFmt)
4421         : Inst_SOPP(iFmt, "s_set_gpr_idx_off")
4422     {
4423     } // Inst_SOPP__S_SET_GPR_IDX_OFF
4424
4425     Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
4426     {
4427     } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
4428
4429     // MODE.gpr_idx_en = 0.
4430     // Clear GPR indexing mode. Vector operations after this will not perform
4431     // relative GPR addressing regardless of the contents of M0.
4432     void
4433     Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst)
4434     {
4435         panicUnimplemented();
4436     }
4437
4438     Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
4439           InFmt_SOPP *iFmt)
4440         : Inst_SOPP(iFmt, "s_set_gpr_idx_mode")
4441     {
4442     } // Inst_SOPP__S_SET_GPR_IDX_MODE
4443
4444     Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
4445     {
4446     } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
4447
4448     // M0[15:12] = SIMM4.
4449     // Modify the mode used for vector GPR indexing.
4450     // The raw contents of the source field are read and used to set the enable
4451     // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
4452     // and SIMM4[3] = VDST_REL.
4453     void
4454     Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst)
4455     {
4456         panicUnimplemented();
4457     }
4458
4459     Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt)
4460         : Inst_SMEM(iFmt, "s_load_dword")
4461     {
4462         setFlag(MemoryRef);
4463         setFlag(Load);
4464     } // Inst_SMEM__S_LOAD_DWORD
4465
4466     Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
4467     {
4468     } // ~Inst_SMEM__S_LOAD_DWORD
4469
4470     /**
4471      * Read 1 dword from scalar data cache. If the offset is specified as an
4472      * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
4473      * ignored). If the offset is specified as an immediate 20-bit constant,
4474      * the constant is an unsigned byte offset.
4475      */
4476     void
4477     Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4478     {
4479         Wavefront *wf = gpuDynInst->wavefront();
4480         gpuDynInst->execUnitId = wf->execUnitId;
4481         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4482         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4483         ScalarRegU32 offset(0);
4484         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4485
4486         addr.read();
4487
4488         if (instData.IMM) {
4489             offset = extData.OFFSET;
4490         } else {
4491             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4492             off_sgpr.read();
4493             offset = off_sgpr.rawData();
4494         }
4495
4496         calcAddr(gpuDynInst, addr, offset);
4497
4498         gpuDynInst->computeUnit()->scalarMemoryPipe
4499             .getGMReqFIFO().push(gpuDynInst);
4500
4501         wf->scalarRdGmReqsInPipe--;
4502         wf->scalarOutstandingReqsRdGm++;
4503         gpuDynInst->wavefront()->outstandingReqs++;
4504         gpuDynInst->wavefront()->validateRequestCounters();
4505     }
4506
4507     void
4508     Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4509     {
4510         initMemRead<1>(gpuDynInst);
4511     } // initiateAcc
4512
4513     void
4514     Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4515     {
4516         ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4517         sdst.write();
4518     } // completeAcc
4519
4520     Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt)
4521         : Inst_SMEM(iFmt, "s_load_dwordx2")
4522     {
4523         setFlag(MemoryRef);
4524         setFlag(Load);
4525     } // Inst_SMEM__S_LOAD_DWORDX2
4526
4527     Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
4528     {
4529     } // ~Inst_SMEM__S_LOAD_DWORDX2
4530
4531     /**
4532      * Read 2 dwords from scalar data cache. See s_load_dword for details on
4533      * the offset input.
4534      */
4535     void
4536     Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4537     {
4538         Wavefront *wf = gpuDynInst->wavefront();
4539         gpuDynInst->execUnitId = wf->execUnitId;
4540         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4541         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4542         ScalarRegU32 offset(0);
4543         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4544
4545         addr.read();
4546
4547         if (instData.IMM) {
4548             offset = extData.OFFSET;
4549         } else {
4550             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4551             off_sgpr.read();
4552             offset = off_sgpr.rawData();
4553         }
4554
4555         calcAddr(gpuDynInst, addr, offset);
4556
4557         gpuDynInst->computeUnit()->scalarMemoryPipe.
4558             getGMReqFIFO().push(gpuDynInst);
4559
4560         wf->scalarRdGmReqsInPipe--;
4561         wf->scalarOutstandingReqsRdGm++;
4562         gpuDynInst->wavefront()->outstandingReqs++;
4563         gpuDynInst->wavefront()->validateRequestCounters();
4564     }
4565
4566     void
4567     Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4568     {
4569         initMemRead<2>(gpuDynInst);
4570     } // initiateAcc
4571
4572     void
4573     Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4574     {
4575         ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4576         sdst.write();
4577     } // completeAcc
4578
4579     Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt)
4580         : Inst_SMEM(iFmt, "s_load_dwordx4")
4581     {
4582         setFlag(MemoryRef);
4583         setFlag(Load);
4584     } // Inst_SMEM__S_LOAD_DWORDX4
4585
4586     Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
4587     {
4588     } // ~Inst_SMEM__S_LOAD_DWORDX4
4589
4590     // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4591     // the offset input.
4592     void
4593     Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4594     {
4595         Wavefront *wf = gpuDynInst->wavefront();
4596         gpuDynInst->execUnitId = wf->execUnitId;
4597         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4598         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4599         ScalarRegU32 offset(0);
4600         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4601
4602         addr.read();
4603
4604         if (instData.IMM) {
4605             offset = extData.OFFSET;
4606         } else {
4607             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4608             off_sgpr.read();
4609             offset = off_sgpr.rawData();
4610         }
4611
4612         calcAddr(gpuDynInst, addr, offset);
4613
4614         gpuDynInst->computeUnit()->scalarMemoryPipe.
4615             getGMReqFIFO().push(gpuDynInst);
4616
4617         wf->scalarRdGmReqsInPipe--;
4618         wf->scalarOutstandingReqsRdGm++;
4619         gpuDynInst->wavefront()->outstandingReqs++;
4620         gpuDynInst->wavefront()->validateRequestCounters();
4621     }
4622
4623     void
4624     Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4625     {
4626         initMemRead<4>(gpuDynInst);
4627     } // initiateAcc
4628
4629     void
4630     Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4631     {
4632         ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4633         sdst.write();
4634     } // completeAcc
4635
4636     Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt)
4637         : Inst_SMEM(iFmt, "s_load_dwordx8")
4638     {
4639         setFlag(MemoryRef);
4640         setFlag(Load);
4641     } // Inst_SMEM__S_LOAD_DWORDX8
4642
4643     Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
4644     {
4645     } // ~Inst_SMEM__S_LOAD_DWORDX8
4646
4647     // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4648     // the offset input.
4649     void
4650     Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4651     {
4652         Wavefront *wf = gpuDynInst->wavefront();
4653         gpuDynInst->execUnitId = wf->execUnitId;
4654         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4655         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4656         ScalarRegU32 offset(0);
4657         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4658
4659         addr.read();
4660
4661         if (instData.IMM) {
4662             offset = extData.OFFSET;
4663         } else {
4664             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4665             off_sgpr.read();
4666             offset = off_sgpr.rawData();
4667         }
4668
4669         calcAddr(gpuDynInst, addr, offset);
4670
4671         gpuDynInst->computeUnit()->scalarMemoryPipe.
4672             getGMReqFIFO().push(gpuDynInst);
4673
4674         wf->scalarRdGmReqsInPipe--;
4675         wf->scalarOutstandingReqsRdGm++;
4676         gpuDynInst->wavefront()->outstandingReqs++;
4677         gpuDynInst->wavefront()->validateRequestCounters();
4678     }
4679
4680     void
4681     Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4682     {
4683         initMemRead<8>(gpuDynInst);
4684     } // initiateAcc
4685
4686     void
4687     Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4688     {
4689         ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4690         sdst.write();
4691     } // completeAcc
4692
4693     Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt)
4694         : Inst_SMEM(iFmt, "s_load_dwordx16")
4695     {
4696         setFlag(MemoryRef);
4697         setFlag(Load);
4698     } // Inst_SMEM__S_LOAD_DWORDX16
4699
4700     Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
4701     {
4702     } // ~Inst_SMEM__S_LOAD_DWORDX16
4703
4704     // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4705     // the offset input.
4706     void
4707     Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
4708     {
4709         Wavefront *wf = gpuDynInst->wavefront();
4710         gpuDynInst->execUnitId = wf->execUnitId;
4711         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4712         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4713         ScalarRegU32 offset(0);
4714         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4715
4716         addr.read();
4717
4718         if (instData.IMM) {
4719             offset = extData.OFFSET;
4720         } else {
4721             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4722             off_sgpr.read();
4723             offset = off_sgpr.rawData();
4724         }
4725
4726         calcAddr(gpuDynInst, addr, offset);
4727
4728         gpuDynInst->computeUnit()->scalarMemoryPipe.
4729             getGMReqFIFO().push(gpuDynInst);
4730
4731         wf->scalarRdGmReqsInPipe--;
4732         wf->scalarOutstandingReqsRdGm++;
4733         gpuDynInst->wavefront()->outstandingReqs++;
4734         gpuDynInst->wavefront()->validateRequestCounters();
4735     }
4736
4737     void
4738     Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
4739     {
4740         initMemRead<16>(gpuDynInst);
4741     } // initiateAcc
4742
4743     void
4744     Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
4745     {
4746         ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
4747         sdst.write();
4748     } // completeAcc
4749
4750     Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
4751           InFmt_SMEM *iFmt)
4752         : Inst_SMEM(iFmt, "s_buffer_load_dword")
4753     {
4754         setFlag(MemoryRef);
4755         setFlag(Load);
4756     } // Inst_SMEM__S_BUFFER_LOAD_DWORD
4757
4758     Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
4759     {
4760     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
4761
4762     // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
4763     // offset input.
4764     void
4765     Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4766     {
4767         Wavefront *wf = gpuDynInst->wavefront();
4768         gpuDynInst->execUnitId = wf->execUnitId;
4769         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4770         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4771         ScalarRegU32 offset(0);
4772         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4773
4774         rsrcDesc.read();
4775
4776         if (instData.IMM) {
4777             offset = extData.OFFSET;
4778         } else {
4779             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4780             off_sgpr.read();
4781             offset = off_sgpr.rawData();
4782         }
4783
4784         calcAddr(gpuDynInst, rsrcDesc, offset);
4785
4786         gpuDynInst->computeUnit()->scalarMemoryPipe
4787             .getGMReqFIFO().push(gpuDynInst);
4788
4789         wf->scalarRdGmReqsInPipe--;
4790         wf->scalarOutstandingReqsRdGm++;
4791         gpuDynInst->wavefront()->outstandingReqs++;
4792         gpuDynInst->wavefront()->validateRequestCounters();
4793     } // execute
4794
4795     void
4796     Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4797     {
4798         initMemRead<1>(gpuDynInst);
4799     } // initiateAcc
4800
4801     void
4802     Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4803     {
4804         // 1 request, size 32
4805         ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4806         sdst.write();
4807     } // completeAcc
4808
4809     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
4810           InFmt_SMEM *iFmt)
4811         : Inst_SMEM(iFmt, "s_buffer_load_dwordx2")
4812     {
4813         setFlag(MemoryRef);
4814         setFlag(Load);
4815     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4816
4817     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
4818     {
4819     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4820
4821     // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
4822     // the offset input.
4823     void
4824     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4825     {
4826         Wavefront *wf = gpuDynInst->wavefront();
4827         gpuDynInst->execUnitId = wf->execUnitId;
4828         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4829         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4830         ScalarRegU32 offset(0);
4831         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4832
4833         rsrcDesc.read();
4834
4835         if (instData.IMM) {
4836             offset = extData.OFFSET;
4837         } else {
4838             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4839             off_sgpr.read();
4840             offset = off_sgpr.rawData();
4841         }
4842
4843         calcAddr(gpuDynInst, rsrcDesc, offset);
4844
4845         gpuDynInst->computeUnit()->scalarMemoryPipe
4846             .getGMReqFIFO().push(gpuDynInst);
4847
4848         wf->scalarRdGmReqsInPipe--;
4849         wf->scalarOutstandingReqsRdGm++;
4850         gpuDynInst->wavefront()->outstandingReqs++;
4851         gpuDynInst->wavefront()->validateRequestCounters();
4852     } // execute
4853
4854     void
4855     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4856     {
4857         initMemRead<2>(gpuDynInst);
4858     } // initiateAcc
4859
4860     void
4861     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4862     {
4863         // use U64 because 2 requests, each size 32
4864         ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4865         sdst.write();
4866     } // completeAcc
4867
4868     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
4869           InFmt_SMEM *iFmt)
4870         : Inst_SMEM(iFmt, "s_buffer_load_dwordx4")
4871     {
4872         setFlag(MemoryRef);
4873         setFlag(Load);
4874     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4875
4876     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
4877     {
4878     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4879
4880     // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4881     // the offset input.
4882     void
4883     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4884     {
4885         Wavefront *wf = gpuDynInst->wavefront();
4886         gpuDynInst->execUnitId = wf->execUnitId;
4887         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4888         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4889         ScalarRegU32 offset(0);
4890         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4891
4892         rsrcDesc.read();
4893
4894         if (instData.IMM) {
4895             offset = extData.OFFSET;
4896         } else {
4897             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4898             off_sgpr.read();
4899             offset = off_sgpr.rawData();
4900         }
4901
4902         calcAddr(gpuDynInst, rsrcDesc, offset);
4903
4904         gpuDynInst->computeUnit()->scalarMemoryPipe
4905             .getGMReqFIFO().push(gpuDynInst);
4906
4907         wf->scalarRdGmReqsInPipe--;
4908         wf->scalarOutstandingReqsRdGm++;
4909         gpuDynInst->wavefront()->outstandingReqs++;
4910         gpuDynInst->wavefront()->validateRequestCounters();
4911     } // execute
4912
4913     void
4914     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4915     {
4916         initMemRead<4>(gpuDynInst);
4917     } // initiateAcc
4918
4919     void
4920     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4921     {
4922         // 4 requests, each size 32
4923         ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4924         sdst.write();
4925     } // completeAcc
4926
4927     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
4928           InFmt_SMEM *iFmt)
4929         : Inst_SMEM(iFmt, "s_buffer_load_dwordx8")
4930     {
4931         setFlag(MemoryRef);
4932         setFlag(Load);
4933     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4934
4935     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
4936     {
4937     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4938
4939     // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4940     // the offset input.
4941     void
4942     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4943     {
4944         Wavefront *wf = gpuDynInst->wavefront();
4945         gpuDynInst->execUnitId = wf->execUnitId;
4946         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4947         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4948         ScalarRegU32 offset(0);
4949         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4950
4951         rsrcDesc.read();
4952
4953         if (instData.IMM) {
4954             offset = extData.OFFSET;
4955         } else {
4956             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4957             off_sgpr.read();
4958             offset = off_sgpr.rawData();
4959         }
4960
4961         calcAddr(gpuDynInst, rsrcDesc, offset);
4962
4963         gpuDynInst->computeUnit()->scalarMemoryPipe
4964             .getGMReqFIFO().push(gpuDynInst);
4965
4966         wf->scalarRdGmReqsInPipe--;
4967         wf->scalarOutstandingReqsRdGm++;
4968         gpuDynInst->wavefront()->outstandingReqs++;
4969         gpuDynInst->wavefront()->validateRequestCounters();
4970     } // execute
4971
4972     void
4973     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4974     {
4975         initMemRead<8>(gpuDynInst);
4976     } // initiateAcc
4977
4978     void
4979     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4980     {
4981         // 8 requests, each size 32
4982         ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4983         sdst.write();
4984     } // completeAcc
4985
4986     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
4987           InFmt_SMEM *iFmt)
4988         : Inst_SMEM(iFmt, "s_buffer_load_dwordx16")
4989     {
4990         setFlag(MemoryRef);
4991         setFlag(Load);
4992     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4993
4994     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
4995     {
4996     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4997
4998     // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4999     // the offset input.
5000     void
5001     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
5002     {
5003         Wavefront *wf = gpuDynInst->wavefront();
5004         gpuDynInst->execUnitId = wf->execUnitId;
5005         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5006         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5007         ScalarRegU32 offset(0);
5008         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
5009
5010         rsrcDesc.read();
5011
5012         if (instData.IMM) {
5013             offset = extData.OFFSET;
5014         } else {
5015             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5016             off_sgpr.read();
5017             offset = off_sgpr.rawData();
5018         }
5019
5020         calcAddr(gpuDynInst, rsrcDesc, offset);
5021
5022         gpuDynInst->computeUnit()->scalarMemoryPipe
5023             .getGMReqFIFO().push(gpuDynInst);
5024
5025         wf->scalarRdGmReqsInPipe--;
5026         wf->scalarOutstandingReqsRdGm++;
5027         gpuDynInst->wavefront()->outstandingReqs++;
5028         gpuDynInst->wavefront()->validateRequestCounters();
5029     } // execute
5030
5031     void
5032     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
5033     {
5034         initMemRead<16>(gpuDynInst);
5035     } // initiateAcc
5036
5037     void
5038     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
5039     {
5040         // 16 requests, each size 32
5041         ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
5042         sdst.write();
5043     } // completeAcc
5044
5045     Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
5046         : Inst_SMEM(iFmt, "s_store_dword")
5047     {
5048         setFlag(MemoryRef);
5049         setFlag(Store);
5050     } // Inst_SMEM__S_STORE_DWORD
5051
5052     Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
5053     {
5054     } // ~Inst_SMEM__S_STORE_DWORD
5055
5056     // Write 1 dword to scalar data cache.
5057     // If the offset is specified as an SGPR, the SGPR contains an unsigned
5058     // BYTE offset (the 2 LSBs are ignored).
5059     // If the offset is specified as an immediate 20-bit constant, the
5060     // constant is an unsigned BYTE offset.
5061     void
5062     Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5063     {
5064         Wavefront *wf = gpuDynInst->wavefront();
5065         gpuDynInst->execUnitId = wf->execUnitId;
5066         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5067         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5068         ScalarRegU32 offset(0);
5069         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5070
5071         addr.read();
5072
5073         if (instData.IMM) {
5074             offset = extData.OFFSET;
5075         } else {
5076             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5077             off_sgpr.read();
5078             offset = off_sgpr.rawData();
5079         }
5080
5081         calcAddr(gpuDynInst, addr, offset);
5082
5083         gpuDynInst->computeUnit()->scalarMemoryPipe.
5084             getGMReqFIFO().push(gpuDynInst);
5085
5086         wf->scalarWrGmReqsInPipe--;
5087         wf->scalarOutstandingReqsWrGm++;
5088         gpuDynInst->wavefront()->outstandingReqs++;
5089         gpuDynInst->wavefront()->validateRequestCounters();
5090     }
5091
5092     void
5093     Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5094     {
5095         ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA);
5096         sdata.read();
5097         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5098             sizeof(ScalarRegU32));
5099         initMemWrite<1>(gpuDynInst);
5100     } // initiateAcc
5101
5102     void
5103     Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5104     {
5105     } // completeAcc
5106
5107     Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt)
5108         : Inst_SMEM(iFmt, "s_store_dwordx2")
5109     {
5110         setFlag(MemoryRef);
5111         setFlag(Store);
5112     } // Inst_SMEM__S_STORE_DWORDX2
5113
5114     Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
5115     {
5116     } // ~Inst_SMEM__S_STORE_DWORDX2
5117
5118     // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5119     // the offset input.
5120     void
5121     Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5122     {
5123         Wavefront *wf = gpuDynInst->wavefront();
5124         gpuDynInst->execUnitId = wf->execUnitId;
5125         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5126         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5127         ScalarRegU32 offset(0);
5128         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5129
5130         addr.read();
5131
5132         if (instData.IMM) {
5133             offset = extData.OFFSET;
5134         } else {
5135             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5136             off_sgpr.read();
5137             offset = off_sgpr.rawData();
5138         }
5139
5140         calcAddr(gpuDynInst, addr, offset);
5141
5142         gpuDynInst->computeUnit()->scalarMemoryPipe.
5143             getGMReqFIFO().push(gpuDynInst);
5144
5145         wf->scalarWrGmReqsInPipe--;
5146         wf->scalarOutstandingReqsWrGm++;
5147         gpuDynInst->wavefront()->outstandingReqs++;
5148         gpuDynInst->wavefront()->validateRequestCounters();
5149     }
5150
5151     void
5152     Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5153     {
5154         ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA);
5155         sdata.read();
5156         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5157             sizeof(ScalarRegU64));
5158         initMemWrite<2>(gpuDynInst);
5159     } // initiateAcc
5160
5161     void
5162     Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5163     {
5164     } // completeAcc
5165
5166     Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt)
5167         : Inst_SMEM(iFmt, "s_store_dwordx4")
5168     {
5169         setFlag(MemoryRef);
5170         setFlag(Store);
5171     } // Inst_SMEM__S_STORE_DWORDX4
5172
5173     Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
5174     {
5175     } // ~Inst_SMEM__S_STORE_DWORDX4
5176
5177     // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5178     // the offset input.
5179     void
5180     Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5181     {
5182         Wavefront *wf = gpuDynInst->wavefront();
5183         gpuDynInst->execUnitId = wf->execUnitId;
5184         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5185         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5186         ScalarRegU32 offset(0);
5187         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5188
5189         addr.read();
5190
5191         if (instData.IMM) {
5192             offset = extData.OFFSET;
5193         } else {
5194             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5195             off_sgpr.read();
5196             offset = off_sgpr.rawData();
5197         }
5198
5199         calcAddr(gpuDynInst, addr, offset);
5200
5201         gpuDynInst->computeUnit()->scalarMemoryPipe.
5202             getGMReqFIFO().push(gpuDynInst);
5203
5204         wf->scalarWrGmReqsInPipe--;
5205         wf->scalarOutstandingReqsWrGm++;
5206         gpuDynInst->wavefront()->outstandingReqs++;
5207         gpuDynInst->wavefront()->validateRequestCounters();
5208     }
5209
5210     void
5211     Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5212     {
5213         ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA);
5214         sdata.read();
5215         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5216             4 * sizeof(ScalarRegU32));
5217         initMemWrite<4>(gpuDynInst);
5218     } // initiateAcc
5219
5220     void
5221     Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5222     {
5223     } // completeAcc
5224
5225     Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
5226           InFmt_SMEM *iFmt)
5227         : Inst_SMEM(iFmt, "s_buffer_store_dword")
5228     {
5229         setFlag(MemoryRef);
5230         setFlag(Store);
5231     } // Inst_SMEM__S_BUFFER_STORE_DWORD
5232
5233     Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
5234     {
5235     } // ~Inst_SMEM__S_BUFFER_STORE_DWORD
5236
5237     // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
5238     // offset input.
5239     void
5240     Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5241     {
5242         panicUnimplemented();
5243     }
5244
5245     void
5246     Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5247     {
5248     } // initiateAcc
5249
5250     void
5251     Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5252     {
5253     } // completeAcc
5254
5255     Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
5256           InFmt_SMEM *iFmt)
5257         : Inst_SMEM(iFmt, "s_buffer_store_dwordx2")
5258     {
5259         setFlag(MemoryRef);
5260         setFlag(Store);
5261     } // Inst_SMEM__S_BUFFER_STORE_DWORDX2
5262
5263     Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
5264     {
5265     } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
5266
5267     // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5268     // the offset input.
5269     void
5270     Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5271     {
5272         panicUnimplemented();
5273     }
5274
5275     void
5276     Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5277     {
5278     } // initiateAcc
5279
5280     void
5281     Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5282     {
5283     } // completeAcc
5284
5285     Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
5286           InFmt_SMEM *iFmt)
5287         : Inst_SMEM(iFmt, "s_buffer_store_dwordx4")
5288     {
5289         setFlag(MemoryRef);
5290         setFlag(Store);
5291     } // Inst_SMEM__S_BUFFER_STORE_DWORDX4
5292
5293     Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
5294     {
5295     } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
5296
5297     // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5298     // the offset input.
5299     void
5300     Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5301     {
5302         panicUnimplemented();
5303     }
5304
5305     void
5306     Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5307     {
5308     } // initiateAcc
5309
5310     void
5311     Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5312     {
5313     } // completeAcc
5314
5315     Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt)
5316         : Inst_SMEM(iFmt, "s_dcache_inv")
5317     {
5318     } // Inst_SMEM__S_DCACHE_INV
5319
5320     Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
5321     {
5322     } // ~Inst_SMEM__S_DCACHE_INV
5323
5324     // Invalidate the scalar data cache.
5325     void
5326     Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst)
5327     {
5328         panicUnimplemented();
5329     }
5330
5331     Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt)
5332         : Inst_SMEM(iFmt, "s_dcache_wb")
5333     {
5334     } // Inst_SMEM__S_DCACHE_WB
5335
5336     Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
5337     {
5338     } // ~Inst_SMEM__S_DCACHE_WB
5339
5340     // Write back dirty data in the scalar data cache.
5341     void
5342     Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst)
5343     {
5344         panicUnimplemented();
5345     }
5346
5347     Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt)
5348         : Inst_SMEM(iFmt, "s_dcache_inv_vol")
5349     {
5350     } // Inst_SMEM__S_DCACHE_INV_VOL
5351
5352     Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
5353     {
5354     } // ~Inst_SMEM__S_DCACHE_INV_VOL
5355
5356     // Invalidate the scalar data cache volatile lines.
5357     void
5358     Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst)
5359     {
5360         panicUnimplemented();
5361     }
5362
5363     Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt)
5364         : Inst_SMEM(iFmt, "s_dcache_wb_vol")
5365     {
5366     } // Inst_SMEM__S_DCACHE_WB_VOL
5367
5368     Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
5369     {
5370     } // ~Inst_SMEM__S_DCACHE_WB_VOL
5371
5372     // Write back dirty data in the scalar data cache volatile lines.
5373     void
5374     Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst)
5375     {
5376         panicUnimplemented();
5377     }
5378
5379     Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt)
5380         : Inst_SMEM(iFmt, "s_memtime")
5381     {
5382     } // Inst_SMEM__S_MEMTIME
5383
5384     Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
5385     {
5386     } // ~Inst_SMEM__S_MEMTIME
5387
5388     // Return current 64-bit timestamp.
5389     void
5390     Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst)
5391     {
5392         panicUnimplemented();
5393     }
5394
5395     Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt)
5396         : Inst_SMEM(iFmt, "s_memrealtime")
5397     {
5398     } // Inst_SMEM__S_MEMREALTIME
5399
5400     Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
5401     {
5402     } // ~Inst_SMEM__S_MEMREALTIME
5403
5404     // Return current 64-bit RTC.
5405     void
5406     Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst)
5407     {
5408         panicUnimplemented();
5409     }
5410
5411     Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt)
5412         : Inst_SMEM(iFmt, "s_atc_probe")
5413     {
5414     } // Inst_SMEM__S_ATC_PROBE
5415
5416     Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
5417     {
5418     } // ~Inst_SMEM__S_ATC_PROBE
5419
5420     void
5421     Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst)
5422     {
5423         panicUnimplemented();
5424     }
5425
5426     Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
5427           InFmt_SMEM *iFmt)
5428         : Inst_SMEM(iFmt, "s_atc_probe_buffer")
5429     {
5430     } // Inst_SMEM__S_ATC_PROBE_BUFFER
5431
5432     Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
5433     {
5434     } // ~Inst_SMEM__S_ATC_PROBE_BUFFER
5435
5436     void
5437     Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst)
5438     {
5439         panicUnimplemented();
5440     }
5441
5442     Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt)
5443         : Inst_VOP2(iFmt, "v_cndmask_b32")
5444     {
5445         setFlag(ALU);
5446         setFlag(ReadsVCC);
5447     } // Inst_VOP2__V_CNDMASK_B32
5448
5449     Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
5450     {
5451     } // ~Inst_VOP2__V_CNDMASK_B32
5452
5453     // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
5454     // as a scalar GPR in S2.
5455     void
5456     Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
5457     {
5458         Wavefront *wf = gpuDynInst->wavefront();
5459         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5460         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5461         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5462         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
5463
5464         src0.readSrc();
5465         src1.read();
5466         vcc.read();
5467
5468         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5469             if (wf->execMask(lane)) {
5470                 vdst[lane]
5471                     = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];
5472             }
5473         }
5474
5475         vdst.write();
5476     }
5477
5478     Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt)
5479         : Inst_VOP2(iFmt, "v_add_f32")
5480     {
5481         setFlag(ALU);
5482         setFlag(F32);
5483     } // Inst_VOP2__V_ADD_F32
5484
5485     Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
5486     {
5487     } // ~Inst_VOP2__V_ADD_F32
5488
5489     // D.f = S0.f + S1.f.
5490     void
5491     Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
5492     {
5493         Wavefront *wf = gpuDynInst->wavefront();
5494         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5495         VecOperandF32 src1(gpuDynInst, instData.VSRC1);
5496         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5497
5498         src0.readSrc();
5499         src1.read();
5500
5501         if (isDPPInst()) {
5502             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
5503             src0_dpp.read();
5504
5505             DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
5506                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
5507                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
5508                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
5509                     extData.iFmt_VOP_DPP.DPP_CTRL,
5510                     extData.iFmt_VOP_DPP.SRC0_ABS,
5511                     extData.iFmt_VOP_DPP.SRC0_NEG,
5512                     extData.iFmt_VOP_DPP.SRC1_ABS,
5513                     extData.iFmt_VOP_DPP.SRC1_NEG,
5514                     extData.iFmt_VOP_DPP.BOUND_CTRL,
5515                     extData.iFmt_VOP_DPP.BANK_MASK,
5516                     extData.iFmt_VOP_DPP.ROW_MASK);
5517
5518             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
5519
5520             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5521                 if (wf->execMask(lane)) {
5522                     vdst[lane] = src0_dpp[lane] + src1[lane];
5523                 }
5524             }
5525         } else {
5526             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5527                 if (wf->execMask(lane)) {
5528                     vdst[lane] = src0[lane] + src1[lane];
5529                 }
5530             }
5531         }
5532
5533         vdst.write();
5534     }
5535
5536     Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt)
5537         : Inst_VOP2(iFmt, "v_sub_f32")
5538     {
5539         setFlag(ALU);
5540         setFlag(F32);
5541     } // Inst_VOP2__V_SUB_F32
5542
5543     Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
5544     {
5545     } // ~Inst_VOP2__V_SUB_F32
5546
5547     // D.f = S0.f - S1.f.
5548     void
5549     Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
5550     {
5551         Wavefront *wf = gpuDynInst->wavefront();
5552         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5553         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5554         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5555
5556         src0.readSrc();
5557         src1.read();
5558
5559         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5560             if (wf->execMask(lane)) {
5561                 vdst[lane] = src0[lane] - src1[lane];
5562             }
5563         }
5564
5565         vdst.write();
5566     }
5567
5568     Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt)
5569         : Inst_VOP2(iFmt, "v_subrev_f32")
5570     {
5571         setFlag(ALU);
5572         setFlag(F32);
5573     } // Inst_VOP2__V_SUBREV_F32
5574
5575     Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
5576     {
5577     } // ~Inst_VOP2__V_SUBREV_F32
5578
5579     // D.f = S1.f - S0.f.
5580     void
5581     Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
5582     {
5583         Wavefront *wf = gpuDynInst->wavefront();
5584         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5585         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5586         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5587
5588         src0.readSrc();
5589         src1.read();
5590
5591         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5592             if (wf->execMask(lane)) {
5593                 vdst[lane] = src1[lane] - src0[lane];
5594             }
5595         }
5596
5597         vdst.write();
5598     }
5599
5600     Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt)
5601         : Inst_VOP2(iFmt, "v_mul_legacy_f32")
5602     {
5603         setFlag(ALU);
5604         setFlag(F32);
5605     } // Inst_VOP2__V_MUL_LEGACY_F32
5606
5607     Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
5608     {
5609     } // ~Inst_VOP2__V_MUL_LEGACY_F32
5610
5611     // D.f = S0.f * S1.f
5612     void
5613     Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
5614     {
5615         Wavefront *wf = gpuDynInst->wavefront();
5616         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5617         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5618         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5619
5620         src0.readSrc();
5621         src1.read();
5622
5623         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5624             if (wf->execMask(lane)) {
5625                 vdst[lane] = src0[lane] * src1[lane];
5626             }
5627         }
5628
5629         vdst.write();
5630     }
5631
5632     Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt)
5633         : Inst_VOP2(iFmt, "v_mul_f32")
5634     {
5635         setFlag(ALU);
5636         setFlag(F32);
5637     } // Inst_VOP2__V_MUL_F32
5638
5639     Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
5640     {
5641     } // ~Inst_VOP2__V_MUL_F32
5642
5643     // D.f = S0.f * S1.f.
5644     void
5645     Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
5646     {
5647         Wavefront *wf = gpuDynInst->wavefront();
5648         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5649         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5650         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5651
5652         src0.readSrc();
5653         src1.read();
5654
5655         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5656             if (wf->execMask(lane)) {
5657                 if (std::isnan(src0[lane]) ||
5658                     std::isnan(src1[lane])) {
5659                     vdst[lane] = NAN;
5660                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5661                            std::fpclassify(src0[lane]) == FP_ZERO) &&
5662                            !std::signbit(src0[lane])) {
5663                     if (std::isinf(src1[lane])) {
5664                         vdst[lane] = NAN;
5665                     } else if (!std::signbit(src1[lane])) {
5666                         vdst[lane] = +0.0;
5667                     } else {
5668                         vdst[lane] = -0.0;
5669                     }
5670                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5671                            std::fpclassify(src0[lane]) == FP_ZERO) &&
5672                            std::signbit(src0[lane])) {
5673                     if (std::isinf(src1[lane])) {
5674                         vdst[lane] = NAN;
5675                     } else if (std::signbit(src1[lane])) {
5676                         vdst[lane] = +0.0;
5677                     } else {
5678                         vdst[lane] = -0.0;
5679                     }
5680                 } else if (std::isinf(src0[lane]) &&
5681                            !std::signbit(src0[lane])) {
5682                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5683                         std::fpclassify(src1[lane]) == FP_ZERO) {
5684                         vdst[lane] = NAN;
5685                     } else if (!std::signbit(src1[lane])) {
5686                         vdst[lane] = +INFINITY;
5687                     } else {
5688                         vdst[lane] = -INFINITY;
5689                     }
5690                 } else if (std::isinf(src0[lane]) &&
5691                            std::signbit(src0[lane])) {
5692                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5693                         std::fpclassify(src1[lane]) == FP_ZERO) {
5694                         vdst[lane] = NAN;
5695                     } else if (std::signbit(src1[lane])) {
5696                         vdst[lane] = +INFINITY;
5697                     } else {
5698                         vdst[lane] = -INFINITY;
5699                     }
5700                 } else {
5701                     vdst[lane] = src0[lane] * src1[lane];
5702                 }
5703             }
5704         }
5705
5706         vdst.write();
5707     }
5708
5709     Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt)
5710         : Inst_VOP2(iFmt, "v_mul_i32_i24")
5711     {
5712         setFlag(ALU);
5713     } // Inst_VOP2__V_MUL_I32_I24
5714
5715     Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
5716     {
5717     } // ~Inst_VOP2__V_MUL_I32_I24
5718
5719     // D.i = S0.i[23:0] * S1.i[23:0].
5720     void
5721     Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5722     {
5723         Wavefront *wf = gpuDynInst->wavefront();
5724         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5725         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5726         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5727
5728         src0.readSrc();
5729         src1.read();
5730
5731         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5732             if (wf->execMask(lane)) {
5733                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
5734                     * sext<24>(bits(src1[lane], 23, 0));
5735             }
5736         }
5737
5738         vdst.write();
5739     }
5740
5741     Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt)
5742         : Inst_VOP2(iFmt, "v_mul_hi_i32_i24")
5743     {
5744         setFlag(ALU);
5745     } // Inst_VOP2__V_MUL_HI_I32_I24
5746
5747     Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
5748     {
5749     } // ~Inst_VOP2__V_MUL_HI_I32_I24
5750
5751     // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
5752     void
5753     Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5754     {
5755         Wavefront *wf = gpuDynInst->wavefront();
5756         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5757         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5758         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5759
5760         src0.readSrc();
5761         src1.read();
5762
5763         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5764             if (wf->execMask(lane)) {
5765                 VecElemI64 tmp_src0
5766                     = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
5767                 VecElemI64 tmp_src1
5768                     = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
5769
5770                 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
5771             }
5772         }
5773
5774         vdst.write();
5775     }
5776
5777     Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt)
5778         : Inst_VOP2(iFmt, "v_mul_u32_u24")
5779     {
5780         setFlag(ALU);
5781     } // Inst_VOP2__V_MUL_U32_U24
5782
5783     Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
5784     {
5785     } // ~Inst_VOP2__V_MUL_U32_U24
5786
5787     // D.u = S0.u[23:0] * S1.u[23:0].
5788     void
5789     Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5790     {
5791         Wavefront *wf = gpuDynInst->wavefront();
5792         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5793         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
5794         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5795
5796         src0.readSrc();
5797         src1.read();
5798
5799         if (isSDWAInst()) {
5800             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
5801             // use copies of original src0, src1, and dest during selecting
5802             VecOperandU32 origSrc0_sdwa(gpuDynInst,
5803                                         extData.iFmt_VOP_SDWA.SRC0);
5804             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
5805             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
5806
5807             src0_sdwa.read();
5808             origSrc0_sdwa.read();
5809             origSrc1.read();
5810
5811             DPRINTF(GCN3, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
5812                     "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
5813                     "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
5814                     "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
5815                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
5816                     extData.iFmt_VOP_SDWA.DST_UNUSED,
5817                     extData.iFmt_VOP_SDWA.CLAMP,
5818                     extData.iFmt_VOP_SDWA.SRC0_SEL,
5819                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
5820                     extData.iFmt_VOP_SDWA.SRC0_NEG,
5821                     extData.iFmt_VOP_SDWA.SRC0_ABS,
5822                     extData.iFmt_VOP_SDWA.SRC1_SEL,
5823                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
5824                     extData.iFmt_VOP_SDWA.SRC1_NEG,
5825                     extData.iFmt_VOP_SDWA.SRC1_ABS);
5826
5827             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
5828                             src1, origSrc1);
5829
5830             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5831                 if (wf->execMask(lane)) {
5832                     vdst[lane] = bits(src0_sdwa[lane], 23, 0) *
5833                                  bits(src1[lane], 23, 0);
5834                     origVdst[lane] = vdst[lane]; // keep copy consistent
5835                 }
5836             }
5837
5838             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
5839         } else {
5840             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5841                 if (wf->execMask(lane)) {
5842                     vdst[lane] = bits(src0[lane], 23, 0) *
5843                                  bits(src1[lane], 23, 0);
5844                 }
5845             }
5846         }
5847
5848
5849         vdst.write();
5850     }
5851
5852     Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt)
5853         : Inst_VOP2(iFmt, "v_mul_hi_u32_u24")
5854     {
5855         setFlag(ALU);
5856     } // Inst_VOP2__V_MUL_HI_U32_U24
5857
5858     Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
5859     {
5860     } // ~Inst_VOP2__V_MUL_HI_U32_U24
5861
5862     // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
5863     void
5864     Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5865     {
5866         Wavefront *wf = gpuDynInst->wavefront();
5867         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5868         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5869         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5870
5871         src0.readSrc();
5872         src1.read();
5873
5874         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5875             if (wf->execMask(lane)) {
5876                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
5877                 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
5878                 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
5879             }
5880         }
5881
5882         vdst.write();
5883     }
5884
5885     Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt)
5886         : Inst_VOP2(iFmt, "v_min_f32")
5887     {
5888         setFlag(ALU);
5889         setFlag(F32);
5890     } // Inst_VOP2__V_MIN_F32
5891
5892     Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
5893     {
5894     } // ~Inst_VOP2__V_MIN_F32
5895
5896     // D.f = (S0.f < S1.f ? S0.f : S1.f).
5897     void
5898     Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
5899     {
5900         Wavefront *wf = gpuDynInst->wavefront();
5901         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5902         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5903         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5904
5905         src0.readSrc();
5906         src1.read();
5907
5908         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5909             if (wf->execMask(lane)) {
5910                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
5911             }
5912         }
5913
5914         vdst.write();
5915     }
5916
5917     Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt)
5918         : Inst_VOP2(iFmt, "v_max_f32")
5919     {
5920         setFlag(ALU);
5921         setFlag(F32);
5922     } // Inst_VOP2__V_MAX_F32
5923
5924     Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
5925     {
5926     } // ~Inst_VOP2__V_MAX_F32
5927
5928     // D.f = (S0.f >= S1.f ? S0.f : S1.f).
5929     void
5930     Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
5931     {
5932         Wavefront *wf = gpuDynInst->wavefront();
5933         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5934         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5935         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5936
5937         src0.readSrc();
5938         src1.read();
5939
5940         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5941             if (wf->execMask(lane)) {
5942                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
5943             }
5944         }
5945
5946         vdst.write();
5947     }
5948
5949     Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt)
5950         : Inst_VOP2(iFmt, "v_min_i32")
5951     {
5952         setFlag(ALU);
5953     } // Inst_VOP2__V_MIN_I32
5954
5955     Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
5956     {
5957     } // ~Inst_VOP2__V_MIN_I32
5958
5959     // D.i = min(S0.i, S1.i).
5960     void
5961     Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
5962     {
5963         Wavefront *wf = gpuDynInst->wavefront();
5964         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5965         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5966         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5967
5968         src0.readSrc();
5969         src1.read();
5970
5971         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5972             if (wf->execMask(lane)) {
5973                 vdst[lane] = std::min(src0[lane], src1[lane]);
5974             }
5975         }
5976
5977         vdst.write();
5978     }
5979
5980     Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt)
5981         : Inst_VOP2(iFmt, "v_max_i32")
5982     {
5983         setFlag(ALU);
5984     } // Inst_VOP2__V_MAX_I32
5985
5986     Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
5987     {
5988     } // ~Inst_VOP2__V_MAX_I32
5989
5990     // D.i = max(S0.i, S1.i).
5991     void
5992     Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
5993     {
5994         Wavefront *wf = gpuDynInst->wavefront();
5995         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5996         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5997         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5998
5999         src0.readSrc();
6000         src1.read();
6001
6002         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6003             if (wf->execMask(lane)) {
6004                 vdst[lane] = std::max(src0[lane], src1[lane]);
6005             }
6006         }
6007
6008         vdst.write();
6009     }
6010
6011     Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt)
6012         : Inst_VOP2(iFmt, "v_min_u32")
6013     {
6014         setFlag(ALU);
6015     } // Inst_VOP2__V_MIN_U32
6016
6017     Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
6018     {
6019     } // ~Inst_VOP2__V_MIN_U32
6020
6021     // D.u = min(S0.u, S1.u).
6022     void
6023     Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
6024     {
6025         Wavefront *wf = gpuDynInst->wavefront();
6026         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6027         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6028         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6029
6030         src0.readSrc();
6031         src1.read();
6032
6033         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6034             if (wf->execMask(lane)) {
6035                 vdst[lane] = std::min(src0[lane], src1[lane]);
6036             }
6037         }
6038
6039         vdst.write();
6040     }
6041
6042     Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt)
6043         : Inst_VOP2(iFmt, "v_max_u32")
6044     {
6045         setFlag(ALU);
6046     } // Inst_VOP2__V_MAX_U32
6047
6048     Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
6049     {
6050     } // ~Inst_VOP2__V_MAX_U32
6051
6052     // D.u = max(S0.u, S1.u).
6053     void
6054     Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
6055     {
6056         Wavefront *wf = gpuDynInst->wavefront();
6057         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6058         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6059         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6060
6061         src0.readSrc();
6062         src1.read();
6063
6064         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6065             if (wf->execMask(lane)) {
6066                 vdst[lane] = std::max(src0[lane], src1[lane]);
6067             }
6068         }
6069
6070         vdst.write();
6071     }
6072
6073     Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt)
6074         : Inst_VOP2(iFmt, "v_lshrrev_b32")
6075     {
6076         setFlag(ALU);
6077     } // Inst_VOP2__V_LSHRREV_B32
6078
6079     Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
6080     {
6081     } // ~Inst_VOP2__V_LSHRREV_B32
6082
6083     // D.u = S1.u >> S0.u[4:0].
6084     // The vacated bits are set to zero.
6085     void
6086     Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
6087     {
6088         Wavefront *wf = gpuDynInst->wavefront();
6089         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6090         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6091         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6092
6093         src0.readSrc();
6094         src1.read();
6095
6096         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6097             if (wf->execMask(lane)) {
6098                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6099             }
6100         }
6101
6102         vdst.write();
6103     }
6104
6105     Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt)
6106         : Inst_VOP2(iFmt, "v_ashrrev_i32")
6107     {
6108         setFlag(ALU);
6109     } // Inst_VOP2__V_ASHRREV_I32
6110
6111     Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
6112     {
6113     } // ~Inst_VOP2__V_ASHRREV_I32
6114
6115     // D.i = signext(S1.i) >> S0.i[4:0].
6116     // The vacated bits are set to the sign bit of the input value.
6117     void
6118     Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
6119     {
6120         Wavefront *wf = gpuDynInst->wavefront();
6121         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6122         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
6123         VecOperandI32 vdst(gpuDynInst, instData.VDST);
6124
6125         src0.readSrc();
6126         src1.read();
6127
6128         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6129             if (wf->execMask(lane)) {
6130                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6131             }
6132         }
6133
6134         vdst.write();
6135     }
6136
6137     Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt)
6138         : Inst_VOP2(iFmt, "v_lshlrev_b32")
6139     {
6140         setFlag(ALU);
6141     } // Inst_VOP2__V_LSHLREV_B32
6142
6143     Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
6144     {
6145     } // ~Inst_VOP2__V_LSHLREV_B32
6146
6147     // D.u = S1.u << S0.u[4:0].
6148     void
6149     Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
6150     {
6151         Wavefront *wf = gpuDynInst->wavefront();
6152         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6153         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6154         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6155
6156         src0.readSrc();
6157         src1.read();
6158
6159         if (isSDWAInst()) {
6160             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6161             // use copies of original src0, src1, and vdst during selecting
6162             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6163                                         extData.iFmt_VOP_SDWA.SRC0);
6164             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6165             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6166
6167             src0_sdwa.read();
6168             origSrc0_sdwa.read();
6169             origSrc1.read();
6170
6171             DPRINTF(GCN3, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
6172                     "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
6173                     "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
6174                     "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6175                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6176                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6177                     extData.iFmt_VOP_SDWA.CLAMP,
6178                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6179                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6180                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6181                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6182                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6183                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6184                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6185                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6186
6187             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6188                             src1, origSrc1);
6189
6190             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6191                 if (wf->execMask(lane)) {
6192                     vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);
6193                     origVdst[lane] = vdst[lane]; // keep copy consistent
6194                 }
6195             }
6196
6197             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6198         } else {
6199             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6200                 if (wf->execMask(lane)) {
6201                     vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
6202                 }
6203             }
6204         }
6205
6206         vdst.write();
6207     }
6208
6209     Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt)
6210         : Inst_VOP2(iFmt, "v_and_b32")
6211     {
6212         setFlag(ALU);
6213     } // Inst_VOP2__V_AND_B32
6214
6215     Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
6216     {
6217     } // ~Inst_VOP2__V_AND_B32
6218
6219     // D.u = S0.u & S1.u.
6220     // Input and output modifiers not supported.
6221     void
6222     Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
6223     {
6224         Wavefront *wf = gpuDynInst->wavefront();
6225         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6226         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6227         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6228
6229         src0.readSrc();
6230         src1.read();
6231
6232         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6233             if (wf->execMask(lane)) {
6234                 vdst[lane] = src0[lane] & src1[lane];
6235             }
6236         }
6237
6238         vdst.write();
6239     }
6240
6241     Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt)
6242         : Inst_VOP2(iFmt, "v_or_b32")
6243     {
6244         setFlag(ALU);
6245     } // Inst_VOP2__V_OR_B32
6246
6247     Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
6248     {
6249     } // ~Inst_VOP2__V_OR_B32
6250
6251     // D.u = S0.u | S1.u.
6252     // Input and output modifiers not supported.
6253     void
6254     Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
6255     {
6256         Wavefront *wf = gpuDynInst->wavefront();
6257         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6258         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6259         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6260
6261         src0.readSrc();
6262         src1.read();
6263
6264         if (isSDWAInst()) {
6265             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6266             // use copies of original src0, src1, and dest during selecting
6267             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6268                                         extData.iFmt_VOP_SDWA.SRC0);
6269             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6270             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6271
6272             src0_sdwa.read();
6273             origSrc0_sdwa.read();
6274             origSrc1.read();
6275
6276             DPRINTF(GCN3, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
6277                     "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6278                     "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6279                     "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6280                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6281                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6282                     extData.iFmt_VOP_SDWA.CLAMP,
6283                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6284                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6285                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6286                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6287                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6288                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6289                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6290                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6291
6292             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6293                             src1, origSrc1);
6294
6295             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6296                 if (wf->execMask(lane)) {
6297                     vdst[lane] = src0_sdwa[lane] | src1[lane];
6298                     origVdst[lane] = vdst[lane]; // keep copy consistent
6299                 }
6300             }
6301
6302             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6303         } else {
6304             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6305                 if (wf->execMask(lane)) {
6306                     vdst[lane] = src0[lane] | src1[lane];
6307                 }
6308             }
6309         }
6310
6311         vdst.write();
6312     }
6313
6314     Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt)
6315         : Inst_VOP2(iFmt, "v_xor_b32")
6316     {
6317         setFlag(ALU);
6318     } // Inst_VOP2__V_XOR_B32
6319
6320     Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
6321     {
6322     } // ~Inst_VOP2__V_XOR_B32
6323
6324     // D.u = S0.u ^ S1.u.
6325     // Input and output modifiers not supported.
6326     void
6327     Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
6328     {
6329         Wavefront *wf = gpuDynInst->wavefront();
6330         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6331         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6332         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6333
6334         src0.readSrc();
6335         src1.read();
6336
6337         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6338             if (wf->execMask(lane)) {
6339                 vdst[lane] = src0[lane] ^ src1[lane];
6340             }
6341         }
6342
6343         vdst.write();
6344     }
6345
6346     Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt)
6347         : Inst_VOP2(iFmt, "v_mac_f32")
6348     {
6349         setFlag(ALU);
6350         setFlag(F32);
6351         setFlag(MAC);
6352     } // Inst_VOP2__V_MAC_F32
6353
6354     Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
6355     {
6356     } // ~Inst_VOP2__V_MAC_F32
6357
6358     // D.f = S0.f * S1.f + D.f.
6359     void
6360     Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
6361     {
6362         Wavefront *wf = gpuDynInst->wavefront();
6363         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6364         VecOperandF32 src1(gpuDynInst, instData.VSRC1);
6365         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6366
6367         src0.readSrc();
6368         src1.read();
6369         vdst.read();
6370
6371         if (isDPPInst()) {
6372             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
6373             src0_dpp.read();
6374
6375             DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
6376                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
6377                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
6378                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
6379                     extData.iFmt_VOP_DPP.DPP_CTRL,
6380                     extData.iFmt_VOP_DPP.SRC0_ABS,
6381                     extData.iFmt_VOP_DPP.SRC0_NEG,
6382                     extData.iFmt_VOP_DPP.SRC1_ABS,
6383                     extData.iFmt_VOP_DPP.SRC1_NEG,
6384                     extData.iFmt_VOP_DPP.BOUND_CTRL,
6385                     extData.iFmt_VOP_DPP.BANK_MASK,
6386                     extData.iFmt_VOP_DPP.ROW_MASK);
6387
6388             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
6389
6390             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6391                 if (wf->execMask(lane)) {
6392                     vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
6393                                           vdst[lane]);
6394                 }
6395             }
6396         } else {
6397             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6398                 if (wf->execMask(lane)) {
6399                     vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
6400                 }
6401             }
6402         }
6403
6404         vdst.write();
6405     }
6406
6407     Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt)
6408         : Inst_VOP2(iFmt, "v_madmk_f32")
6409     {
6410         setFlag(ALU);
6411         setFlag(F32);
6412         setFlag(MAD);
6413     } // Inst_VOP2__V_MADMK_F32
6414
6415     Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
6416     {
6417     } // ~Inst_VOP2__V_MADMK_F32
6418
6419     // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
6420     // This opcode cannot use the input/output modifiers.
6421     void
6422     Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst)
6423     {
6424         Wavefront *wf = gpuDynInst->wavefront();
6425         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6426         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6427         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6428         VecElemF32 k = extData.imm_f32;
6429
6430         src0.readSrc();
6431         src1.read();
6432
6433         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6434             if (wf->execMask(lane)) {
6435                 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
6436             }
6437         }
6438
6439         vdst.write();
6440     }
6441
6442     Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt)
6443         : Inst_VOP2(iFmt, "v_madak_f32")
6444     {
6445         setFlag(ALU);
6446         setFlag(F32);
6447         setFlag(MAD);
6448     } // Inst_VOP2__V_MADAK_F32
6449
6450     Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
6451     {
6452     } // ~Inst_VOP2__V_MADAK_F32
6453
6454     // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
6455     // This opcode cannot use input/output modifiers.
6456     void
6457     Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst)
6458     {
6459         Wavefront *wf = gpuDynInst->wavefront();
6460         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6461         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6462         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6463         VecElemF32 k = extData.imm_f32;
6464
6465         src0.readSrc();
6466         src1.read();
6467
6468         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6469             if (wf->execMask(lane)) {
6470                 vdst[lane] = std::fma(src0[lane], src1[lane], k);
6471             }
6472         }
6473
6474         vdst.write();
6475     }
6476
6477     Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
6478         : Inst_VOP2(iFmt, "v_add_u32")
6479     {
6480         setFlag(ALU);
6481         setFlag(WritesVCC);
6482     } // Inst_VOP2__V_ADD_U32
6483
6484     Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
6485     {
6486     } // ~Inst_VOP2__V_ADD_U32
6487
6488     // D.u = S0.u + S1.u;
6489     // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED
6490     // overflow or carry-out.
6491     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6492     void
6493     Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
6494     {
6495         Wavefront *wf = gpuDynInst->wavefront();
6496         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6497         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6498         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6499         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6500
6501         src0.readSrc();
6502         src1.read();
6503
6504         if (isSDWAInst()) {
6505             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6506             // use copies of original src0, src1, and dest during selecting
6507             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6508                                         extData.iFmt_VOP_SDWA.SRC0);
6509             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6510             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6511
6512             src0_sdwa.read();
6513             origSrc0_sdwa.read();
6514             origSrc1.read();
6515
6516             DPRINTF(GCN3, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
6517                     "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6518                     "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6519                     "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6520                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6521                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6522                     extData.iFmt_VOP_SDWA.CLAMP,
6523                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6524                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6525                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6526                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6527                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6528                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6529                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6530                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6531
6532             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6533                             src1, origSrc1);
6534
6535             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6536                 if (wf->execMask(lane)) {
6537                     vdst[lane] = src0_sdwa[lane] + src1[lane];
6538                     origVdst[lane] = vdst[lane]; // keep copy consistent
6539                     vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]
6540                         + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6541                 }
6542             }
6543
6544             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6545         } else {
6546             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6547                 if (wf->execMask(lane)) {
6548                     vdst[lane] = src0[lane] + src1[lane];
6549                     vcc.setBit(lane, ((VecElemU64)src0[lane]
6550                         + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6551                 }
6552             }
6553         }
6554
6555         vcc.write();
6556         vdst.write();
6557     }
6558
6559     Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
6560         : Inst_VOP2(iFmt, "v_sub_u32")
6561     {
6562         setFlag(ALU);
6563         setFlag(WritesVCC);
6564     } // Inst_VOP2__V_SUB_U32
6565
6566     Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
6567     {
6568     } // ~Inst_VOP2__V_SUB_U32
6569
6570     // D.u = S0.u - S1.u;
6571     // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
6572     // carry-out.
6573     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6574     void
6575     Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
6576     {
6577         Wavefront *wf = gpuDynInst->wavefront();
6578         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6579         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6580         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6581         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6582
6583         src0.readSrc();
6584         src1.read();
6585
6586         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6587             if (wf->execMask(lane)) {
6588                 vdst[lane] = src0[lane] - src1[lane];
6589                 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
6590             }
6591         }
6592
6593         vdst.write();
6594         vcc.write();
6595     }
6596
6597     Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
6598         : Inst_VOP2(iFmt, "v_subrev_u32")
6599     {
6600         setFlag(ALU);
6601         setFlag(WritesVCC);
6602     } // Inst_VOP2__V_SUBREV_U32
6603
6604     Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
6605     {
6606     } // ~Inst_VOP2__V_SUBREV_U32
6607
6608     // D.u = S1.u - S0.u;
6609     // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
6610     // carry-out.
6611     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6612     void
6613     Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6614     {
6615         Wavefront *wf = gpuDynInst->wavefront();
6616         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6617         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6618         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6619         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6620
6621         src0.readSrc();
6622         src1.read();
6623
6624         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6625             if (wf->execMask(lane)) {
6626                 vdst[lane] = src1[lane] - src0[lane];
6627                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
6628             }
6629         }
6630
6631         vdst.write();
6632         vcc.write();
6633     }
6634
6635     Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt)
6636         : Inst_VOP2(iFmt, "v_addc_u32")
6637     {
6638         setFlag(ALU);
6639         setFlag(WritesVCC);
6640         setFlag(ReadsVCC);
6641     } // Inst_VOP2__V_ADDC_U32
6642
6643     Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
6644     {
6645     } // ~Inst_VOP2__V_ADDC_U32
6646
6647     // D.u = S0.u + S1.u + VCC[threadId];
6648     // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
6649     // is an UNSIGNED overflow.
6650     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6651     // source comes from the SGPR-pair at S2.u.
6652     void
6653     Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
6654     {
6655         Wavefront *wf = gpuDynInst->wavefront();
6656         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6657         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6658         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6659         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6660
6661         src0.readSrc();
6662         src1.read();
6663         vcc.read();
6664
6665         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6666             if (wf->execMask(lane)) {
6667                 vdst[lane] = src0[lane] + src1[lane]
6668                     + bits(vcc.rawData(), lane);
6669                 vcc.setBit(lane, ((VecElemU64)src0[lane]
6670                     + (VecElemU64)src1[lane]
6671                         + (VecElemU64)bits(vcc.rawData(), lane, lane))
6672                             >= 0x100000000 ? 1 : 0);
6673             }
6674         }
6675
6676         vdst.write();
6677         vcc.write();
6678     }
6679
6680     Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt)
6681         : Inst_VOP2(iFmt, "v_subb_u32")
6682     {
6683         setFlag(ALU);
6684         setFlag(WritesVCC);
6685         setFlag(ReadsVCC);
6686     } // Inst_VOP2__V_SUBB_U32
6687
6688     Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
6689     {
6690     } // ~Inst_VOP2__V_SUBB_U32
6691
6692     // D.u = S0.u - S1.u - VCC[threadId];
6693     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6694     // overflow.
6695     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6696     // source comes from the SGPR-pair at S2.u.
6697     void
6698     Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
6699     {
6700         Wavefront *wf = gpuDynInst->wavefront();
6701         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6702         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6703         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6704         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6705
6706         src0.readSrc();
6707         src1.read();
6708         vcc.read();
6709
6710         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6711             if (wf->execMask(lane)) {
6712                 vdst[lane]
6713                     = src0[lane] - src1[lane] - bits(vcc.rawData(), lane);
6714                 vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
6715                     > src0[lane] ? 1 : 0);
6716             }
6717         }
6718
6719         vdst.write();
6720         vcc.write();
6721     }
6722
6723     Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt)
6724         : Inst_VOP2(iFmt, "v_subbrev_u32")
6725     {
6726         setFlag(ALU);
6727         setFlag(WritesVCC);
6728         setFlag(ReadsVCC);
6729     } // Inst_VOP2__V_SUBBREV_U32
6730
6731     Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
6732     {
6733     } // ~Inst_VOP2__V_SUBBREV_U32
6734
6735     // D.u = S1.u - S0.u - VCC[threadId];
6736     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6737     // overflow.
6738     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6739     // source comes from the SGPR-pair at S2.u.
6740     void
6741     Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6742     {
6743         Wavefront *wf = gpuDynInst->wavefront();
6744         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6745         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6746         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6747         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6748
6749         src0.readSrc();
6750         src1.read();
6751         vcc.read();
6752
6753         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6754             if (wf->execMask(lane)) {
6755                 vdst[lane]
6756                     = src1[lane] - src0[lane] - bits(vcc.rawData(), lane);
6757                 vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))
6758                     > src1[lane] ? 1 : 0);
6759             }
6760         }
6761
6762         vdst.write();
6763         vcc.write();
6764     }
6765
6766     Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt)
6767         : Inst_VOP2(iFmt, "v_add_f16")
6768     {
6769         setFlag(ALU);
6770         setFlag(F16);
6771     } // Inst_VOP2__V_ADD_F16
6772
6773     Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
6774     {
6775     } // ~Inst_VOP2__V_ADD_F16
6776
6777     // D.f16 = S0.f16 + S1.f16.
6778     void
6779     Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
6780     {
6781         panicUnimplemented();
6782     }
6783
6784     Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt)
6785         : Inst_VOP2(iFmt, "v_sub_f16")
6786     {
6787         setFlag(ALU);
6788         setFlag(F16);
6789     } // Inst_VOP2__V_SUB_F16
6790
6791     Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
6792     {
6793     } // ~Inst_VOP2__V_SUB_F16
6794
6795     // D.f16 = S0.f16 - S1.f16.
6796     void
6797     Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
6798     {
6799         panicUnimplemented();
6800     }
6801
6802     Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt)
6803         : Inst_VOP2(iFmt, "v_subrev_f16")
6804     {
6805         setFlag(ALU);
6806         setFlag(F16);
6807     } // Inst_VOP2__V_SUBREV_F16
6808
6809     Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
6810     {
6811     } // ~Inst_VOP2__V_SUBREV_F16
6812
6813     // D.f16 = S1.f16 - S0.f16.
6814     void
6815     Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
6816     {
6817         panicUnimplemented();
6818     }
6819
6820     Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt)
6821         : Inst_VOP2(iFmt, "v_mul_f16")
6822     {
6823         setFlag(ALU);
6824         setFlag(F16);
6825     } // Inst_VOP2__V_MUL_F16
6826
6827     Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
6828     {
6829     } // ~Inst_VOP2__V_MUL_F16
6830
6831     // D.f16 = S0.f16 * S1.f16.
6832     void
6833     Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
6834     {
6835         panicUnimplemented();
6836     }
6837
6838     Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt)
6839         : Inst_VOP2(iFmt, "v_mac_f16")
6840     {
6841         setFlag(ALU);
6842         setFlag(F16);
6843         setFlag(MAC);
6844     } // Inst_VOP2__V_MAC_F16
6845
6846     Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
6847     {
6848     } // ~Inst_VOP2__V_MAC_F16
6849
6850     // D.f16 = S0.f16 * S1.f16 + D.f16.
6851     void
6852     Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
6853     {
6854         panicUnimplemented();
6855     }
6856
6857     Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt)
6858         : Inst_VOP2(iFmt, "v_madmk_f16")
6859     {
6860         setFlag(ALU);
6861         setFlag(F16);
6862         setFlag(MAD);
6863     } // Inst_VOP2__V_MADMK_F16
6864
6865     Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
6866     {
6867     } // ~Inst_VOP2__V_MADMK_F16
6868
6869     // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
6870     // in the following literal DWORD.
6871     // This opcode cannot use the VOP3 encoding and cannot use input/output
6872     // modifiers.
6873     void
6874     Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst)
6875     {
6876         panicUnimplemented();
6877     }
6878
6879     Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt)
6880         : Inst_VOP2(iFmt, "v_madak_f16")
6881     {
6882         setFlag(ALU);
6883         setFlag(F16);
6884         setFlag(MAD);
6885     } // Inst_VOP2__V_MADAK_F16
6886
6887     Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
6888     {
6889     } // ~Inst_VOP2__V_MADAK_F16
6890
6891     // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
6892     // in the following literal DWORD.
6893     // This opcode cannot use the VOP3 encoding and cannot use input/output
6894     // modifiers.
6895     void
6896     Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst)
6897     {
6898         panicUnimplemented();
6899     }
6900
6901     Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt)
6902         : Inst_VOP2(iFmt, "v_add_u16")
6903     {
6904         setFlag(ALU);
6905     } // Inst_VOP2__V_ADD_U16
6906
6907     Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
6908     {
6909     } // ~Inst_VOP2__V_ADD_U16
6910
6911     // D.u16 = S0.u16 + S1.u16.
6912     void
6913     Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
6914     {
6915         Wavefront *wf = gpuDynInst->wavefront();
6916         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6917         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6918         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6919
6920         src0.readSrc();
6921         src1.read();
6922
6923         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6924             if (wf->execMask(lane)) {
6925                 vdst[lane] = src0[lane] + src1[lane];
6926             }
6927         }
6928
6929         vdst.write();
6930     }
6931
6932     Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt)
6933         : Inst_VOP2(iFmt, "v_sub_u16")
6934     {
6935         setFlag(ALU);
6936     } // Inst_VOP2__V_SUB_U16
6937
6938     Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
6939     {
6940     } // ~Inst_VOP2__V_SUB_U16
6941
6942     // D.u16 = S0.u16 - S1.u16.
6943     void
6944     Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
6945     {
6946         Wavefront *wf = gpuDynInst->wavefront();
6947         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6948         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6949         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6950
6951         src0.readSrc();
6952         src1.read();
6953
6954         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6955             if (wf->execMask(lane)) {
6956                 vdst[lane] = src0[lane] - src1[lane];
6957             }
6958         }
6959
6960         vdst.write();
6961     }
6962
6963     Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt)
6964         : Inst_VOP2(iFmt, "v_subrev_u16")
6965     {
6966         setFlag(ALU);
6967     } // Inst_VOP2__V_SUBREV_U16
6968
6969     Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
6970     {
6971     } // ~Inst_VOP2__V_SUBREV_U16
6972
6973     // D.u16 = S1.u16 - S0.u16.
6974     void
6975     Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
6976     {
6977         Wavefront *wf = gpuDynInst->wavefront();
6978         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6979         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6980         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6981
6982         src0.readSrc();
6983         src1.read();
6984
6985         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6986             if (wf->execMask(lane)) {
6987                 vdst[lane] = src1[lane] - src0[lane];
6988             }
6989         }
6990
6991         vdst.write();
6992     }
6993
6994     Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt)
6995         : Inst_VOP2(iFmt, "v_mul_lo_u16")
6996     {
6997         setFlag(ALU);
6998     } // Inst_VOP2__V_MUL_LO_U16
6999
7000     Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
7001     {
7002     } // ~Inst_VOP2__V_MUL_LO_U16
7003
7004     // D.u16 = S0.u16 * S1.u16.
7005     void
7006     Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
7007     {
7008         Wavefront *wf = gpuDynInst->wavefront();
7009         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7010         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7011         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7012
7013         src0.readSrc();
7014         src1.read();
7015
7016         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7017             if (wf->execMask(lane)) {
7018                 vdst[lane] = src0[lane] * src1[lane];
7019             }
7020         }
7021
7022         vdst.write();
7023     }
7024
7025     Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt)
7026         : Inst_VOP2(iFmt, "v_lshlrev_b16")
7027     {
7028         setFlag(ALU);
7029     } // Inst_VOP2__V_LSHLREV_B16
7030
7031     Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
7032     {
7033     } // ~Inst_VOP2__V_LSHLREV_B16
7034
7035     // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
7036     void
7037     Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
7038     {
7039         Wavefront *wf = gpuDynInst->wavefront();
7040         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7041         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7042         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7043
7044         src0.readSrc();
7045         src1.read();
7046
7047         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7048             if (wf->execMask(lane)) {
7049                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
7050             }
7051         }
7052
7053         vdst.write();
7054     }
7055
7056     Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt)
7057         : Inst_VOP2(iFmt, "v_lshrrev_b16")
7058     {
7059         setFlag(ALU);
7060     } // Inst_VOP2__V_LSHRREV_B16
7061
7062     Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
7063     {
7064     } // ~Inst_VOP2__V_LSHRREV_B16
7065
7066     // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
7067     // The vacated bits are set to zero.
7068     void
7069     Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
7070     {
7071         Wavefront *wf = gpuDynInst->wavefront();
7072         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7073         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7074         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7075
7076         src0.readSrc();
7077         src1.read();
7078
7079         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7080             if (wf->execMask(lane)) {
7081                 vdst[lane] = src1[lane] >> src0[lane];
7082             }
7083         }
7084
7085         vdst.write();
7086     }
7087
7088     Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt)
7089         : Inst_VOP2(iFmt, "v_ashrrev_i16")
7090     {
7091         setFlag(ALU);
7092     } // Inst_VOP2__V_ASHRREV_I16
7093
7094     Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
7095     {
7096     } // ~Inst_VOP2__V_ASHRREV_I16
7097
7098     // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
7099     // The vacated bits are set to the sign bit of the input value.
7100     void
7101     Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
7102     {
7103         Wavefront *wf = gpuDynInst->wavefront();
7104         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7105         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7106         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7107
7108         src0.readSrc();
7109         src1.read();
7110
7111         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7112             if (wf->execMask(lane)) {
7113                 vdst[lane] = src1[lane] >> src0[lane];
7114             }
7115         }
7116
7117         vdst.write();
7118     }
7119
7120     Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt)
7121         : Inst_VOP2(iFmt, "v_max_f16")
7122     {
7123         setFlag(ALU);
7124         setFlag(F16);
7125     } // Inst_VOP2__V_MAX_F16
7126
7127     Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
7128     {
7129     } // ~Inst_VOP2__V_MAX_F16
7130
7131     // D.f16 = max(S0.f16, S1.f16).
7132     void
7133     Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
7134     {
7135         panicUnimplemented();
7136     }
7137
7138     Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt)
7139         : Inst_VOP2(iFmt, "v_min_f16")
7140     {
7141         setFlag(ALU);
7142         setFlag(F16);
7143     } // Inst_VOP2__V_MIN_F16
7144
7145     Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
7146     {
7147     } // ~Inst_VOP2__V_MIN_F16
7148
7149     // D.f16 = min(S0.f16, S1.f16).
7150     void
7151     Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
7152     {
7153         panicUnimplemented();
7154     }
7155
7156     Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt)
7157         : Inst_VOP2(iFmt, "v_max_u16")
7158     {
7159         setFlag(ALU);
7160     } // Inst_VOP2__V_MAX_U16
7161
7162     Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
7163     {
7164     } // ~Inst_VOP2__V_MAX_U16
7165
7166     // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
7167     void
7168     Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
7169     {
7170         Wavefront *wf = gpuDynInst->wavefront();
7171         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7172         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7173         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7174
7175         src0.readSrc();
7176         src1.read();
7177
7178         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7179             if (wf->execMask(lane)) {
7180                 vdst[lane] = std::max(src0[lane], src1[lane]);
7181             }
7182         }
7183
7184         vdst.write();
7185     }
7186
7187     Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt)
7188         : Inst_VOP2(iFmt, "v_max_i16")
7189     {
7190         setFlag(ALU);
7191     } // Inst_VOP2__V_MAX_I16
7192
7193     Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
7194     {
7195     } // ~Inst_VOP2__V_MAX_I16
7196
7197     // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
7198     void
7199     Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
7200     {
7201         Wavefront *wf = gpuDynInst->wavefront();
7202         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7203         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7204         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7205
7206         src0.readSrc();
7207         src1.read();
7208
7209         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7210             if (wf->execMask(lane)) {
7211                 vdst[lane] = std::max(src0[lane], src1[lane]);
7212             }
7213         }
7214
7215         vdst.write();
7216     }
7217
7218     Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt)
7219         : Inst_VOP2(iFmt, "v_min_u16")
7220     {
7221         setFlag(ALU);
7222     } // Inst_VOP2__V_MIN_U16
7223
7224     Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
7225     {
7226     } // ~Inst_VOP2__V_MIN_U16
7227
7228     // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
7229     void
7230     Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
7231     {
7232         Wavefront *wf = gpuDynInst->wavefront();
7233         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7234         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7235         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7236
7237         src0.readSrc();
7238         src1.read();
7239
7240         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7241             if (wf->execMask(lane)) {
7242                 vdst[lane] = std::min(src0[lane], src1[lane]);
7243             }
7244         }
7245
7246         vdst.write();
7247     }
7248
7249     Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt)
7250         : Inst_VOP2(iFmt, "v_min_i16")
7251     {
7252         setFlag(ALU);
7253     } // Inst_VOP2__V_MIN_I16
7254
7255     Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
7256     {
7257     } // ~Inst_VOP2__V_MIN_I16
7258
7259     // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
7260     void
7261     Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
7262     {
7263         Wavefront *wf = gpuDynInst->wavefront();
7264         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7265         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7266         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7267
7268         src0.readSrc();
7269         src1.read();
7270
7271         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7272             if (wf->execMask(lane)) {
7273                 vdst[lane] = std::min(src0[lane], src1[lane]);
7274             }
7275         }
7276
7277         vdst.write();
7278     }
7279
7280     Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt)
7281         : Inst_VOP2(iFmt, "v_ldexp_f16")
7282     {
7283         setFlag(ALU);
7284         setFlag(F16);
7285     } // Inst_VOP2__V_LDEXP_F16
7286
7287     Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
7288     {
7289     } // ~Inst_VOP2__V_LDEXP_F16
7290
7291     // D.f16 = S0.f16 * (2 ** S1.i16).
7292     void
7293     Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
7294     {
7295         panicUnimplemented();
7296     }
7297
7298     Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
7299         : Inst_VOP1(iFmt, "v_nop")
7300     {
7301         setFlag(Nop);
7302         setFlag(ALU);
7303     } // Inst_VOP1__V_NOP
7304
7305     Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
7306     {
7307     } // ~Inst_VOP1__V_NOP
7308
7309     // Do nothing.
7310     void
7311     Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst)
7312     {
7313     }
7314
7315     Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt)
7316         : Inst_VOP1(iFmt, "v_mov_b32")
7317     {
7318         setFlag(ALU);
7319     } // Inst_VOP1__V_MOV_B32
7320
7321     Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
7322     {
7323     } // ~Inst_VOP1__V_MOV_B32
7324
7325     // D.u = S0.u.
7326     // Input and output modifiers not supported; this is an untyped operation.
7327     void
7328     Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
7329     {
7330         Wavefront *wf = gpuDynInst->wavefront();
7331         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7332         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7333
7334         src.readSrc();
7335
7336         if (isDPPInst()) {
7337             VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
7338             src_dpp.read();
7339
7340             DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
7341                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
7342                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
7343                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
7344                     extData.iFmt_VOP_DPP.DPP_CTRL,
7345                     extData.iFmt_VOP_DPP.SRC0_ABS,
7346                     extData.iFmt_VOP_DPP.SRC0_NEG,
7347                     extData.iFmt_VOP_DPP.SRC1_ABS,
7348                     extData.iFmt_VOP_DPP.SRC1_NEG,
7349                     extData.iFmt_VOP_DPP.BOUND_CTRL,
7350                     extData.iFmt_VOP_DPP.BANK_MASK,
7351                     extData.iFmt_VOP_DPP.ROW_MASK);
7352
7353             // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
7354             // to negate it or take the absolute value of it
7355             assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
7356             assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
7357             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
7358
7359             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7360                 if (wf->execMask(lane)) {
7361                     vdst[lane] = src_dpp[lane];
7362                 }
7363             }
7364         } else {
7365             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7366                 if (wf->execMask(lane)) {
7367                     vdst[lane] = src[lane];
7368                 }
7369             }
7370         }
7371
7372         vdst.write();
7373     }
7374
7375     Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
7376           InFmt_VOP1 *iFmt)
7377         : Inst_VOP1(iFmt, "v_readfirstlane_b32")
7378     {
7379         setFlag(ALU);
7380     } // Inst_VOP1__V_READFIRSTLANE_B32
7381
7382     Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
7383     {
7384     } // ~Inst_VOP1__V_READFIRSTLANE_B32
7385
7386     // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
7387     // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
7388     // (Lane# = 0 if exec is zero). Ignores exec mask for the access.
7389     // Input and output modifiers not supported; this is an untyped operation.
7390     void
7391     Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst)
7392     {
7393         Wavefront *wf = gpuDynInst->wavefront();
7394         ScalarRegI32 src_lane(0);
7395         ScalarRegU64 exec_mask = wf->execMask().to_ullong();
7396         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7397         ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
7398
7399         src.readSrc();
7400
7401         if (exec_mask) {
7402             src_lane = findLsbSet(exec_mask);
7403         }
7404
7405         sdst = src[src_lane];
7406
7407         sdst.write();
7408     }
7409
7410     Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt)
7411         : Inst_VOP1(iFmt, "v_cvt_i32_f64")
7412     {
7413         setFlag(ALU);
7414         setFlag(F64);
7415     } // Inst_VOP1__V_CVT_I32_F64
7416
7417     Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
7418     {
7419     } // ~Inst_VOP1__V_CVT_I32_F64
7420
7421     // D.i = (int)S0.d.
7422     // Out-of-range floating point values (including infinity) saturate. NaN
7423     // is converted to 0.
7424     void
7425     Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
7426     {
7427         Wavefront *wf = gpuDynInst->wavefront();
7428         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7429         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7430
7431         src.readSrc();
7432
7433         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7434             if (wf->execMask(lane)) {
7435                 int exp;
7436                 std::frexp(src[lane],&exp);
7437                 if (std::isnan(src[lane])) {
7438                     vdst[lane] = 0;
7439                 } else if (std::isinf(src[lane]) || exp > 30) {
7440                     if (std::signbit(src[lane])) {
7441                         vdst[lane] = INT_MIN;
7442                     } else {
7443                         vdst[lane] = INT_MAX;
7444                     }
7445                 } else {
7446                     vdst[lane] = (VecElemI32)src[lane];
7447                 }
7448             }
7449         }
7450
7451         vdst.write();
7452     }
7453
7454     Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt)
7455         : Inst_VOP1(iFmt, "v_cvt_f64_i32")
7456     {
7457         setFlag(ALU);
7458         setFlag(F64);
7459     } // Inst_VOP1__V_CVT_F64_I32
7460
7461     Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
7462     {
7463     } // ~Inst_VOP1__V_CVT_F64_I32
7464
7465     // D.d = (double)S0.i.
7466     void
7467     Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
7468     {
7469         Wavefront *wf = gpuDynInst->wavefront();
7470         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7471         VecOperandF64 vdst(gpuDynInst, instData.VDST);
7472
7473         src.readSrc();
7474
7475         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7476             if (wf->execMask(lane)) {
7477                 vdst[lane] = (VecElemF64)src[lane];
7478             }
7479         }
7480
7481         vdst.write();
7482     }
7483
7484     Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt)
7485         : Inst_VOP1(iFmt, "v_cvt_f32_i32")
7486     {
7487         setFlag(ALU);
7488         setFlag(F32);
7489     } // Inst_VOP1__V_CVT_F32_I32
7490
7491     Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
7492     {
7493     } // ~Inst_VOP1__V_CVT_F32_I32
7494
7495     // D.f = (float)S0.i.
7496     void
7497     Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
7498     {
7499         Wavefront *wf = gpuDynInst->wavefront();
7500         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7501         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7502
7503         src.readSrc();
7504
7505         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7506             if (wf->execMask(lane)) {
7507                 vdst[lane] = (VecElemF32)src[lane];
7508             }
7509         }
7510
7511         vdst.write();
7512     }
7513
7514     Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt)
7515         : Inst_VOP1(iFmt, "v_cvt_f32_u32")
7516     {
7517         setFlag(ALU);
7518         setFlag(F32);
7519     } // Inst_VOP1__V_CVT_F32_U32
7520
7521     Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
7522     {
7523     } // ~Inst_VOP1__V_CVT_F32_U32
7524
7525     // D.f = (float)S0.u.
7526     void
7527     Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
7528     {
7529         Wavefront *wf = gpuDynInst->wavefront();
7530         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7531         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7532
7533         src.readSrc();
7534
7535         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7536             if (wf->execMask(lane)) {
7537                 vdst[lane] = (VecElemF32)src[lane];
7538             }
7539         }
7540
7541         vdst.write();
7542     }
7543
7544     Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt)
7545         : Inst_VOP1(iFmt, "v_cvt_u32_f32")
7546     {
7547         setFlag(ALU);
7548         setFlag(F32);
7549     } // Inst_VOP1__V_CVT_U32_F32
7550
7551     Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
7552     {
7553     } // ~Inst_VOP1__V_CVT_U32_F32
7554
7555     // D.u = (unsigned)S0.f.
7556     // Out-of-range floating point values (including infinity) saturate. NaN
7557     // is converted to 0.
7558     void
7559     Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
7560     {
7561         Wavefront *wf = gpuDynInst->wavefront();
7562         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7563         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7564
7565         src.readSrc();
7566
7567         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7568             if (wf->execMask(lane)) {
7569                 int exp;
7570                 std::frexp(src[lane],&exp);
7571                 if (std::isnan(src[lane])) {
7572                     vdst[lane] = 0;
7573                 } else if (std::isinf(src[lane])) {
7574                     if (std::signbit(src[lane])) {
7575                         vdst[lane] = 0;
7576                     } else {
7577                         vdst[lane] = UINT_MAX;
7578                     }
7579                 } else if (exp > 31) {
7580                     vdst[lane] = UINT_MAX;
7581                 } else {
7582                     vdst[lane] = (VecElemU32)src[lane];
7583                 }
7584             }
7585         }
7586
7587         vdst.write();
7588     }
7589
7590     Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt)
7591         : Inst_VOP1(iFmt, "v_cvt_i32_f32")
7592     {
7593         setFlag(ALU);
7594         setFlag(F32);
7595     } // Inst_VOP1__V_CVT_I32_F32
7596
7597     Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
7598     {
7599     } // ~Inst_VOP1__V_CVT_I32_F32
7600
7601     // D.i = (int)S0.f.
7602     // Out-of-range floating point values (including infinity) saturate. NaN
7603     // is converted to 0.
7604     void
7605     Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7606     {
7607         Wavefront *wf = gpuDynInst->wavefront();
7608         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7609         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7610
7611         src.readSrc();
7612
7613         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7614             if (wf->execMask(lane)) {
7615                 int exp;
7616                 std::frexp(src[lane],&exp);
7617                 if (std::isnan(src[lane])) {
7618                     vdst[lane] = 0;
7619                 } else if (std::isinf(src[lane]) || exp > 30) {
7620                     if (std::signbit(src[lane])) {
7621                         vdst[lane] = INT_MIN;
7622                     } else {
7623                         vdst[lane] = INT_MAX;
7624                     }
7625                 } else {
7626                     vdst[lane] = (VecElemI32)src[lane];
7627                 }
7628             }
7629         }
7630
7631         vdst.write();
7632     }
7633
7634     Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt)
7635         : Inst_VOP1(iFmt, "v_mov_fed_b32")
7636     {
7637         setFlag(ALU);
7638     } // Inst_VOP1__V_MOV_FED_B32
7639
7640     Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
7641     {
7642     } // ~Inst_VOP1__V_MOV_FED_B32
7643
7644     // D.u = S0.u;
7645     // Input and output modifiers not supported; this is an untyped operation.
7646     void
7647     Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
7648     {
7649         panicUnimplemented();
7650     }
7651
7652     Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt)
7653         : Inst_VOP1(iFmt, "v_cvt_f16_f32")
7654     {
7655         setFlag(ALU);
7656         setFlag(F32);
7657     } // Inst_VOP1__V_CVT_F16_F32
7658
7659     Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
7660     {
7661     } // ~Inst_VOP1__V_CVT_F16_F32
7662
7663     // D.f16 = flt32_to_flt16(S0.f).
7664     void
7665     Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
7666     {
7667         panicUnimplemented();
7668     }
7669
7670     Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt)
7671         : Inst_VOP1(iFmt, "v_cvt_f32_f16")
7672     {
7673         setFlag(ALU);
7674         setFlag(F32);
7675     } // Inst_VOP1__V_CVT_F32_F16
7676
7677     Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
7678     {
7679     } // ~Inst_VOP1__V_CVT_F32_F16
7680
7681     // D.f = flt16_to_flt32(S0.f16).
7682     void
7683     Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
7684     {
7685         panicUnimplemented();
7686     }
7687
7688     Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
7689           InFmt_VOP1 *iFmt)
7690         : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")
7691     {
7692         setFlag(ALU);
7693         setFlag(F32);
7694     } // Inst_VOP1__V_CVT_RPI_I32_F32
7695
7696     Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
7697     {
7698     } // ~Inst_VOP1__V_CVT_RPI_I32_F32
7699
7700     // D.i = (int)floor(S0.f + 0.5).
7701     void
7702     Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7703     {
7704         Wavefront *wf = gpuDynInst->wavefront();
7705         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7706         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7707
7708         src.readSrc();
7709
7710         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7711             if (wf->execMask(lane)) {
7712                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
7713             }
7714         }
7715
7716         vdst.write();
7717     }
7718
7719     Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
7720           InFmt_VOP1 *iFmt)
7721         : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")
7722     {
7723         setFlag(ALU);
7724         setFlag(F32);
7725     } // Inst_VOP1__V_CVT_FLR_I32_F32
7726
7727     Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
7728     {
7729     } // ~Inst_VOP1__V_CVT_FLR_I32_F32
7730
7731     // D.i = (int)floor(S0.f).
7732     void
7733     Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7734     {
7735         Wavefront *wf = gpuDynInst->wavefront();
7736         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7737         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7738
7739         src.readSrc();
7740
7741         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7742             if (wf->execMask(lane)) {
7743                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
7744             }
7745         }
7746
7747         vdst.write();
7748     }
7749
7750     Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt)
7751         : Inst_VOP1(iFmt, "v_cvt_off_f32_i4")
7752     {
7753         setFlag(ALU);
7754         setFlag(F32);
7755     } // Inst_VOP1__V_CVT_OFF_F32_I4
7756
7757     Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
7758     {
7759     } // ~Inst_VOP1__V_CVT_OFF_F32_I4
7760
7761     // 4-bit signed int to 32-bit float.
7762     void
7763     Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
7764     {
7765         panicUnimplemented();
7766     }
7767
7768     Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt)
7769         : Inst_VOP1(iFmt, "v_cvt_f32_f64")
7770     {
7771         setFlag(ALU);
7772         setFlag(F64);
7773     } // Inst_VOP1__V_CVT_F32_F64
7774
7775     Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
7776     {
7777     } // ~Inst_VOP1__V_CVT_F32_F64
7778
7779     // D.f = (float)S0.d.
7780     void
7781     Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
7782     {
7783         Wavefront *wf = gpuDynInst->wavefront();
7784         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7785         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7786
7787         src.readSrc();
7788
7789         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7790             if (wf->execMask(lane)) {
7791                 vdst[lane] = (VecElemF32)src[lane];
7792             }
7793         }
7794
7795         vdst.write();
7796     }
7797
7798     Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt)
7799         : Inst_VOP1(iFmt, "v_cvt_f64_f32")
7800     {
7801         setFlag(ALU);
7802         setFlag(F64);
7803     } // Inst_VOP1__V_CVT_F64_F32
7804
7805     Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
7806     {
7807     } // ~Inst_VOP1__V_CVT_F64_F32
7808
7809     // D.d = (double)S0.f.
7810     void
7811     Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
7812     {
7813         Wavefront *wf = gpuDynInst->wavefront();
7814         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7815         VecOperandF64 vdst(gpuDynInst, instData.VDST);
7816
7817         src.readSrc();
7818
7819         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7820             if (wf->execMask(lane)) {
7821                 vdst[lane] = (VecElemF64)src[lane];
7822             }
7823         }
7824
7825         vdst.write();
7826     }
7827
7828     Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt)
7829         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")
7830     {
7831         setFlag(ALU);
7832         setFlag(F32);
7833     } // Inst_VOP1__V_CVT_F32_UBYTE0
7834
7835     Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
7836     {
7837     } // ~Inst_VOP1__V_CVT_F32_UBYTE0
7838
7839     // D.f = (float)(S0.u[7:0]).
7840     void
7841     Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
7842     {
7843         Wavefront *wf = gpuDynInst->wavefront();
7844         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7845         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7846
7847         src.readSrc();
7848
7849         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7850             if (wf->execMask(lane)) {
7851                 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
7852             }
7853         }
7854
7855         vdst.write();
7856     }
7857
7858     Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt)
7859         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")
7860     {
7861         setFlag(ALU);
7862         setFlag(F32);
7863     } // Inst_VOP1__V_CVT_F32_UBYTE1
7864
7865     Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
7866     {
7867     } // ~Inst_VOP1__V_CVT_F32_UBYTE1
7868
7869     // D.f = (float)(S0.u[15:8]).
7870     void
7871     Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
7872     {
7873         Wavefront *wf = gpuDynInst->wavefront();
7874         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7875         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7876
7877         src.readSrc();
7878
7879         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7880             if (wf->execMask(lane)) {
7881                 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
7882             }
7883         }
7884
7885         vdst.write();
7886     }
7887
7888     Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt)
7889         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")
7890     {
7891         setFlag(ALU);
7892         setFlag(F32);
7893     } // Inst_VOP1__V_CVT_F32_UBYTE2
7894
7895     Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
7896     {
7897     } // ~Inst_VOP1__V_CVT_F32_UBYTE2
7898
7899     // D.f = (float)(S0.u[23:16]).
7900     void
7901     Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
7902     {
7903         Wavefront *wf = gpuDynInst->wavefront();
7904         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7905         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7906
7907         src.readSrc();
7908
7909         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7910             if (wf->execMask(lane)) {
7911                 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
7912             }
7913         }
7914
7915         vdst.write();
7916     }
7917
7918     Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt)
7919         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")
7920     {
7921         setFlag(ALU);
7922         setFlag(F32);
7923     } // Inst_VOP1__V_CVT_F32_UBYTE3
7924
7925     Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
7926     {
7927     } // ~Inst_VOP1__V_CVT_F32_UBYTE3
7928
7929     // D.f = (float)(S0.u[31:24]).
7930     void
7931     Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
7932     {
7933         Wavefront *wf = gpuDynInst->wavefront();
7934         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7935         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7936
7937         src.readSrc();
7938
7939         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7940             if (wf->execMask(lane)) {
7941                 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
7942             }
7943         }
7944
7945         vdst.write();
7946     }
7947
7948     Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt)
7949         : Inst_VOP1(iFmt, "v_cvt_u32_f64")
7950     {
7951         setFlag(ALU);
7952         setFlag(F64);
7953     } // Inst_VOP1__V_CVT_U32_F64
7954
7955     Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
7956     {
7957     } // ~Inst_VOP1__V_CVT_U32_F64
7958
7959     // D.u = (unsigned)S0.d.
7960     // Out-of-range floating point values (including infinity) saturate. NaN
7961     // is converted to 0.
7962     void
7963     Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
7964     {
7965         Wavefront *wf = gpuDynInst->wavefront();
7966         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7967         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7968
7969         src.readSrc();
7970
7971         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7972             if (wf->execMask(lane)) {
7973                 int exp;
7974                 std::frexp(src[lane],&exp);
7975                 if (std::isnan(src[lane])) {
7976                     vdst[lane] = 0;
7977                 } else if (std::isinf(src[lane])) {
7978                     if (std::signbit(src[lane])) {
7979                         vdst[lane] = 0;
7980                     } else {
7981                         vdst[lane] = UINT_MAX;
7982                     }
7983                 } else if (exp > 31) {
7984                     vdst[lane] = UINT_MAX;
7985                 } else {
7986                     vdst[lane] = (VecElemU32)src[lane];
7987                 }
7988             }
7989         }
7990
7991         vdst.write();
7992     }
7993
7994     Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt)
7995         : Inst_VOP1(iFmt, "v_cvt_f64_u32")
7996     {
7997         setFlag(ALU);
7998         setFlag(F64);
7999     } // Inst_VOP1__V_CVT_F64_U32
8000
8001     Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
8002     {
8003     } // ~Inst_VOP1__V_CVT_F64_U32
8004
8005     // D.d = (double)S0.u.
8006     void
8007     Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
8008     {
8009         Wavefront *wf = gpuDynInst->wavefront();
8010         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8011         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8012
8013         src.readSrc();
8014
8015         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8016             if (wf->execMask(lane)) {
8017                 vdst[lane] = (VecElemF64)src[lane];
8018             }
8019         }
8020
8021         vdst.write();
8022     }
8023
8024     Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt)
8025         : Inst_VOP1(iFmt, "v_trunc_f64")
8026     {
8027         setFlag(ALU);
8028         setFlag(F64);
8029     } // Inst_VOP1__V_TRUNC_F64
8030
8031     Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
8032     {
8033     } // ~Inst_VOP1__V_TRUNC_F64
8034
8035     // D.d = trunc(S0.d), return integer part of S0.d.
8036     void
8037     Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
8038     {
8039         Wavefront *wf = gpuDynInst->wavefront();
8040         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8041         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8042
8043         src.readSrc();
8044
8045         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8046             if (wf->execMask(lane)) {
8047                 vdst[lane] = std::trunc(src[lane]);
8048             }
8049         }
8050
8051         vdst.write();
8052     }
8053
8054     Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt)
8055         : Inst_VOP1(iFmt, "v_ceil_f64")
8056     {
8057         setFlag(ALU);
8058         setFlag(F64);
8059     } // Inst_VOP1__V_CEIL_F64
8060
8061     Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
8062     {
8063     } // ~Inst_VOP1__V_CEIL_F64
8064
8065     // D.d = ceil(S0.d);
8066     void
8067     Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
8068     {
8069         Wavefront *wf = gpuDynInst->wavefront();
8070         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8071         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8072
8073         src.readSrc();
8074
8075         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8076             if (wf->execMask(lane)) {
8077                 vdst[lane] = std::ceil(src[lane]);
8078             }
8079         }
8080
8081         vdst.write();
8082     }
8083
8084     Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt)
8085         : Inst_VOP1(iFmt, "v_rndne_f64")
8086     {
8087         setFlag(ALU);
8088         setFlag(F64);
8089     } // Inst_VOP1__V_RNDNE_F64
8090
8091     Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
8092     {
8093     } // ~Inst_VOP1__V_RNDNE_F64
8094
8095     // D.d = round_nearest_even(S0.d).
8096     void
8097     Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
8098     {
8099         Wavefront *wf = gpuDynInst->wavefront();
8100         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8101         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8102
8103         src.readSrc();
8104
8105         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8106             if (wf->execMask(lane)) {
8107                 vdst[lane] = roundNearestEven(src[lane]);
8108             }
8109         }
8110
8111         vdst.write();
8112     }
8113
8114     Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt)
8115         : Inst_VOP1(iFmt, "v_floor_f64")
8116     {
8117         setFlag(ALU);
8118         setFlag(F64);
8119     } // Inst_VOP1__V_FLOOR_F64
8120
8121     Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
8122     {
8123     } // ~Inst_VOP1__V_FLOOR_F64
8124
8125     // D.d = floor(S0.d);
8126     void
8127     Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
8128     {
8129         Wavefront *wf = gpuDynInst->wavefront();
8130         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8131         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8132
8133         src.readSrc();
8134
8135         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8136             if (wf->execMask(lane)) {
8137                 vdst[lane] = std::floor(src[lane]);
8138             }
8139         }
8140
8141         vdst.write();
8142     }
8143
8144     Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt)
8145         : Inst_VOP1(iFmt, "v_fract_f32")
8146     {
8147         setFlag(ALU);
8148         setFlag(F32);
8149     } // Inst_VOP1__V_FRACT_F32
8150
8151     Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
8152     {
8153     } // ~Inst_VOP1__V_FRACT_F32
8154
8155     // D.f = modf(S0.f).
8156     void
8157     Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
8158     {
8159         Wavefront *wf = gpuDynInst->wavefront();
8160         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8161         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8162
8163         src.readSrc();
8164
8165         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8166             if (wf->execMask(lane)) {
8167                 VecElemF32 int_part(0.0);
8168                 vdst[lane] = std::modf(src[lane], &int_part);
8169             }
8170         }
8171
8172         vdst.write();
8173     }
8174
8175     Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt)
8176         : Inst_VOP1(iFmt, "v_trunc_f32")
8177     {
8178         setFlag(ALU);
8179         setFlag(F32);
8180     } // Inst_VOP1__V_TRUNC_F32
8181
8182     Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
8183     {
8184     } // ~Inst_VOP1__V_TRUNC_F32
8185
8186     // D.f = trunc(S0.f), return integer part of S0.f.
8187     void
8188     Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
8189     {
8190         Wavefront *wf = gpuDynInst->wavefront();
8191         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8192         VecOperandF32 vdst (gpuDynInst, instData.VDST);
8193
8194         src.readSrc();
8195
8196         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8197             if (wf->execMask(lane)) {
8198                 vdst[lane] = std::trunc(src[lane]);
8199             }
8200         }
8201
8202         vdst.write();
8203     }
8204
8205     Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt)
8206         : Inst_VOP1(iFmt, "v_ceil_f32")
8207     {
8208         setFlag(ALU);
8209         setFlag(F32);
8210     } // Inst_VOP1__V_CEIL_F32
8211
8212     Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
8213     {
8214     } // ~Inst_VOP1__V_CEIL_F32
8215
8216     // D.f = ceil(S0.f);
8217     void
8218     Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
8219     {
8220         Wavefront *wf = gpuDynInst->wavefront();
8221         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8222         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8223
8224         src.readSrc();
8225
8226         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8227             if (wf->execMask(lane)) {
8228                 vdst[lane] = std::ceil(src[lane]);
8229             }
8230         }
8231
8232         vdst.write();
8233     }
8234
8235     Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt)
8236         : Inst_VOP1(iFmt, "v_rndne_f32")
8237     {
8238         setFlag(ALU);
8239         setFlag(F32);
8240     } // Inst_VOP1__V_RNDNE_F32
8241
8242     Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
8243     {
8244     } // ~Inst_VOP1__V_RNDNE_F32
8245
8246     // D.f = round_nearest_even(S0.f).
8247     void
8248     Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
8249     {
8250         Wavefront *wf = gpuDynInst->wavefront();
8251         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8252         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8253
8254         src.readSrc();
8255
8256         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8257             if (wf->execMask(lane)) {
8258                 vdst[lane] = roundNearestEven(src[lane]);
8259             }
8260         }
8261
8262         vdst.write();
8263     }
8264
8265     Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt)
8266         : Inst_VOP1(iFmt, "v_floor_f32")
8267     {
8268         setFlag(ALU);
8269         setFlag(F32);
8270     } // Inst_VOP1__V_FLOOR_F32
8271
8272     Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
8273     {
8274     } // ~Inst_VOP1__V_FLOOR_F32
8275
8276     // D.f = floor(S0.f);
8277     void
8278     Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
8279     {
8280         Wavefront *wf = gpuDynInst->wavefront();
8281         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8282         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8283
8284         src.readSrc();
8285
8286         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8287             if (wf->execMask(lane)) {
8288                 vdst[lane] = std::floor(src[lane]);
8289             }
8290         }
8291
8292         vdst.write();
8293     }
8294
8295     Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt)
8296         : Inst_VOP1(iFmt, "v_exp_f32")
8297     {
8298         setFlag(ALU);
8299         setFlag(F32);
8300     } // Inst_VOP1__V_EXP_F32
8301
8302     Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
8303     {
8304     } // ~Inst_VOP1__V_EXP_F32
8305
8306     // D.f = pow(2.0, S0.f).
8307     void
8308     Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
8309     {
8310         Wavefront *wf = gpuDynInst->wavefront();
8311         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8312         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8313
8314         src.readSrc();
8315
8316         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8317             if (wf->execMask(lane)) {
8318                 vdst[lane] = std::pow(2.0, src[lane]);
8319             }
8320         }
8321
8322         vdst.write();
8323     }
8324
8325     Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt)
8326         : Inst_VOP1(iFmt, "v_log_f32")
8327     {
8328         setFlag(ALU);
8329         setFlag(F32);
8330     } // Inst_VOP1__V_LOG_F32
8331
8332     Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
8333     {
8334     } // ~Inst_VOP1__V_LOG_F32
8335
8336     // D.f = log2(S0.f).
8337     void
8338     Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
8339     {
8340         Wavefront *wf = gpuDynInst->wavefront();
8341         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8342         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8343
8344         src.readSrc();
8345
8346         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8347             if (wf->execMask(lane)) {
8348                 vdst[lane] = std::log2(src[lane]);
8349             }
8350         }
8351
8352         vdst.write();
8353     }
8354
8355     Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt)
8356         : Inst_VOP1(iFmt, "v_rcp_f32")
8357     {
8358         setFlag(ALU);
8359         setFlag(F32);
8360     } // Inst_VOP1__V_RCP_F32
8361
8362     Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
8363     {
8364     } // ~Inst_VOP1__V_RCP_F32
8365
8366     // D.f = 1.0 / S0.f.
8367     void
8368     Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
8369     {
8370         Wavefront *wf = gpuDynInst->wavefront();
8371         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8372         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8373
8374         src.readSrc();
8375
8376         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8377             if (wf->execMask(lane)) {
8378                 vdst[lane] = 1.0 / src[lane];
8379             }
8380         }
8381
8382         vdst.write();
8383     }
8384
8385     Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt)
8386         : Inst_VOP1(iFmt, "v_rcp_iflag_f32")
8387     {
8388         setFlag(ALU);
8389         setFlag(F32);
8390     } // Inst_VOP1__V_RCP_IFLAG_F32
8391
8392     Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
8393     {
8394     } // ~Inst_VOP1__V_RCP_IFLAG_F32
8395
8396     // D.f = 1.0 / S0.f.
8397     void
8398     Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
8399     {
8400         Wavefront *wf = gpuDynInst->wavefront();
8401         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8402         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8403
8404         src.readSrc();
8405
8406         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8407             if (wf->execMask(lane)) {
8408                 vdst[lane] = 1.0 / src[lane];
8409             }
8410         }
8411
8412         vdst.write();
8413     }
8414
8415     Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt)
8416         : Inst_VOP1(iFmt, "v_rsq_f32")
8417     {
8418         setFlag(ALU);
8419         setFlag(F32);
8420     } // Inst_VOP1__V_RSQ_F32
8421
8422     Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
8423     {
8424     } // ~Inst_VOP1__V_RSQ_F32
8425
8426     // D.f = 1.0 / sqrt(S0.f).
8427     void
8428     Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
8429     {
8430         Wavefront *wf = gpuDynInst->wavefront();
8431         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8432         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8433
8434         src.readSrc();
8435
8436         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8437             if (wf->execMask(lane)) {
8438                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
8439             }
8440         }
8441
8442         vdst.write();
8443     }
8444
8445     Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt)
8446         : Inst_VOP1(iFmt, "v_rcp_f64")
8447     {
8448         setFlag(ALU);
8449         setFlag(F64);
8450     } // Inst_VOP1__V_RCP_F64
8451
8452     Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
8453     {
8454     } // ~Inst_VOP1__V_RCP_F64
8455
8456     // D.d = 1.0 / S0.d.
8457     void
8458     Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
8459     {
8460         Wavefront *wf = gpuDynInst->wavefront();
8461         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8462         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8463
8464         src.readSrc();
8465
8466         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8467             if (wf->execMask(lane)) {
8468                 if (std::fpclassify(src[lane]) == FP_ZERO) {
8469                     vdst[lane] = +INFINITY;
8470                 } else if (std::isnan(src[lane])) {
8471                     vdst[lane] = NAN;
8472                 } else if (std::isinf(src[lane])) {
8473                     if (std::signbit(src[lane])) {
8474                         vdst[lane] = -0.0;
8475                     } else {
8476                         vdst[lane] = 0.0;
8477                     }
8478                 } else {
8479                     vdst[lane] = 1.0 / src[lane];
8480                 }
8481             }
8482         }
8483
8484         vdst.write();
8485     }
8486
8487     Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt)
8488         : Inst_VOP1(iFmt, "v_rsq_f64")
8489     {
8490         setFlag(ALU);
8491         setFlag(F64);
8492     } // Inst_VOP1__V_RSQ_F64
8493
8494     Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
8495     {
8496     } // ~Inst_VOP1__V_RSQ_F64
8497
8498     // D.d = 1.0 / sqrt(S0.d).
8499     void
8500     Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
8501     {
8502         Wavefront *wf = gpuDynInst->wavefront();
8503         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8504         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8505
8506         src.readSrc();
8507
8508         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8509             if (wf->execMask(lane)) {
8510                 if (std::fpclassify(src[lane]) == FP_ZERO) {
8511                     vdst[lane] = +INFINITY;
8512                 } else if (std::isnan(src[lane])) {
8513                     vdst[lane] = NAN;
8514                 } else if (std::isinf(src[lane])
8515                            && !std::signbit(src[lane])) {
8516                     vdst[lane] = 0.0;
8517                 } else if (std::signbit(src[lane])) {
8518                     vdst[lane] = NAN;
8519                 } else {
8520                     vdst[lane] = 1.0 / std::sqrt(src[lane]);
8521                 }
8522             }
8523         }
8524
8525         vdst.write();
8526     }
8527
8528     Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt)
8529         : Inst_VOP1(iFmt, "v_sqrt_f32")
8530     {
8531         setFlag(ALU);
8532         setFlag(F32);
8533     } // Inst_VOP1__V_SQRT_F32
8534
8535     Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
8536     {
8537     } // ~Inst_VOP1__V_SQRT_F32
8538
8539     // D.f = sqrt(S0.f).
8540     void
8541     Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
8542     {
8543         Wavefront *wf = gpuDynInst->wavefront();
8544         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8545         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8546
8547         src.readSrc();
8548
8549         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8550             if (wf->execMask(lane)) {
8551                 vdst[lane] = std::sqrt(src[lane]);
8552             }
8553         }
8554
8555         vdst.write();
8556     }
8557
8558     Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt)
8559         : Inst_VOP1(iFmt, "v_sqrt_f64")
8560     {
8561         setFlag(ALU);
8562         setFlag(F64);
8563     } // Inst_VOP1__V_SQRT_F64
8564
8565     Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
8566     {
8567     } // ~Inst_VOP1__V_SQRT_F64
8568
8569     // D.d = sqrt(S0.d).
8570     void
8571     Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
8572     {
8573         Wavefront *wf = gpuDynInst->wavefront();
8574         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8575         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8576
8577         src.readSrc();
8578
8579         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8580             if (wf->execMask(lane)) {
8581                 vdst[lane] = std::sqrt(src[lane]);
8582             }
8583         }
8584
8585         vdst.write();
8586     }
8587
8588     Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt)
8589         : Inst_VOP1(iFmt, "v_sin_f32")
8590     {
8591         setFlag(ALU);
8592         setFlag(F32);
8593     } // Inst_VOP1__V_SIN_F32
8594
8595     Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
8596     {
8597     } // ~Inst_VOP1__V_SIN_F32
8598
8599     // D.f = sin(S0.f * 2 * PI).
8600     void
8601     Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
8602     {
8603         Wavefront *wf = gpuDynInst->wavefront();
8604         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8605         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8606         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8607
8608         src.readSrc();
8609         pi.read();
8610
8611         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8612             if (wf->execMask(lane)) {
8613                 if (src[lane] < -256.0 || src[lane] > 256.0) {
8614                     vdst[lane] = 0.0;
8615                 } else {
8616                     vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());
8617                 }
8618             }
8619         }
8620
8621         vdst.write();
8622     }
8623
8624     Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt)
8625         : Inst_VOP1(iFmt, "v_cos_f32")
8626     {
8627         setFlag(ALU);
8628         setFlag(F32);
8629     } // Inst_VOP1__V_COS_F32
8630
8631     Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
8632     {
8633     } // ~Inst_VOP1__V_COS_F32
8634
8635     // D.f = cos(S0.f * 2 * PI).
8636     void
8637     Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
8638     {
8639         Wavefront *wf = gpuDynInst->wavefront();
8640         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8641         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8642         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8643
8644         src.readSrc();
8645         pi.read();
8646
8647         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8648             if (wf->execMask(lane)) {
8649                 if (src[lane] < -256.0 || src[lane] > 256.0) {
8650                     vdst[lane] = 0.0;
8651                 } else {
8652                     vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());
8653                 }
8654             }
8655         }
8656
8657         vdst.write();
8658     }
8659
8660     Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt)
8661         : Inst_VOP1(iFmt, "v_not_b32")
8662     {
8663         setFlag(ALU);
8664     } // Inst_VOP1__V_NOT_B32
8665
8666     Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
8667     {
8668     } // ~Inst_VOP1__V_NOT_B32
8669
8670     // D.u = ~S0.u.
8671     // Input and output modifiers not supported.
8672     void
8673     Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
8674     {
8675         Wavefront *wf = gpuDynInst->wavefront();
8676         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8677         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8678
8679         src.readSrc();
8680
8681         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8682             if (wf->execMask(lane)) {
8683                 vdst[lane] = ~src[lane];
8684             }
8685         }
8686
8687         vdst.write();
8688     }
8689
8690     Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt)
8691         : Inst_VOP1(iFmt, "v_bfrev_b32")
8692     {
8693         setFlag(ALU);
8694     } // Inst_VOP1__V_BFREV_B32
8695
8696     Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
8697     {
8698     } // ~Inst_VOP1__V_BFREV_B32
8699
8700     // D.u[31:0] = S0.u[0:31], bitfield reverse.
8701     // Input and output modifiers not supported.
8702     void
8703     Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
8704     {
8705         Wavefront *wf = gpuDynInst->wavefront();
8706         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8707         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8708
8709         src.readSrc();
8710
8711         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8712             if (wf->execMask(lane)) {
8713                 vdst[lane] = reverseBits(src[lane]);
8714             }
8715         }
8716
8717         vdst.write();
8718     }
8719
8720     Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt)
8721         : Inst_VOP1(iFmt, "v_ffbh_u32")
8722     {
8723         setFlag(ALU);
8724     } // Inst_VOP1__V_FFBH_U32
8725
8726     Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
8727     {
8728     } // ~Inst_VOP1__V_FFBH_U32
8729
8730     // D.u = position of first 1 in S0.u from MSB;
8731     // D.u = 0xffffffff if S0.u == 0.
8732     void
8733     Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
8734     {
8735         Wavefront *wf = gpuDynInst->wavefront();
8736         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8737         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8738
8739         src.readSrc();
8740
8741         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8742             if (wf->execMask(lane)) {
8743                 vdst[lane] = findFirstOneMsb(src[lane]);
8744             }
8745         }
8746
8747         vdst.write();
8748     }
8749
8750     Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt)
8751         : Inst_VOP1(iFmt, "v_ffbl_b32")
8752     {
8753         setFlag(ALU);
8754     } // Inst_VOP1__V_FFBL_B32
8755
8756     Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
8757     {
8758     } // ~Inst_VOP1__V_FFBL_B32
8759
8760     // D.u = position of first 1 in S0.u from LSB;
8761     // D.u = 0xffffffff if S0.u == 0.
8762     void
8763     Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
8764     {
8765         Wavefront *wf = gpuDynInst->wavefront();
8766         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8767         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8768
8769         src.readSrc();
8770
8771         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8772             if (wf->execMask(lane)) {
8773                 vdst[lane] = findFirstOne(src[lane]);
8774             }
8775         }
8776
8777         vdst.write();
8778     }
8779
8780     Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt)
8781         : Inst_VOP1(iFmt, "v_ffbh_i32")
8782     {
8783         setFlag(ALU);
8784     } // Inst_VOP1__V_FFBH_I32
8785
8786     Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
8787     {
8788     } // ~Inst_VOP1__V_FFBH_I32
8789
8790     // D.u = position of first bit different from sign bit in S0.i from MSB;
8791     // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
8792     void
8793     Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
8794     {
8795         Wavefront *wf = gpuDynInst->wavefront();
8796         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
8797         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8798
8799         src.readSrc();
8800
8801         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8802             if (wf->execMask(lane)) {
8803                 vdst[lane] = firstOppositeSignBit(src[lane]);
8804             }
8805         }
8806
8807         vdst.write();
8808     }
8809
8810     Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
8811           InFmt_VOP1 *iFmt)
8812         : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")
8813     {
8814         setFlag(ALU);
8815         setFlag(F64);
8816     } // Inst_VOP1__V_FREXP_EXP_I32_F64
8817
8818     Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
8819     {
8820     } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
8821
8822     void
8823     Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
8824     {
8825         Wavefront *wf = gpuDynInst->wavefront();
8826         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8827         VecOperandI32 vdst(gpuDynInst, instData.VDST);
8828
8829         src.readSrc();
8830
8831         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8832             if (wf->execMask(lane)) {
8833                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8834                     vdst[lane] = 0;
8835                 } else {
8836                     VecElemI32 exp = 0;
8837                     std::frexp(src[lane], &exp);
8838                     vdst[lane] = exp;
8839                 }
8840             }
8841         }
8842
8843         vdst.write();
8844     }
8845
8846     Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt)
8847         : Inst_VOP1(iFmt, "v_frexp_mant_f64")
8848     {
8849         setFlag(ALU);
8850         setFlag(F64);
8851     } // Inst_VOP1__V_FREXP_MANT_F64
8852
8853     Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
8854     {
8855     } // ~Inst_VOP1__V_FREXP_MANT_F64
8856
8857     void
8858     Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
8859     {
8860         Wavefront *wf = gpuDynInst->wavefront();
8861         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8862         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8863
8864         src.readSrc();
8865
8866         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8867             if (wf->execMask(lane)) {
8868                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8869                     vdst[lane] = src[lane];
8870                 } else {
8871                     VecElemI32 exp(0);
8872                     vdst[lane] = std::frexp(src[lane], &exp);
8873                 }
8874             }
8875         }
8876
8877         vdst.write();
8878     }
8879
8880     Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt)
8881         : Inst_VOP1(iFmt, "v_fract_f64")
8882     {
8883         setFlag(ALU);
8884         setFlag(F64);
8885     } // Inst_VOP1__V_FRACT_F64
8886
8887     Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
8888     {
8889     } // ~Inst_VOP1__V_FRACT_F64
8890
8891     void
8892     Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
8893     {
8894         Wavefront *wf = gpuDynInst->wavefront();
8895         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8896         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8897
8898         src.readSrc();
8899
8900         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8901             if (wf->execMask(lane)) {
8902                 VecElemF64 int_part(0.0);
8903                 vdst[lane] = std::modf(src[lane], &int_part);
8904             }
8905         }
8906
8907         vdst.write();
8908     }
8909
8910     Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
8911           InFmt_VOP1 *iFmt)
8912         : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")
8913     {
8914         setFlag(ALU);
8915         setFlag(F32);
8916     } // Inst_VOP1__V_FREXP_EXP_I32_F32
8917
8918     Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
8919     {
8920     } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
8921
8922     // frexp(S0.f, Exponent(S0.f))
8923     // if (S0.f == INF || S0.f == NAN) then D.i = 0;
8924     // else D.i = Exponent(S0.f);
8925     void
8926     Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
8927     {
8928         Wavefront *wf = gpuDynInst->wavefront();
8929         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8930         VecOperandI32 vdst(gpuDynInst, instData.VDST);
8931
8932         src.readSrc();
8933
8934         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8935             if (wf->execMask(lane)) {
8936                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8937                     vdst[lane] = 0;
8938                 } else {
8939                     VecElemI32 exp(0);
8940                     std::frexp(src[lane], &exp);
8941                     vdst[lane] = exp;
8942                 }
8943             }
8944         }
8945
8946         vdst.write();
8947     }
8948
8949     Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt)
8950         : Inst_VOP1(iFmt, "v_frexp_mant_f32")
8951     {
8952         setFlag(ALU);
8953         setFlag(F32);
8954     } // Inst_VOP1__V_FREXP_MANT_F32
8955
8956     Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
8957     {
8958     } // ~Inst_VOP1__V_FREXP_MANT_F32
8959
8960     // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
8961     // else D.f = frexp(S0.f, Exponent(S0.f)).
8962     void
8963     Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
8964     {
8965         Wavefront *wf = gpuDynInst->wavefront();
8966         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8967         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8968
8969         src.readSrc();
8970
8971         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8972             if (wf->execMask(lane)) {
8973                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8974                     vdst[lane] = src[lane];
8975                 } else {
8976                     VecElemI32 exp(0);
8977                     vdst[lane] = std::frexp(src[lane], &exp);
8978                 }
8979             }
8980         }
8981
8982         vdst.write();
8983     }
8984
8985     Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt)
8986         : Inst_VOP1(iFmt, "v_clrexcp")
8987     {
8988         setFlag(ALU);
8989     } // Inst_VOP1__V_CLREXCP
8990
8991     Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
8992     {
8993     } // ~Inst_VOP1__V_CLREXCP
8994
8995     void
8996     Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
8997     {
8998         panicUnimplemented();
8999     }
9000
9001     Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)
9002         : Inst_VOP1(iFmt, "v_cvt_f16_u16")
9003     {
9004         setFlag(ALU);
9005         setFlag(F16);
9006     } // Inst_VOP1__V_CVT_F16_U16
9007
9008     Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
9009     {
9010     } // ~Inst_VOP1__V_CVT_F16_U16
9011
9012     // D.f16 = uint16_to_flt16(S.u16).
9013     void
9014     Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
9015     {
9016         panicUnimplemented();
9017     }
9018
9019     Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt)
9020         : Inst_VOP1(iFmt, "v_cvt_f16_i16")
9021     {
9022         setFlag(ALU);
9023         setFlag(F16);
9024     } // Inst_VOP1__V_CVT_F16_I16
9025
9026     Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
9027     {
9028     } // ~Inst_VOP1__V_CVT_F16_I16
9029
9030     // D.f16 = int16_to_flt16(S.i16).
9031     void
9032     Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
9033     {
9034         panicUnimplemented();
9035     }
9036
9037     Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt)
9038         : Inst_VOP1(iFmt, "v_cvt_u16_f16")
9039     {
9040         setFlag(ALU);
9041         setFlag(F16);
9042     } // Inst_VOP1__V_CVT_U16_F16
9043
9044     Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
9045     {
9046     } // ~Inst_VOP1__V_CVT_U16_F16
9047
9048     // D.u16 = flt16_to_uint16(S.f16).
9049     void
9050     Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
9051     {
9052         panicUnimplemented();
9053     }
9054
9055     Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt)
9056         : Inst_VOP1(iFmt, "v_cvt_i16_f16")
9057     {
9058         setFlag(ALU);
9059         setFlag(F16);
9060     } // Inst_VOP1__V_CVT_I16_F16
9061
9062     Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
9063     {
9064     } // ~Inst_VOP1__V_CVT_I16_F16
9065
9066     // D.i16 = flt16_to_int16(S.f16).
9067     void
9068     Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9069     {
9070         panicUnimplemented();
9071     }
9072
9073     Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt)
9074         : Inst_VOP1(iFmt, "v_rcp_f16")
9075     {
9076         setFlag(ALU);
9077         setFlag(F16);
9078     } // Inst_VOP1__V_RCP_F16
9079
9080     Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
9081     {
9082     } // ~Inst_VOP1__V_RCP_F16
9083
9084     // if (S0.f16 == 1.0f)
9085     //     D.f16 = 1.0f;
9086     // else
9087     //     D.f16 = 1 / S0.f16;
9088     void
9089     Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
9090     {
9091         panicUnimplemented();
9092     }
9093
9094     Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt)
9095         : Inst_VOP1(iFmt, "v_sqrt_f16")
9096     {
9097         setFlag(ALU);
9098         setFlag(F16);
9099     } // Inst_VOP1__V_SQRT_F16
9100
9101     Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
9102     {
9103     } // ~Inst_VOP1__V_SQRT_F16
9104
9105     // if (S0.f16 == 1.0f)
9106     //     D.f16 = 1.0f;
9107     // else
9108     //     D.f16 = sqrt(S0.f16);
9109     void
9110     Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
9111     {
9112         panicUnimplemented();
9113     }
9114
9115     Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt)
9116         : Inst_VOP1(iFmt, "v_rsq_f16")
9117     {
9118         setFlag(ALU);
9119         setFlag(F16);
9120     } // Inst_VOP1__V_RSQ_F16
9121
9122     Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
9123     {
9124     } // ~Inst_VOP1__V_RSQ_F16
9125
9126     // if (S0.f16 == 1.0f)
9127     //     D.f16 = 1.0f;
9128     // else
9129     //     D.f16 = 1 / sqrt(S0.f16);
9130     void
9131     Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
9132     {
9133         panicUnimplemented();
9134     }
9135
9136     Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt)
9137         : Inst_VOP1(iFmt, "v_log_f16")
9138     {
9139         setFlag(ALU);
9140         setFlag(F16);
9141     } // Inst_VOP1__V_LOG_F16
9142
9143     Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
9144     {
9145     } // ~Inst_VOP1__V_LOG_F16
9146
9147     // if (S0.f16 == 1.0f)
9148     //     D.f16 = 0.0f;
9149     // else
9150     //     D.f16 = log2(S0.f16);
9151     void
9152     Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
9153     {
9154         panicUnimplemented();
9155     }
9156
9157     Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt)
9158         : Inst_VOP1(iFmt, "v_exp_f16")
9159     {
9160         setFlag(ALU);
9161         setFlag(F16);
9162     } // Inst_VOP1__V_EXP_F16
9163
9164     Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
9165     {
9166     } // ~Inst_VOP1__V_EXP_F16
9167
9168     // if (S0.f16 == 0.0f)
9169     //     D.f16 = 1.0f;
9170     // else
9171     //     D.f16 = pow(2.0, S0.f16).
9172     void
9173     Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
9174     {
9175         panicUnimplemented();
9176     }
9177
9178     Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt)
9179         : Inst_VOP1(iFmt, "v_frexp_mant_f16")
9180     {
9181         setFlag(ALU);
9182         setFlag(F16);
9183     } // Inst_VOP1__V_FREXP_MANT_F16
9184
9185     Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
9186     {
9187     } // ~Inst_VOP1__V_FREXP_MANT_F16
9188
9189     // if (S0.f16 == +-INF || S0.f16 == NAN)
9190     //     D.f16 = S0.f16;
9191     // else
9192     //     D.f16 = mantissa(S0.f16).
9193     void
9194     Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
9195     {
9196         panicUnimplemented();
9197     }
9198
9199     Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
9200           InFmt_VOP1 *iFmt)
9201         : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")
9202     {
9203         setFlag(ALU);
9204         setFlag(F16);
9205     } // Inst_VOP1__V_FREXP_EXP_I16_F16
9206
9207     Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
9208     {
9209     } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
9210
9211     // frexp(S0.f16, Exponent(S0.f16))
9212     // if (S0.f16 == +-INF || S0.f16 == NAN)
9213     //     D.i16 = 0;
9214     // else
9215     //     D.i16 = Exponent(S0.f16);
9216     void
9217     Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9218     {
9219         panicUnimplemented();
9220     }
9221
9222     Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt)
9223         : Inst_VOP1(iFmt, "v_floor_f16")
9224     {
9225         setFlag(ALU);
9226         setFlag(F16);
9227     } // Inst_VOP1__V_FLOOR_F16
9228
9229     Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
9230     {
9231     } // ~Inst_VOP1__V_FLOOR_F16
9232
9233     // D.f16 = floor(S0.f16);
9234     void
9235     Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
9236     {
9237         panicUnimplemented();
9238     }
9239
9240     Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt)
9241         : Inst_VOP1(iFmt, "v_ceil_f16")
9242     {
9243         setFlag(ALU);
9244         setFlag(F16);
9245     } // Inst_VOP1__V_CEIL_F16
9246
9247     Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
9248     {
9249     } // ~Inst_VOP1__V_CEIL_F16
9250
9251     // D.f16 = ceil(S0.f16);
9252     void
9253     Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
9254     {
9255         panicUnimplemented();
9256     }
9257
9258     Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt)
9259         : Inst_VOP1(iFmt, "v_trunc_f16")
9260     {
9261         setFlag(ALU);
9262         setFlag(F16);
9263     } // Inst_VOP1__V_TRUNC_F16
9264
9265     Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
9266     {
9267     } // ~Inst_VOP1__V_TRUNC_F16
9268
9269     // D.f16 = trunc(S0.f16).
9270     void
9271     Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
9272     {
9273         panicUnimplemented();
9274     }
9275
9276     Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt)
9277         : Inst_VOP1(iFmt, "v_rndne_f16")
9278     {
9279         setFlag(ALU);
9280         setFlag(F16);
9281     } // Inst_VOP1__V_RNDNE_F16
9282
9283     Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
9284     {
9285     } // ~Inst_VOP1__V_RNDNE_F16
9286
9287     // D.f16 = roundNearestEven(S0.f16);
9288     void
9289     Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
9290     {
9291         panicUnimplemented();
9292     }
9293
9294     Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt)
9295         : Inst_VOP1(iFmt, "v_fract_f16")
9296     {
9297         setFlag(ALU);
9298         setFlag(F16);
9299     } // Inst_VOP1__V_FRACT_F16
9300
9301     Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
9302     {
9303     } // ~Inst_VOP1__V_FRACT_F16
9304
9305     // D.f16 = S0.f16 + -floor(S0.f16).
9306     void
9307     Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
9308     {
9309         panicUnimplemented();
9310     }
9311
9312     Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt)
9313         : Inst_VOP1(iFmt, "v_sin_f16")
9314     {
9315         setFlag(ALU);
9316         setFlag(F16);
9317     } // Inst_VOP1__V_SIN_F16
9318
9319     Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
9320     {
9321     } // ~Inst_VOP1__V_SIN_F16
9322
9323     // D.f16 = sin(S0.f16 * 2 * PI).
9324     void
9325     Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
9326     {
9327         panicUnimplemented();
9328     }
9329
9330     Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt)
9331         : Inst_VOP1(iFmt, "v_cos_f16")
9332     {
9333         setFlag(ALU);
9334         setFlag(F16);
9335     } // Inst_VOP1__V_COS_F16
9336
9337     Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
9338     {
9339     } // ~Inst_VOP1__V_COS_F16
9340
9341     // D.f16 = cos(S0.f16 * 2 * PI).
9342     void
9343     Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
9344     {
9345         panicUnimplemented();
9346     }
9347
9348     Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt)
9349         : Inst_VOP1(iFmt, "v_exp_legacy_f32")
9350     {
9351         setFlag(ALU);
9352         setFlag(F32);
9353     } // Inst_VOP1__V_EXP_LEGACY_F32
9354
9355     Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
9356     {
9357     } // ~Inst_VOP1__V_EXP_LEGACY_F32
9358
9359     // D.f = pow(2.0, S0.f)
9360     void
9361     Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9362     {
9363         Wavefront *wf = gpuDynInst->wavefront();
9364         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9365         VecOperandF32 vdst(gpuDynInst, instData.VDST);
9366
9367         src.readSrc();
9368
9369         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9370             if (wf->execMask(lane)) {
9371                 vdst[lane] = std::pow(2.0, src[lane]);
9372             }
9373         }
9374
9375         vdst.write();
9376     }
9377
9378     Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt)
9379         : Inst_VOP1(iFmt, "v_log_legacy_f32")
9380     {
9381         setFlag(ALU);
9382         setFlag(F32);
9383     } // Inst_VOP1__V_LOG_LEGACY_F32
9384
9385     Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
9386     {
9387     } // ~Inst_VOP1__V_LOG_LEGACY_F32
9388
9389     // D.f = log2(S0.f).
9390     void
9391     Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9392     {
9393         Wavefront *wf = gpuDynInst->wavefront();
9394         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9395         VecOperandF32 vdst(gpuDynInst, instData.VDST);
9396
9397         src.readSrc();
9398
9399         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9400             if (wf->execMask(lane)) {
9401                 vdst[lane] = std::log2(src[lane]);
9402             }
9403         }
9404
9405         vdst.write();
9406     }
9407
9408     Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt)
9409         : Inst_VOPC(iFmt, "v_cmp_class_f32")
9410     {
9411         setFlag(ALU);
9412         setFlag(F32);
9413     } // Inst_VOPC__V_CMP_CLASS_F32
9414
9415     Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
9416     {
9417     } // ~Inst_VOPC__V_CMP_CLASS_F32
9418
9419     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
9420     // The function reports true if the floating point value is any of the
9421     // numeric types selected in S1.u according to the following list:
9422     // S1.u[0] -- value is a signaling NaN.
9423     // S1.u[1] -- value is a quiet NaN.
9424     // S1.u[2] -- value is negative infinity.
9425     // S1.u[3] -- value is a negative normal value.
9426     // S1.u[4] -- value is a negative denormal value.
9427     // S1.u[5] -- value is negative zero.
9428     // S1.u[6] -- value is positive zero.
9429     // S1.u[7] -- value is a positive denormal value.
9430     // S1.u[8] -- value is a positive normal value.
9431     // S1.u[9] -- value is positive infinity.
9432     void
9433     Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9434     {
9435         Wavefront *wf = gpuDynInst->wavefront();
9436         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9437         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9438         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9439
9440         src0.readSrc();
9441         src1.read();
9442
9443         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9444             if (wf->execMask(lane)) {
9445                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9446                     // is NaN
9447                     if (std::isnan(src0[lane])) {
9448                         vcc.setBit(lane, 1);
9449                         continue;
9450                     }
9451                 }
9452                 if (bits(src1[lane], 2)) {
9453                     // is -infinity
9454                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9455                         vcc.setBit(lane, 1);
9456                         continue;
9457                     }
9458                 }
9459                 if (bits(src1[lane], 3)) {
9460                     // is -normal
9461                     if (std::isnormal(src0[lane])
9462                         && std::signbit(src0[lane])) {
9463                         vcc.setBit(lane, 1);
9464                         continue;
9465                     }
9466                 }
9467                 if (bits(src1[lane], 4)) {
9468                     // is -denormal
9469                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9470                         && std::signbit(src0[lane])) {
9471                         vcc.setBit(lane, 1);
9472                         continue;
9473                     }
9474                 }
9475                 if (bits(src1[lane], 5)) {
9476                     // is -zero
9477                     if (std::fpclassify(src0[lane]) == FP_ZERO
9478                         && std::signbit(src0[lane])) {
9479                         vcc.setBit(lane, 1);
9480                         continue;
9481                     }
9482                 }
9483                 if (bits(src1[lane], 6)) {
9484                     // is +zero
9485                     if (std::fpclassify(src0[lane]) == FP_ZERO
9486                         && !std::signbit(src0[lane])) {
9487                         vcc.setBit(lane, 1);
9488                         continue;
9489                     }
9490                 }
9491                 if (bits(src1[lane], 7)) {
9492                     // is +denormal
9493                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9494                         && !std::signbit(src0[lane])) {
9495                         vcc.setBit(lane, 1);
9496                         continue;
9497                     }
9498                 }
9499                 if (bits(src1[lane], 8)) {
9500                     // is +normal
9501                     if (std::isnormal(src0[lane])
9502                         && !std::signbit(src0[lane])) {
9503                         vcc.setBit(lane, 1);
9504                         continue;
9505                     }
9506                 }
9507                 if (bits(src1[lane], 9)) {
9508                     // is +infinity
9509                     if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9510                         vcc.setBit(lane, 1);
9511                         continue;
9512                     }
9513                 }
9514             }
9515         }
9516
9517         vcc.write();
9518     }
9519
9520     Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt)
9521         : Inst_VOPC(iFmt, "v_cmpx_class_f32")
9522     {
9523         setFlag(ALU);
9524         setFlag(F32);
9525     } // Inst_VOPC__V_CMPX_CLASS_F32
9526
9527     Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
9528     {
9529     } // ~Inst_VOPC__V_CMPX_CLASS_F32
9530
9531     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9532     // S0.f The function reports true if the floating point value is any of
9533     // the numeric types selected in S1.u according to the following list:
9534     // S1.u[0] -- value is a signaling NaN.
9535     // S1.u[1] -- value is a quiet NaN.
9536     // S1.u[2] -- value is negative infinity.
9537     // S1.u[3] -- value is a negative normal value.
9538     // S1.u[4] -- value is a negative denormal value.
9539     // S1.u[5] -- value is negative zero.
9540     // S1.u[6] -- value is positive zero.
9541     // S1.u[7] -- value is a positive denormal value.
9542     // S1.u[8] -- value is a positive normal value.
9543     // S1.u[9] -- value is positive infinity.
9544     void
9545     Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9546     {
9547         Wavefront *wf = gpuDynInst->wavefront();
9548         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9549         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9550         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9551
9552         src0.readSrc();
9553         src1.read();
9554
9555         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9556             if (wf->execMask(lane)) {
9557                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9558                     // is NaN
9559                     if (std::isnan(src0[lane])) {
9560                         vcc.setBit(lane, 1);
9561                         continue;
9562                     }
9563                 }
9564                 if (bits(src1[lane], 2)) {
9565                     // is -infinity
9566                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9567                         vcc.setBit(lane, 1);
9568                         continue;
9569                     }
9570                 }
9571                 if (bits(src1[lane], 3)) {
9572                     // is -normal
9573                     if (std::isnormal(src0[lane])
9574                         && std::signbit(src0[lane])) {
9575                         vcc.setBit(lane, 1);
9576                         continue;
9577                     }
9578                 }
9579                 if (bits(src1[lane], 4)) {
9580                     // is -denormal
9581                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9582                         && std::signbit(src0[lane])) {
9583                         vcc.setBit(lane, 1);
9584                         continue;
9585                     }
9586                 }
9587                 if (bits(src1[lane], 5)) {
9588                     // is -zero
9589                     if (std::fpclassify(src0[lane]) == FP_ZERO
9590                         && std::signbit(src0[lane])) {
9591                         vcc.setBit(lane, 1);
9592                         continue;
9593                     }
9594                 }
9595                 if (bits(src1[lane], 6)) {
9596                     // is +zero
9597                     if (std::fpclassify(src0[lane]) == FP_ZERO
9598                         && !std::signbit(src0[lane])) {
9599                         vcc.setBit(lane, 1);
9600                         continue;
9601                     }
9602                 }
9603                 if (bits(src1[lane], 7)) {
9604                     // is +denormal
9605                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9606                         && !std::signbit(src0[lane])) {
9607                         vcc.setBit(lane, 1);
9608                         continue;
9609                     }
9610                 }
9611                 if (bits(src1[lane], 8)) {
9612                     // is +normal
9613                     if (std::isnormal(src0[lane])
9614                         && !std::signbit(src0[lane])) {
9615                         vcc.setBit(lane, 1);
9616                         continue;
9617                     }
9618                 }
9619                 if (bits(src1[lane], 9)) {
9620                     // is +infinity
9621                     if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9622                         vcc.setBit(lane, 1);
9623                         continue;
9624                     }
9625                 }
9626             }
9627         }
9628
9629         vcc.write();
9630         wf->execMask() = vcc.rawData();
9631     }
9632
9633     Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt)
9634         : Inst_VOPC(iFmt, "v_cmp_class_f64")
9635     {
9636         setFlag(ALU);
9637         setFlag(F64);
9638     } // Inst_VOPC__V_CMP_CLASS_F64
9639
9640     Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
9641     {
9642     } // ~Inst_VOPC__V_CMP_CLASS_F64
9643
9644     // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
9645     // The function reports true if the floating point value is any of the
9646     // numeric types selected in S1.u according to the following list:
9647     // S1.u[0] -- value is a signaling NaN.
9648     // S1.u[1] -- value is a quiet NaN.
9649     // S1.u[2] -- value is negative infinity.
9650     // S1.u[3] -- value is a negative normal value.
9651     // S1.u[4] -- value is a negative denormal value.
9652     // S1.u[5] -- value is negative zero.
9653     // S1.u[6] -- value is positive zero.
9654     // S1.u[7] -- value is a positive denormal value.
9655     // S1.u[8] -- value is a positive normal value.
9656     // S1.u[9] -- value is positive infinity.
9657     void
9658     Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9659     {
9660         Wavefront *wf = gpuDynInst->wavefront();
9661         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9662         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9663         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9664
9665         src0.readSrc();
9666         src1.read();
9667
9668         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9669             if (wf->execMask(lane)) {
9670                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9671                     // is NaN
9672                     if (std::isnan(src0[lane])) {
9673                         vcc.setBit(lane, 1);
9674                         continue;
9675                     }
9676                 }
9677                 if (bits(src1[lane], 2)) {
9678                     // is -infinity
9679                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9680                         vcc.setBit(lane, 1);
9681                         continue;
9682                     }
9683                 }
9684                 if (bits(src1[lane], 3)) {
9685                     // is -normal
9686                     if (std::isnormal(src0[lane])
9687                         && std::signbit(src0[lane])) {
9688                         vcc.setBit(lane, 1);
9689                         continue;
9690                     }
9691                 }
9692                 if (bits(src1[lane], 4)) {
9693                     // is -denormal
9694                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9695                         && std::signbit(src0[lane])) {
9696                         vcc.setBit(lane, 1);
9697                         continue;
9698                     }
9699                 }
9700                 if (bits(src1[lane], 5)) {
9701                     // is -zero
9702                     if (std::fpclassify(src0[lane]) == FP_ZERO
9703                         && std::signbit(src0[lane])) {
9704                         vcc.setBit(lane, 1);
9705                         continue;
9706                     }
9707                 }
9708                 if (bits(src1[lane], 6)) {
9709                     // is +zero
9710                     if (std::fpclassify(src0[lane]) == FP_ZERO
9711                         && !std::signbit(src0[lane])) {
9712                         vcc.setBit(lane, 1);
9713                         continue;
9714                     }
9715                 }
9716                 if (bits(src1[lane], 7)) {
9717                     // is +denormal
9718                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9719                         && !std::signbit(src0[lane])) {
9720                         vcc.setBit(lane, 1);
9721                         continue;
9722                     }
9723                 }
9724                 if (bits(src1[lane], 8)) {
9725                     // is +normal
9726                     if (std::isnormal(src0[lane])
9727                         && !std::signbit(src0[lane])) {
9728                         vcc.setBit(lane, 1);
9729                         continue;
9730                     }
9731                 }
9732                 if (bits(src1[lane], 9)) {
9733                     // is +infinity
9734                     if (std::isinf(src0[lane])
9735                         && !std::signbit(src0[lane])) {
9736                         vcc.setBit(lane, 1);
9737                         continue;
9738                     }
9739                 }
9740             }
9741         }
9742
9743         vcc.write();
9744     }
9745
9746     Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt)
9747         : Inst_VOPC(iFmt, "v_cmpx_class_f64")
9748     {
9749         setFlag(ALU);
9750         setFlag(F64);
9751     } // Inst_VOPC__V_CMPX_CLASS_F64
9752
9753     Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
9754     {
9755     } // ~Inst_VOPC__V_CMPX_CLASS_F64
9756
9757     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9758     // S0.d The function reports true if the floating point value is any of
9759     // the numeric types selected in S1.u according to the following list:
9760     // S1.u[0] -- value is a signaling NaN.
9761     // S1.u[1] -- value is a quiet NaN.
9762     // S1.u[2] -- value is negative infinity.
9763     // S1.u[3] -- value is a negative normal value.
9764     // S1.u[4] -- value is a negative denormal value.
9765     // S1.u[5] -- value is negative zero.
9766     // S1.u[6] -- value is positive zero.
9767     // S1.u[7] -- value is a positive denormal value.
9768     // S1.u[8] -- value is a positive normal value.
9769     // S1.u[9] -- value is positive infinity.
9770     void
9771     Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9772     {
9773         Wavefront *wf = gpuDynInst->wavefront();
9774         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9775         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9776         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9777
9778         src0.readSrc();
9779         src1.read();
9780
9781         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9782             if (wf->execMask(lane)) {
9783                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9784                     // is NaN
9785                     if (std::isnan(src0[lane])) {
9786                         vcc.setBit(lane, 1);
9787                         continue;
9788                     }
9789                 }
9790                 if (bits(src1[lane], 2)) {
9791                     // is -infinity
9792                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9793                         vcc.setBit(lane, 1);
9794                         continue;
9795                     }
9796                 }
9797                 if (bits(src1[lane], 3)) {
9798                     // is -normal
9799                     if (std::isnormal(src0[lane])
9800                         && std::signbit(src0[lane])) {
9801                         vcc.setBit(lane, 1);
9802                         continue;
9803                     }
9804                 }
9805                 if (bits(src1[lane], 4)) {
9806                     // is -denormal
9807                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9808                         && std::signbit(src0[lane])) {
9809                         vcc.setBit(lane, 1);
9810                         continue;
9811                     }
9812                 }
9813                 if (bits(src1[lane], 5)) {
9814                     // is -zero
9815                     if (std::fpclassify(src0[lane]) == FP_ZERO
9816                         && std::signbit(src0[lane])) {
9817                         vcc.setBit(lane, 1);
9818                         continue;
9819                     }
9820                 }
9821                 if (bits(src1[lane], 6)) {
9822                     // is +zero
9823                     if (std::fpclassify(src0[lane]) == FP_ZERO
9824                         && !std::signbit(src0[lane])) {
9825                         vcc.setBit(lane, 1);
9826                         continue;
9827                     }
9828                 }
9829                 if (bits(src1[lane], 7)) {
9830                     // is +denormal
9831                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9832                         && !std::signbit(src0[lane])) {
9833                         vcc.setBit(lane, 1);
9834                         continue;
9835                     }
9836                 }
9837                 if (bits(src1[lane], 8)) {
9838                     // is +normal
9839                     if (std::isnormal(src0[lane])
9840                         && !std::signbit(src0[lane])) {
9841                         vcc.setBit(lane, 1);
9842                         continue;
9843                     }
9844                 }
9845                 if (bits(src1[lane], 9)) {
9846                     // is +infinity
9847                     if (std::isinf(src0[lane])
9848                         && !std::signbit(src0[lane])) {
9849                         vcc.setBit(lane, 1);
9850                         continue;
9851                     }
9852                 }
9853             }
9854         }
9855
9856         vcc.write();
9857         wf->execMask() = vcc.rawData();
9858     }
9859
9860     Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt)
9861         : Inst_VOPC(iFmt, "v_cmp_class_f16")
9862     {
9863         setFlag(ALU);
9864         setFlag(F16);
9865     } // Inst_VOPC__V_CMP_CLASS_F16
9866
9867     Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
9868     {
9869     } // ~Inst_VOPC__V_CMP_CLASS_F16
9870
9871     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
9872     // The function reports true if the floating point value is any of the
9873     // numeric types selected in S1.u according to the following list:
9874     // S1.u[0] -- value is a signaling NaN.
9875     // S1.u[1] -- value is a quiet NaN.
9876     // S1.u[2] -- value is negative infinity.
9877     // S1.u[3] -- value is a negative normal value.
9878     // S1.u[4] -- value is a negative denormal value.
9879     // S1.u[5] -- value is negative zero.
9880     // S1.u[6] -- value is positive zero.
9881     // S1.u[7] -- value is a positive denormal value.
9882     // S1.u[8] -- value is a positive normal value.
9883     // S1.u[9] -- value is positive infinity.
9884     void
9885     Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9886     {
9887         panicUnimplemented();
9888     }
9889
9890     Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt)
9891         : Inst_VOPC(iFmt, "v_cmpx_class_f16")
9892     {
9893         setFlag(ALU);
9894         setFlag(F16);
9895     } // Inst_VOPC__V_CMPX_CLASS_F16
9896
9897     Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
9898     {
9899     } // ~Inst_VOPC__V_CMPX_CLASS_F16
9900
9901     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9902     // S0.f16
9903     // The function reports true if the floating point value is any of the
9904     // numeric types selected in S1.u according to the following list:
9905     // S1.u[0] -- value is a signaling NaN.
9906     // S1.u[1] -- value is a quiet NaN.
9907     // S1.u[2] -- value is negative infinity.
9908     // S1.u[3] -- value is a negative normal value.
9909     // S1.u[4] -- value is a negative denormal value.
9910     // S1.u[5] -- value is negative zero.
9911     // S1.u[6] -- value is positive zero.
9912     // S1.u[7] -- value is a positive denormal value.
9913     // S1.u[8] -- value is a positive normal value.
9914     // S1.u[9] -- value is positive infinity.
9915     void
9916     Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9917     {
9918         panicUnimplemented();
9919     }
9920
9921     Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt)
9922         : Inst_VOPC(iFmt, "v_cmp_f_f16")
9923     {
9924         setFlag(ALU);
9925         setFlag(F16);
9926     } // Inst_VOPC__V_CMP_F_F16
9927
9928     Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
9929     {
9930     } // ~Inst_VOPC__V_CMP_F_F16
9931
9932     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
9933     void
9934     Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
9935     {
9936         panicUnimplemented();
9937     }
9938
9939     Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt)
9940         : Inst_VOPC(iFmt, "v_cmp_lt_f16")
9941     {
9942         setFlag(ALU);
9943         setFlag(F16);
9944     } // Inst_VOPC__V_CMP_LT_F16
9945
9946     Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
9947     {
9948     } // ~Inst_VOPC__V_CMP_LT_F16
9949
9950     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
9951     void
9952     Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
9953     {
9954         panicUnimplemented();
9955     }
9956
9957     Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt)
9958         : Inst_VOPC(iFmt, "v_cmp_eq_f16")
9959     {
9960         setFlag(ALU);
9961         setFlag(F16);
9962     } // Inst_VOPC__V_CMP_EQ_F16
9963
9964     Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
9965     {
9966     } // ~Inst_VOPC__V_CMP_EQ_F16
9967
9968     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
9969     void
9970     Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
9971     {
9972         panicUnimplemented();
9973     }
9974
9975     Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt)
9976         : Inst_VOPC(iFmt, "v_cmp_le_f16")
9977     {
9978         setFlag(ALU);
9979         setFlag(F16);
9980     } // Inst_VOPC__V_CMP_LE_F16
9981
9982     Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
9983     {
9984     } // ~Inst_VOPC__V_CMP_LE_F16
9985
9986     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
9987     void
9988     Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
9989     {
9990         panicUnimplemented();
9991     }
9992
9993     Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt)
9994         : Inst_VOPC(iFmt, "v_cmp_gt_f16")
9995     {
9996         setFlag(ALU);
9997         setFlag(F16);
9998     } // Inst_VOPC__V_CMP_GT_F16
9999
10000     Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
10001     {
10002     } // ~Inst_VOPC__V_CMP_GT_F16
10003
10004     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10005     void
10006     Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
10007     {
10008         panicUnimplemented();
10009     }
10010
10011     Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt)
10012         : Inst_VOPC(iFmt, "v_cmp_lg_f16")
10013     {
10014         setFlag(ALU);
10015         setFlag(F16);
10016     } // Inst_VOPC__V_CMP_LG_F16
10017
10018     Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
10019     {
10020     } // ~Inst_VOPC__V_CMP_LG_F16
10021
10022     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10023     void
10024     Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
10025     {
10026         panicUnimplemented();
10027     }
10028
10029     Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt)
10030         : Inst_VOPC(iFmt, "v_cmp_ge_f16")
10031     {
10032         setFlag(ALU);
10033         setFlag(F16);
10034     } // Inst_VOPC__V_CMP_GE_F16
10035
10036     Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
10037     {
10038     } // ~Inst_VOPC__V_CMP_GE_F16
10039
10040     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10041     void
10042     Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10043     {
10044         panicUnimplemented();
10045     }
10046
10047     Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt)
10048         : Inst_VOPC(iFmt, "v_cmp_o_f16")
10049     {
10050         setFlag(ALU);
10051         setFlag(F16);
10052     } // Inst_VOPC__V_CMP_O_F16
10053
10054     Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
10055     {
10056     } // ~Inst_VOPC__V_CMP_O_F16
10057
10058     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10059     void
10060     Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
10061     {
10062         panicUnimplemented();
10063     }
10064
10065     Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt)
10066         : Inst_VOPC(iFmt, "v_cmp_u_f16")
10067     {
10068         setFlag(ALU);
10069         setFlag(F16);
10070     } // Inst_VOPC__V_CMP_U_F16
10071
10072     Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
10073     {
10074     } // ~Inst_VOPC__V_CMP_U_F16
10075
10076     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
10077     void
10078     Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
10079     {
10080         panicUnimplemented();
10081     }
10082
10083     Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt)
10084         : Inst_VOPC(iFmt, "v_cmp_nge_f16")
10085     {
10086         setFlag(ALU);
10087         setFlag(F16);
10088     } // Inst_VOPC__V_CMP_NGE_F16
10089
10090     Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
10091     {
10092     } // ~Inst_VOPC__V_CMP_NGE_F16
10093
10094     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10095     void
10096     Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10097     {
10098         panicUnimplemented();
10099     }
10100
10101     Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt)
10102         : Inst_VOPC(iFmt, "v_cmp_nlg_f16")
10103     {
10104         setFlag(ALU);
10105         setFlag(F16);
10106     } // Inst_VOPC__V_CMP_NLG_F16
10107
10108     Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
10109     {
10110     } // ~Inst_VOPC__V_CMP_NLG_F16
10111
10112     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10113     void
10114     Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10115     {
10116         panicUnimplemented();
10117     }
10118
10119     Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt)
10120         : Inst_VOPC(iFmt, "v_cmp_ngt_f16")
10121     {
10122         setFlag(ALU);
10123         setFlag(F16);
10124     } // Inst_VOPC__V_CMP_NGT_F16
10125
10126     Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
10127     {
10128     } // ~Inst_VOPC__V_CMP_NGT_F16
10129
10130     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10131     void
10132     Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10133     {
10134         panicUnimplemented();
10135     }
10136
10137     Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt)
10138         : Inst_VOPC(iFmt, "v_cmp_nle_f16")
10139     {
10140         setFlag(ALU);
10141         setFlag(F16);
10142     } // Inst_VOPC__V_CMP_NLE_F16
10143
10144     Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
10145     {
10146     } // ~Inst_VOPC__V_CMP_NLE_F16
10147
10148     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10149     void
10150     Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10151     {
10152         panicUnimplemented();
10153     }
10154
10155     Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt)
10156         : Inst_VOPC(iFmt, "v_cmp_neq_f16")
10157     {
10158         setFlag(ALU);
10159         setFlag(F16);
10160     } // Inst_VOPC__V_CMP_NEQ_F16
10161
10162     Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
10163     {
10164     } // ~Inst_VOPC__V_CMP_NEQ_F16
10165
10166     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10167     void
10168     Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10169     {
10170         panicUnimplemented();
10171     }
10172
10173     Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt)
10174         : Inst_VOPC(iFmt, "v_cmp_nlt_f16")
10175     {
10176         setFlag(ALU);
10177         setFlag(F16);
10178     } // Inst_VOPC__V_CMP_NLT_F16
10179
10180     Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
10181     {
10182     } // ~Inst_VOPC__V_CMP_NLT_F16
10183
10184     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10185     void
10186     Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10187     {
10188         panicUnimplemented();
10189     }
10190
10191     Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt)
10192         : Inst_VOPC(iFmt, "v_cmp_tru_f16")
10193     {
10194         setFlag(ALU);
10195         setFlag(F16);
10196     } // Inst_VOPC__V_CMP_TRU_F16
10197
10198     Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
10199     {
10200     } // ~Inst_VOPC__V_CMP_TRU_F16
10201
10202     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10203     void
10204     Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10205     {
10206         panicUnimplemented();
10207     }
10208
10209     Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt)
10210         : Inst_VOPC(iFmt, "v_cmpx_f_f16")
10211     {
10212         setFlag(ALU);
10213         setFlag(F16);
10214     } // Inst_VOPC__V_CMPX_F_F16
10215
10216     Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
10217     {
10218     } // ~Inst_VOPC__V_CMPX_F_F16
10219
10220     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10221     void
10222     Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
10223     {
10224         panicUnimplemented();
10225     }
10226
10227     Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt)
10228         : Inst_VOPC(iFmt, "v_cmpx_lt_f16")
10229     {
10230         setFlag(ALU);
10231         setFlag(F16);
10232     } // Inst_VOPC__V_CMPX_LT_F16
10233
10234     Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
10235     {
10236     } // ~Inst_VOPC__V_CMPX_LT_F16
10237
10238     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10239     void
10240     Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
10241     {
10242         panicUnimplemented();
10243     }
10244
10245     Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt)
10246         : Inst_VOPC(iFmt, "v_cmpx_eq_f16")
10247     {
10248         setFlag(ALU);
10249         setFlag(F16);
10250     } // Inst_VOPC__V_CMPX_EQ_F16
10251
10252     Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
10253     {
10254     } // ~Inst_VOPC__V_CMPX_EQ_F16
10255
10256     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10257     void
10258     Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
10259     {
10260         panicUnimplemented();
10261     }
10262
10263     Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt)
10264         : Inst_VOPC(iFmt, "v_cmpx_le_f16")
10265     {
10266         setFlag(ALU);
10267         setFlag(F16);
10268     } // Inst_VOPC__V_CMPX_LE_F16
10269
10270     Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
10271     {
10272     } // ~Inst_VOPC__V_CMPX_LE_F16
10273
10274     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10275     void
10276     Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
10277     {
10278         panicUnimplemented();
10279     }
10280
10281     Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt)
10282         : Inst_VOPC(iFmt, "v_cmpx_gt_f16")
10283     {
10284         setFlag(ALU);
10285         setFlag(F16);
10286     } // Inst_VOPC__V_CMPX_GT_F16
10287
10288     Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
10289     {
10290     } // ~Inst_VOPC__V_CMPX_GT_F16
10291
10292     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10293     void
10294     Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
10295     {
10296         panicUnimplemented();
10297     }
10298
10299     Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt)
10300         : Inst_VOPC(iFmt, "v_cmpx_lg_f16")
10301     {
10302         setFlag(ALU);
10303         setFlag(F16);
10304     } // Inst_VOPC__V_CMPX_LG_F16
10305
10306     Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
10307     {
10308     } // ~Inst_VOPC__V_CMPX_LG_F16
10309
10310     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10311     void
10312     Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
10313     {
10314         panicUnimplemented();
10315     }
10316
10317     Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt)
10318         : Inst_VOPC(iFmt, "v_cmpx_ge_f16")
10319     {
10320         setFlag(ALU);
10321         setFlag(F16);
10322     } // Inst_VOPC__V_CMPX_GE_F16
10323
10324     Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
10325     {
10326     } // ~Inst_VOPC__V_CMPX_GE_F16
10327
10328     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10329     void
10330     Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10331     {
10332         panicUnimplemented();
10333     }
10334
10335     Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt)
10336         : Inst_VOPC(iFmt, "v_cmpx_o_f16")
10337     {
10338         setFlag(ALU);
10339         setFlag(F16);
10340     } // Inst_VOPC__V_CMPX_O_F16
10341
10342     Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
10343     {
10344     } // ~Inst_VOPC__V_CMPX_O_F16
10345
10346     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
10347     // encoding.
10348     void
10349     Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
10350     {
10351         panicUnimplemented();
10352     }
10353
10354     Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt)
10355         : Inst_VOPC(iFmt, "v_cmpx_u_f16")
10356     {
10357         setFlag(ALU);
10358         setFlag(F16);
10359     } // Inst_VOPC__V_CMPX_U_F16
10360
10361     Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
10362     {
10363     } // ~Inst_VOPC__V_CMPX_U_F16
10364
10365     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
10366     // encoding.
10367     void
10368     Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
10369     {
10370         panicUnimplemented();
10371     }
10372
10373     Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt)
10374         : Inst_VOPC(iFmt, "v_cmpx_nge_f16")
10375     {
10376         setFlag(ALU);
10377         setFlag(F16);
10378     } // Inst_VOPC__V_CMPX_NGE_F16
10379
10380     Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
10381     {
10382     } // ~Inst_VOPC__V_CMPX_NGE_F16
10383
10384     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10385     void
10386     Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10387     {
10388         panicUnimplemented();
10389     }
10390
10391     Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt)
10392         : Inst_VOPC(iFmt, "v_cmpx_nlg_f16")
10393     {
10394         setFlag(ALU);
10395         setFlag(F16);
10396     } // Inst_VOPC__V_CMPX_NLG_F16
10397
10398     Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
10399     {
10400     } // ~Inst_VOPC__V_CMPX_NLG_F16
10401
10402     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10403     void
10404     Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10405     {
10406         panicUnimplemented();
10407     }
10408
10409     Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt)
10410         : Inst_VOPC(iFmt, "v_cmpx_ngt_f16")
10411     {
10412         setFlag(ALU);
10413         setFlag(F16);
10414     } // Inst_VOPC__V_CMPX_NGT_F16
10415
10416     Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
10417     {
10418     } // ~Inst_VOPC__V_CMPX_NGT_F16
10419
10420     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10421     void
10422     Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10423     {
10424         panicUnimplemented();
10425     }
10426
10427     Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt)
10428         : Inst_VOPC(iFmt, "v_cmpx_nle_f16")
10429     {
10430         setFlag(ALU);
10431         setFlag(F16);
10432     } // Inst_VOPC__V_CMPX_NLE_F16
10433
10434     Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
10435     {
10436     } // ~Inst_VOPC__V_CMPX_NLE_F16
10437
10438     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10439     void
10440     Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10441     {
10442         panicUnimplemented();
10443     }
10444
10445     Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt)
10446         : Inst_VOPC(iFmt, "v_cmpx_neq_f16")
10447     {
10448         setFlag(ALU);
10449         setFlag(F16);
10450     } // Inst_VOPC__V_CMPX_NEQ_F16
10451
10452     Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
10453     {
10454     } // ~Inst_VOPC__V_CMPX_NEQ_F16
10455
10456     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10457     void
10458     Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10459     {
10460         panicUnimplemented();
10461     }
10462
10463     Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt)
10464         : Inst_VOPC(iFmt, "v_cmpx_nlt_f16")
10465     {
10466         setFlag(ALU);
10467         setFlag(F16);
10468     } // Inst_VOPC__V_CMPX_NLT_F16
10469
10470     Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
10471     {
10472     } // ~Inst_VOPC__V_CMPX_NLT_F16
10473
10474     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10475     void
10476     Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10477     {
10478         panicUnimplemented();
10479     }
10480
10481     Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt)
10482         : Inst_VOPC(iFmt, "v_cmpx_tru_f16")
10483     {
10484         setFlag(ALU);
10485         setFlag(F16);
10486     } // Inst_VOPC__V_CMPX_TRU_F16
10487
10488     Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
10489     {
10490     } // ~Inst_VOPC__V_CMPX_TRU_F16
10491
10492     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
10493     void
10494     Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10495     {
10496         panicUnimplemented();
10497     }
10498
10499     Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt)
10500         : Inst_VOPC(iFmt, "v_cmp_f_f32")
10501     {
10502         setFlag(ALU);
10503         setFlag(F32);
10504     } // Inst_VOPC__V_CMP_F_F32
10505
10506     Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
10507     {
10508     } // ~Inst_VOPC__V_CMP_F_F32
10509
10510     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
10511     void
10512     Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
10513     {
10514         Wavefront *wf = gpuDynInst->wavefront();
10515         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10516
10517         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10518             if (wf->execMask(lane)) {
10519                 vcc.setBit(lane, 0);
10520             }
10521         }
10522
10523         vcc.write();
10524     }
10525
10526     Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt)
10527         : Inst_VOPC(iFmt, "v_cmp_lt_f32")
10528     {
10529         setFlag(ALU);
10530         setFlag(F32);
10531     } // Inst_VOPC__V_CMP_LT_F32
10532
10533     Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
10534     {
10535     } // ~Inst_VOPC__V_CMP_LT_F32
10536
10537     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10538     void
10539     Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
10540     {
10541         Wavefront *wf = gpuDynInst->wavefront();
10542         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10543         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10544         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10545
10546         src0.readSrc();
10547         src1.read();
10548
10549         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10550             if (wf->execMask(lane)) {
10551                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
10552             }
10553         }
10554
10555         vcc.write();
10556     }
10557
10558     Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt)
10559         : Inst_VOPC(iFmt, "v_cmp_eq_f32")
10560     {
10561         setFlag(ALU);
10562         setFlag(F32);
10563     } // Inst_VOPC__V_CMP_EQ_F32
10564
10565     Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
10566     {
10567     } // ~Inst_VOPC__V_CMP_EQ_F32
10568
10569     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10570     void
10571     Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
10572     {
10573         Wavefront *wf = gpuDynInst->wavefront();
10574         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10575         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10576         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10577
10578         src0.readSrc();
10579         src1.read();
10580
10581         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10582             if (wf->execMask(lane)) {
10583                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
10584             }
10585         }
10586
10587         vcc.write();
10588     }
10589
10590     Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt)
10591         : Inst_VOPC(iFmt, "v_cmp_le_f32")
10592     {
10593         setFlag(ALU);
10594         setFlag(F32);
10595     } // Inst_VOPC__V_CMP_LE_F32
10596
10597     Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
10598     {
10599     } // ~Inst_VOPC__V_CMP_LE_F32
10600
10601     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10602     void
10603     Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
10604     {
10605         Wavefront *wf = gpuDynInst->wavefront();
10606         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10607         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10608         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10609
10610         src0.readSrc();
10611         src1.read();
10612
10613         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10614             if (wf->execMask(lane)) {
10615                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
10616             }
10617         }
10618
10619         vcc.write();
10620     }
10621
10622     Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt)
10623         : Inst_VOPC(iFmt, "v_cmp_gt_f32")
10624     {
10625         setFlag(ALU);
10626         setFlag(F32);
10627     } // Inst_VOPC__V_CMP_GT_F32
10628
10629     Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
10630     {
10631     } // ~Inst_VOPC__V_CMP_GT_F32
10632
10633     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10634     void
10635     Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
10636     {
10637         Wavefront *wf = gpuDynInst->wavefront();
10638         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10639         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10640         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10641
10642         src0.readSrc();
10643         src1.read();
10644
10645         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10646             if (wf->execMask(lane)) {
10647                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
10648             }
10649         }
10650
10651         vcc.write();
10652     }
10653
10654     Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt)
10655         : Inst_VOPC(iFmt, "v_cmp_lg_f32")
10656     {
10657         setFlag(ALU);
10658         setFlag(F32);
10659     } // Inst_VOPC__V_CMP_LG_F32
10660
10661     Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
10662     {
10663     } // ~Inst_VOPC__V_CMP_LG_F32
10664
10665     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10666     void
10667     Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
10668     {
10669         Wavefront *wf = gpuDynInst->wavefront();
10670         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10671         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10672         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10673
10674         src0.readSrc();
10675         src1.read();
10676
10677         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10678             if (wf->execMask(lane)) {
10679                 vcc.setBit(lane, (src0[lane] < src1[lane]
10680                     || src0[lane] > src1[lane]) ? 1 : 0);
10681             }
10682         }
10683
10684         vcc.write();
10685     }
10686
10687     Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt)
10688         : Inst_VOPC(iFmt, "v_cmp_ge_f32")
10689     {
10690         setFlag(ALU);
10691         setFlag(F32);
10692     } // Inst_VOPC__V_CMP_GE_F32
10693
10694     Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
10695     {
10696     } // ~Inst_VOPC__V_CMP_GE_F32
10697
10698     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10699     void
10700     Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
10701     {
10702         Wavefront *wf = gpuDynInst->wavefront();
10703         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10704         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10705         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10706
10707         src0.readSrc();
10708         src1.read();
10709
10710         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10711             if (wf->execMask(lane)) {
10712                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
10713             }
10714         }
10715
10716         vcc.write();
10717     }
10718
10719     Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt)
10720         : Inst_VOPC(iFmt, "v_cmp_o_f32")
10721     {
10722         setFlag(ALU);
10723         setFlag(F32);
10724     } // Inst_VOPC__V_CMP_O_F32
10725
10726     Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
10727     {
10728     } // ~Inst_VOPC__V_CMP_O_F32
10729
10730     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10731     void
10732     Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
10733     {
10734         Wavefront *wf = gpuDynInst->wavefront();
10735         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10736         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10737         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10738
10739         src0.readSrc();
10740         src1.read();
10741
10742         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10743             if (wf->execMask(lane)) {
10744                 vcc.setBit(lane, (!std::isnan(src0[lane])
10745                     && !std::isnan(src1[lane])) ? 1 : 0);
10746             }
10747         }
10748
10749         vcc.write();
10750     }
10751
10752     Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt)
10753         : Inst_VOPC(iFmt, "v_cmp_u_f32")
10754     {
10755         setFlag(ALU);
10756         setFlag(F32);
10757     } // Inst_VOPC__V_CMP_U_F32
10758
10759     Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
10760     {
10761     } // ~Inst_VOPC__V_CMP_U_F32
10762
10763     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
10764     void
10765     Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
10766     {
10767         Wavefront *wf = gpuDynInst->wavefront();
10768         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10769         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10770         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10771
10772         src0.readSrc();
10773         src1.read();
10774
10775         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10776             if (wf->execMask(lane)) {
10777                 vcc.setBit(lane, (std::isnan(src0[lane])
10778                     || std::isnan(src1[lane])) ? 1 : 0);
10779             }
10780         }
10781
10782         vcc.write();
10783     }
10784
10785     Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt)
10786         : Inst_VOPC(iFmt, "v_cmp_nge_f32")
10787     {
10788         setFlag(ALU);
10789         setFlag(F32);
10790     } // Inst_VOPC__V_CMP_NGE_F32
10791
10792     Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
10793     {
10794     } // ~Inst_VOPC__V_CMP_NGE_F32
10795
10796     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10797     void
10798     Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
10799     {
10800         Wavefront *wf = gpuDynInst->wavefront();
10801         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10802         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10803         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10804
10805         src0.readSrc();
10806         src1.read();
10807
10808         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10809             if (wf->execMask(lane)) {
10810                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
10811             }
10812         }
10813
10814         vcc.write();
10815     }
10816
10817     Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt)
10818         : Inst_VOPC(iFmt, "v_cmp_nlg_f32")
10819     {
10820         setFlag(ALU);
10821         setFlag(F32);
10822     } // Inst_VOPC__V_CMP_NLG_F32
10823
10824     Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
10825     {
10826     } // ~Inst_VOPC__V_CMP_NLG_F32
10827
10828     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10829     void
10830     Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
10831     {
10832         Wavefront *wf = gpuDynInst->wavefront();
10833         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10834         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10835         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10836
10837         src0.readSrc();
10838         src1.read();
10839
10840         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10841             if (wf->execMask(lane)) {
10842                 vcc.setBit(lane, !(src0[lane] < src1[lane]
10843                     || src0[lane] > src1[lane]) ? 1 : 0);
10844             }
10845         }
10846
10847         vcc.write();
10848     }
10849
10850     Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt)
10851         : Inst_VOPC(iFmt, "v_cmp_ngt_f32")
10852     {
10853         setFlag(ALU);
10854         setFlag(F32);
10855     } // Inst_VOPC__V_CMP_NGT_F32
10856
10857     Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
10858     {
10859     } // ~Inst_VOPC__V_CMP_NGT_F32
10860
10861     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10862     void
10863     Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
10864     {
10865         Wavefront *wf = gpuDynInst->wavefront();
10866         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10867         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10868         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10869
10870         src0.readSrc();
10871         src1.read();
10872
10873         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10874             if (wf->execMask(lane)) {
10875                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
10876             }
10877         }
10878
10879         vcc.write();
10880     }
10881
10882     Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt)
10883         : Inst_VOPC(iFmt, "v_cmp_nle_f32")
10884     {
10885         setFlag(ALU);
10886         setFlag(F32);
10887     } // Inst_VOPC__V_CMP_NLE_F32
10888
10889     Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
10890     {
10891     } // ~Inst_VOPC__V_CMP_NLE_F32
10892
10893     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10894     void
10895     Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
10896     {
10897         Wavefront *wf = gpuDynInst->wavefront();
10898         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10899         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10900         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10901
10902         src0.readSrc();
10903         src1.read();
10904
10905         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10906             if (wf->execMask(lane)) {
10907                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
10908             }
10909         }
10910
10911         vcc.write();
10912     }
10913
10914     Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt)
10915         : Inst_VOPC(iFmt, "v_cmp_neq_f32")
10916     {
10917         setFlag(ALU);
10918         setFlag(F32);
10919     } // Inst_VOPC__V_CMP_NEQ_F32
10920
10921     Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
10922     {
10923     } // ~Inst_VOPC__V_CMP_NEQ_F32
10924
10925     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10926     void
10927     Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
10928     {
10929         Wavefront *wf = gpuDynInst->wavefront();
10930         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10931         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10932         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10933
10934         src0.readSrc();
10935         src1.read();
10936
10937         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10938             if (wf->execMask(lane)) {
10939                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
10940             }
10941         }
10942
10943         vcc.write();
10944     }
10945
10946     Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt)
10947         : Inst_VOPC(iFmt, "v_cmp_nlt_f32")
10948     {
10949         setFlag(ALU);
10950         setFlag(F32);
10951     } // Inst_VOPC__V_CMP_NLT_F32
10952
10953     Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
10954     {
10955     } // ~Inst_VOPC__V_CMP_NLT_F32
10956
10957     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10958     void
10959     Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
10960     {
10961         Wavefront *wf = gpuDynInst->wavefront();
10962         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10963         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10964         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10965
10966         src0.readSrc();
10967         src1.read();
10968
10969         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10970             if (wf->execMask(lane)) {
10971                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
10972             }
10973         }
10974
10975         vcc.write();
10976     }
10977
10978     Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt)
10979         : Inst_VOPC(iFmt, "v_cmp_tru_f32")
10980     {
10981         setFlag(ALU);
10982         setFlag(F32);
10983     } // Inst_VOPC__V_CMP_TRU_F32
10984
10985     Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
10986     {
10987     } // ~Inst_VOPC__V_CMP_TRU_F32
10988
10989     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10990     void
10991     Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
10992     {
10993         Wavefront *wf = gpuDynInst->wavefront();
10994         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10995
10996         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10997             if (wf->execMask(lane)) {
10998                 vcc.setBit(lane, 1);
10999             }
11000         }
11001
11002         vcc.write();
11003     }
11004
11005     Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt)
11006         : Inst_VOPC(iFmt, "v_cmpx_f_f32")
11007     {
11008         setFlag(ALU);
11009         setFlag(F32);
11010     } // Inst_VOPC__V_CMPX_F_F32
11011
11012     Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
11013     {
11014     } // ~Inst_VOPC__V_CMPX_F_F32
11015
11016     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
11017     void
11018     Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
11019     {
11020         Wavefront *wf = gpuDynInst->wavefront();
11021         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11022
11023         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11024             if (wf->execMask(lane)) {
11025                 vcc.setBit(lane, 0);
11026             }
11027         }
11028
11029         vcc.write();
11030         wf->execMask() = vcc.rawData();
11031     }
11032
11033     Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt)
11034         : Inst_VOPC(iFmt, "v_cmpx_lt_f32")
11035     {
11036         setFlag(ALU);
11037         setFlag(F32);
11038     } // Inst_VOPC__V_CMPX_LT_F32
11039
11040     Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
11041     {
11042     } // ~Inst_VOPC__V_CMPX_LT_F32
11043
11044     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11045     void
11046     Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
11047     {
11048         Wavefront *wf = gpuDynInst->wavefront();
11049         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11050         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11051         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11052
11053         src0.readSrc();
11054         src1.read();
11055
11056         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11057             if (wf->execMask(lane)) {
11058                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11059             }
11060         }
11061
11062         vcc.write();
11063         wf->execMask() = vcc.rawData();
11064     }
11065
11066     Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt)
11067         : Inst_VOPC(iFmt, "v_cmpx_eq_f32")
11068     {
11069         setFlag(ALU);
11070         setFlag(F32);
11071     } // Inst_VOPC__V_CMPX_EQ_F32
11072
11073     Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
11074     {
11075     } // ~Inst_VOPC__V_CMPX_EQ_F32
11076
11077     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11078     void
11079     Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
11080     {
11081         Wavefront *wf = gpuDynInst->wavefront();
11082         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11083         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11084         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11085
11086         src0.readSrc();
11087         src1.read();
11088
11089         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11090             if (wf->execMask(lane)) {
11091                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11092             }
11093         }
11094
11095         vcc.write();
11096         wf->execMask() = vcc.rawData();
11097     }
11098
11099     Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt)
11100         : Inst_VOPC(iFmt, "v_cmpx_le_f32")
11101     {
11102         setFlag(ALU);
11103         setFlag(F32);
11104     } // Inst_VOPC__V_CMPX_LE_F32
11105
11106     Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
11107     {
11108     } // ~Inst_VOPC__V_CMPX_LE_F32
11109
11110     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11111     void
11112     Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
11113     {
11114         Wavefront *wf = gpuDynInst->wavefront();
11115         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11116         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11117         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11118
11119         src0.readSrc();
11120         src1.read();
11121
11122         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11123             if (wf->execMask(lane)) {
11124                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11125             }
11126         }
11127
11128         vcc.write();
11129         wf->execMask() = vcc.rawData();
11130     }
11131
11132     Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt)
11133         : Inst_VOPC(iFmt, "v_cmpx_gt_f32")
11134     {
11135         setFlag(ALU);
11136         setFlag(F32);
11137     } // Inst_VOPC__V_CMPX_GT_F32
11138
11139     Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
11140     {
11141     } // ~Inst_VOPC__V_CMPX_GT_F32
11142
11143     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11144     void
11145     Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
11146     {
11147         Wavefront *wf = gpuDynInst->wavefront();
11148         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11149         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11150         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11151
11152         src0.readSrc();
11153         src1.read();
11154
11155         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11156             if (wf->execMask(lane)) {
11157                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11158             }
11159         }
11160
11161         vcc.write();
11162         wf->execMask() = vcc.rawData();
11163     }
11164
11165     Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt)
11166         : Inst_VOPC(iFmt, "v_cmpx_lg_f32")
11167     {
11168         setFlag(ALU);
11169         setFlag(F32);
11170     } // Inst_VOPC__V_CMPX_LG_F32
11171
11172     Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
11173     {
11174     } // ~Inst_VOPC__V_CMPX_LG_F32
11175
11176     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11177     void
11178     Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
11179     {
11180         Wavefront *wf = gpuDynInst->wavefront();
11181         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11182         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11183         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11184
11185         src0.readSrc();
11186         src1.read();
11187
11188         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11189             if (wf->execMask(lane)) {
11190                 vcc.setBit(lane, (src0[lane] < src1[lane]
11191                     || src0[lane] > src1[lane]) ? 1 : 0);
11192             }
11193         }
11194
11195         vcc.write();
11196         wf->execMask() = vcc.rawData();
11197     }
11198
11199     Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt)
11200         : Inst_VOPC(iFmt, "v_cmpx_ge_f32")
11201     {
11202         setFlag(ALU);
11203         setFlag(F32);
11204     } // Inst_VOPC__V_CMPX_GE_F32
11205
11206     Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
11207     {
11208     } // ~Inst_VOPC__V_CMPX_GE_F32
11209
11210     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11211     void
11212     Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
11213     {
11214         Wavefront *wf = gpuDynInst->wavefront();
11215         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11216         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11217         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11218
11219         src0.readSrc();
11220         src1.read();
11221
11222         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11223             if (wf->execMask(lane)) {
11224                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11225             }
11226         }
11227
11228         vcc.write();
11229         wf->execMask() = vcc.rawData();
11230     }
11231
11232     Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt)
11233         : Inst_VOPC(iFmt, "v_cmpx_o_f32")
11234     {
11235         setFlag(ALU);
11236         setFlag(F32);
11237     } // Inst_VOPC__V_CMPX_O_F32
11238
11239     Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
11240     {
11241     } // ~Inst_VOPC__V_CMPX_O_F32
11242
11243     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
11244     // encoding.
11245     void
11246     Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
11247     {
11248         Wavefront *wf = gpuDynInst->wavefront();
11249         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11250         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11251         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11252
11253         src0.readSrc();
11254         src1.read();
11255
11256         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11257             if (wf->execMask(lane)) {
11258                 vcc.setBit(lane, (!std::isnan(src0[lane])
11259                     && !std::isnan(src1[lane])) ? 1 : 0);
11260             }
11261         }
11262
11263         vcc.write();
11264         wf->execMask() = vcc.rawData();
11265     }
11266
11267     Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt)
11268         : Inst_VOPC(iFmt, "v_cmpx_u_f32")
11269     {
11270         setFlag(ALU);
11271         setFlag(F32);
11272     } // Inst_VOPC__V_CMPX_U_F32
11273
11274     Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
11275     {
11276     } // ~Inst_VOPC__V_CMPX_U_F32
11277
11278     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
11279     // encoding.
11280     void
11281     Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
11282     {
11283         Wavefront *wf = gpuDynInst->wavefront();
11284         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11285         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11286         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11287
11288         src0.readSrc();
11289         src1.read();
11290
11291         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11292             if (wf->execMask(lane)) {
11293                 vcc.setBit(lane, (std::isnan(src0[lane])
11294                     || std::isnan(src1[lane])) ? 1 : 0);
11295             }
11296         }
11297
11298         vcc.write();
11299         wf->execMask() = vcc.rawData();
11300     }
11301
11302     Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt)
11303         : Inst_VOPC(iFmt, "v_cmpx_nge_f32")
11304     {
11305         setFlag(ALU);
11306         setFlag(F32);
11307     } // Inst_VOPC__V_CMPX_NGE_F32
11308
11309     Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
11310     {
11311     } // ~Inst_VOPC__V_CMPX_NGE_F32
11312
11313     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11314     void
11315     Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
11316     {
11317         Wavefront *wf = gpuDynInst->wavefront();
11318         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11319         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11320         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11321
11322         src0.readSrc();
11323         src1.read();
11324
11325         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11326             if (wf->execMask(lane)) {
11327                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11328             }
11329         }
11330
11331         vcc.write();
11332         wf->execMask() = vcc.rawData();
11333     }
11334
11335     Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt)
11336         : Inst_VOPC(iFmt, "v_cmpx_nlg_f32")
11337     {
11338         setFlag(ALU);
11339         setFlag(F32);
11340     } // Inst_VOPC__V_CMPX_NLG_F32
11341
11342     Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
11343     {
11344     } // ~Inst_VOPC__V_CMPX_NLG_F32
11345
11346     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11347     void
11348     Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
11349     {
11350         Wavefront *wf = gpuDynInst->wavefront();
11351         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11352         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11353         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11354
11355         src0.readSrc();
11356         src1.read();
11357
11358         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11359             if (wf->execMask(lane)) {
11360                 vcc.setBit(lane, !(src0[lane] < src1[lane]
11361                     || src0[lane] > src1[lane]) ? 1 : 0);
11362             }
11363         }
11364
11365         vcc.write();
11366         wf->execMask() = vcc.rawData();
11367     }
11368
11369     Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt)
11370         : Inst_VOPC(iFmt, "v_cmpx_ngt_f32")
11371     {
11372         setFlag(ALU);
11373         setFlag(F32);
11374     } // Inst_VOPC__V_CMPX_NGT_F32
11375
11376     Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
11377     {
11378     } // ~Inst_VOPC__V_CMPX_NGT_F32
11379
11380     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11381     void
11382     Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
11383     {
11384         Wavefront *wf = gpuDynInst->wavefront();
11385         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11386         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11387         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11388
11389         src0.readSrc();
11390         src1.read();
11391
11392         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11393             if (wf->execMask(lane)) {
11394                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11395             }
11396         }
11397
11398         vcc.write();
11399         wf->execMask() = vcc.rawData();
11400     }
11401
11402     Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt)
11403         : Inst_VOPC(iFmt, "v_cmpx_nle_f32")
11404     {
11405         setFlag(ALU);
11406         setFlag(F32);
11407     } // Inst_VOPC__V_CMPX_NLE_F32
11408
11409     Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
11410     {
11411     } // ~Inst_VOPC__V_CMPX_NLE_F32
11412
11413     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11414     void
11415     Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
11416     {
11417         Wavefront *wf = gpuDynInst->wavefront();
11418         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11419         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11420         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11421
11422         src0.readSrc();
11423         src1.read();
11424
11425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11426             if (wf->execMask(lane)) {
11427                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11428             }
11429         }
11430
11431         vcc.write();
11432         wf->execMask() = vcc.rawData();
11433     }
11434
11435     Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt)
11436         : Inst_VOPC(iFmt, "v_cmpx_neq_f32")
11437     {
11438         setFlag(ALU);
11439         setFlag(F32);
11440     } // Inst_VOPC__V_CMPX_NEQ_F32
11441
11442     Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
11443     {
11444     } // ~Inst_VOPC__V_CMPX_NEQ_F32
11445
11446     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11447     void
11448     Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
11449     {
11450         Wavefront *wf = gpuDynInst->wavefront();
11451         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11452         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11453         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11454
11455         src0.readSrc();
11456         src1.read();
11457
11458         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11459             if (wf->execMask(lane)) {
11460                 vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0);
11461             }
11462         }
11463
11464         vcc.write();
11465     }
11466
11467     Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt)
11468         : Inst_VOPC(iFmt, "v_cmpx_nlt_f32")
11469     {
11470         setFlag(ALU);
11471         setFlag(F32);
11472     } // Inst_VOPC__V_CMPX_NLT_F32
11473
11474     Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
11475     {
11476     } // ~Inst_VOPC__V_CMPX_NLT_F32
11477
11478     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11479     void
11480     Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
11481     {
11482         Wavefront *wf = gpuDynInst->wavefront();
11483         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11484         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11485         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11486
11487         src0.readSrc();
11488         src1.read();
11489
11490         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11491             if (wf->execMask(lane)) {
11492                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
11493             }
11494         }
11495
11496         vcc.write();
11497         wf->execMask() = vcc.rawData();
11498     }
11499
11500     Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt)
11501         : Inst_VOPC(iFmt, "v_cmpx_tru_f32")
11502     {
11503         setFlag(ALU);
11504         setFlag(F32);
11505     } // Inst_VOPC__V_CMPX_TRU_F32
11506
11507     Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
11508     {
11509     } // ~Inst_VOPC__V_CMPX_TRU_F32
11510
11511     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
11512     void
11513     Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
11514     {
11515         Wavefront *wf = gpuDynInst->wavefront();
11516         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11517
11518         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11519             if (wf->execMask(lane)) {
11520                 vcc.setBit(lane, 1);
11521             }
11522         }
11523
11524         vcc.write();
11525         wf->execMask() = vcc.rawData();
11526     }
11527
11528     Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt)
11529         : Inst_VOPC(iFmt, "v_cmp_f_f64")
11530     {
11531         setFlag(ALU);
11532         setFlag(F64);
11533     } // Inst_VOPC__V_CMP_F_F64
11534
11535     Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
11536     {
11537     } // ~Inst_VOPC__V_CMP_F_F64
11538
11539     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
11540     void
11541     Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
11542     {
11543         Wavefront *wf = gpuDynInst->wavefront();
11544         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11545
11546         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11547             if (wf->execMask(lane)) {
11548                 vcc.setBit(lane, 0);
11549             }
11550         }
11551
11552         vcc.write();
11553     }
11554
11555     Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt)
11556         : Inst_VOPC(iFmt, "v_cmp_lt_f64")
11557     {
11558         setFlag(ALU);
11559         setFlag(F64);
11560     } // Inst_VOPC__V_CMP_LT_F64
11561
11562     Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
11563     {
11564     } // ~Inst_VOPC__V_CMP_LT_F64
11565
11566     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11567     void
11568     Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
11569     {
11570         Wavefront *wf = gpuDynInst->wavefront();
11571         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11572         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11573         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11574
11575         src0.readSrc();
11576         src1.read();
11577
11578         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11579             if (wf->execMask(lane)) {
11580                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11581             }
11582         }
11583
11584         vcc.write();
11585     }
11586
11587     Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt)
11588         : Inst_VOPC(iFmt, "v_cmp_eq_f64")
11589     {
11590         setFlag(ALU);
11591         setFlag(F64);
11592     } // Inst_VOPC__V_CMP_EQ_F64
11593
11594     Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
11595     {
11596     } // ~Inst_VOPC__V_CMP_EQ_F64
11597
11598     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11599     void
11600     Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
11601     {
11602         Wavefront *wf = gpuDynInst->wavefront();
11603         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11604         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11605         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11606
11607         src0.readSrc();
11608         src1.read();
11609
11610         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11611             if (wf->execMask(lane)) {
11612                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11613             }
11614         }
11615
11616         vcc.write();
11617     }
11618
11619     Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt)
11620         : Inst_VOPC(iFmt, "v_cmp_le_f64")
11621     {
11622         setFlag(ALU);
11623         setFlag(F64);
11624     } // Inst_VOPC__V_CMP_LE_F64
11625
11626     Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
11627     {
11628     } // ~Inst_VOPC__V_CMP_LE_F64
11629
11630     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11631     void
11632     Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
11633     {
11634         Wavefront *wf = gpuDynInst->wavefront();
11635         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11636         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11637         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11638
11639         src0.readSrc();
11640         src1.read();
11641
11642         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11643             if (wf->execMask(lane)) {
11644                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11645             }
11646         }
11647
11648         vcc.write();
11649     }
11650
11651     Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt)
11652         : Inst_VOPC(iFmt, "v_cmp_gt_f64")
11653     {
11654         setFlag(ALU);
11655         setFlag(F64);
11656     } // Inst_VOPC__V_CMP_GT_F64
11657
11658     Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
11659     {
11660     } // ~Inst_VOPC__V_CMP_GT_F64
11661
11662     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11663     void
11664     Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
11665     {
11666         Wavefront *wf = gpuDynInst->wavefront();
11667         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11668         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11669         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11670
11671         src0.readSrc();
11672         src1.read();
11673
11674         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11675             if (wf->execMask(lane)) {
11676                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11677             }
11678         }
11679
11680         vcc.write();
11681     }
11682
11683     Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt)
11684         : Inst_VOPC(iFmt, "v_cmp_lg_f64")
11685     {
11686         setFlag(ALU);
11687         setFlag(F64);
11688     } // Inst_VOPC__V_CMP_LG_F64
11689
11690     Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
11691     {
11692     } // ~Inst_VOPC__V_CMP_LG_F64
11693
11694     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11695     void
11696     Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
11697     {
11698         Wavefront *wf = gpuDynInst->wavefront();
11699         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11700         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11701         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11702
11703         src0.readSrc();
11704         src1.read();
11705
11706         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11707             if (wf->execMask(lane)) {
11708                 vcc.setBit(lane, (src0[lane] < src1[lane]
11709                     || src0[lane] > src1[lane]) ? 1 : 0);
11710             }
11711         }
11712
11713         vcc.write();
11714     }
11715
11716     Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt)
11717         : Inst_VOPC(iFmt, "v_cmp_ge_f64")
11718     {
11719         setFlag(ALU);
11720         setFlag(F64);
11721     } // Inst_VOPC__V_CMP_GE_F64
11722
11723     Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
11724     {
11725     } // ~Inst_VOPC__V_CMP_GE_F64
11726
11727     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11728     void
11729     Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
11730     {
11731         Wavefront *wf = gpuDynInst->wavefront();
11732         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11733         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11734         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11735
11736         src0.readSrc();
11737         src1.read();
11738
11739         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11740             if (wf->execMask(lane)) {
11741                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11742             }
11743         }
11744
11745         vcc.write();
11746     }
11747
11748     Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt)
11749         : Inst_VOPC(iFmt, "v_cmp_o_f64")
11750     {
11751         setFlag(ALU);
11752         setFlag(F64);
11753     } // Inst_VOPC__V_CMP_O_F64
11754
11755     Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
11756     {
11757     } // ~Inst_VOPC__V_CMP_O_F64
11758
11759     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
11760     void
11761     Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
11762     {
11763         Wavefront *wf = gpuDynInst->wavefront();
11764         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11765         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11766         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11767
11768         src0.readSrc();
11769         src1.read();
11770
11771         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11772             if (wf->execMask(lane)) {
11773                 vcc.setBit(lane, (!std::isnan(src0[lane])
11774                     && !std::isnan(src1[lane])) ? 1 : 0);
11775             }
11776         }
11777
11778         vcc.write();
11779     }
11780
11781     Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt)
11782         : Inst_VOPC(iFmt, "v_cmp_u_f64")
11783     {
11784         setFlag(ALU);
11785         setFlag(F64);
11786     } // Inst_VOPC__V_CMP_U_F64
11787
11788     Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
11789     {
11790     } // ~Inst_VOPC__V_CMP_U_F64
11791
11792     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
11793     void
11794     Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
11795     {
11796         Wavefront *wf = gpuDynInst->wavefront();
11797         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11798         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11799         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11800
11801         src0.readSrc();
11802         src1.read();
11803
11804         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11805             if (wf->execMask(lane)) {
11806                 vcc.setBit(lane, (std::isnan(src0[lane])
11807                     || std::isnan(src1[lane])) ? 1 : 0);
11808             }
11809         }
11810
11811         vcc.write();
11812     }
11813
11814     Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt)
11815         : Inst_VOPC(iFmt, "v_cmp_nge_f64")
11816     {
11817         setFlag(ALU);
11818         setFlag(F64);
11819     } // Inst_VOPC__V_CMP_NGE_F64
11820
11821     Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
11822     {
11823     } // ~Inst_VOPC__V_CMP_NGE_F64
11824
11825     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11826     void
11827     Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
11828     {
11829         Wavefront *wf = gpuDynInst->wavefront();
11830         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11831         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11832         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11833
11834         src0.readSrc();
11835         src1.read();
11836
11837         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11838             if (wf->execMask(lane)) {
11839                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11840             }
11841         }
11842
11843         vcc.write();
11844     }
11845
11846     Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt)
11847         : Inst_VOPC(iFmt, "v_cmp_nlg_f64")
11848     {
11849         setFlag(ALU);
11850         setFlag(F64);
11851     } // Inst_VOPC__V_CMP_NLG_F64
11852
11853     Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
11854     {
11855     } // ~Inst_VOPC__V_CMP_NLG_F64
11856
11857     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11858     void
11859     Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
11860     {
11861         Wavefront *wf = gpuDynInst->wavefront();
11862         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11863         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11864         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11865
11866         src0.readSrc();
11867         src1.read();
11868
11869         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11870             if (wf->execMask(lane)) {
11871                 vcc.setBit(lane, !(src0[lane] < src1[lane]
11872                     || src0[lane] > src1[lane]) ? 1 : 0);
11873             }
11874         }
11875
11876         vcc.write();
11877     }
11878
11879     Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt)
11880         : Inst_VOPC(iFmt, "v_cmp_ngt_f64")
11881     {
11882         setFlag(ALU);
11883         setFlag(F64);
11884     } // Inst_VOPC__V_CMP_NGT_F64
11885
11886     Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
11887     {
11888     } // ~Inst_VOPC__V_CMP_NGT_F64
11889
11890     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11891     void
11892     Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
11893     {
11894         Wavefront *wf = gpuDynInst->wavefront();
11895         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11896         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11897         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11898
11899         src0.readSrc();
11900         src1.read();
11901
11902         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11903             if (wf->execMask(lane)) {
11904                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11905             }
11906         }
11907
11908         vcc.write();
11909     }
11910
11911     Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt)
11912         : Inst_VOPC(iFmt, "v_cmp_nle_f64")
11913     {
11914         setFlag(ALU);
11915         setFlag(F64);
11916     } // Inst_VOPC__V_CMP_NLE_F64
11917
11918     Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
11919     {
11920     } // ~Inst_VOPC__V_CMP_NLE_F64
11921
11922     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11923     void
11924     Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
11925     {
11926         Wavefront *wf = gpuDynInst->wavefront();
11927         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11928         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11929         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11930
11931         src0.readSrc();
11932         src1.read();
11933
11934         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11935             if (wf->execMask(lane)) {
11936                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11937             }
11938         }
11939
11940         vcc.write();
11941     }
11942
11943     Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt)
11944         : Inst_VOPC(iFmt, "v_cmp_neq_f64")
11945     {
11946         setFlag(ALU);
11947         setFlag(F64);
11948     } // Inst_VOPC__V_CMP_NEQ_F64
11949
11950     Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
11951     {
11952     } // ~Inst_VOPC__V_CMP_NEQ_F64
11953
11954     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11955     void
11956     Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
11957     {
11958         Wavefront *wf = gpuDynInst->wavefront();
11959         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11960         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11961         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11962
11963         src0.readSrc();
11964         src1.read();
11965
11966         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11967             if (wf->execMask(lane)) {
11968                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
11969             }
11970         }
11971
11972         vcc.write();
11973     }
11974
11975     Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt)
11976         : Inst_VOPC(iFmt, "v_cmp_nlt_f64")
11977     {
11978         setFlag(ALU);
11979         setFlag(F64);
11980     } // Inst_VOPC__V_CMP_NLT_F64
11981
11982     Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
11983     {
11984     } // ~Inst_VOPC__V_CMP_NLT_F64
11985
11986     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11987     void
11988     Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
11989     {
11990         Wavefront *wf = gpuDynInst->wavefront();
11991         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11992         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11993         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11994
11995         src0.readSrc();
11996         src1.read();
11997
11998         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11999             if (wf->execMask(lane)) {
12000                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
12001             }
12002         }
12003
12004         vcc.write();
12005     }
12006
12007     Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt)
12008         : Inst_VOPC(iFmt, "v_cmp_tru_f64")
12009     {
12010         setFlag(ALU);
12011         setFlag(F64);
12012     } // Inst_VOPC__V_CMP_TRU_F64
12013
12014     Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
12015     {
12016     } // ~Inst_VOPC__V_CMP_TRU_F64
12017
12018     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12019     void
12020     Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
12021     {
12022         Wavefront *wf = gpuDynInst->wavefront();
12023         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12024
12025         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12026             if (wf->execMask(lane)) {
12027                 vcc.setBit(lane, 1);
12028             }
12029         }
12030
12031         vcc.write();
12032     }
12033
12034     Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt)
12035         : Inst_VOPC(iFmt, "v_cmpx_f_f64")
12036     {
12037         setFlag(ALU);
12038         setFlag(F64);
12039     } // Inst_VOPC__V_CMPX_F_F64
12040
12041     Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
12042     {
12043     } // ~Inst_VOPC__V_CMPX_F_F64
12044
12045     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
12046     void
12047     Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
12048     {
12049         Wavefront *wf = gpuDynInst->wavefront();
12050         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12051
12052         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12053             if (wf->execMask(lane)) {
12054                 vcc.setBit(lane, 0);
12055             }
12056         }
12057
12058         vcc.write();
12059         wf->execMask() = vcc.rawData();
12060     }
12061
12062     Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt)
12063         : Inst_VOPC(iFmt, "v_cmpx_lt_f64")
12064     {
12065         setFlag(ALU);
12066         setFlag(F64);
12067     } // Inst_VOPC__V_CMPX_LT_F64
12068
12069     Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
12070     {
12071     } // ~Inst_VOPC__V_CMPX_LT_F64
12072
12073     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12074     void
12075     Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
12076     {
12077         Wavefront *wf = gpuDynInst->wavefront();
12078         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12079         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12080         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12081
12082         src0.readSrc();
12083         src1.read();
12084
12085         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12086             if (wf->execMask(lane)) {
12087                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12088             }
12089         }
12090
12091         vcc.write();
12092         wf->execMask() = vcc.rawData();
12093     }
12094
12095     Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt)
12096         : Inst_VOPC(iFmt, "v_cmpx_eq_f64")
12097     {
12098         setFlag(ALU);
12099         setFlag(F64);
12100     } // Inst_VOPC__V_CMPX_EQ_F64
12101
12102     Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
12103     {
12104     } // ~Inst_VOPC__V_CMPX_EQ_F64
12105
12106     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12107     void
12108     Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
12109     {
12110         Wavefront *wf = gpuDynInst->wavefront();
12111         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12112         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12113         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12114
12115         src0.readSrc();
12116         src1.read();
12117
12118         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12119             if (wf->execMask(lane)) {
12120                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12121             }
12122         }
12123
12124         vcc.write();
12125         wf->execMask() = vcc.rawData();
12126     }
12127
12128     Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt)
12129         : Inst_VOPC(iFmt, "v_cmpx_le_f64")
12130     {
12131         setFlag(ALU);
12132         setFlag(F64);
12133     } // Inst_VOPC__V_CMPX_LE_F64
12134
12135     Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
12136     {
12137     } // ~Inst_VOPC__V_CMPX_LE_F64
12138
12139     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12140     void
12141     Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
12142     {
12143         Wavefront *wf = gpuDynInst->wavefront();
12144         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12145         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12146         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12147
12148         src0.readSrc();
12149         src1.read();
12150
12151         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12152             if (wf->execMask(lane)) {
12153                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12154             }
12155         }
12156
12157         wf->execMask() = vcc.rawData();
12158         vcc.write();
12159     }
12160
12161     Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt)
12162         : Inst_VOPC(iFmt, "v_cmpx_gt_f64")
12163     {
12164         setFlag(ALU);
12165         setFlag(F64);
12166     } // Inst_VOPC__V_CMPX_GT_F64
12167
12168     Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
12169     {
12170     } // ~Inst_VOPC__V_CMPX_GT_F64
12171
12172     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12173     void
12174     Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
12175     {
12176         Wavefront *wf = gpuDynInst->wavefront();
12177         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12178         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12179         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12180
12181         src0.readSrc();
12182         src1.read();
12183
12184         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12185             if (wf->execMask(lane)) {
12186                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12187             }
12188         }
12189
12190         wf->execMask() = vcc.rawData();
12191         vcc.write();
12192     }
12193
12194     Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt)
12195         : Inst_VOPC(iFmt, "v_cmpx_lg_f64")
12196     {
12197         setFlag(ALU);
12198         setFlag(F64);
12199     } // Inst_VOPC__V_CMPX_LG_F64
12200
12201     Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
12202     {
12203     } // ~Inst_VOPC__V_CMPX_LG_F64
12204
12205     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12206     void
12207     Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
12208     {
12209         Wavefront *wf = gpuDynInst->wavefront();
12210         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12211         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12212         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12213
12214         src0.readSrc();
12215         src1.read();
12216
12217         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12218             if (wf->execMask(lane)) {
12219                 vcc.setBit(lane, (src0[lane] < src1[lane]
12220                     || src0[lane] > src1[lane]) ? 1 : 0);
12221             }
12222         }
12223
12224         wf->execMask() = vcc.rawData();
12225         vcc.write();
12226     }
12227
12228     Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt)
12229         : Inst_VOPC(iFmt, "v_cmpx_ge_f64")
12230     {
12231         setFlag(ALU);
12232         setFlag(F64);
12233     } // Inst_VOPC__V_CMPX_GE_F64
12234
12235     Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
12236     {
12237     } // ~Inst_VOPC__V_CMPX_GE_F64
12238
12239     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12240     void
12241     Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
12242     {
12243         Wavefront *wf = gpuDynInst->wavefront();
12244         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12245         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12246         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12247
12248         src0.readSrc();
12249         src1.read();
12250
12251         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12252             if (wf->execMask(lane)) {
12253                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12254             }
12255         }
12256
12257         wf->execMask() = vcc.rawData();
12258         vcc.write();
12259     }
12260
12261     Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt)
12262         : Inst_VOPC(iFmt, "v_cmpx_o_f64")
12263     {
12264         setFlag(ALU);
12265         setFlag(F64);
12266     } // Inst_VOPC__V_CMPX_O_F64
12267
12268     Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
12269     {
12270     } // ~Inst_VOPC__V_CMPX_O_F64
12271
12272     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
12273     // encoding.
12274     void
12275     Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
12276     {
12277         Wavefront *wf = gpuDynInst->wavefront();
12278         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12279         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12280         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12281
12282         src0.readSrc();
12283         src1.read();
12284
12285         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12286             if (wf->execMask(lane)) {
12287                 vcc.setBit(lane, (!std::isnan(src0[lane])
12288                     && !std::isnan(src1[lane])) ? 1 : 0);
12289             }
12290         }
12291
12292         wf->execMask() = vcc.rawData();
12293         vcc.write();
12294     }
12295
12296     Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt)
12297         : Inst_VOPC(iFmt, "v_cmpx_u_f64")
12298     {
12299         setFlag(ALU);
12300         setFlag(F64);
12301     } // Inst_VOPC__V_CMPX_U_F64
12302
12303     Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
12304     {
12305     } // ~Inst_VOPC__V_CMPX_U_F64
12306
12307     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
12308     // encoding.
12309     void
12310     Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
12311     {
12312         Wavefront *wf = gpuDynInst->wavefront();
12313         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12314         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12315         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12316
12317         src0.readSrc();
12318         src1.read();
12319
12320         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12321             if (wf->execMask(lane)) {
12322                 vcc.setBit(lane, (std::isnan(src0[lane])
12323                     || std::isnan(src1[lane])) ? 1 : 0);
12324             }
12325         }
12326
12327         wf->execMask() = vcc.rawData();
12328         vcc.write();
12329     }
12330
12331     Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt)
12332         : Inst_VOPC(iFmt, "v_cmpx_nge_f64")
12333     {
12334         setFlag(ALU);
12335         setFlag(F64);
12336     } // Inst_VOPC__V_CMPX_NGE_F64
12337
12338     Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
12339     {
12340     } // ~Inst_VOPC__V_CMPX_NGE_F64
12341
12342     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
12343     void
12344     Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
12345     {
12346         Wavefront *wf = gpuDynInst->wavefront();
12347         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12348         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12349         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12350
12351         src0.readSrc();
12352         src1.read();
12353
12354         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12355             if (wf->execMask(lane)) {
12356                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
12357             }
12358         }
12359
12360         wf->execMask() = vcc.rawData();
12361         vcc.write();
12362     }
12363
12364     Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt)
12365         : Inst_VOPC(iFmt, "v_cmpx_nlg_f64")
12366     {
12367         setFlag(ALU);
12368         setFlag(F64);
12369     } // Inst_VOPC__V_CMPX_NLG_F64
12370
12371     Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
12372     {
12373     } // ~Inst_VOPC__V_CMPX_NLG_F64
12374
12375     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
12376     void
12377     Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
12378     {
12379         Wavefront *wf = gpuDynInst->wavefront();
12380         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12381         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12382         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12383
12384         src0.readSrc();
12385         src1.read();
12386
12387         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12388             if (wf->execMask(lane)) {
12389                 vcc.setBit(lane, !(src0[lane] < src1[lane]
12390                     || src0[lane] > src1[lane]) ? 1 : 0);
12391             }
12392         }
12393
12394         wf->execMask() = vcc.rawData();
12395         vcc.write();
12396     }
12397
12398     Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt)
12399         : Inst_VOPC(iFmt, "v_cmpx_ngt_f64")
12400     {
12401         setFlag(ALU);
12402         setFlag(F64);
12403     } // Inst_VOPC__V_CMPX_NGT_F64
12404
12405     Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
12406     {
12407     } // ~Inst_VOPC__V_CMPX_NGT_F64
12408
12409     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
12410     void
12411     Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
12412     {
12413         Wavefront *wf = gpuDynInst->wavefront();
12414         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12415         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12416         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12417
12418         src0.readSrc();
12419         src1.read();
12420
12421         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12422             if (wf->execMask(lane)) {
12423                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
12424             }
12425         }
12426
12427         wf->execMask() = vcc.rawData();
12428         vcc.write();
12429     }
12430
12431     Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt)
12432         : Inst_VOPC(iFmt, "v_cmpx_nle_f64")
12433     {
12434         setFlag(ALU);
12435         setFlag(F64);
12436     } // Inst_VOPC__V_CMPX_NLE_F64
12437
12438     Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
12439     {
12440     } // ~Inst_VOPC__V_CMPX_NLE_F64
12441
12442     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
12443     void
12444     Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
12445     {
12446         Wavefront *wf = gpuDynInst->wavefront();
12447         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12448         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12449         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12450
12451         src0.readSrc();
12452         src1.read();
12453
12454         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12455             if (wf->execMask(lane)) {
12456                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
12457             }
12458         }
12459
12460         wf->execMask() = vcc.rawData();
12461         vcc.write();
12462     }
12463
12464     Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt)
12465         : Inst_VOPC(iFmt, "v_cmpx_neq_f64")
12466     {
12467         setFlag(ALU);
12468         setFlag(F64);
12469     } // Inst_VOPC__V_CMPX_NEQ_F64
12470
12471     Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
12472     {
12473     } // ~Inst_VOPC__V_CMPX_NEQ_F64
12474
12475     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
12476     void
12477     Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
12478     {
12479         Wavefront *wf = gpuDynInst->wavefront();
12480         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12481         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12482         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12483
12484         src0.readSrc();
12485         src1.read();
12486
12487         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12488             if (wf->execMask(lane)) {
12489                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12490             }
12491         }
12492
12493         wf->execMask() = vcc.rawData();
12494         vcc.write();
12495     }
12496
12497     Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt)
12498         : Inst_VOPC(iFmt, "v_cmpx_nlt_f64")
12499     {
12500         setFlag(ALU);
12501         setFlag(F64);
12502     } // Inst_VOPC__V_CMPX_NLT_F64
12503
12504     Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
12505     {
12506     } // ~Inst_VOPC__V_CMPX_NLT_F64
12507
12508     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
12509     void
12510     Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
12511     {
12512         Wavefront *wf = gpuDynInst->wavefront();
12513         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12514         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12515         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12516
12517         src0.readSrc();
12518         src1.read();
12519
12520         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12521             if (wf->execMask(lane)) {
12522                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
12523             }
12524         }
12525
12526         wf->execMask() = vcc.rawData();
12527         vcc.write();
12528     }
12529
12530     Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt)
12531         : Inst_VOPC(iFmt, "v_cmpx_tru_f64")
12532     {
12533         setFlag(ALU);
12534         setFlag(F64);
12535     } // Inst_VOPC__V_CMPX_TRU_F64
12536
12537     Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
12538     {
12539     } // ~Inst_VOPC__V_CMPX_TRU_F64
12540
12541     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
12542     void
12543     Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
12544     {
12545         Wavefront *wf = gpuDynInst->wavefront();
12546         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12547
12548         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12549             if (wf->execMask(lane)) {
12550                 vcc.setBit(lane, 1);
12551             }
12552         }
12553
12554         wf->execMask() = vcc.rawData();
12555         vcc.write();
12556     }
12557
12558     Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt)
12559         : Inst_VOPC(iFmt, "v_cmp_f_i16")
12560     {
12561         setFlag(ALU);
12562     } // Inst_VOPC__V_CMP_F_I16
12563
12564     Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
12565     {
12566     } // ~Inst_VOPC__V_CMP_F_I16
12567
12568     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12569     void
12570     Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
12571     {
12572         Wavefront *wf = gpuDynInst->wavefront();
12573         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12574
12575         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12576             if (wf->execMask(lane)) {
12577                 vcc.setBit(lane, 0);
12578             }
12579         }
12580
12581         vcc.write();
12582     }
12583
12584     Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt)
12585         : Inst_VOPC(iFmt, "v_cmp_lt_i16")
12586     {
12587         setFlag(ALU);
12588     } // Inst_VOPC__V_CMP_LT_I16
12589
12590     Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
12591     {
12592     } // ~Inst_VOPC__V_CMP_LT_I16
12593
12594     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12595     void
12596     Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
12597     {
12598         Wavefront *wf = gpuDynInst->wavefront();
12599         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12600         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12601         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12602
12603         src0.readSrc();
12604         src1.read();
12605
12606         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12607             if (wf->execMask(lane)) {
12608                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12609             }
12610         }
12611
12612         vcc.write();
12613     }
12614
12615     Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt)
12616         : Inst_VOPC(iFmt, "v_cmp_eq_i16")
12617     {
12618         setFlag(ALU);
12619     } // Inst_VOPC__V_CMP_EQ_I16
12620
12621     Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
12622     {
12623     } // ~Inst_VOPC__V_CMP_EQ_I16
12624
12625     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12626     void
12627     Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
12628     {
12629         Wavefront *wf = gpuDynInst->wavefront();
12630         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12631         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12632         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12633
12634         src0.readSrc();
12635         src1.read();
12636
12637         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12638             if (wf->execMask(lane)) {
12639                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12640             }
12641         }
12642
12643         vcc.write();
12644     }
12645
12646     Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt)
12647         : Inst_VOPC(iFmt, "v_cmp_le_i16")
12648     {
12649         setFlag(ALU);
12650     } // Inst_VOPC__V_CMP_LE_I16
12651
12652     Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
12653     {
12654     } // ~Inst_VOPC__V_CMP_LE_I16
12655
12656     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12657     void
12658     Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
12659     {
12660         Wavefront *wf = gpuDynInst->wavefront();
12661         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12662         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12663         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12664
12665         src0.readSrc();
12666         src1.read();
12667
12668         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12669             if (wf->execMask(lane)) {
12670                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12671             }
12672         }
12673
12674         vcc.write();
12675     }
12676
12677     Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt)
12678         : Inst_VOPC(iFmt, "v_cmp_gt_i16")
12679     {
12680         setFlag(ALU);
12681     } // Inst_VOPC__V_CMP_GT_I16
12682
12683     Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
12684     {
12685     } // ~Inst_VOPC__V_CMP_GT_I16
12686
12687     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12688     void
12689     Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
12690     {
12691         Wavefront *wf = gpuDynInst->wavefront();
12692         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12693         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12694         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12695
12696         src0.readSrc();
12697         src1.read();
12698
12699         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12700             if (wf->execMask(lane)) {
12701                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12702             }
12703         }
12704
12705         vcc.write();
12706     }
12707
12708     Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt)
12709         : Inst_VOPC(iFmt, "v_cmp_ne_i16")
12710     {
12711         setFlag(ALU);
12712     } // Inst_VOPC__V_CMP_NE_I16
12713
12714     Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
12715     {
12716     } // ~Inst_VOPC__V_CMP_NE_I16
12717
12718     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12719     void
12720     Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
12721     {
12722         Wavefront *wf = gpuDynInst->wavefront();
12723         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12724         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12725         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12726
12727         src0.readSrc();
12728         src1.read();
12729
12730         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12731             if (wf->execMask(lane)) {
12732                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12733             }
12734         }
12735
12736         vcc.write();
12737     }
12738
12739     Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt)
12740         : Inst_VOPC(iFmt, "v_cmp_ge_i16")
12741     {
12742         setFlag(ALU);
12743     } // Inst_VOPC__V_CMP_GE_I16
12744
12745     Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
12746     {
12747     } // ~Inst_VOPC__V_CMP_GE_I16
12748
12749     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12750     void
12751     Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
12752     {
12753         Wavefront *wf = gpuDynInst->wavefront();
12754         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12755         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12756         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12757
12758         src0.readSrc();
12759         src1.read();
12760
12761         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12762             if (wf->execMask(lane)) {
12763                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12764             }
12765         }
12766
12767         vcc.write();
12768     }
12769
12770     Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt)
12771         : Inst_VOPC(iFmt, "v_cmp_t_i16")
12772     {
12773         setFlag(ALU);
12774     } // Inst_VOPC__V_CMP_T_I16
12775
12776     Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
12777     {
12778     } // ~Inst_VOPC__V_CMP_T_I16
12779
12780     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12781     void
12782     Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
12783     {
12784         Wavefront *wf = gpuDynInst->wavefront();
12785         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12786
12787         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12788             if (wf->execMask(lane)) {
12789                 vcc.setBit(lane, 1);
12790             }
12791         }
12792
12793         vcc.write();
12794     }
12795
12796     Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt)
12797         : Inst_VOPC(iFmt, "v_cmp_f_u16")
12798     {
12799         setFlag(ALU);
12800     } // Inst_VOPC__V_CMP_F_U16
12801
12802     Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
12803     {
12804     } // ~Inst_VOPC__V_CMP_F_U16
12805
12806     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12807     void
12808     Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
12809     {
12810         Wavefront *wf = gpuDynInst->wavefront();
12811         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12812
12813         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12814             if (wf->execMask(lane)) {
12815                 vcc.setBit(lane, 0);
12816             }
12817         }
12818
12819         vcc.write();
12820     }
12821
12822     Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt)
12823         : Inst_VOPC(iFmt, "v_cmp_lt_u16")
12824     {
12825         setFlag(ALU);
12826     } // Inst_VOPC__V_CMP_LT_U16
12827
12828     Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
12829     {
12830     } // ~Inst_VOPC__V_CMP_LT_U16
12831
12832     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12833     void
12834     Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
12835     {
12836         Wavefront *wf = gpuDynInst->wavefront();
12837         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12838         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12839         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12840
12841         src0.readSrc();
12842         src1.read();
12843
12844         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12845             if (wf->execMask(lane)) {
12846                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12847             }
12848         }
12849
12850         vcc.write();
12851     }
12852
12853     Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt)
12854         : Inst_VOPC(iFmt, "v_cmp_eq_u16")
12855     {
12856         setFlag(ALU);
12857     } // Inst_VOPC__V_CMP_EQ_U16
12858
12859     Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
12860     {
12861     } // ~Inst_VOPC__V_CMP_EQ_U16
12862
12863     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12864     void
12865     Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
12866     {
12867         Wavefront *wf = gpuDynInst->wavefront();
12868         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12869         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12870         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12871
12872         src0.readSrc();
12873         src1.read();
12874
12875         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12876             if (wf->execMask(lane)) {
12877                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12878             }
12879         }
12880
12881         vcc.write();
12882     }
12883
12884     Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt)
12885         : Inst_VOPC(iFmt, "v_cmp_le_u16")
12886     {
12887         setFlag(ALU);
12888     } // Inst_VOPC__V_CMP_LE_U16
12889
12890     Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
12891     {
12892     } // ~Inst_VOPC__V_CMP_LE_U16
12893
12894     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12895     void
12896     Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
12897     {
12898         Wavefront *wf = gpuDynInst->wavefront();
12899         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12900         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12901         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12902
12903         src0.readSrc();
12904         src1.read();
12905
12906         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12907             if (wf->execMask(lane)) {
12908                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12909             }
12910         }
12911
12912         vcc.write();
12913     }
12914
12915     Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt)
12916         : Inst_VOPC(iFmt, "v_cmp_gt_u16")
12917     {
12918         setFlag(ALU);
12919     } // Inst_VOPC__V_CMP_GT_U16
12920
12921     Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
12922     {
12923     } // ~Inst_VOPC__V_CMP_GT_U16
12924
12925     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12926     void
12927     Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
12928     {
12929         Wavefront *wf = gpuDynInst->wavefront();
12930         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12931         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12932         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12933
12934         src0.readSrc();
12935         src1.read();
12936
12937         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12938             if (wf->execMask(lane)) {
12939                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12940             }
12941         }
12942
12943         vcc.write();
12944     }
12945
12946     Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt)
12947         : Inst_VOPC(iFmt, "v_cmp_ne_u16")
12948     {
12949         setFlag(ALU);
12950     } // Inst_VOPC__V_CMP_NE_U16
12951
12952     Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
12953     {
12954     } // ~Inst_VOPC__V_CMP_NE_U16
12955
12956     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12957     void
12958     Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
12959     {
12960         Wavefront *wf = gpuDynInst->wavefront();
12961         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12962         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12963         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12964
12965         src0.readSrc();
12966         src1.read();
12967
12968         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12969             if (wf->execMask(lane)) {
12970                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12971             }
12972         }
12973
12974         vcc.write();
12975     }
12976
12977     Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt)
12978         : Inst_VOPC(iFmt, "v_cmp_ge_u16")
12979     {
12980         setFlag(ALU);
12981     } // Inst_VOPC__V_CMP_GE_U16
12982
12983     Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
12984     {
12985     } // ~Inst_VOPC__V_CMP_GE_U16
12986
12987     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12988     void
12989     Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
12990     {
12991         Wavefront *wf = gpuDynInst->wavefront();
12992         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12993         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12994         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12995
12996         src0.readSrc();
12997         src1.read();
12998
12999         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13000             if (wf->execMask(lane)) {
13001                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13002             }
13003         }
13004
13005         vcc.write();
13006     }
13007
13008     Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt)
13009         : Inst_VOPC(iFmt, "v_cmp_t_u16")
13010     {
13011         setFlag(ALU);
13012     } // Inst_VOPC__V_CMP_T_U16
13013
13014     Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
13015     {
13016     } // ~Inst_VOPC__V_CMP_T_U16
13017
13018     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13019     void
13020     Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
13021     {
13022         Wavefront *wf = gpuDynInst->wavefront();
13023         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13024
13025         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13026             if (wf->execMask(lane)) {
13027                 vcc.setBit(lane, 1);
13028             }
13029         }
13030
13031         vcc.write();
13032     }
13033
13034     Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt)
13035         : Inst_VOPC(iFmt, "v_cmpx_f_i16")
13036     {
13037         setFlag(ALU);
13038     } // Inst_VOPC__V_CMPX_F_I16
13039
13040     Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
13041     {
13042     } // ~Inst_VOPC__V_CMPX_F_I16
13043
13044     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13045     void
13046     Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
13047     {
13048         Wavefront *wf = gpuDynInst->wavefront();
13049         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13050
13051         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13052             if (wf->execMask(lane)) {
13053                 vcc.setBit(lane, 0);
13054             }
13055         }
13056
13057         wf->execMask() = vcc.rawData();
13058         vcc.write();
13059     }
13060
13061     Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt)
13062         : Inst_VOPC(iFmt, "v_cmpx_lt_i16")
13063     {
13064         setFlag(ALU);
13065     } // Inst_VOPC__V_CMPX_LT_I16
13066
13067     Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
13068     {
13069     } // ~Inst_VOPC__V_CMPX_LT_I16
13070
13071     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13072     void
13073     Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
13074     {
13075         Wavefront *wf = gpuDynInst->wavefront();
13076         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13077         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13078         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13079
13080         src0.readSrc();
13081         src1.read();
13082
13083         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13084             if (wf->execMask(lane)) {
13085                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13086             }
13087         }
13088
13089         wf->execMask() = vcc.rawData();
13090         vcc.write();
13091     }
13092
13093     Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt)
13094         : Inst_VOPC(iFmt, "v_cmpx_eq_i16")
13095     {
13096         setFlag(ALU);
13097     } // Inst_VOPC__V_CMPX_EQ_I16
13098
13099     Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
13100     {
13101     } // ~Inst_VOPC__V_CMPX_EQ_I16
13102
13103     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13104     void
13105     Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
13106     {
13107         Wavefront *wf = gpuDynInst->wavefront();
13108         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13109         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13110         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13111
13112         src0.readSrc();
13113         src1.read();
13114
13115         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13116             if (wf->execMask(lane)) {
13117                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13118             }
13119         }
13120
13121         wf->execMask() = vcc.rawData();
13122         vcc.write();
13123     }
13124
13125     Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt)
13126         : Inst_VOPC(iFmt, "v_cmpx_le_i16")
13127     {
13128         setFlag(ALU);
13129     } // Inst_VOPC__V_CMPX_LE_I16
13130
13131     Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
13132     {
13133     } // ~Inst_VOPC__V_CMPX_LE_I16
13134
13135     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13136     void
13137     Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
13138     {
13139         Wavefront *wf = gpuDynInst->wavefront();
13140         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13141         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13142         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13143
13144         src0.readSrc();
13145         src1.read();
13146
13147         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13148             if (wf->execMask(lane)) {
13149                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13150             }
13151         }
13152
13153         wf->execMask() = vcc.rawData();
13154         vcc.write();
13155     }
13156
13157     Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt)
13158         : Inst_VOPC(iFmt, "v_cmpx_gt_i16")
13159     {
13160         setFlag(ALU);
13161     } // Inst_VOPC__V_CMPX_GT_I16
13162
13163     Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
13164     {
13165     } // ~Inst_VOPC__V_CMPX_GT_I16
13166
13167     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13168     void
13169     Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
13170     {
13171         Wavefront *wf = gpuDynInst->wavefront();
13172         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13173         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13174         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13175
13176         src0.readSrc();
13177         src1.read();
13178
13179         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13180             if (wf->execMask(lane)) {
13181                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13182             }
13183         }
13184
13185         wf->execMask() = vcc.rawData();
13186         vcc.write();
13187     }
13188
13189     Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt)
13190         : Inst_VOPC(iFmt, "v_cmpx_ne_i16")
13191     {
13192         setFlag(ALU);
13193     } // Inst_VOPC__V_CMPX_NE_I16
13194
13195     Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
13196     {
13197     } // ~Inst_VOPC__V_CMPX_NE_I16
13198
13199     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13200     void
13201     Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
13202     {
13203         Wavefront *wf = gpuDynInst->wavefront();
13204         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13205         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13206         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13207
13208         src0.readSrc();
13209         src1.read();
13210
13211         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13212             if (wf->execMask(lane)) {
13213                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13214             }
13215         }
13216
13217         wf->execMask() = vcc.rawData();
13218         vcc.write();
13219     }
13220
13221     Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt)
13222         : Inst_VOPC(iFmt, "v_cmpx_ge_i16")
13223     {
13224         setFlag(ALU);
13225     } // Inst_VOPC__V_CMPX_GE_I16
13226
13227     Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
13228     {
13229     } // ~Inst_VOPC__V_CMPX_GE_I16
13230
13231     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13232     void
13233     Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
13234     {
13235         Wavefront *wf = gpuDynInst->wavefront();
13236         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13237         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13238         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13239
13240         src0.readSrc();
13241         src1.read();
13242
13243         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13244             if (wf->execMask(lane)) {
13245                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13246             }
13247         }
13248
13249         wf->execMask() = vcc.rawData();
13250         vcc.write();
13251     }
13252
13253     Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt)
13254         : Inst_VOPC(iFmt, "v_cmpx_t_i16")
13255     {
13256         setFlag(ALU);
13257     } // Inst_VOPC__V_CMPX_T_I16
13258
13259     Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
13260     {
13261     } // ~Inst_VOPC__V_CMPX_T_I16
13262
13263     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13264     void
13265     Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
13266     {
13267         Wavefront *wf = gpuDynInst->wavefront();
13268         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13269
13270         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13271             if (wf->execMask(lane)) {
13272                 vcc.setBit(lane, 1);
13273             }
13274         }
13275
13276         wf->execMask() = vcc.rawData();
13277         vcc.write();
13278     }
13279
13280     Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt)
13281         : Inst_VOPC(iFmt, "v_cmpx_f_u16")
13282     {
13283         setFlag(ALU);
13284     } // Inst_VOPC__V_CMPX_F_U16
13285
13286     Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
13287     {
13288     } // ~Inst_VOPC__V_CMPX_F_U16
13289
13290     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13291     void
13292     Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
13293     {
13294         Wavefront *wf = gpuDynInst->wavefront();
13295         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13296
13297         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13298             if (wf->execMask(lane)) {
13299                 vcc.setBit(lane, 0);
13300             }
13301         }
13302
13303         wf->execMask() = vcc.rawData();
13304         vcc.write();
13305     }
13306
13307     Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt)
13308         : Inst_VOPC(iFmt, "v_cmpx_lt_u16")
13309     {
13310         setFlag(ALU);
13311     } // Inst_VOPC__V_CMPX_LT_U16
13312
13313     Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
13314     {
13315     } // ~Inst_VOPC__V_CMPX_LT_U16
13316
13317     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13318     void
13319     Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
13320     {
13321         Wavefront *wf = gpuDynInst->wavefront();
13322         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13323         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13324         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13325
13326         src0.readSrc();
13327         src1.read();
13328
13329         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13330             if (wf->execMask(lane)) {
13331                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13332             }
13333         }
13334
13335         wf->execMask() = vcc.rawData();
13336         vcc.write();
13337     }
13338
13339     Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt)
13340         : Inst_VOPC(iFmt, "v_cmpx_eq_u16")
13341     {
13342         setFlag(ALU);
13343     } // Inst_VOPC__V_CMPX_EQ_U16
13344
13345     Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
13346     {
13347     } // ~Inst_VOPC__V_CMPX_EQ_U16
13348
13349     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13350     void
13351     Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
13352     {
13353         Wavefront *wf = gpuDynInst->wavefront();
13354         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13355         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13356         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13357
13358         src0.readSrc();
13359         src1.read();
13360
13361         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13362             if (wf->execMask(lane)) {
13363                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13364             }
13365         }
13366
13367         wf->execMask() = vcc.rawData();
13368         vcc.write();
13369     }
13370
13371     Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt)
13372         : Inst_VOPC(iFmt, "v_cmpx_le_u16")
13373     {
13374         setFlag(ALU);
13375     } // Inst_VOPC__V_CMPX_LE_U16
13376
13377     Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
13378     {
13379     } // ~Inst_VOPC__V_CMPX_LE_U16
13380
13381     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13382     void
13383     Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
13384     {
13385         Wavefront *wf = gpuDynInst->wavefront();
13386         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13387         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13388         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13389
13390         src0.readSrc();
13391         src1.read();
13392
13393         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13394             if (wf->execMask(lane)) {
13395                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13396             }
13397         }
13398
13399         wf->execMask() = vcc.rawData();
13400         vcc.write();
13401     }
13402
13403     Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt)
13404         : Inst_VOPC(iFmt, "v_cmpx_gt_u16")
13405     {
13406         setFlag(ALU);
13407     } // Inst_VOPC__V_CMPX_GT_U16
13408
13409     Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
13410     {
13411     } // ~Inst_VOPC__V_CMPX_GT_U16
13412
13413     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13414     void
13415     Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
13416     {
13417         Wavefront *wf = gpuDynInst->wavefront();
13418         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13419         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13420         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13421
13422         src0.readSrc();
13423         src1.read();
13424
13425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13426             if (wf->execMask(lane)) {
13427                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13428             }
13429         }
13430
13431         wf->execMask() = vcc.rawData();
13432         vcc.write();
13433     }
13434
13435     Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt)
13436         : Inst_VOPC(iFmt, "v_cmpx_ne_u16")
13437     {
13438         setFlag(ALU);
13439     } // Inst_VOPC__V_CMPX_NE_U16
13440
13441     Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
13442     {
13443     } // ~Inst_VOPC__V_CMPX_NE_U16
13444
13445     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13446     void
13447     Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
13448     {
13449         Wavefront *wf = gpuDynInst->wavefront();
13450         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13451         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13452         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13453
13454         src0.readSrc();
13455         src1.read();
13456
13457         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13458             if (wf->execMask(lane)) {
13459                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13460             }
13461         }
13462
13463         wf->execMask() = vcc.rawData();
13464         vcc.write();
13465     }
13466
13467     Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt)
13468         : Inst_VOPC(iFmt, "v_cmpx_ge_u16")
13469     {
13470         setFlag(ALU);
13471     } // Inst_VOPC__V_CMPX_GE_U16
13472
13473     Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
13474     {
13475     } // ~Inst_VOPC__V_CMPX_GE_U16
13476
13477     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13478     void
13479     Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
13480     {
13481         Wavefront *wf = gpuDynInst->wavefront();
13482         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13483         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13484         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13485
13486         src0.readSrc();
13487         src1.read();
13488
13489         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13490             if (wf->execMask(lane)) {
13491                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13492             }
13493         }
13494
13495         wf->execMask() = vcc.rawData();
13496         vcc.write();
13497     }
13498
13499     Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt)
13500         : Inst_VOPC(iFmt, "v_cmpx_t_u16")
13501     {
13502         setFlag(ALU);
13503     } // Inst_VOPC__V_CMPX_T_U16
13504
13505     Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
13506     {
13507     } // ~Inst_VOPC__V_CMPX_T_U16
13508
13509     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13510     void
13511     Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
13512     {
13513         Wavefront *wf = gpuDynInst->wavefront();
13514         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13515
13516         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13517             if (wf->execMask(lane)) {
13518                 vcc.setBit(lane, 1);
13519             }
13520         }
13521
13522         wf->execMask() = vcc.rawData();
13523         vcc.write();
13524     }
13525
13526     Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt)
13527         : Inst_VOPC(iFmt, "v_cmp_f_i32")
13528     {
13529         setFlag(ALU);
13530     } // Inst_VOPC__V_CMP_F_I32
13531
13532     Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
13533     {
13534     } // ~Inst_VOPC__V_CMP_F_I32
13535
13536     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13537     void
13538     Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
13539     {
13540         Wavefront *wf = gpuDynInst->wavefront();
13541         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13542
13543         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13544             if (wf->execMask(lane)) {
13545                 vcc.setBit(lane, 0);
13546             }
13547         }
13548
13549         vcc.write();
13550     }
13551
13552     Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt)
13553         : Inst_VOPC(iFmt, "v_cmp_lt_i32")
13554     {
13555         setFlag(ALU);
13556     } // Inst_VOPC__V_CMP_LT_I32
13557
13558     Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
13559     {
13560     } // ~Inst_VOPC__V_CMP_LT_I32
13561
13562     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13563     void
13564     Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
13565     {
13566         Wavefront *wf = gpuDynInst->wavefront();
13567         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13568         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13569         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13570
13571         src0.readSrc();
13572         src1.read();
13573
13574         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13575             if (wf->execMask(lane)) {
13576                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13577             }
13578         }
13579
13580         vcc.write();
13581     }
13582
13583     Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt)
13584         : Inst_VOPC(iFmt, "v_cmp_eq_i32")
13585     {
13586         setFlag(ALU);
13587     } // Inst_VOPC__V_CMP_EQ_I32
13588
13589     Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
13590     {
13591     } // ~Inst_VOPC__V_CMP_EQ_I32
13592
13593     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13594     void
13595     Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
13596     {
13597         Wavefront *wf = gpuDynInst->wavefront();
13598         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13599         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13600         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13601
13602         src0.readSrc();
13603         src1.read();
13604
13605         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13606             if (wf->execMask(lane)) {
13607                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13608             }
13609         }
13610
13611         vcc.write();
13612     }
13613
13614     Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt)
13615         : Inst_VOPC(iFmt, "v_cmp_le_i32")
13616     {
13617         setFlag(ALU);
13618     } // Inst_VOPC__V_CMP_LE_I32
13619
13620     Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
13621     {
13622     } // ~Inst_VOPC__V_CMP_LE_I32
13623
13624     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13625     void
13626     Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
13627     {
13628         Wavefront *wf = gpuDynInst->wavefront();
13629         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13630         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13631         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13632
13633         src0.readSrc();
13634         src1.read();
13635
13636         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13637             if (wf->execMask(lane)) {
13638                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13639             }
13640         }
13641
13642         vcc.write();
13643     }
13644
13645     Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt)
13646         : Inst_VOPC(iFmt, "v_cmp_gt_i32")
13647     {
13648         setFlag(ALU);
13649     } // Inst_VOPC__V_CMP_GT_I32
13650
13651     Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
13652     {
13653     } // ~Inst_VOPC__V_CMP_GT_I32
13654
13655     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13656     void
13657     Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
13658     {
13659         Wavefront *wf = gpuDynInst->wavefront();
13660         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13661         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13662         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13663
13664         src0.readSrc();
13665         src1.read();
13666
13667         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13668             if (wf->execMask(lane)) {
13669                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13670             }
13671         }
13672
13673         vcc.write();
13674     }
13675
13676     Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt)
13677         : Inst_VOPC(iFmt, "v_cmp_ne_i32")
13678     {
13679         setFlag(ALU);
13680     } // Inst_VOPC__V_CMP_NE_I32
13681
13682     Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
13683     {
13684     } // ~Inst_VOPC__V_CMP_NE_I32
13685
13686     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13687     void
13688     Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
13689     {
13690         Wavefront *wf = gpuDynInst->wavefront();
13691         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13692         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13693         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13694
13695         src0.readSrc();
13696         src1.read();
13697
13698         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13699             if (wf->execMask(lane)) {
13700                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13701             }
13702         }
13703
13704         vcc.write();
13705     }
13706
13707     Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt)
13708         : Inst_VOPC(iFmt, "v_cmp_ge_i32")
13709     {
13710         setFlag(ALU);
13711     } // Inst_VOPC__V_CMP_GE_I32
13712
13713     Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
13714     {
13715     } // ~Inst_VOPC__V_CMP_GE_I32
13716
13717     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13718     void
13719     Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
13720     {
13721         Wavefront *wf = gpuDynInst->wavefront();
13722         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13723         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13724         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13725
13726         src0.readSrc();
13727         src1.read();
13728
13729         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13730             if (wf->execMask(lane)) {
13731                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13732             }
13733         }
13734
13735         vcc.write();
13736     }
13737
13738     Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt)
13739         : Inst_VOPC(iFmt, "v_cmp_t_i32")
13740     {
13741         setFlag(ALU);
13742     } // Inst_VOPC__V_CMP_T_I32
13743
13744     Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
13745     {
13746     } // ~Inst_VOPC__V_CMP_T_I32
13747
13748     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13749     void
13750     Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
13751     {
13752         Wavefront *wf = gpuDynInst->wavefront();
13753         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13754
13755         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13756             if (wf->execMask(lane)) {
13757                 vcc.setBit(lane, 1);
13758             }
13759         }
13760
13761         vcc.write();
13762     }
13763
13764     Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt)
13765         : Inst_VOPC(iFmt, "v_cmp_f_u32")
13766     {
13767         setFlag(ALU);
13768     } // Inst_VOPC__V_CMP_F_U32
13769
13770     Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
13771     {
13772     } // ~Inst_VOPC__V_CMP_F_U32
13773
13774     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13775     void
13776     Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
13777     {
13778         Wavefront *wf = gpuDynInst->wavefront();
13779         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13780
13781         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13782             if (wf->execMask(lane)) {
13783                 vcc.setBit(lane, 0);
13784             }
13785         }
13786
13787         vcc.write();
13788     }
13789
13790     Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt)
13791         : Inst_VOPC(iFmt, "v_cmp_lt_u32")
13792     {
13793         setFlag(ALU);
13794     } // Inst_VOPC__V_CMP_LT_U32
13795
13796     Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
13797     {
13798     } // ~Inst_VOPC__V_CMP_LT_U32
13799
13800     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13801     void
13802     Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
13803     {
13804         Wavefront *wf = gpuDynInst->wavefront();
13805         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13806         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13807         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13808
13809         src0.readSrc();
13810         src1.read();
13811
13812         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13813             if (wf->execMask(lane)) {
13814                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13815             }
13816         }
13817
13818         vcc.write();
13819     }
13820
13821     Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt)
13822         : Inst_VOPC(iFmt, "v_cmp_eq_u32")
13823     {
13824         setFlag(ALU);
13825     } // Inst_VOPC__V_CMP_EQ_U32
13826
13827     Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
13828     {
13829     } // ~Inst_VOPC__V_CMP_EQ_U32
13830
13831     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13832     void
13833     Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
13834     {
13835         Wavefront *wf = gpuDynInst->wavefront();
13836         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13837         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13838         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13839
13840         src0.readSrc();
13841         src1.read();
13842
13843         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13844             if (wf->execMask(lane)) {
13845                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13846             }
13847         }
13848
13849         vcc.write();
13850     }
13851
13852     Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt)
13853         : Inst_VOPC(iFmt, "v_cmp_le_u32")
13854     {
13855         setFlag(ALU);
13856     } // Inst_VOPC__V_CMP_LE_U32
13857
13858     Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
13859     {
13860     } // ~Inst_VOPC__V_CMP_LE_U32
13861
13862     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13863     void
13864     Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
13865     {
13866         Wavefront *wf = gpuDynInst->wavefront();
13867         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13868         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13869         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13870
13871         src0.readSrc();
13872         src1.read();
13873
13874         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13875             if (wf->execMask(lane)) {
13876                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13877             }
13878         }
13879
13880         vcc.write();
13881     }
13882
13883     Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt)
13884         : Inst_VOPC(iFmt, "v_cmp_gt_u32")
13885     {
13886         setFlag(ALU);
13887     } // Inst_VOPC__V_CMP_GT_U32
13888
13889     Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
13890     {
13891     } // ~Inst_VOPC__V_CMP_GT_U32
13892
13893     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13894     void
13895     Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
13896     {
13897         Wavefront *wf = gpuDynInst->wavefront();
13898         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13899         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13900         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13901
13902         src0.readSrc();
13903         src1.read();
13904
13905         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13906             if (wf->execMask(lane)) {
13907                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13908             }
13909         }
13910
13911         vcc.write();
13912     }
13913
13914     Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt)
13915         : Inst_VOPC(iFmt, "v_cmp_ne_u32")
13916     {
13917         setFlag(ALU);
13918     } // Inst_VOPC__V_CMP_NE_U32
13919
13920     Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
13921     {
13922     } // ~Inst_VOPC__V_CMP_NE_U32
13923
13924     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13925     void
13926     Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
13927     {
13928         Wavefront *wf = gpuDynInst->wavefront();
13929         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13930         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13931         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13932
13933         src0.readSrc();
13934         src1.read();
13935
13936         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13937             if (wf->execMask(lane)) {
13938                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13939             }
13940         }
13941
13942         vcc.write();
13943     }
13944
13945     Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt)
13946         : Inst_VOPC(iFmt, "v_cmp_ge_u32")
13947     {
13948         setFlag(ALU);
13949     } // Inst_VOPC__V_CMP_GE_U32
13950
13951     Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
13952     {
13953     } // ~Inst_VOPC__V_CMP_GE_U32
13954
13955     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13956     void
13957     Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
13958     {
13959         Wavefront *wf = gpuDynInst->wavefront();
13960         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13961         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13962         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13963
13964         src0.readSrc();
13965         src1.read();
13966
13967         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13968             if (wf->execMask(lane)) {
13969                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13970             }
13971         }
13972
13973         vcc.write();
13974     }
13975
13976     Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt)
13977         : Inst_VOPC(iFmt, "v_cmp_t_u32")
13978     {
13979         setFlag(ALU);
13980     } // Inst_VOPC__V_CMP_T_U32
13981
13982     Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
13983     {
13984     } // ~Inst_VOPC__V_CMP_T_U32
13985
13986     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13987     void
13988     Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
13989     {
13990         Wavefront *wf = gpuDynInst->wavefront();
13991         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13992
13993         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13994             if (wf->execMask(lane)) {
13995                 vcc.setBit(lane, 1);
13996             }
13997         }
13998
13999         vcc.write();
14000     }
14001
14002     Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt)
14003         : Inst_VOPC(iFmt, "v_cmpx_f_i32")
14004     {
14005         setFlag(ALU);
14006     } // Inst_VOPC__V_CMPX_F_I32
14007
14008     Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
14009     {
14010     } // ~Inst_VOPC__V_CMPX_F_I32
14011
14012     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14013     void
14014     Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
14015     {
14016         Wavefront *wf = gpuDynInst->wavefront();
14017         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14018
14019         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14020             if (wf->execMask(lane)) {
14021                 vcc.setBit(lane, 0);
14022             }
14023         }
14024
14025         wf->execMask() = vcc.rawData();
14026         vcc.write();
14027     }
14028
14029     Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt)
14030         : Inst_VOPC(iFmt, "v_cmpx_lt_i32")
14031     {
14032         setFlag(ALU);
14033     } // Inst_VOPC__V_CMPX_LT_I32
14034
14035     Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
14036     {
14037     } // ~Inst_VOPC__V_CMPX_LT_I32
14038
14039     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14040     void
14041     Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
14042     {
14043         Wavefront *wf = gpuDynInst->wavefront();
14044         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14045         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14046         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14047
14048         src0.readSrc();
14049         src1.read();
14050
14051         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14052             if (wf->execMask(lane)) {
14053                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14054             }
14055         }
14056
14057         wf->execMask() = vcc.rawData();
14058         vcc.write();
14059     }
14060
14061     Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt)
14062         : Inst_VOPC(iFmt, "v_cmpx_eq_i32")
14063     {
14064         setFlag(ALU);
14065     } // Inst_VOPC__V_CMPX_EQ_I32
14066
14067     Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
14068     {
14069     } // ~Inst_VOPC__V_CMPX_EQ_I32
14070
14071     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14072     void
14073     Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
14074     {
14075         Wavefront *wf = gpuDynInst->wavefront();
14076         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14077         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14078         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14079
14080         src0.readSrc();
14081         src1.read();
14082
14083         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14084             if (wf->execMask(lane)) {
14085                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14086             }
14087         }
14088
14089         wf->execMask() = vcc.rawData();
14090         vcc.write();
14091     }
14092
14093     Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt)
14094         : Inst_VOPC(iFmt, "v_cmpx_le_i32")
14095     {
14096         setFlag(ALU);
14097     } // Inst_VOPC__V_CMPX_LE_I32
14098
14099     Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
14100     {
14101     } // ~Inst_VOPC__V_CMPX_LE_I32
14102
14103     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14104     void
14105     Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
14106     {
14107         Wavefront *wf = gpuDynInst->wavefront();
14108         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14109         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14110         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14111
14112         src0.readSrc();
14113         src1.read();
14114
14115         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14116             if (wf->execMask(lane)) {
14117                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14118             }
14119         }
14120
14121         wf->execMask() = vcc.rawData();
14122         vcc.write();
14123     }
14124
14125     Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt)
14126         : Inst_VOPC(iFmt, "v_cmpx_gt_i32")
14127     {
14128         setFlag(ALU);
14129     } // Inst_VOPC__V_CMPX_GT_I32
14130
14131     Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
14132     {
14133     } // ~Inst_VOPC__V_CMPX_GT_I32
14134
14135     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14136     void
14137     Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
14138     {
14139         Wavefront *wf = gpuDynInst->wavefront();
14140         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14141         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14142         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14143
14144         src0.readSrc();
14145         src1.read();
14146
14147         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14148             if (wf->execMask(lane)) {
14149                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14150             }
14151         }
14152
14153         wf->execMask() = vcc.rawData();
14154         vcc.write();
14155     }
14156
14157     Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt)
14158         : Inst_VOPC(iFmt, "v_cmpx_ne_i32")
14159     {
14160         setFlag(ALU);
14161     } // Inst_VOPC__V_CMPX_NE_I32
14162
14163     Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
14164     {
14165     } // ~Inst_VOPC__V_CMPX_NE_I32
14166
14167     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14168     void
14169     Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
14170     {
14171         Wavefront *wf = gpuDynInst->wavefront();
14172         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14173         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14174         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14175
14176         src0.readSrc();
14177         src1.read();
14178
14179         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14180             if (wf->execMask(lane)) {
14181                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14182             }
14183         }
14184
14185         wf->execMask() = vcc.rawData();
14186         vcc.write();
14187     }
14188
14189     Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt)
14190         : Inst_VOPC(iFmt, "v_cmpx_ge_i32")
14191     {
14192         setFlag(ALU);
14193     } // Inst_VOPC__V_CMPX_GE_I32
14194
14195     Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
14196     {
14197     } // ~Inst_VOPC__V_CMPX_GE_I32
14198
14199     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14200     void
14201     Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
14202     {
14203         Wavefront *wf = gpuDynInst->wavefront();
14204         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14205         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14206         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14207
14208         src0.readSrc();
14209         src1.read();
14210
14211         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14212             if (wf->execMask(lane)) {
14213                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14214             }
14215         }
14216
14217         wf->execMask() = vcc.rawData();
14218         vcc.write();
14219     }
14220
14221     Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt)
14222         : Inst_VOPC(iFmt, "v_cmpx_t_i32")
14223     {
14224         setFlag(ALU);
14225     } // Inst_VOPC__V_CMPX_T_I32
14226
14227     Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
14228     {
14229     } // ~Inst_VOPC__V_CMPX_T_I32
14230
14231     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14232     void
14233     Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
14234     {
14235         Wavefront *wf = gpuDynInst->wavefront();
14236         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14237
14238         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14239             if (wf->execMask(lane)) {
14240                 vcc.setBit(lane, 1);
14241             }
14242         }
14243
14244         wf->execMask() = vcc.rawData();
14245         vcc.write();
14246     }
14247
14248     Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt)
14249         : Inst_VOPC(iFmt, "v_cmpx_f_u32")
14250     {
14251         setFlag(ALU);
14252     } // Inst_VOPC__V_CMPX_F_U32
14253
14254     Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
14255     {
14256     } // ~Inst_VOPC__V_CMPX_F_U32
14257
14258     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14259     void
14260     Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
14261     {
14262         Wavefront *wf = gpuDynInst->wavefront();
14263         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14264
14265         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14266             if (wf->execMask(lane)) {
14267                 vcc.setBit(lane, 0);
14268             }
14269         }
14270
14271         wf->execMask() = vcc.rawData();
14272         vcc.write();
14273     }
14274
14275     Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt)
14276         : Inst_VOPC(iFmt, "v_cmpx_lt_u32")
14277     {
14278         setFlag(ALU);
14279     } // Inst_VOPC__V_CMPX_LT_U32
14280
14281     Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
14282     {
14283     } // ~Inst_VOPC__V_CMPX_LT_U32
14284
14285     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14286     void
14287     Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
14288     {
14289         Wavefront *wf = gpuDynInst->wavefront();
14290         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14291         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14292         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14293
14294         src0.readSrc();
14295         src1.read();
14296
14297         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14298             if (wf->execMask(lane)) {
14299                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14300             }
14301         }
14302
14303         wf->execMask() = vcc.rawData();
14304         vcc.write();
14305     }
14306
14307     Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt)
14308         : Inst_VOPC(iFmt, "v_cmpx_eq_u32")
14309     {
14310         setFlag(ALU);
14311     } // Inst_VOPC__V_CMPX_EQ_U32
14312
14313     Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
14314     {
14315     } // ~Inst_VOPC__V_CMPX_EQ_U32
14316
14317     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14318     void
14319     Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
14320     {
14321         Wavefront *wf = gpuDynInst->wavefront();
14322         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14323         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14324         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14325
14326         src0.readSrc();
14327         src1.read();
14328
14329         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14330             if (wf->execMask(lane)) {
14331                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14332             }
14333         }
14334
14335         wf->execMask() = vcc.rawData();
14336         vcc.write();
14337     }
14338
14339     Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt)
14340         : Inst_VOPC(iFmt, "v_cmpx_le_u32")
14341     {
14342         setFlag(ALU);
14343     } // Inst_VOPC__V_CMPX_LE_U32
14344
14345     Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
14346     {
14347     } // ~Inst_VOPC__V_CMPX_LE_U32
14348
14349     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14350     void
14351     Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
14352     {
14353         Wavefront *wf = gpuDynInst->wavefront();
14354         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14355         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14356         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14357
14358         src0.readSrc();
14359         src1.read();
14360
14361         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14362             if (wf->execMask(lane)) {
14363                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14364             }
14365         }
14366
14367         wf->execMask() = vcc.rawData();
14368         vcc.write();
14369     }
14370
14371     Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt)
14372         : Inst_VOPC(iFmt, "v_cmpx_gt_u32")
14373     {
14374         setFlag(ALU);
14375     } // Inst_VOPC__V_CMPX_GT_U32
14376
14377     Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
14378     {
14379     } // ~Inst_VOPC__V_CMPX_GT_U32
14380
14381     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14382     void
14383     Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
14384     {
14385         Wavefront *wf = gpuDynInst->wavefront();
14386         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14387         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14388         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14389
14390         src0.readSrc();
14391         src1.read();
14392
14393         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14394             if (wf->execMask(lane)) {
14395                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14396             }
14397         }
14398
14399         wf->execMask() = vcc.rawData();
14400         vcc.write();
14401     }
14402
14403     Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt)
14404         : Inst_VOPC(iFmt, "v_cmpx_ne_u32")
14405     {
14406         setFlag(ALU);
14407     } // Inst_VOPC__V_CMPX_NE_U32
14408
14409     Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
14410     {
14411     } // ~Inst_VOPC__V_CMPX_NE_U32
14412
14413     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14414     void
14415     Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
14416     {
14417         Wavefront *wf = gpuDynInst->wavefront();
14418         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14419         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14420         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14421
14422         src0.readSrc();
14423         src1.read();
14424
14425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14426             if (wf->execMask(lane)) {
14427                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14428             }
14429         }
14430
14431         wf->execMask() = vcc.rawData();
14432         vcc.write();
14433     }
14434
14435     Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt)
14436         : Inst_VOPC(iFmt, "v_cmpx_ge_u32")
14437     {
14438         setFlag(ALU);
14439     } // Inst_VOPC__V_CMPX_GE_U32
14440
14441     Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
14442     {
14443     } // ~Inst_VOPC__V_CMPX_GE_U32
14444
14445     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14446     void
14447     Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
14448     {
14449         Wavefront *wf = gpuDynInst->wavefront();
14450         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14451         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14452         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14453
14454         src0.readSrc();
14455         src1.read();
14456
14457         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14458             if (wf->execMask(lane)) {
14459                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14460             }
14461         }
14462
14463         wf->execMask() = vcc.rawData();
14464         vcc.write();
14465     }
14466
14467     Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt)
14468         : Inst_VOPC(iFmt, "v_cmpx_t_u32")
14469     {
14470         setFlag(ALU);
14471     } // Inst_VOPC__V_CMPX_T_U32
14472
14473     Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
14474     {
14475     } // ~Inst_VOPC__V_CMPX_T_U32
14476
14477     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14478     void
14479     Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
14480     {
14481         Wavefront *wf = gpuDynInst->wavefront();
14482         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14483
14484         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14485             if (wf->execMask(lane)) {
14486                 vcc.setBit(lane, 1);
14487             }
14488         }
14489
14490         wf->execMask() = vcc.rawData();
14491         vcc.write();
14492     }
14493
14494     Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt)
14495         : Inst_VOPC(iFmt, "v_cmp_f_i64")
14496     {
14497         setFlag(ALU);
14498     } // Inst_VOPC__V_CMP_F_I64
14499
14500     Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
14501     {
14502     } // ~Inst_VOPC__V_CMP_F_I64
14503
14504     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14505     void
14506     Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
14507     {
14508         Wavefront *wf = gpuDynInst->wavefront();
14509         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14510
14511         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14512             if (wf->execMask(lane)) {
14513                 vcc.setBit(lane, 0);
14514             }
14515         }
14516
14517         vcc.write();
14518     }
14519
14520     Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt)
14521         : Inst_VOPC(iFmt, "v_cmp_lt_i64")
14522     {
14523         setFlag(ALU);
14524     } // Inst_VOPC__V_CMP_LT_I64
14525
14526     Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
14527     {
14528     } // ~Inst_VOPC__V_CMP_LT_I64
14529
14530     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14531     void
14532     Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
14533     {
14534         Wavefront *wf = gpuDynInst->wavefront();
14535         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14536         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14537         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14538
14539         src0.readSrc();
14540         src1.read();
14541
14542         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14543             if (wf->execMask(lane)) {
14544                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14545             }
14546         }
14547
14548         vcc.write();
14549     }
14550
14551     Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt)
14552         : Inst_VOPC(iFmt, "v_cmp_eq_i64")
14553     {
14554         setFlag(ALU);
14555     } // Inst_VOPC__V_CMP_EQ_I64
14556
14557     Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
14558     {
14559     } // ~Inst_VOPC__V_CMP_EQ_I64
14560
14561     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14562     void
14563     Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
14564     {
14565         Wavefront *wf = gpuDynInst->wavefront();
14566         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14567         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14568         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14569
14570         src0.readSrc();
14571         src1.read();
14572
14573         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14574             if (wf->execMask(lane)) {
14575                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14576             }
14577         }
14578
14579         vcc.write();
14580     }
14581
14582     Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt)
14583         : Inst_VOPC(iFmt, "v_cmp_le_i64")
14584     {
14585         setFlag(ALU);
14586     } // Inst_VOPC__V_CMP_LE_I64
14587
14588     Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
14589     {
14590     } // ~Inst_VOPC__V_CMP_LE_I64
14591
14592     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14593     void
14594     Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
14595     {
14596         Wavefront *wf = gpuDynInst->wavefront();
14597         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14598         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14599         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14600
14601         src0.readSrc();
14602         src1.read();
14603
14604         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14605             if (wf->execMask(lane)) {
14606                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14607             }
14608         }
14609
14610         vcc.write();
14611     }
14612
14613     Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt)
14614         : Inst_VOPC(iFmt, "v_cmp_gt_i64")
14615     {
14616         setFlag(ALU);
14617     } // Inst_VOPC__V_CMP_GT_I64
14618
14619     Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
14620     {
14621     } // ~Inst_VOPC__V_CMP_GT_I64
14622
14623     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14624     void
14625     Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
14626     {
14627         Wavefront *wf = gpuDynInst->wavefront();
14628         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14629         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14630         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14631
14632         src0.readSrc();
14633         src1.read();
14634
14635         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14636             if (wf->execMask(lane)) {
14637                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14638             }
14639         }
14640
14641         vcc.write();
14642     }
14643
14644     Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt)
14645         : Inst_VOPC(iFmt, "v_cmp_ne_i64")
14646     {
14647         setFlag(ALU);
14648     } // Inst_VOPC__V_CMP_NE_I64
14649
14650     Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
14651     {
14652     } // ~Inst_VOPC__V_CMP_NE_I64
14653
14654     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14655     void
14656     Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
14657     {
14658         Wavefront *wf = gpuDynInst->wavefront();
14659         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14660         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14661         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14662
14663         src0.readSrc();
14664         src1.read();
14665
14666         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14667             if (wf->execMask(lane)) {
14668                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14669             }
14670         }
14671
14672         vcc.write();
14673     }
14674
14675     Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt)
14676         : Inst_VOPC(iFmt, "v_cmp_ge_i64")
14677     {
14678         setFlag(ALU);
14679     } // Inst_VOPC__V_CMP_GE_I64
14680
14681     Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
14682     {
14683     } // ~Inst_VOPC__V_CMP_GE_I64
14684
14685     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14686     void
14687     Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
14688     {
14689         Wavefront *wf = gpuDynInst->wavefront();
14690         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14691         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14692         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14693
14694         src0.readSrc();
14695         src1.read();
14696
14697         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14698             if (wf->execMask(lane)) {
14699                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14700             }
14701         }
14702
14703         vcc.write();
14704     }
14705
14706     Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt)
14707         : Inst_VOPC(iFmt, "v_cmp_t_i64")
14708     {
14709         setFlag(ALU);
14710     } // Inst_VOPC__V_CMP_T_I64
14711
14712     Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
14713     {
14714     } // ~Inst_VOPC__V_CMP_T_I64
14715
14716     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14717     void
14718     Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
14719     {
14720         Wavefront *wf = gpuDynInst->wavefront();
14721         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14722
14723         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14724             if (wf->execMask(lane)) {
14725                 vcc.setBit(lane, 1);
14726             }
14727         }
14728
14729         vcc.write();
14730     }
14731
14732     Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt)
14733         : Inst_VOPC(iFmt, "v_cmp_f_u64")
14734     {
14735         setFlag(ALU);
14736     } // Inst_VOPC__V_CMP_F_U64
14737
14738     Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
14739     {
14740     } // ~Inst_VOPC__V_CMP_F_U64
14741
14742     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14743     void
14744     Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
14745     {
14746         Wavefront *wf = gpuDynInst->wavefront();
14747         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14748
14749         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14750             if (wf->execMask(lane)) {
14751                 vcc.setBit(lane, 0);
14752             }
14753         }
14754
14755         vcc.write();
14756     }
14757
14758     Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt)
14759         : Inst_VOPC(iFmt, "v_cmp_lt_u64")
14760     {
14761         setFlag(ALU);
14762     } // Inst_VOPC__V_CMP_LT_U64
14763
14764     Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
14765     {
14766     } // ~Inst_VOPC__V_CMP_LT_U64
14767
14768     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14769     void
14770     Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
14771     {
14772         Wavefront *wf = gpuDynInst->wavefront();
14773         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14774         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14775         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14776
14777         src0.readSrc();
14778         src1.read();
14779
14780         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14781             if (wf->execMask(lane)) {
14782                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14783             }
14784         }
14785
14786         vcc.write();
14787     }
14788
14789     Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt)
14790         : Inst_VOPC(iFmt, "v_cmp_eq_u64")
14791     {
14792         setFlag(ALU);
14793     } // Inst_VOPC__V_CMP_EQ_U64
14794
14795     Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
14796     {
14797     } // ~Inst_VOPC__V_CMP_EQ_U64
14798
14799     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14800     void
14801     Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
14802     {
14803         Wavefront *wf = gpuDynInst->wavefront();
14804         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14805         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14806         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14807
14808         src0.readSrc();
14809         src1.read();
14810
14811         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14812             if (wf->execMask(lane)) {
14813                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14814             }
14815         }
14816
14817         vcc.write();
14818     }
14819
14820     Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt)
14821         : Inst_VOPC(iFmt, "v_cmp_le_u64")
14822     {
14823         setFlag(ALU);
14824     } // Inst_VOPC__V_CMP_LE_U64
14825
14826     Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
14827     {
14828     } // ~Inst_VOPC__V_CMP_LE_U64
14829
14830     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14831     void
14832     Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
14833     {
14834         Wavefront *wf = gpuDynInst->wavefront();
14835         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14836         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14837         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14838
14839         src0.readSrc();
14840         src1.read();
14841
14842         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14843             if (wf->execMask(lane)) {
14844                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14845             }
14846         }
14847
14848         vcc.write();
14849     }
14850
14851     Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt)
14852         : Inst_VOPC(iFmt, "v_cmp_gt_u64")
14853     {
14854         setFlag(ALU);
14855     } // Inst_VOPC__V_CMP_GT_U64
14856
14857     Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
14858     {
14859     } // ~Inst_VOPC__V_CMP_GT_U64
14860
14861     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14862     void
14863     Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
14864     {
14865         Wavefront *wf = gpuDynInst->wavefront();
14866         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14867         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14868         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14869
14870         src0.readSrc();
14871         src1.read();
14872
14873         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14874             if (wf->execMask(lane)) {
14875                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14876             }
14877         }
14878
14879         vcc.write();
14880     }
14881
14882     Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt)
14883         : Inst_VOPC(iFmt, "v_cmp_ne_u64")
14884     {
14885         setFlag(ALU);
14886     } // Inst_VOPC__V_CMP_NE_U64
14887
14888     Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
14889     {
14890     } // ~Inst_VOPC__V_CMP_NE_U64
14891
14892     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14893     void
14894     Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
14895     {
14896         Wavefront *wf = gpuDynInst->wavefront();
14897         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14898         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14899         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14900
14901         src0.readSrc();
14902         src1.read();
14903
14904         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14905             if (wf->execMask(lane)) {
14906                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14907             }
14908         }
14909
14910         vcc.write();
14911     }
14912
14913     Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt)
14914         : Inst_VOPC(iFmt, "v_cmp_ge_u64")
14915     {
14916         setFlag(ALU);
14917     } // Inst_VOPC__V_CMP_GE_U64
14918
14919     Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
14920     {
14921     } // ~Inst_VOPC__V_CMP_GE_U64
14922
14923     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14924     void
14925     Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
14926     {
14927         Wavefront *wf = gpuDynInst->wavefront();
14928         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14929         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14930         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14931
14932         src0.readSrc();
14933         src1.read();
14934
14935         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14936             if (wf->execMask(lane)) {
14937                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14938             }
14939         }
14940
14941         vcc.write();
14942     }
14943
14944     Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt)
14945         : Inst_VOPC(iFmt, "v_cmp_t_u64")
14946     {
14947         setFlag(ALU);
14948     } // Inst_VOPC__V_CMP_T_U64
14949
14950     Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
14951     {
14952     } // ~Inst_VOPC__V_CMP_T_U64
14953
14954     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14955     void
14956     Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
14957     {
14958         Wavefront *wf = gpuDynInst->wavefront();
14959         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14960
14961         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14962             if (wf->execMask(lane)) {
14963                 vcc.setBit(lane, 1);
14964             }
14965         }
14966
14967         vcc.write();
14968     }
14969
14970     Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt)
14971         : Inst_VOPC(iFmt, "v_cmpx_f_i64")
14972     {
14973         setFlag(ALU);
14974     } // Inst_VOPC__V_CMPX_F_I64
14975
14976     Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
14977     {
14978     } // ~Inst_VOPC__V_CMPX_F_I64
14979
14980     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14981     void
14982     Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
14983     {
14984         Wavefront *wf = gpuDynInst->wavefront();
14985         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14986
14987         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14988             if (wf->execMask(lane)) {
14989                 vcc.setBit(lane, 0);
14990             }
14991         }
14992
14993         wf->execMask() = vcc.rawData();
14994         vcc.write();
14995     }
14996
14997     Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt)
14998         : Inst_VOPC(iFmt, "v_cmpx_lt_i64")
14999     {
15000         setFlag(ALU);
15001     } // Inst_VOPC__V_CMPX_LT_I64
15002
15003     Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
15004     {
15005     } // ~Inst_VOPC__V_CMPX_LT_I64
15006
15007     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15008     void
15009     Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
15010     {
15011         Wavefront *wf = gpuDynInst->wavefront();
15012         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15013         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15014         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15015
15016         src0.readSrc();
15017         src1.read();
15018
15019         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15020             if (wf->execMask(lane)) {
15021                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
15022             }
15023         }
15024
15025         wf->execMask() = vcc.rawData();
15026         vcc.write();
15027     }
15028
15029     Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt)
15030         : Inst_VOPC(iFmt, "v_cmpx_eq_i64")
15031     {
15032         setFlag(ALU);
15033     } // Inst_VOPC__V_CMPX_EQ_I64
15034
15035     Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
15036     {
15037     } // ~Inst_VOPC__V_CMPX_EQ_I64
15038
15039     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15040     void
15041     Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
15042     {
15043         Wavefront *wf = gpuDynInst->wavefront();
15044         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15045         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15046         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15047
15048         src0.readSrc();
15049         src1.read();
15050
15051         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15052             if (wf->execMask(lane)) {
15053                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15054             }
15055         }
15056
15057         wf->execMask() = vcc.rawData();
15058         vcc.write();
15059     }
15060
15061     Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt)
15062         : Inst_VOPC(iFmt, "v_cmpx_le_i64")
15063     {
15064         setFlag(ALU);
15065     } // Inst_VOPC__V_CMPX_LE_I64
15066
15067     Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
15068     {
15069     } // ~Inst_VOPC__V_CMPX_LE_I64
15070
15071     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15072     void
15073     Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
15074     {
15075         Wavefront *wf = gpuDynInst->wavefront();
15076         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15077         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15078         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15079
15080         src0.readSrc();
15081         src1.read();
15082
15083         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15084             if (wf->execMask(lane)) {
15085                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15086             }
15087         }
15088
15089         wf->execMask() = vcc.rawData();
15090         vcc.write();
15091     }
15092
15093     Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt)
15094         : Inst_VOPC(iFmt, "v_cmpx_gt_i64")
15095     {
15096         setFlag(ALU);
15097     } // Inst_VOPC__V_CMPX_GT_I64
15098
15099     Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
15100     {
15101     } // ~Inst_VOPC__V_CMPX_GT_I64
15102
15103     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15104     void
15105     Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
15106     {
15107         Wavefront *wf = gpuDynInst->wavefront();
15108         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15109         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15110         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15111
15112         src0.readSrc();
15113         src1.read();
15114
15115         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15116             if (wf->execMask(lane)) {
15117                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15118             }
15119         }
15120
15121         wf->execMask() = vcc.rawData();
15122         vcc.write();
15123     }
15124
15125     Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt)
15126         : Inst_VOPC(iFmt, "v_cmpx_ne_i64")
15127     {
15128         setFlag(ALU);
15129     } // Inst_VOPC__V_CMPX_NE_I64
15130
15131     Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
15132     {
15133     } // ~Inst_VOPC__V_CMPX_NE_I64
15134
15135     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15136     void
15137     Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
15138     {
15139         Wavefront *wf = gpuDynInst->wavefront();
15140         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15141         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15142         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15143
15144         src0.readSrc();
15145         src1.read();
15146
15147         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15148             if (wf->execMask(lane)) {
15149                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15150             }
15151         }
15152
15153         wf->execMask() = vcc.rawData();
15154         vcc.write();
15155     }
15156
15157     Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt)
15158         : Inst_VOPC(iFmt, "v_cmpx_ge_i64")
15159     {
15160         setFlag(ALU);
15161     } // Inst_VOPC__V_CMPX_GE_I64
15162
15163     Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
15164     {
15165     } // ~Inst_VOPC__V_CMPX_GE_I64
15166
15167     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15168     void
15169     Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
15170     {
15171         Wavefront *wf = gpuDynInst->wavefront();
15172         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15173         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15174         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15175
15176         src0.readSrc();
15177         src1.read();
15178
15179         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15180             if (wf->execMask(lane)) {
15181                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15182             }
15183         }
15184
15185         wf->execMask() = vcc.rawData();
15186         vcc.write();
15187     }
15188
15189     Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt)
15190         : Inst_VOPC(iFmt, "v_cmpx_t_i64")
15191     {
15192         setFlag(ALU);
15193     } // Inst_VOPC__V_CMPX_T_I64
15194
15195     Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
15196     {
15197     } // ~Inst_VOPC__V_CMPX_T_I64
15198
15199     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15200     void
15201     Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
15202     {
15203         Wavefront *wf = gpuDynInst->wavefront();
15204         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15205
15206         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15207             if (wf->execMask(lane)) {
15208                 vcc.setBit(lane, 1);
15209             }
15210         }
15211
15212         wf->execMask() = vcc.rawData();
15213         vcc.write();
15214     }
15215
15216     Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt)
15217         : Inst_VOPC(iFmt, "v_cmpx_f_u64")
15218     {
15219         setFlag(ALU);
15220     } // Inst_VOPC__V_CMPX_F_U64
15221
15222     Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
15223     {
15224     } // ~Inst_VOPC__V_CMPX_F_U64
15225
15226     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
15227     void
15228     Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
15229     {
15230         Wavefront *wf = gpuDynInst->wavefront();
15231         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15232
15233         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15234             if (wf->execMask(lane)) {
15235                 vcc.setBit(lane, 0);
15236             }
15237         }
15238
15239         wf->execMask() = vcc.rawData();
15240         vcc.write();
15241     }
15242
15243     Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt)
15244         : Inst_VOPC(iFmt, "v_cmpx_lt_u64")
15245     {
15246         setFlag(ALU);
15247     } // Inst_VOPC__V_CMPX_LT_U64
15248
15249     Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
15250     {
15251     } // ~Inst_VOPC__V_CMPX_LT_U64
15252
15253     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15254     void
15255     Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
15256     {
15257         Wavefront *wf = gpuDynInst->wavefront();
15258         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15259         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15260         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15261
15262         src0.readSrc();
15263         src1.read();
15264
15265         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15266             if (wf->execMask(lane)) {
15267                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
15268             }
15269         }
15270
15271         wf->execMask() = vcc.rawData();
15272         vcc.write();
15273     }
15274
15275     Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt)
15276         : Inst_VOPC(iFmt, "v_cmpx_eq_u64")
15277     {
15278         setFlag(ALU);
15279     } // Inst_VOPC__V_CMPX_EQ_U64
15280
15281     Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
15282     {
15283     } // ~Inst_VOPC__V_CMPX_EQ_U64
15284
15285     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15286     void
15287     Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
15288     {
15289         Wavefront *wf = gpuDynInst->wavefront();
15290         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15291         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15292         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15293
15294         src0.readSrc();
15295         src1.read();
15296
15297         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15298             if (wf->execMask(lane)) {
15299                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15300             }
15301         }
15302
15303         wf->execMask() = vcc.rawData();
15304         vcc.write();
15305     }
15306
15307     Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt)
15308         : Inst_VOPC(iFmt, "v_cmpx_le_u64")
15309     {
15310         setFlag(ALU);
15311     } // Inst_VOPC__V_CMPX_LE_U64
15312
15313     Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
15314     {
15315     } // ~Inst_VOPC__V_CMPX_LE_U64
15316
15317     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15318     void
15319     Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
15320     {
15321         Wavefront *wf = gpuDynInst->wavefront();
15322         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15323         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15324         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15325
15326         src0.readSrc();
15327         src1.read();
15328
15329         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15330             if (wf->execMask(lane)) {
15331                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15332             }
15333         }
15334
15335         wf->execMask() = vcc.rawData();
15336         vcc.write();
15337     }
15338
15339     Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt)
15340         : Inst_VOPC(iFmt, "v_cmpx_gt_u64")
15341     {
15342         setFlag(ALU);
15343     } // Inst_VOPC__V_CMPX_GT_U64
15344
15345     Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
15346     {
15347     } // ~Inst_VOPC__V_CMPX_GT_U64
15348
15349     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15350     void
15351     Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
15352     {
15353         Wavefront *wf = gpuDynInst->wavefront();
15354         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15355         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15356         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15357
15358         src0.readSrc();
15359         src1.read();
15360
15361         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15362             if (wf->execMask(lane)) {
15363                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15364             }
15365         }
15366
15367         wf->execMask() = vcc.rawData();
15368         vcc.write();
15369     }
15370
15371     Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt)
15372         : Inst_VOPC(iFmt, "v_cmpx_ne_u64")
15373     {
15374         setFlag(ALU);
15375     } // Inst_VOPC__V_CMPX_NE_U64
15376
15377     Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
15378     {
15379     } // ~Inst_VOPC__V_CMPX_NE_U64
15380
15381     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15382     void
15383     Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
15384     {
15385         Wavefront *wf = gpuDynInst->wavefront();
15386         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15387         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15388         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15389
15390         src0.readSrc();
15391         src1.read();
15392
15393         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15394             if (wf->execMask(lane)) {
15395                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15396             }
15397         }
15398
15399         wf->execMask() = vcc.rawData();
15400         vcc.write();
15401     }
15402
15403     Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt)
15404         : Inst_VOPC(iFmt, "v_cmpx_ge_u64")
15405     {
15406         setFlag(ALU);
15407     } // Inst_VOPC__V_CMPX_GE_U64
15408
15409     Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
15410     {
15411     } // ~Inst_VOPC__V_CMPX_GE_U64
15412
15413     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15414     void
15415     Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
15416     {
15417         Wavefront *wf = gpuDynInst->wavefront();
15418         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15419         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15420         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15421
15422         src0.readSrc();
15423         src1.read();
15424
15425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15426             if (wf->execMask(lane)) {
15427                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15428             }
15429         }
15430
15431         wf->execMask() = vcc.rawData();
15432         vcc.write();
15433     }
15434
15435     Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt)
15436         : Inst_VOPC(iFmt, "v_cmpx_t_u64")
15437     {
15438         setFlag(ALU);
15439     } // Inst_VOPC__V_CMPX_T_U64
15440
15441     Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
15442     {
15443     } // ~Inst_VOPC__V_CMPX_T_U64
15444
15445     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15446     void
15447     Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
15448     {
15449         Wavefront *wf = gpuDynInst->wavefront();
15450         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15451
15452         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15453             if (wf->execMask(lane)) {
15454                 vcc.setBit(lane, 1);
15455             }
15456         }
15457
15458         wf->execMask() = vcc.rawData();
15459         vcc.write();
15460     }
15461
15462     Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
15463           InFmt_VINTRP *iFmt)
15464         : Inst_VINTRP(iFmt, "v_interp_p1_f32")
15465     {
15466         setFlag(ALU);
15467         setFlag(F32);
15468     } // Inst_VINTRP__V_INTERP_P1_F32
15469
15470     Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
15471     {
15472     } // ~Inst_VINTRP__V_INTERP_P1_F32
15473
15474     // D.f = P10 * S.f + P0; parameter interpolation
15475     void
15476     Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
15477     {
15478         panicUnimplemented();
15479     }
15480
15481     Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
15482           InFmt_VINTRP *iFmt)
15483         : Inst_VINTRP(iFmt, "v_interp_p2_f32")
15484     {
15485         setFlag(ALU);
15486         setFlag(F32);
15487     } // Inst_VINTRP__V_INTERP_P2_F32
15488
15489     Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
15490     {
15491     } // ~Inst_VINTRP__V_INTERP_P2_F32
15492
15493     // D.f = P20 * S.f + D.f; parameter interpolation
15494     void
15495     Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
15496     {
15497         panicUnimplemented();
15498     }
15499
15500     Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
15501           InFmt_VINTRP *iFmt)
15502         : Inst_VINTRP(iFmt, "v_interp_mov_f32")
15503     {
15504         setFlag(ALU);
15505         setFlag(F32);
15506     } // Inst_VINTRP__V_INTERP_MOV_F32
15507
15508     Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
15509     {
15510     } // ~Inst_VINTRP__V_INTERP_MOV_F32
15511
15512     void
15513     Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
15514     {
15515         panicUnimplemented();
15516     }
15517
15518     Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
15519           InFmt_VOP3 *iFmt)
15520         : Inst_VOP3(iFmt, "v_cmp_class_f32", true)
15521     {
15522         setFlag(ALU);
15523         setFlag(F32);
15524     } // Inst_VOP3__V_CMP_CLASS_F32
15525
15526     Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
15527     {
15528     } // ~Inst_VOP3__V_CMP_CLASS_F32
15529
15530     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
15531     // The function reports true if the floating point value is any of the
15532     // numeric types selected in S1.u according to the following list:
15533     // S1.u[0] -- value is a signaling NaN.
15534     // S1.u[1] -- value is a quiet NaN.
15535     // S1.u[2] -- value is negative infinity.
15536     // S1.u[3] -- value is a negative normal value.
15537     // S1.u[4] -- value is a negative denormal value.
15538     // S1.u[5] -- value is negative zero.
15539     // S1.u[6] -- value is positive zero.
15540     // S1.u[7] -- value is a positive denormal value.
15541     // S1.u[8] -- value is a positive normal value.
15542     // S1.u[9] -- value is positive infinity.
15543     void
15544     Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15545     {
15546         Wavefront *wf = gpuDynInst->wavefront();
15547         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15548         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15549         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15550
15551         src0.readSrc();
15552         src1.readSrc();
15553
15554         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15555             if (wf->execMask(lane)) {
15556                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15557                     // is NaN
15558                     if (std::isnan(src0[lane])) {
15559                         sdst.setBit(lane,  1);
15560                         continue;
15561                     }
15562                 }
15563                 if (bits(src1[lane], 2)) {
15564                     // is -infinity
15565                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15566                         sdst.setBit(lane,  1);
15567                         continue;
15568                     }
15569                 }
15570                 if (bits(src1[lane], 3)) {
15571                     // is -normal
15572                     if (std::isnormal(src0[lane])
15573                         && std::signbit(src0[lane])) {
15574                         sdst.setBit(lane,  1);
15575                         continue;
15576                     }
15577                 }
15578                 if (bits(src1[lane], 4)) {
15579                     // is -denormal
15580                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15581                         && std::signbit(src0[lane])) {
15582                         sdst.setBit(lane,  1);
15583                         continue;
15584                     }
15585                 }
15586                 if (bits(src1[lane], 5)) {
15587                     // is -zero
15588                     if (std::fpclassify(src0[lane]) == FP_ZERO
15589                         && std::signbit(src0[lane])) {
15590                         sdst.setBit(lane,  1);
15591                         continue;
15592                     }
15593                 }
15594                 if (bits(src1[lane], 6)) {
15595                     // is +zero
15596                     if (std::fpclassify(src0[lane]) == FP_ZERO
15597                         && !std::signbit(src0[lane])) {
15598                         sdst.setBit(lane,  1);
15599                         continue;
15600                     }
15601                 }
15602                 if (bits(src1[lane], 7)) {
15603                     // is +denormal
15604                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15605                         && !std::signbit(src0[lane])) {
15606                         sdst.setBit(lane,  1);
15607                         continue;
15608                     }
15609                 }
15610                 if (bits(src1[lane], 8)) {
15611                     // is +normal
15612                     if (std::isnormal(src0[lane])
15613                         && !std::signbit(src0[lane])) {
15614                         sdst.setBit(lane,  1);
15615                         continue;
15616                     }
15617                 }
15618                 if (bits(src1[lane], 9)) {
15619                     // is +infinity
15620                     if (std::isinf(src0[lane])
15621                         && !std::signbit(src0[lane])) {
15622                         sdst.setBit(lane,  1);
15623                         continue;
15624                     }
15625                 }
15626             }
15627         }
15628
15629         sdst.write();
15630     }
15631
15632     Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
15633           InFmt_VOP3 *iFmt)
15634         : Inst_VOP3(iFmt, "v_cmpx_class_f32", true)
15635     {
15636         setFlag(ALU);
15637         setFlag(F32);
15638     } // Inst_VOP3__V_CMPX_CLASS_F32
15639
15640     Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
15641     {
15642     } // ~Inst_VOP3__V_CMPX_CLASS_F32
15643
15644     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15645     // S0.f
15646     // The function reports true if the floating point value is any of the
15647     // numeric types selected in S1.u according to the following list:
15648     // S1.u[0] -- value is a signaling NaN.
15649     // S1.u[1] -- value is a quiet NaN.
15650     // S1.u[2] -- value is negative infinity.
15651     // S1.u[3] -- value is a negative normal value.
15652     // S1.u[4] -- value is a negative denormal value.
15653     // S1.u[5] -- value is negative zero.
15654     // S1.u[6] -- value is positive zero.
15655     // S1.u[7] -- value is a positive denormal value.
15656     // S1.u[8] -- value is a positive normal value.
15657     // S1.u[9] -- value is positive infinity.
15658     void
15659     Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15660     {
15661         Wavefront *wf = gpuDynInst->wavefront();
15662         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15663         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15664         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15665
15666         src0.readSrc();
15667         src1.readSrc();
15668
15669         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15670             if (wf->execMask(lane)) {
15671                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15672                     // is NaN
15673                     if (std::isnan(src0[lane])) {
15674                         sdst.setBit(lane,  1);
15675                         continue;
15676                     }
15677                 }
15678                 if (bits(src1[lane], 2)) {
15679                     // is -infinity
15680                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15681                         sdst.setBit(lane,  1);
15682                         continue;
15683                     }
15684                 }
15685                 if (bits(src1[lane], 3)) {
15686                     // is -normal
15687                     if (std::isnormal(src0[lane])
15688                         && std::signbit(src0[lane])) {
15689                         sdst.setBit(lane,  1);
15690                         continue;
15691                     }
15692                 }
15693                 if (bits(src1[lane], 4)) {
15694                     // is -denormal
15695                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15696                         && std::signbit(src0[lane])) {
15697                         sdst.setBit(lane,  1);
15698                         continue;
15699                     }
15700                 }
15701                 if (bits(src1[lane], 5)) {
15702                     // is -zero
15703                     if (std::fpclassify(src0[lane]) == FP_ZERO
15704                         && std::signbit(src0[lane])) {
15705                         sdst.setBit(lane,  1);
15706                         continue;
15707                     }
15708                 }
15709                 if (bits(src1[lane], 6)) {
15710                     // is +zero
15711                     if (std::fpclassify(src0[lane]) == FP_ZERO
15712                         && !std::signbit(src0[lane])) {
15713                         sdst.setBit(lane,  1);
15714                         continue;
15715                     }
15716                 }
15717                 if (bits(src1[lane], 7)) {
15718                     // is +denormal
15719                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15720                         && !std::signbit(src0[lane])) {
15721                         sdst.setBit(lane,  1);
15722                         continue;
15723                     }
15724                 }
15725                 if (bits(src1[lane], 8)) {
15726                     // is +normal
15727                     if (std::isnormal(src0[lane])
15728                         && !std::signbit(src0[lane])) {
15729                         sdst.setBit(lane,  1);
15730                         continue;
15731                     }
15732                 }
15733                 if (bits(src1[lane], 9)) {
15734                     // is +infinity
15735                     if (std::isinf(src0[lane])
15736                         && !std::signbit(src0[lane])) {
15737                         sdst.setBit(lane,  1);
15738                         continue;
15739                     }
15740                 }
15741             }
15742         }
15743
15744         wf->execMask() = sdst.rawData();
15745         sdst.write();
15746     }
15747
15748     Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
15749           InFmt_VOP3 *iFmt)
15750         : Inst_VOP3(iFmt, "v_cmp_class_f64", true)
15751     {
15752         setFlag(ALU);
15753         setFlag(F64);
15754     } // Inst_VOP3__V_CMP_CLASS_F64
15755
15756     Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
15757     {
15758     } // ~Inst_VOP3__V_CMP_CLASS_F64
15759
15760     // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
15761     // The function reports true if the floating point value is any of the
15762     // numeric types selected in S1.u according to the following list:
15763     // S1.u[0] -- value is a signaling NaN.
15764     // S1.u[1] -- value is a quiet NaN.
15765     // S1.u[2] -- value is negative infinity.
15766     // S1.u[3] -- value is a negative normal value.
15767     // S1.u[4] -- value is a negative denormal value.
15768     // S1.u[5] -- value is negative zero.
15769     // S1.u[6] -- value is positive zero.
15770     // S1.u[7] -- value is a positive denormal value.
15771     // S1.u[8] -- value is a positive normal value.
15772     // S1.u[9] -- value is positive infinity.
15773     void
15774     Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15775     {
15776         Wavefront *wf = gpuDynInst->wavefront();
15777         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15778         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15779         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15780
15781         src0.readSrc();
15782         src1.readSrc();
15783
15784         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15785             if (wf->execMask(lane)) {
15786                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15787                     // is NaN
15788                     if (std::isnan(src0[lane])) {
15789                         sdst.setBit(lane, 1);
15790                         continue;
15791                     }
15792                 }
15793                 if (bits(src1[lane], 2)) {
15794                     // is -infinity
15795                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15796                         sdst.setBit(lane, 1);
15797                         continue;
15798                     }
15799                 }
15800                 if (bits(src1[lane], 3)) {
15801                     // is -normal
15802                     if (std::isnormal(src0[lane])
15803                         && std::signbit(src0[lane])) {
15804                         sdst.setBit(lane, 1);
15805                         continue;
15806                     }
15807                 }
15808                 if (bits(src1[lane], 4)) {
15809                     // is -denormal
15810                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15811                         && std::signbit(src0[lane])) {
15812                         sdst.setBit(lane, 1);
15813                         continue;
15814                     }
15815                 }
15816                 if (bits(src1[lane], 5)) {
15817                     // is -zero
15818                     if (std::fpclassify(src0[lane]) == FP_ZERO
15819                         && std::signbit(src0[lane])) {
15820                         sdst.setBit(lane, 1);
15821                         continue;
15822                     }
15823                 }
15824                 if (bits(src1[lane], 6)) {
15825                     // is +zero
15826                     if (std::fpclassify(src0[lane]) == FP_ZERO
15827                         && !std::signbit(src0[lane])) {
15828                         sdst.setBit(lane, 1);
15829                         continue;
15830                     }
15831                 }
15832                 if (bits(src1[lane], 7)) {
15833                     // is +denormal
15834                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15835                         && !std::signbit(src0[lane])) {
15836                         sdst.setBit(lane, 1);
15837                         continue;
15838                     }
15839                 }
15840                 if (bits(src1[lane], 8)) {
15841                     // is +normal
15842                     if (std::isnormal(src0[lane])
15843                         && !std::signbit(src0[lane])) {
15844                         sdst.setBit(lane, 1);
15845                         continue;
15846                     }
15847                 }
15848                 if (bits(src1[lane], 9)) {
15849                     // is +infinity
15850                     if (std::isinf(src0[lane])
15851                         && !std::signbit(src0[lane])) {
15852                         sdst.setBit(lane, 1);
15853                         continue;
15854                     }
15855                 }
15856             }
15857         }
15858
15859         sdst.write();
15860     }
15861
15862     Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
15863           InFmt_VOP3 *iFmt)
15864         : Inst_VOP3(iFmt, "v_cmpx_class_f64", true)
15865     {
15866         setFlag(ALU);
15867         setFlag(F64);
15868     } // Inst_VOP3__V_CMPX_CLASS_F64
15869
15870     Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
15871     {
15872     } // ~Inst_VOP3__V_CMPX_CLASS_F64
15873
15874     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15875     // S0.d
15876     // The function reports true if the floating point value is any of the
15877     // numeric types selected in S1.u according to the following list:
15878     // S1.u[0] -- value is a signaling NaN.
15879     // S1.u[1] -- value is a quiet NaN.
15880     // S1.u[2] -- value is negative infinity.
15881     // S1.u[3] -- value is a negative normal value.
15882     // S1.u[4] -- value is a negative denormal value.
15883     // S1.u[5] -- value is negative zero.
15884     // S1.u[6] -- value is positive zero.
15885     // S1.u[7] -- value is a positive denormal value.
15886     // S1.u[8] -- value is a positive normal value.
15887     // S1.u[9] -- value is positive infinity.
15888     void
15889     Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15890     {
15891         Wavefront *wf = gpuDynInst->wavefront();
15892         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15893         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15894         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15895
15896         src0.readSrc();
15897         src1.readSrc();
15898
15899         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15900             if (wf->execMask(lane)) {
15901                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15902                     // is NaN
15903                     if (std::isnan(src0[lane])) {
15904                         sdst.setBit(lane, 1);
15905                         continue;
15906                     }
15907                 }
15908                 if (bits(src1[lane], 2)) {
15909                     // is -infinity
15910                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15911                         sdst.setBit(lane, 1);
15912                         continue;
15913                     }
15914                 }
15915                 if (bits(src1[lane], 3)) {
15916                     // is -normal
15917                     if (std::isnormal(src0[lane])
15918                         && std::signbit(src0[lane])) {
15919                         sdst.setBit(lane, 1);
15920                         continue;
15921                     }
15922                 }
15923                 if (bits(src1[lane], 4)) {
15924                     // is -denormal
15925                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15926                         && std::signbit(src0[lane])) {
15927                         sdst.setBit(lane, 1);
15928                         continue;
15929                     }
15930                 }
15931                 if (bits(src1[lane], 5)) {
15932                     // is -zero
15933                     if (std::fpclassify(src0[lane]) == FP_ZERO
15934                         && std::signbit(src0[lane])) {
15935                         sdst.setBit(lane, 1);
15936                         continue;
15937                     }
15938                 }
15939                 if (bits(src1[lane], 6)) {
15940                     // is +zero
15941                     if (std::fpclassify(src0[lane]) == FP_ZERO
15942                         && !std::signbit(src0[lane])) {
15943                         sdst.setBit(lane, 1);
15944                         continue;
15945                     }
15946                 }
15947                 if (bits(src1[lane], 7)) {
15948                     // is +denormal
15949                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15950                         && !std::signbit(src0[lane])) {
15951                         sdst.setBit(lane, 1);
15952                         continue;
15953                     }
15954                 }
15955                 if (bits(src1[lane], 8)) {
15956                     // is +normal
15957                     if (std::isnormal(src0[lane])
15958                         && !std::signbit(src0[lane])) {
15959                         sdst.setBit(lane, 1);
15960                         continue;
15961                     }
15962                 }
15963                 if (bits(src1[lane], 9)) {
15964                     // is +infinity
15965                     if (std::isinf(src0[lane])
15966                         && !std::signbit(src0[lane])) {
15967                         sdst.setBit(lane, 1);
15968                         continue;
15969                     }
15970                 }
15971             }
15972         }
15973
15974         wf->execMask() = sdst.rawData();
15975         sdst.write();
15976     }
15977
15978     Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
15979           InFmt_VOP3 *iFmt)
15980         : Inst_VOP3(iFmt, "v_cmp_class_f16", true)
15981     {
15982         setFlag(ALU);
15983         setFlag(F16);
15984     } // Inst_VOP3__V_CMP_CLASS_F16
15985
15986     Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
15987     {
15988     } // ~Inst_VOP3__V_CMP_CLASS_F16
15989
15990     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
15991     // The function reports true if the floating point value is any of the
15992     // numeric types selected in S1.u according to the following list:
15993     // S1.u[0] -- value is a signaling NaN.
15994     // S1.u[1] -- value is a quiet NaN.
15995     // S1.u[2] -- value is negative infinity.
15996     // S1.u[3] -- value is a negative normal value.
15997     // S1.u[4] -- value is a negative denormal value.
15998     // S1.u[5] -- value is negative zero.
15999     // S1.u[6] -- value is positive zero.
16000     // S1.u[7] -- value is a positive denormal value.
16001     // S1.u[8] -- value is a positive normal value.
16002     // S1.u[9] -- value is positive infinity.
16003     void
16004     Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
16005     {
16006         panicUnimplemented();
16007     }
16008
16009     Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
16010           InFmt_VOP3 *iFmt)
16011         : Inst_VOP3(iFmt, "v_cmpx_class_f16", true)
16012     {
16013         setFlag(ALU);
16014         setFlag(F16);
16015     } // Inst_VOP3__V_CMPX_CLASS_F16
16016
16017     Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
16018     {
16019     } // ~Inst_VOP3__V_CMPX_CLASS_F16
16020
16021     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
16022     // S0.f16
16023     // The function reports true if the floating point value is any of the
16024     // numeric types selected in S1.u according to the following list:
16025     // S1.u[0] -- value is a signaling NaN.
16026     // S1.u[1] -- value is a quiet NaN.
16027     // S1.u[2] -- value is negative infinity.
16028     // S1.u[3] -- value is a negative normal value.
16029     // S1.u[4] -- value is a negative denormal value.
16030     // S1.u[5] -- value is negative zero.
16031     // S1.u[6] -- value is positive zero.
16032     // S1.u[7] -- value is a positive denormal value.
16033     // S1.u[8] -- value is a positive normal value.
16034     // S1.u[9] -- value is positive infinity.
16035     void
16036     Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
16037     {
16038         panicUnimplemented();
16039     }
16040
16041     Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3 *iFmt)
16042         : Inst_VOP3(iFmt, "v_cmp_f_f16", true)
16043     {
16044         setFlag(ALU);
16045         setFlag(F16);
16046     } // Inst_VOP3__V_CMP_F_F16
16047
16048     Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
16049     {
16050     } // ~Inst_VOP3__V_CMP_F_F16
16051
16052     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16053     void
16054     Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
16055     {
16056         panicUnimplemented();
16057     }
16058
16059     Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
16060           InFmt_VOP3 *iFmt)
16061         : Inst_VOP3(iFmt, "v_cmp_lt_f16", true)
16062     {
16063         setFlag(ALU);
16064         setFlag(F16);
16065     } // Inst_VOP3__V_CMP_LT_F16
16066
16067     Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
16068     {
16069     } // ~Inst_VOP3__V_CMP_LT_F16
16070
16071     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16072     void
16073     Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16074     {
16075         panicUnimplemented();
16076     }
16077
16078     Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
16079           InFmt_VOP3 *iFmt)
16080         : Inst_VOP3(iFmt, "v_cmp_eq_f16", true)
16081     {
16082         setFlag(ALU);
16083         setFlag(F16);
16084     } // Inst_VOP3__V_CMP_EQ_F16
16085
16086     Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
16087     {
16088     } // ~Inst_VOP3__V_CMP_EQ_F16
16089
16090     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16091     void
16092     Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16093     {
16094         panicUnimplemented();
16095     }
16096
16097     Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
16098           InFmt_VOP3 *iFmt)
16099         : Inst_VOP3(iFmt, "v_cmp_le_f16", true)
16100     {
16101         setFlag(ALU);
16102         setFlag(F16);
16103     } // Inst_VOP3__V_CMP_LE_F16
16104
16105     Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
16106     {
16107     } // ~Inst_VOP3__V_CMP_LE_F16
16108
16109     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16110     void
16111     Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16112     {
16113         panicUnimplemented();
16114     }
16115
16116     Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
16117           InFmt_VOP3 *iFmt)
16118         : Inst_VOP3(iFmt, "v_cmp_gt_f16", true)
16119     {
16120         setFlag(ALU);
16121         setFlag(F16);
16122     } // Inst_VOP3__V_CMP_GT_F16
16123
16124     Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
16125     {
16126     } // ~Inst_VOP3__V_CMP_GT_F16
16127
16128     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16129     void
16130     Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16131     {
16132         panicUnimplemented();
16133     }
16134
16135     Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
16136           InFmt_VOP3 *iFmt)
16137         : Inst_VOP3(iFmt, "v_cmp_lg_f16", true)
16138     {
16139         setFlag(ALU);
16140         setFlag(F16);
16141     } // Inst_VOP3__V_CMP_LG_F16
16142
16143     Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
16144     {
16145     } // ~Inst_VOP3__V_CMP_LG_F16
16146
16147     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16148     void
16149     Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16150     {
16151         panicUnimplemented();
16152     }
16153
16154     Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
16155           InFmt_VOP3 *iFmt)
16156         : Inst_VOP3(iFmt, "v_cmp_ge_f16", true)
16157     {
16158         setFlag(ALU);
16159         setFlag(F16);
16160     } // Inst_VOP3__V_CMP_GE_F16
16161
16162     Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
16163     {
16164     } // ~Inst_VOP3__V_CMP_GE_F16
16165
16166     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16167     void
16168     Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16169     {
16170         panicUnimplemented();
16171     }
16172
16173     Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3 *iFmt)
16174         : Inst_VOP3(iFmt, "v_cmp_o_f16", true)
16175     {
16176         setFlag(ALU);
16177         setFlag(F16);
16178     } // Inst_VOP3__V_CMP_O_F16
16179
16180     Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
16181     {
16182     } // ~Inst_VOP3__V_CMP_O_F16
16183
16184     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16185     void
16186     Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
16187     {
16188         panicUnimplemented();
16189     }
16190
16191     Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3 *iFmt)
16192         : Inst_VOP3(iFmt, "v_cmp_u_f16", true)
16193     {
16194         setFlag(ALU);
16195         setFlag(F16);
16196     } // Inst_VOP3__V_CMP_U_F16
16197
16198     Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
16199     {
16200     } // ~Inst_VOP3__V_CMP_U_F16
16201
16202     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
16203     void
16204     Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
16205     {
16206         panicUnimplemented();
16207     }
16208
16209     Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
16210           InFmt_VOP3 *iFmt)
16211         : Inst_VOP3(iFmt, "v_cmp_nge_f16", true)
16212     {
16213         setFlag(ALU);
16214         setFlag(F16);
16215     } // Inst_VOP3__V_CMP_NGE_F16
16216
16217     Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
16218     {
16219     } // ~Inst_VOP3__V_CMP_NGE_F16
16220
16221     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16222     void
16223     Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16224     {
16225         panicUnimplemented();
16226     }
16227
16228     Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
16229           InFmt_VOP3 *iFmt)
16230         : Inst_VOP3(iFmt, "v_cmp_nlg_f16", true)
16231     {
16232         setFlag(ALU);
16233         setFlag(F16);
16234     } // Inst_VOP3__V_CMP_NLG_F16
16235
16236     Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
16237     {
16238     } // ~Inst_VOP3__V_CMP_NLG_F16
16239
16240     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16241     void
16242     Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16243     {
16244         panicUnimplemented();
16245     }
16246
16247     Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
16248           InFmt_VOP3 *iFmt)
16249         : Inst_VOP3(iFmt, "v_cmp_ngt_f16", true)
16250     {
16251         setFlag(ALU);
16252         setFlag(F16);
16253     } // Inst_VOP3__V_CMP_NGT_F16
16254
16255     Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
16256     {
16257     } // ~Inst_VOP3__V_CMP_NGT_F16
16258
16259     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16260     void
16261     Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16262     {
16263         panicUnimplemented();
16264     }
16265
16266     Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
16267           InFmt_VOP3 *iFmt)
16268         : Inst_VOP3(iFmt, "v_cmp_nle_f16", true)
16269     {
16270         setFlag(ALU);
16271         setFlag(F16);
16272     } // Inst_VOP3__V_CMP_NLE_F16
16273
16274     Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
16275     {
16276     } // ~Inst_VOP3__V_CMP_NLE_F16
16277
16278     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16279     void
16280     Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16281     {
16282         panicUnimplemented();
16283     }
16284
16285     Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
16286           InFmt_VOP3 *iFmt)
16287         : Inst_VOP3(iFmt, "v_cmp_neq_f16", true)
16288     {
16289         setFlag(ALU);
16290         setFlag(F16);
16291     } // Inst_VOP3__V_CMP_NEQ_F16
16292
16293     Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
16294     {
16295     } // ~Inst_VOP3__V_CMP_NEQ_F16
16296
16297     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16298     void
16299     Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16300     {
16301         panicUnimplemented();
16302     }
16303
16304     Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
16305           InFmt_VOP3 *iFmt)
16306         : Inst_VOP3(iFmt, "v_cmp_nlt_f16", true)
16307     {
16308         setFlag(ALU);
16309         setFlag(F16);
16310     } // Inst_VOP3__V_CMP_NLT_F16
16311
16312     Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
16313     {
16314     } // ~Inst_VOP3__V_CMP_NLT_F16
16315
16316     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16317     void
16318     Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16319     {
16320         panicUnimplemented();
16321     }
16322
16323     Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
16324           InFmt_VOP3 *iFmt)
16325         : Inst_VOP3(iFmt, "v_cmp_tru_f16", true)
16326     {
16327         setFlag(ALU);
16328         setFlag(F16);
16329     } // Inst_VOP3__V_CMP_TRU_F16
16330
16331     Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
16332     {
16333     } // ~Inst_VOP3__V_CMP_TRU_F16
16334
16335     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
16336     void
16337     Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16338     {
16339         Wavefront *wf = gpuDynInst->wavefront();
16340         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16341
16342         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16343             if (wf->execMask(lane)) {
16344                 sdst.setBit(lane, 1);
16345             }
16346         }
16347
16348         sdst.write();
16349     }
16350
16351     Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
16352           InFmt_VOP3 *iFmt)
16353         : Inst_VOP3(iFmt, "v_cmpx_f_f16", true)
16354     {
16355         setFlag(ALU);
16356     } // Inst_VOP3__V_CMPX_F_F16
16357
16358     Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
16359     {
16360     } // ~Inst_VOP3__V_CMPX_F_F16
16361
16362     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
16363     void
16364     Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
16365     {
16366         Wavefront *wf = gpuDynInst->wavefront();
16367         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16368
16369         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16370             if (wf->execMask(lane)) {
16371                 sdst.setBit(lane, 0);
16372             }
16373         }
16374
16375         wf->execMask() = sdst.rawData();
16376         sdst.write();
16377     }
16378
16379     Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
16380           InFmt_VOP3 *iFmt)
16381         : Inst_VOP3(iFmt, "v_cmpx_lt_f16", true)
16382     {
16383         setFlag(ALU);
16384         setFlag(F16);
16385     } // Inst_VOP3__V_CMPX_LT_F16
16386
16387     Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
16388     {
16389     } // ~Inst_VOP3__V_CMPX_LT_F16
16390
16391     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16392     void
16393     Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16394     {
16395         panicUnimplemented();
16396     }
16397
16398     Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
16399           InFmt_VOP3 *iFmt)
16400         : Inst_VOP3(iFmt, "v_cmpx_eq_f16", true)
16401     {
16402         setFlag(ALU);
16403         setFlag(F16);
16404     } // Inst_VOP3__V_CMPX_EQ_F16
16405
16406     Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
16407     {
16408     } // ~Inst_VOP3__V_CMPX_EQ_F16
16409
16410     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16411     void
16412     Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16413     {
16414         panicUnimplemented();
16415     }
16416
16417     Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
16418           InFmt_VOP3 *iFmt)
16419         : Inst_VOP3(iFmt, "v_cmpx_le_f16", true)
16420     {
16421         setFlag(ALU);
16422         setFlag(F16);
16423     } // Inst_VOP3__V_CMPX_LE_F16
16424
16425     Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
16426     {
16427     } // ~Inst_VOP3__V_CMPX_LE_F16
16428
16429     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16430     void
16431     Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16432     {
16433         panicUnimplemented();
16434     }
16435
16436     Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
16437           InFmt_VOP3 *iFmt)
16438         : Inst_VOP3(iFmt, "v_cmpx_gt_f16", true)
16439     {
16440         setFlag(ALU);
16441         setFlag(F16);
16442     } // Inst_VOP3__V_CMPX_GT_F16
16443
16444     Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
16445     {
16446     } // ~Inst_VOP3__V_CMPX_GT_F16
16447
16448     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16449     void
16450     Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16451     {
16452         panicUnimplemented();
16453     }
16454
16455     Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
16456           InFmt_VOP3 *iFmt)
16457         : Inst_VOP3(iFmt, "v_cmpx_lg_f16", true)
16458     {
16459         setFlag(ALU);
16460         setFlag(F16);
16461     } // Inst_VOP3__V_CMPX_LG_F16
16462
16463     Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
16464     {
16465     } // ~Inst_VOP3__V_CMPX_LG_F16
16466
16467     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16468     void
16469     Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16470     {
16471         panicUnimplemented();
16472     }
16473
16474     Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
16475           InFmt_VOP3 *iFmt)
16476         : Inst_VOP3(iFmt, "v_cmpx_ge_f16", true)
16477     {
16478         setFlag(ALU);
16479         setFlag(F16);
16480     } // Inst_VOP3__V_CMPX_GE_F16
16481
16482     Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
16483     {
16484     } // ~Inst_VOP3__V_CMPX_GE_F16
16485
16486     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16487     void
16488     Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16489     {
16490         panicUnimplemented();
16491     }
16492
16493     Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
16494           InFmt_VOP3 *iFmt)
16495         : Inst_VOP3(iFmt, "v_cmpx_o_f16", true)
16496     {
16497         setFlag(ALU);
16498         setFlag(F16);
16499     } // Inst_VOP3__V_CMPX_O_F16
16500
16501     Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
16502     {
16503     } // ~Inst_VOP3__V_CMPX_O_F16
16504
16505     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
16506     // encoding.
16507     void
16508     Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
16509     {
16510         panicUnimplemented();
16511     }
16512
16513     Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
16514           InFmt_VOP3 *iFmt)
16515         : Inst_VOP3(iFmt, "v_cmpx_u_f16", true)
16516     {
16517         setFlag(ALU);
16518         setFlag(F16);
16519     } // Inst_VOP3__V_CMPX_U_F16
16520
16521     Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
16522     {
16523     } // ~Inst_VOP3__V_CMPX_U_F16
16524
16525     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
16526     // encoding.
16527     void
16528     Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
16529     {
16530         panicUnimplemented();
16531     }
16532
16533     Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
16534           InFmt_VOP3 *iFmt)
16535         : Inst_VOP3(iFmt, "v_cmpx_nge_f16", true)
16536     {
16537         setFlag(ALU);
16538         setFlag(F16);
16539     } // Inst_VOP3__V_CMPX_NGE_F16
16540
16541     Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
16542     {
16543     } // ~Inst_VOP3__V_CMPX_NGE_F16
16544
16545     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16546     void
16547     Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16548     {
16549         panicUnimplemented();
16550     }
16551
16552     Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
16553           InFmt_VOP3 *iFmt)
16554         : Inst_VOP3(iFmt, "v_cmpx_nlg_f16", true)
16555     {
16556         setFlag(ALU);
16557         setFlag(F16);
16558     } // Inst_VOP3__V_CMPX_NLG_F16
16559
16560     Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
16561     {
16562     } // ~Inst_VOP3__V_CMPX_NLG_F16
16563
16564     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16565     void
16566     Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16567     {
16568         panicUnimplemented();
16569     }
16570
16571     Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
16572           InFmt_VOP3 *iFmt)
16573         : Inst_VOP3(iFmt, "v_cmpx_ngt_f16", true)
16574     {
16575         setFlag(ALU);
16576         setFlag(F16);
16577     } // Inst_VOP3__V_CMPX_NGT_F16
16578
16579     Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
16580     {
16581     } // ~Inst_VOP3__V_CMPX_NGT_F16
16582
16583     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16584     void
16585     Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16586     {
16587         panicUnimplemented();
16588     }
16589
16590     Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
16591           InFmt_VOP3 *iFmt)
16592         : Inst_VOP3(iFmt, "v_cmpx_nle_f16", true)
16593     {
16594         setFlag(ALU);
16595         setFlag(F16);
16596     } // Inst_VOP3__V_CMPX_NLE_F16
16597
16598     Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
16599     {
16600     } // ~Inst_VOP3__V_CMPX_NLE_F16
16601
16602     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16603     void
16604     Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16605     {
16606         panicUnimplemented();
16607     }
16608
16609     Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
16610           InFmt_VOP3 *iFmt)
16611         : Inst_VOP3(iFmt, "v_cmpx_neq_f16", true)
16612     {
16613         setFlag(ALU);
16614         setFlag(F16);
16615     } // Inst_VOP3__V_CMPX_NEQ_F16
16616
16617     Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
16618     {
16619     } // ~Inst_VOP3__V_CMPX_NEQ_F16
16620
16621     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16622     void
16623     Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16624     {
16625         panicUnimplemented();
16626     }
16627
16628     Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
16629           InFmt_VOP3 *iFmt)
16630         : Inst_VOP3(iFmt, "v_cmpx_nlt_f16", true)
16631     {
16632         setFlag(ALU);
16633         setFlag(F16);
16634     } // Inst_VOP3__V_CMPX_NLT_F16
16635
16636     Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
16637     {
16638     } // ~Inst_VOP3__V_CMPX_NLT_F16
16639
16640     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16641     void
16642     Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16643     {
16644         panicUnimplemented();
16645     }
16646
16647     Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
16648           InFmt_VOP3 *iFmt)
16649         : Inst_VOP3(iFmt, "v_cmpx_tru_f16", true)
16650     {
16651         setFlag(ALU);
16652         setFlag(F16);
16653     } // Inst_VOP3__V_CMPX_TRU_F16
16654
16655     Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
16656     {
16657     } // ~Inst_VOP3__V_CMPX_TRU_F16
16658
16659     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
16660     void
16661     Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16662     {
16663         Wavefront *wf = gpuDynInst->wavefront();
16664         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16665
16666         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16667             if (wf->execMask(lane)) {
16668                 sdst.setBit(lane, 1);
16669             }
16670         }
16671
16672         wf->execMask() = sdst.rawData();
16673         sdst.write();
16674     }
16675
16676     Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3 *iFmt)
16677         : Inst_VOP3(iFmt, "v_cmp_f_f32", true)
16678     {
16679         setFlag(ALU);
16680         setFlag(F32);
16681     } // Inst_VOP3__V_CMP_F_F32
16682
16683     Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
16684     {
16685     } // ~Inst_VOP3__V_CMP_F_F32
16686
16687     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16688     void
16689     Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
16690     {
16691         Wavefront *wf = gpuDynInst->wavefront();
16692         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16693
16694         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16695             if (wf->execMask(lane)) {
16696                 sdst.setBit(lane, 0);
16697             }
16698         }
16699
16700         sdst.write();
16701     }
16702
16703     Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
16704           InFmt_VOP3 *iFmt)
16705         : Inst_VOP3(iFmt, "v_cmp_lt_f32", true)
16706     {
16707         setFlag(ALU);
16708         setFlag(F32);
16709     } // Inst_VOP3__V_CMP_LT_F32
16710
16711     Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
16712     {
16713     } // ~Inst_VOP3__V_CMP_LT_F32
16714
16715     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16716     void
16717     Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
16718     {
16719         Wavefront *wf = gpuDynInst->wavefront();
16720         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16721         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16722         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16723
16724         src0.readSrc();
16725         src1.readSrc();
16726
16727         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16728             if (wf->execMask(lane)) {
16729                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
16730             }
16731         }
16732
16733         sdst.write();
16734     }
16735
16736     Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
16737           InFmt_VOP3 *iFmt)
16738         : Inst_VOP3(iFmt, "v_cmp_eq_f32", true)
16739     {
16740         setFlag(ALU);
16741         setFlag(F32);
16742     } // Inst_VOP3__V_CMP_EQ_F32
16743
16744     Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
16745     {
16746     } // ~Inst_VOP3__V_CMP_EQ_F32
16747
16748     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16749     void
16750     Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
16751     {
16752         Wavefront *wf = gpuDynInst->wavefront();
16753         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16754         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16755         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16756
16757         src0.readSrc();
16758         src1.readSrc();
16759
16760         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16761             if (wf->execMask(lane)) {
16762                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
16763             }
16764         }
16765
16766         sdst.write();
16767     }
16768
16769     Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
16770           InFmt_VOP3 *iFmt)
16771         : Inst_VOP3(iFmt, "v_cmp_le_f32", true)
16772     {
16773         setFlag(ALU);
16774         setFlag(F32);
16775     } // Inst_VOP3__V_CMP_LE_F32
16776
16777     Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
16778     {
16779     } // ~Inst_VOP3__V_CMP_LE_F32
16780
16781     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16782     void
16783     Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
16784     {
16785         Wavefront *wf = gpuDynInst->wavefront();
16786         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16787         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16788         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16789
16790         src0.readSrc();
16791         src1.readSrc();
16792
16793         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16794             if (wf->execMask(lane)) {
16795                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
16796             }
16797         }
16798
16799         sdst.write();
16800     }
16801
16802     Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
16803           InFmt_VOP3 *iFmt)
16804         : Inst_VOP3(iFmt, "v_cmp_gt_f32", true)
16805     {
16806         setFlag(ALU);
16807         setFlag(F32);
16808     } // Inst_VOP3__V_CMP_GT_F32
16809
16810     Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
16811     {
16812     } // ~Inst_VOP3__V_CMP_GT_F32
16813
16814     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16815     void
16816     Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
16817     {
16818         Wavefront *wf = gpuDynInst->wavefront();
16819         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16820         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16821         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16822
16823         src0.readSrc();
16824         src1.readSrc();
16825
16826         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16827             if (wf->execMask(lane)) {
16828                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
16829             }
16830         }
16831
16832         sdst.write();
16833     }
16834
16835     Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
16836           InFmt_VOP3 *iFmt)
16837         : Inst_VOP3(iFmt, "v_cmp_lg_f32", true)
16838     {
16839         setFlag(ALU);
16840         setFlag(F32);
16841     } // Inst_VOP3__V_CMP_LG_F32
16842
16843     Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
16844     {
16845     } // ~Inst_VOP3__V_CMP_LG_F32
16846
16847     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16848     void
16849     Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
16850     {
16851         Wavefront *wf = gpuDynInst->wavefront();
16852         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16853         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16854         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16855
16856         src0.readSrc();
16857         src1.readSrc();
16858
16859         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16860             if (wf->execMask(lane)) {
16861                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
16862             }
16863         }
16864
16865         sdst.write();
16866     }
16867
16868     Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
16869           InFmt_VOP3 *iFmt)
16870         : Inst_VOP3(iFmt, "v_cmp_ge_f32", true)
16871     {
16872         setFlag(ALU);
16873         setFlag(F32);
16874     } // Inst_VOP3__V_CMP_GE_F32
16875
16876     Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
16877     {
16878     } // ~Inst_VOP3__V_CMP_GE_F32
16879
16880     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16881     void
16882     Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
16883     {
16884         Wavefront *wf = gpuDynInst->wavefront();
16885         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16886         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16887         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16888
16889         src0.readSrc();
16890         src1.readSrc();
16891
16892         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16893             if (wf->execMask(lane)) {
16894                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
16895             }
16896         }
16897
16898         sdst.write();
16899     }
16900
16901     Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3 *iFmt)
16902         : Inst_VOP3(iFmt, "v_cmp_o_f32", true)
16903     {
16904         setFlag(ALU);
16905         setFlag(F32);
16906     } // Inst_VOP3__V_CMP_O_F32
16907
16908     Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
16909     {
16910     } // ~Inst_VOP3__V_CMP_O_F32
16911
16912     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16913     void
16914     Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
16915     {
16916         Wavefront *wf = gpuDynInst->wavefront();
16917         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16918         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16919         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16920
16921         src0.readSrc();
16922         src1.readSrc();
16923
16924         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16925             if (wf->execMask(lane)) {
16926                 sdst.setBit(lane, (!std::isnan(src0[lane])
16927                     && !std::isnan(src1[lane])) ? 1 : 0);
16928             }
16929         }
16930
16931         sdst.write();
16932     }
16933
16934     Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3 *iFmt)
16935         : Inst_VOP3(iFmt, "v_cmp_u_f32", true)
16936     {
16937         setFlag(ALU);
16938         setFlag(F32);
16939     } // Inst_VOP3__V_CMP_U_F32
16940
16941     Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
16942     {
16943     } // ~Inst_VOP3__V_CMP_U_F32
16944
16945     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
16946     void
16947     Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
16948     {
16949         Wavefront *wf = gpuDynInst->wavefront();
16950         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16951         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16952         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16953
16954         src0.readSrc();
16955         src1.readSrc();
16956
16957         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16958             if (wf->execMask(lane)) {
16959                 sdst.setBit(lane, (std::isnan(src0[lane])
16960                     || std::isnan(src1[lane])) ? 1 : 0);
16961             }
16962         }
16963
16964         sdst.write();
16965     }
16966
16967     Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
16968           InFmt_VOP3 *iFmt)
16969         : Inst_VOP3(iFmt, "v_cmp_nge_f32", true)
16970     {
16971         setFlag(ALU);
16972         setFlag(F32);
16973     } // Inst_VOP3__V_CMP_NGE_F32
16974
16975     Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
16976     {
16977     } // ~Inst_VOP3__V_CMP_NGE_F32
16978
16979     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16980     void
16981     Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
16982     {
16983         Wavefront *wf = gpuDynInst->wavefront();
16984         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16985         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16986         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16987
16988         src0.readSrc();
16989         src1.readSrc();
16990
16991         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16992             if (wf->execMask(lane)) {
16993                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
16994             }
16995         }
16996
16997         sdst.write();
16998     }
16999
17000     Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
17001           InFmt_VOP3 *iFmt)
17002         : Inst_VOP3(iFmt, "v_cmp_nlg_f32", true)
17003     {
17004         setFlag(ALU);
17005         setFlag(F32);
17006     } // Inst_VOP3__V_CMP_NLG_F32
17007
17008     Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
17009     {
17010     } // ~Inst_VOP3__V_CMP_NLG_F32
17011
17012     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17013     void
17014     Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
17015     {
17016         Wavefront *wf = gpuDynInst->wavefront();
17017         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17018         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17019         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17020
17021         src0.readSrc();
17022         src1.readSrc();
17023
17024         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17025             if (wf->execMask(lane)) {
17026                 sdst.setBit(lane, !(src0[lane] < src1[lane]
17027                     || src0[lane] > src1[lane]) ? 1 : 0);
17028             }
17029         }
17030
17031         sdst.write();
17032     }
17033
17034     Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
17035           InFmt_VOP3 *iFmt)
17036         : Inst_VOP3(iFmt, "v_cmp_ngt_f32", true)
17037     {
17038         setFlag(ALU);
17039         setFlag(F32);
17040     } // Inst_VOP3__V_CMP_NGT_F32
17041
17042     Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
17043     {
17044     } // ~Inst_VOP3__V_CMP_NGT_F32
17045
17046     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17047     void
17048     Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17049     {
17050         Wavefront *wf = gpuDynInst->wavefront();
17051         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17052         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17053         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17054
17055         src0.readSrc();
17056         src1.readSrc();
17057
17058         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17059             if (wf->execMask(lane)) {
17060                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17061             }
17062         }
17063
17064         sdst.write();
17065     }
17066
17067     Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
17068           InFmt_VOP3 *iFmt)
17069         : Inst_VOP3(iFmt, "v_cmp_nle_f32", true)
17070     {
17071         setFlag(ALU);
17072         setFlag(F32);
17073     } // Inst_VOP3__V_CMP_NLE_F32
17074
17075     Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
17076     {
17077     } // ~Inst_VOP3__V_CMP_NLE_F32
17078
17079     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17080     void
17081     Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17082     {
17083         Wavefront *wf = gpuDynInst->wavefront();
17084         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17085         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17086         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17087
17088         src0.readSrc();
17089         src1.readSrc();
17090
17091         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17092             if (wf->execMask(lane)) {
17093                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17094             }
17095         }
17096
17097         sdst.write();
17098     }
17099
17100     Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
17101           InFmt_VOP3 *iFmt)
17102         : Inst_VOP3(iFmt, "v_cmp_neq_f32", true)
17103     {
17104         setFlag(ALU);
17105         setFlag(F32);
17106     } // Inst_VOP3__V_CMP_NEQ_F32
17107
17108     Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
17109     {
17110     } // ~Inst_VOP3__V_CMP_NEQ_F32
17111
17112     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17113     void
17114     Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17115     {
17116         Wavefront *wf = gpuDynInst->wavefront();
17117         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17118         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17119         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17120
17121         src0.readSrc();
17122         src1.readSrc();
17123
17124         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17125             if (wf->execMask(lane)) {
17126                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17127             }
17128         }
17129
17130         sdst.write();
17131     }
17132
17133     Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
17134           InFmt_VOP3 *iFmt)
17135         : Inst_VOP3(iFmt, "v_cmp_nlt_f32", true)
17136     {
17137         setFlag(ALU);
17138         setFlag(F32);
17139     } // Inst_VOP3__V_CMP_NLT_F32
17140
17141     Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
17142     {
17143     } // ~Inst_VOP3__V_CMP_NLT_F32
17144
17145     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17146     void
17147     Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17148     {
17149         Wavefront *wf = gpuDynInst->wavefront();
17150         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17151         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17152         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17153
17154         src0.readSrc();
17155         src1.readSrc();
17156
17157         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17158             if (wf->execMask(lane)) {
17159                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17160             }
17161         }
17162
17163         sdst.write();
17164     }
17165
17166     Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
17167           InFmt_VOP3 *iFmt)
17168         : Inst_VOP3(iFmt, "v_cmp_tru_f32", true)
17169     {
17170         setFlag(ALU);
17171         setFlag(F32);
17172     } // Inst_VOP3__V_CMP_TRU_F32
17173
17174     Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
17175     {
17176     } // ~Inst_VOP3__V_CMP_TRU_F32
17177
17178     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
17179     void
17180     Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17181     {
17182         Wavefront *wf = gpuDynInst->wavefront();
17183         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17184
17185         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17186             if (wf->execMask(lane)) {
17187                 sdst.setBit(lane, 1);
17188             }
17189         }
17190
17191         sdst.write();
17192     }
17193
17194     Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
17195           InFmt_VOP3 *iFmt)
17196         : Inst_VOP3(iFmt, "v_cmpx_f_f32", true)
17197     {
17198         setFlag(ALU);
17199         setFlag(F32);
17200     } // Inst_VOP3__V_CMPX_F_F32
17201
17202     Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
17203     {
17204     } // ~Inst_VOP3__V_CMPX_F_F32
17205
17206     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
17207     void
17208     Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
17209     {
17210         Wavefront *wf = gpuDynInst->wavefront();
17211         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17212
17213         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17214             if (wf->execMask(lane)) {
17215                 sdst.setBit(lane, 0);
17216             }
17217         }
17218
17219         wf->execMask() = sdst.rawData();
17220         sdst.write();
17221     }
17222
17223     Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
17224           InFmt_VOP3 *iFmt)
17225         : Inst_VOP3(iFmt, "v_cmpx_lt_f32", true)
17226     {
17227         setFlag(ALU);
17228         setFlag(F32);
17229     } // Inst_VOP3__V_CMPX_LT_F32
17230
17231     Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
17232     {
17233     } // ~Inst_VOP3__V_CMPX_LT_F32
17234
17235     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17236     void
17237     Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
17238     {
17239         Wavefront *wf = gpuDynInst->wavefront();
17240         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17241         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17242         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17243
17244         src0.readSrc();
17245         src1.readSrc();
17246
17247         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17248             if (wf->execMask(lane)) {
17249                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17250             }
17251         }
17252
17253         wf->execMask() = sdst.rawData();
17254         sdst.write();
17255     }
17256
17257     Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
17258           InFmt_VOP3 *iFmt)
17259         : Inst_VOP3(iFmt, "v_cmpx_eq_f32", true)
17260     {
17261         setFlag(ALU);
17262         setFlag(F32);
17263     } // Inst_VOP3__V_CMPX_EQ_F32
17264
17265     Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
17266     {
17267     } // ~Inst_VOP3__V_CMPX_EQ_F32
17268
17269     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17270     void
17271     Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
17272     {
17273         Wavefront *wf = gpuDynInst->wavefront();
17274         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17275         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17276         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17277
17278         src0.readSrc();
17279         src1.readSrc();
17280
17281         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17282             if (wf->execMask(lane)) {
17283                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17284             }
17285         }
17286
17287         wf->execMask() = sdst.rawData();
17288         sdst.write();
17289     }
17290
17291     Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
17292           InFmt_VOP3 *iFmt)
17293         : Inst_VOP3(iFmt, "v_cmpx_le_f32", true)
17294     {
17295         setFlag(ALU);
17296         setFlag(F32);
17297     } // Inst_VOP3__V_CMPX_LE_F32
17298
17299     Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
17300     {
17301     } // ~Inst_VOP3__V_CMPX_LE_F32
17302
17303     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17304     void
17305     Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
17306     {
17307         Wavefront *wf = gpuDynInst->wavefront();
17308         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17309         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17310         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17311
17312         src0.readSrc();
17313         src1.readSrc();
17314
17315         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17316             if (wf->execMask(lane)) {
17317                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17318             }
17319         }
17320
17321         wf->execMask() = sdst.rawData();
17322         sdst.write();
17323     }
17324
17325     Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
17326           InFmt_VOP3 *iFmt)
17327         : Inst_VOP3(iFmt, "v_cmpx_gt_f32", true)
17328     {
17329         setFlag(ALU);
17330         setFlag(F32);
17331     } // Inst_VOP3__V_CMPX_GT_F32
17332
17333     Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
17334     {
17335     } // ~Inst_VOP3__V_CMPX_GT_F32
17336
17337     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17338     void
17339     Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
17340     {
17341         Wavefront *wf = gpuDynInst->wavefront();
17342         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17343         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17344         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17345
17346         src0.readSrc();
17347         src1.readSrc();
17348
17349         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17350             if (wf->execMask(lane)) {
17351                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17352             }
17353         }
17354
17355         wf->execMask() = sdst.rawData();
17356         sdst.write();
17357     }
17358
17359     Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
17360           InFmt_VOP3 *iFmt)
17361         : Inst_VOP3(iFmt, "v_cmpx_lg_f32", true)
17362     {
17363         setFlag(ALU);
17364         setFlag(F32);
17365     } // Inst_VOP3__V_CMPX_LG_F32
17366
17367     Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
17368     {
17369     } // ~Inst_VOP3__V_CMPX_LG_F32
17370
17371     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17372     void
17373     Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
17374     {
17375         Wavefront *wf = gpuDynInst->wavefront();
17376         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17377         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17378         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17379
17380         src0.readSrc();
17381         src1.readSrc();
17382
17383         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17384             if (wf->execMask(lane)) {
17385                 sdst.setBit(lane, (src0[lane] < src1[lane]
17386                     || src0[lane] > src1[lane]) ? 1 : 0);
17387             }
17388         }
17389
17390         wf->execMask() = sdst.rawData();
17391         sdst.write();
17392     }
17393
17394     Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
17395           InFmt_VOP3 *iFmt)
17396         : Inst_VOP3(iFmt, "v_cmpx_ge_f32", true)
17397     {
17398         setFlag(ALU);
17399         setFlag(F32);
17400     } // Inst_VOP3__V_CMPX_GE_F32
17401
17402     Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
17403     {
17404     } // ~Inst_VOP3__V_CMPX_GE_F32
17405
17406     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
17407     void
17408     Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
17409     {
17410         Wavefront *wf = gpuDynInst->wavefront();
17411         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17412         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17413         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17414
17415         src0.readSrc();
17416         src1.readSrc();
17417
17418         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17419             if (wf->execMask(lane)) {
17420                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
17421             }
17422         }
17423
17424         wf->execMask() = sdst.rawData();
17425         sdst.write();
17426     }
17427
17428     Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
17429           InFmt_VOP3 *iFmt)
17430         : Inst_VOP3(iFmt, "v_cmpx_o_f32", true)
17431     {
17432         setFlag(ALU);
17433         setFlag(F32);
17434     } // Inst_VOP3__V_CMPX_O_F32
17435
17436     Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
17437     {
17438     } // ~Inst_VOP3__V_CMPX_O_F32
17439
17440     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
17441     // encoding.
17442     void
17443     Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
17444     {
17445         Wavefront *wf = gpuDynInst->wavefront();
17446         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17447         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17448         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17449
17450         src0.readSrc();
17451         src1.readSrc();
17452
17453         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17454             if (wf->execMask(lane)) {
17455                 sdst.setBit(lane, (!std::isnan(src0[lane])
17456                     && !std::isnan(src1[lane])) ? 1 : 0);
17457             }
17458         }
17459
17460         wf->execMask() = sdst.rawData();
17461         sdst.write();
17462     }
17463
17464     Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
17465           InFmt_VOP3 *iFmt)
17466         : Inst_VOP3(iFmt, "v_cmpx_u_f32", true)
17467     {
17468         setFlag(ALU);
17469         setFlag(F32);
17470     } // Inst_VOP3__V_CMPX_U_F32
17471
17472     Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
17473     {
17474     } // ~Inst_VOP3__V_CMPX_U_F32
17475
17476     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
17477     // encoding.
17478     void
17479     Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
17480     {
17481         Wavefront *wf = gpuDynInst->wavefront();
17482         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17483         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17484         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17485
17486         src0.readSrc();
17487         src1.readSrc();
17488
17489         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17490             if (wf->execMask(lane)) {
17491                 sdst.setBit(lane, (std::isnan(src0[lane])
17492                         || std::isnan(src1[lane])) ? 1 : 0);
17493             }
17494         }
17495
17496         wf->execMask() = sdst.rawData();
17497         sdst.write();
17498     }
17499
17500     Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
17501           InFmt_VOP3 *iFmt)
17502         : Inst_VOP3(iFmt, "v_cmpx_nge_f32", true)
17503     {
17504         setFlag(ALU);
17505         setFlag(F32);
17506     } // Inst_VOP3__V_CMPX_NGE_F32
17507
17508     Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
17509     {
17510     } // ~Inst_VOP3__V_CMPX_NGE_F32
17511
17512     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
17513     void
17514     Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
17515     {
17516         Wavefront *wf = gpuDynInst->wavefront();
17517         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17518         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17519         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17520
17521         src0.readSrc();
17522         src1.readSrc();
17523
17524         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17525             if (wf->execMask(lane)) {
17526                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
17527             }
17528         }
17529
17530         wf->execMask() = sdst.rawData();
17531         sdst.write();
17532     }
17533
17534     Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
17535           InFmt_VOP3 *iFmt)
17536         : Inst_VOP3(iFmt, "v_cmpx_nlg_f32", true)
17537     {
17538         setFlag(ALU);
17539         setFlag(F32);
17540     } // Inst_VOP3__V_CMPX_NLG_F32
17541
17542     Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
17543     {
17544     } // ~Inst_VOP3__V_CMPX_NLG_F32
17545
17546     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17547     void
17548     Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
17549     {
17550         Wavefront *wf = gpuDynInst->wavefront();
17551         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17552         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17553         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17554
17555         src0.readSrc();
17556         src1.readSrc();
17557
17558         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17559             if (wf->execMask(lane)) {
17560                 sdst.setBit(lane, !(src0[lane] < src1[lane]
17561                     || src0[lane] > src1[lane]) ? 1 : 0);
17562             }
17563         }
17564
17565         wf->execMask() = sdst.rawData();
17566         sdst.write();
17567     }
17568
17569     Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
17570           InFmt_VOP3 *iFmt)
17571         : Inst_VOP3(iFmt, "v_cmpx_ngt_f32", true)
17572     {
17573         setFlag(ALU);
17574         setFlag(F32);
17575     } // Inst_VOP3__V_CMPX_NGT_F32
17576
17577     Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
17578     {
17579     } // ~Inst_VOP3__V_CMPX_NGT_F32
17580
17581     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17582     void
17583     Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17584     {
17585         Wavefront *wf = gpuDynInst->wavefront();
17586         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17587         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17588         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17589
17590         src0.readSrc();
17591         src1.readSrc();
17592
17593         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17594             if (wf->execMask(lane)) {
17595                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17596             }
17597         }
17598
17599         wf->execMask() = sdst.rawData();
17600         sdst.write();
17601     }
17602
17603     Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
17604           InFmt_VOP3 *iFmt)
17605         : Inst_VOP3(iFmt, "v_cmpx_nle_f32", true)
17606     {
17607         setFlag(ALU);
17608         setFlag(F32);
17609     } // Inst_VOP3__V_CMPX_NLE_F32
17610
17611     Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
17612     {
17613     } // ~Inst_VOP3__V_CMPX_NLE_F32
17614
17615     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17616     void
17617     Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17618     {
17619         Wavefront *wf = gpuDynInst->wavefront();
17620         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17621         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17622         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17623
17624         src0.readSrc();
17625         src1.readSrc();
17626
17627         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17628             if (wf->execMask(lane)) {
17629                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17630             }
17631         }
17632
17633         wf->execMask() = sdst.rawData();
17634         sdst.write();
17635     }
17636
17637     Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
17638           InFmt_VOP3 *iFmt)
17639         : Inst_VOP3(iFmt, "v_cmpx_neq_f32", true)
17640     {
17641         setFlag(ALU);
17642         setFlag(F32);
17643     } // Inst_VOP3__V_CMPX_NEQ_F32
17644
17645     Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
17646     {
17647     } // ~Inst_VOP3__V_CMPX_NEQ_F32
17648
17649     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17650     void
17651     Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17652     {
17653         Wavefront *wf = gpuDynInst->wavefront();
17654         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17655         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17656         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17657
17658         src0.readSrc();
17659         src1.readSrc();
17660
17661         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17662             if (wf->execMask(lane)) {
17663                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17664             }
17665         }
17666
17667         wf->execMask() = sdst.rawData();
17668         sdst.write();
17669     }
17670
17671     Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
17672           InFmt_VOP3 *iFmt)
17673         : Inst_VOP3(iFmt, "v_cmpx_nlt_f32", true)
17674     {
17675         setFlag(ALU);
17676         setFlag(F32);
17677     } // Inst_VOP3__V_CMPX_NLT_F32
17678
17679     Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
17680     {
17681     } // ~Inst_VOP3__V_CMPX_NLT_F32
17682
17683     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17684     void
17685     Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17686     {
17687         Wavefront *wf = gpuDynInst->wavefront();
17688         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17689         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17690         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17691
17692         src0.readSrc();
17693         src1.readSrc();
17694
17695         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17696             if (wf->execMask(lane)) {
17697                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17698             }
17699         }
17700
17701         wf->execMask() = sdst.rawData();
17702         sdst.write();
17703     }
17704
17705     Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
17706           InFmt_VOP3 *iFmt)
17707         : Inst_VOP3(iFmt, "v_cmpx_tru_f32", true)
17708     {
17709         setFlag(ALU);
17710         setFlag(F32);
17711     } // Inst_VOP3__V_CMPX_TRU_F32
17712
17713     Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
17714     {
17715     } // ~Inst_VOP3__V_CMPX_TRU_F32
17716
17717     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
17718     void
17719     Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17720     {
17721         Wavefront *wf = gpuDynInst->wavefront();
17722         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17723
17724         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17725             if (wf->execMask(lane)) {
17726                 sdst.setBit(lane, 1);
17727             }
17728         }
17729
17730         wf->execMask() = sdst.rawData();
17731         sdst.write();
17732     }
17733
17734     Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3 *iFmt)
17735         : Inst_VOP3(iFmt, "v_cmp_f_f64", true)
17736     {
17737         setFlag(ALU);
17738         setFlag(F64);
17739     } // Inst_VOP3__V_CMP_F_F64
17740
17741     Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
17742     {
17743     } // ~Inst_VOP3__V_CMP_F_F64
17744
17745     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
17746     void
17747     Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
17748     {
17749         Wavefront *wf = gpuDynInst->wavefront();
17750         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17751
17752         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17753             if (wf->execMask(lane)) {
17754                 sdst.setBit(lane, 0);
17755             }
17756         }
17757
17758         sdst.write();
17759     }
17760
17761     Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
17762           InFmt_VOP3 *iFmt)
17763         : Inst_VOP3(iFmt, "v_cmp_lt_f64", true)
17764     {
17765         setFlag(ALU);
17766         setFlag(F64);
17767     } // Inst_VOP3__V_CMP_LT_F64
17768
17769     Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
17770     {
17771     } // ~Inst_VOP3__V_CMP_LT_F64
17772
17773     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17774     void
17775     Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
17776     {
17777         Wavefront *wf = gpuDynInst->wavefront();
17778         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17779         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17780         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17781
17782         src0.readSrc();
17783         src1.readSrc();
17784
17785         if (instData.ABS & 0x1) {
17786             src0.absModifier();
17787         }
17788
17789         if (instData.ABS & 0x2) {
17790             src1.absModifier();
17791         }
17792
17793         if (extData.NEG & 0x1) {
17794             src0.negModifier();
17795         }
17796
17797         if (extData.NEG & 0x2) {
17798             src1.negModifier();
17799         }
17800
17801         /**
17802          * input modifiers are supported by FP operations only
17803          */
17804         assert(!(instData.ABS & 0x4));
17805         assert(!(extData.NEG & 0x4));
17806
17807         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17808             if (wf->execMask(lane)) {
17809                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17810             }
17811         }
17812
17813         sdst.write();
17814     }
17815
17816     Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
17817           InFmt_VOP3 *iFmt)
17818         : Inst_VOP3(iFmt, "v_cmp_eq_f64", true)
17819     {
17820         setFlag(ALU);
17821         setFlag(F64);
17822     } // Inst_VOP3__V_CMP_EQ_F64
17823
17824     Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
17825     {
17826     } // ~Inst_VOP3__V_CMP_EQ_F64
17827
17828     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17829     void
17830     Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
17831     {
17832         Wavefront *wf = gpuDynInst->wavefront();
17833         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17834         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17835         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17836
17837         src0.readSrc();
17838         src1.readSrc();
17839
17840         if (instData.ABS & 0x1) {
17841             src0.absModifier();
17842         }
17843
17844         if (instData.ABS & 0x2) {
17845             src1.absModifier();
17846         }
17847
17848         if (extData.NEG & 0x1) {
17849             src0.negModifier();
17850         }
17851
17852         if (extData.NEG & 0x2) {
17853             src1.negModifier();
17854         }
17855
17856         /**
17857          * input modifiers are supported by FP operations only
17858          */
17859         assert(!(instData.ABS & 0x4));
17860         assert(!(extData.NEG & 0x4));
17861
17862         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17863             if (wf->execMask(lane)) {
17864                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17865             }
17866         }
17867
17868         sdst.write();
17869     }
17870
17871     Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
17872           InFmt_VOP3 *iFmt)
17873         : Inst_VOP3(iFmt, "v_cmp_le_f64", true)
17874     {
17875         setFlag(ALU);
17876         setFlag(F64);
17877     } // Inst_VOP3__V_CMP_LE_F64
17878
17879     Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
17880     {
17881     } // ~Inst_VOP3__V_CMP_LE_F64
17882
17883     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17884     void
17885     Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
17886     {
17887         Wavefront *wf = gpuDynInst->wavefront();
17888         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17889         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17890         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17891
17892         src0.readSrc();
17893         src1.readSrc();
17894
17895         if (instData.ABS & 0x1) {
17896             src0.absModifier();
17897         }
17898
17899         if (instData.ABS & 0x2) {
17900             src1.absModifier();
17901         }
17902
17903         if (extData.NEG & 0x1) {
17904             src0.negModifier();
17905         }
17906
17907         if (extData.NEG & 0x2) {
17908             src1.negModifier();
17909         }
17910
17911         /**
17912          * input modifiers are supported by FP operations only
17913          */
17914         assert(!(instData.ABS & 0x4));
17915         assert(!(extData.NEG & 0x4));
17916
17917         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17918             if (wf->execMask(lane)) {
17919                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17920             }
17921         }
17922
17923         sdst.write();
17924     }
17925
17926     Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
17927           InFmt_VOP3 *iFmt)
17928         : Inst_VOP3(iFmt, "v_cmp_gt_f64", true)
17929     {
17930         setFlag(ALU);
17931         setFlag(F64);
17932     } // Inst_VOP3__V_CMP_GT_F64
17933
17934     Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
17935     {
17936     } // ~Inst_VOP3__V_CMP_GT_F64
17937
17938     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17939     void
17940     Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
17941     {
17942         Wavefront *wf = gpuDynInst->wavefront();
17943         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17944         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17945         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17946
17947         src0.readSrc();
17948         src1.readSrc();
17949
17950         if (instData.ABS & 0x1) {
17951             src0.absModifier();
17952         }
17953
17954         if (instData.ABS & 0x2) {
17955             src1.absModifier();
17956         }
17957
17958         if (extData.NEG & 0x1) {
17959             src0.negModifier();
17960         }
17961
17962         if (extData.NEG & 0x2) {
17963             src1.negModifier();
17964         }
17965
17966         /**
17967          * input modifiers are supported by FP operations only
17968          */
17969         assert(!(instData.ABS & 0x4));
17970         assert(!(extData.NEG & 0x4));
17971
17972         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17973             if (wf->execMask(lane)) {
17974                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17975             }
17976         }
17977
17978         sdst.write();
17979     }
17980
17981     Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
17982           InFmt_VOP3 *iFmt)
17983         : Inst_VOP3(iFmt, "v_cmp_lg_f64", true)
17984     {
17985         setFlag(ALU);
17986         setFlag(F64);
17987     } // Inst_VOP3__V_CMP_LG_F64
17988
17989     Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
17990     {
17991     } // ~Inst_VOP3__V_CMP_LG_F64
17992
17993     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17994     void
17995     Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
17996     {
17997         Wavefront *wf = gpuDynInst->wavefront();
17998         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17999         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18000         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18001
18002         src0.readSrc();
18003         src1.readSrc();
18004
18005         if (instData.ABS & 0x1) {
18006             src0.absModifier();
18007         }
18008
18009         if (instData.ABS & 0x2) {
18010             src1.absModifier();
18011         }
18012
18013         if (extData.NEG & 0x1) {
18014             src0.negModifier();
18015         }
18016
18017         if (extData.NEG & 0x2) {
18018             src1.negModifier();
18019         }
18020
18021         /**
18022          * input modifiers are supported by FP operations only
18023          */
18024         assert(!(instData.ABS & 0x4));
18025         assert(!(extData.NEG & 0x4));
18026
18027         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18028             if (wf->execMask(lane)) {
18029                 sdst.setBit(lane, (src0[lane] < src1[lane]
18030                     || src0[lane] > src1[lane]) ? 1 : 0);
18031             }
18032         }
18033
18034         sdst.write();
18035     }
18036
18037     Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
18038           InFmt_VOP3 *iFmt)
18039         : Inst_VOP3(iFmt, "v_cmp_ge_f64", true)
18040     {
18041         setFlag(ALU);
18042         setFlag(F64);
18043     } // Inst_VOP3__V_CMP_GE_F64
18044
18045     Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
18046     {
18047     } // ~Inst_VOP3__V_CMP_GE_F64
18048
18049     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18050     void
18051     Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18052     {
18053         Wavefront *wf = gpuDynInst->wavefront();
18054         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18055         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18056         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18057
18058         src0.readSrc();
18059         src1.readSrc();
18060
18061         if (instData.ABS & 0x1) {
18062             src0.absModifier();
18063         }
18064
18065         if (instData.ABS & 0x2) {
18066             src1.absModifier();
18067         }
18068
18069         if (extData.NEG & 0x1) {
18070             src0.negModifier();
18071         }
18072
18073         if (extData.NEG & 0x2) {
18074             src1.negModifier();
18075         }
18076
18077         /**
18078          * input modifiers are supported by FP operations only
18079          */
18080         assert(!(instData.ABS & 0x4));
18081         assert(!(extData.NEG & 0x4));
18082
18083         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18084             if (wf->execMask(lane)) {
18085                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18086             }
18087         }
18088
18089         sdst.write();
18090     }
18091
18092     Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3 *iFmt)
18093         : Inst_VOP3(iFmt, "v_cmp_o_f64", true)
18094     {
18095         setFlag(ALU);
18096         setFlag(F64);
18097     } // Inst_VOP3__V_CMP_O_F64
18098
18099     Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
18100     {
18101     } // ~Inst_VOP3__V_CMP_O_F64
18102
18103     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
18104     void
18105     Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
18106     {
18107         Wavefront *wf = gpuDynInst->wavefront();
18108         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18109         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18110         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18111
18112         src0.readSrc();
18113         src1.readSrc();
18114
18115         if (instData.ABS & 0x1) {
18116             src0.absModifier();
18117         }
18118
18119         if (instData.ABS & 0x2) {
18120             src1.absModifier();
18121         }
18122
18123         if (extData.NEG & 0x1) {
18124             src0.negModifier();
18125         }
18126
18127         if (extData.NEG & 0x2) {
18128             src1.negModifier();
18129         }
18130
18131         /**
18132          * input modifiers are supported by FP operations only
18133          */
18134         assert(!(instData.ABS & 0x4));
18135         assert(!(extData.NEG & 0x4));
18136
18137         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18138             if (wf->execMask(lane)) {
18139                 sdst.setBit(lane, (!std::isnan(src0[lane])
18140                     && !std::isnan(src1[lane])) ? 1 : 0);
18141             }
18142         }
18143
18144         sdst.write();
18145     }
18146
18147     Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3 *iFmt)
18148         : Inst_VOP3(iFmt, "v_cmp_u_f64", true)
18149     {
18150         setFlag(ALU);
18151         setFlag(F64);
18152     } // Inst_VOP3__V_CMP_U_F64
18153
18154     Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
18155     {
18156     } // ~Inst_VOP3__V_CMP_U_F64
18157
18158     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
18159     void
18160     Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
18161     {
18162         Wavefront *wf = gpuDynInst->wavefront();
18163         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18164         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18165         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18166
18167         src0.readSrc();
18168         src1.readSrc();
18169
18170         if (instData.ABS & 0x1) {
18171             src0.absModifier();
18172         }
18173
18174         if (instData.ABS & 0x2) {
18175             src1.absModifier();
18176         }
18177
18178         if (extData.NEG & 0x1) {
18179             src0.negModifier();
18180         }
18181
18182         if (extData.NEG & 0x2) {
18183             src1.negModifier();
18184         }
18185
18186         /**
18187          * input modifiers are supported by FP operations only
18188          */
18189         assert(!(instData.ABS & 0x4));
18190         assert(!(extData.NEG & 0x4));
18191
18192         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18193             if (wf->execMask(lane)) {
18194                 sdst.setBit(lane, (std::isnan(src0[lane])
18195                     || std::isnan(src1[lane])) ? 1 : 0);
18196             }
18197         }
18198
18199         sdst.write();
18200     }
18201
18202     Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
18203           InFmt_VOP3 *iFmt)
18204         : Inst_VOP3(iFmt, "v_cmp_nge_f64", true)
18205     {
18206         setFlag(ALU);
18207         setFlag(F64);
18208     } // Inst_VOP3__V_CMP_NGE_F64
18209
18210     Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
18211     {
18212     } // ~Inst_VOP3__V_CMP_NGE_F64
18213
18214     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
18215     void
18216     Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
18217     {
18218         Wavefront *wf = gpuDynInst->wavefront();
18219         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18220         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18221         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18222
18223         src0.readSrc();
18224         src1.readSrc();
18225
18226         if (instData.ABS & 0x1) {
18227             src0.absModifier();
18228         }
18229
18230         if (instData.ABS & 0x2) {
18231             src1.absModifier();
18232         }
18233
18234         if (extData.NEG & 0x1) {
18235             src0.negModifier();
18236         }
18237
18238         if (extData.NEG & 0x2) {
18239             src1.negModifier();
18240         }
18241
18242         /**
18243          * input modifiers are supported by FP operations only
18244          */
18245         assert(!(instData.ABS & 0x4));
18246         assert(!(extData.NEG & 0x4));
18247
18248         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18249             if (wf->execMask(lane)) {
18250                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
18251             }
18252         }
18253
18254         sdst.write();
18255     }
18256
18257     Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
18258           InFmt_VOP3 *iFmt)
18259         : Inst_VOP3(iFmt, "v_cmp_nlg_f64", true)
18260     {
18261         setFlag(ALU);
18262         setFlag(F64);
18263     } // Inst_VOP3__V_CMP_NLG_F64
18264
18265     Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
18266     {
18267     } // ~Inst_VOP3__V_CMP_NLG_F64
18268
18269     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
18270     void
18271     Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
18272     {
18273         Wavefront *wf = gpuDynInst->wavefront();
18274         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18275         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18276         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18277
18278         src0.readSrc();
18279         src1.readSrc();
18280
18281         if (instData.ABS & 0x1) {
18282             src0.absModifier();
18283         }
18284
18285         if (instData.ABS & 0x2) {
18286             src1.absModifier();
18287         }
18288
18289         if (extData.NEG & 0x1) {
18290             src0.negModifier();
18291         }
18292
18293         if (extData.NEG & 0x2) {
18294             src1.negModifier();
18295         }
18296
18297         /**
18298          * input modifiers are supported by FP operations only
18299          */
18300         assert(!(instData.ABS & 0x4));
18301         assert(!(extData.NEG & 0x4));
18302
18303         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18304             if (wf->execMask(lane)) {
18305                 sdst.setBit(lane, !(src0[lane] < src1[lane]
18306                     || src0[lane] > src1[lane]) ? 1 : 0);
18307             }
18308         }
18309
18310         sdst.write();
18311     }
18312
18313     Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
18314           InFmt_VOP3 *iFmt)
18315         : Inst_VOP3(iFmt, "v_cmp_ngt_f64", true)
18316     {
18317         setFlag(ALU);
18318         setFlag(F64);
18319     } // Inst_VOP3__V_CMP_NGT_F64
18320
18321     Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
18322     {
18323     } // ~Inst_VOP3__V_CMP_NGT_F64
18324
18325     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
18326     void
18327     Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
18328     {
18329         Wavefront *wf = gpuDynInst->wavefront();
18330         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18331         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18332         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18333
18334         src0.readSrc();
18335         src1.readSrc();
18336
18337         if (instData.ABS & 0x1) {
18338             src0.absModifier();
18339         }
18340
18341         if (instData.ABS & 0x2) {
18342             src1.absModifier();
18343         }
18344
18345         if (extData.NEG & 0x1) {
18346             src0.negModifier();
18347         }
18348
18349         if (extData.NEG & 0x2) {
18350             src1.negModifier();
18351         }
18352
18353         /**
18354          * input modifiers are supported by FP operations only
18355          */
18356         assert(!(instData.ABS & 0x4));
18357         assert(!(extData.NEG & 0x4));
18358
18359         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18360             if (wf->execMask(lane)) {
18361                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
18362             }
18363         }
18364
18365         sdst.write();
18366     }
18367
18368     Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
18369           InFmt_VOP3 *iFmt)
18370         : Inst_VOP3(iFmt, "v_cmp_nle_f64", true)
18371     {
18372         setFlag(ALU);
18373         setFlag(F64);
18374     } // Inst_VOP3__V_CMP_NLE_F64
18375
18376     Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
18377     {
18378     } // ~Inst_VOP3__V_CMP_NLE_F64
18379
18380     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
18381     void
18382     Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
18383     {
18384         Wavefront *wf = gpuDynInst->wavefront();
18385         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18386         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18387         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18388
18389         src0.readSrc();
18390         src1.readSrc();
18391
18392         if (instData.ABS & 0x1) {
18393             src0.absModifier();
18394         }
18395
18396         if (instData.ABS & 0x2) {
18397             src1.absModifier();
18398         }
18399
18400         if (extData.NEG & 0x1) {
18401             src0.negModifier();
18402         }
18403
18404         if (extData.NEG & 0x2) {
18405             src1.negModifier();
18406         }
18407
18408         /**
18409          * input modifiers are supported by FP operations only
18410          */
18411         assert(!(instData.ABS & 0x4));
18412         assert(!(extData.NEG & 0x4));
18413
18414         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18415             if (wf->execMask(lane)) {
18416                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
18417             }
18418         }
18419
18420         sdst.write();
18421     }
18422
18423     Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
18424           InFmt_VOP3 *iFmt)
18425         : Inst_VOP3(iFmt, "v_cmp_neq_f64", true)
18426     {
18427         setFlag(ALU);
18428         setFlag(F64);
18429     } // Inst_VOP3__V_CMP_NEQ_F64
18430
18431     Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
18432     {
18433     } // ~Inst_VOP3__V_CMP_NEQ_F64
18434
18435     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
18436     void
18437     Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
18438     {
18439         Wavefront *wf = gpuDynInst->wavefront();
18440         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18441         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18442         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18443
18444         src0.readSrc();
18445         src1.readSrc();
18446
18447         if (instData.ABS & 0x1) {
18448             src0.absModifier();
18449         }
18450
18451         if (instData.ABS & 0x2) {
18452             src1.absModifier();
18453         }
18454
18455         if (extData.NEG & 0x1) {
18456             src0.negModifier();
18457         }
18458
18459         if (extData.NEG & 0x2) {
18460             src1.negModifier();
18461         }
18462
18463         /**
18464          * input modifiers are supported by FP operations only
18465          */
18466         assert(!(instData.ABS & 0x4));
18467         assert(!(extData.NEG & 0x4));
18468
18469         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18470             if (wf->execMask(lane)) {
18471                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
18472             }
18473         }
18474
18475         sdst.write();
18476     }
18477
18478     Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
18479           InFmt_VOP3 *iFmt)
18480         : Inst_VOP3(iFmt, "v_cmp_nlt_f64", true)
18481     {
18482         setFlag(ALU);
18483         setFlag(F64);
18484     } // Inst_VOP3__V_CMP_NLT_F64
18485
18486     Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
18487     {
18488     } // ~Inst_VOP3__V_CMP_NLT_F64
18489
18490     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
18491     void
18492     Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
18493     {
18494         Wavefront *wf = gpuDynInst->wavefront();
18495         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18496         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18497         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18498
18499         src0.readSrc();
18500         src1.readSrc();
18501
18502         if (instData.ABS & 0x1) {
18503             src0.absModifier();
18504         }
18505
18506         if (instData.ABS & 0x2) {
18507             src1.absModifier();
18508         }
18509
18510         if (extData.NEG & 0x1) {
18511             src0.negModifier();
18512         }
18513
18514         if (extData.NEG & 0x2) {
18515             src1.negModifier();
18516         }
18517
18518         /**
18519          * input modifiers are supported by FP operations only
18520          */
18521         assert(!(instData.ABS & 0x4));
18522         assert(!(extData.NEG & 0x4));
18523
18524         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18525             if (wf->execMask(lane)) {
18526                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
18527             }
18528         }
18529
18530         sdst.write();
18531     }
18532
18533     Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
18534           InFmt_VOP3 *iFmt)
18535         : Inst_VOP3(iFmt, "v_cmp_tru_f64", true)
18536     {
18537         setFlag(ALU);
18538         setFlag(F64);
18539     } // Inst_VOP3__V_CMP_TRU_F64
18540
18541     Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
18542     {
18543     } // ~Inst_VOP3__V_CMP_TRU_F64
18544
18545     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
18546     void
18547     Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
18548     {
18549         Wavefront *wf = gpuDynInst->wavefront();
18550         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18551
18552         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18553             if (wf->execMask(lane)) {
18554                 sdst.setBit(lane, 1);
18555             }
18556         }
18557
18558         sdst.write();
18559     }
18560
18561     Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
18562           InFmt_VOP3 *iFmt)
18563         : Inst_VOP3(iFmt, "v_cmpx_f_f64", true)
18564     {
18565         setFlag(ALU);
18566         setFlag(F64);
18567     } // Inst_VOP3__V_CMPX_F_F64
18568
18569     Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
18570     {
18571     } // ~Inst_VOP3__V_CMPX_F_F64
18572
18573     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
18574     void
18575     Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
18576     {
18577         Wavefront *wf = gpuDynInst->wavefront();
18578         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18579
18580         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18581             if (wf->execMask(lane)) {
18582                 sdst.setBit(lane, 0);
18583             }
18584         }
18585
18586         wf->execMask() = sdst.rawData();
18587         sdst.write();
18588     }
18589
18590     Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
18591           InFmt_VOP3 *iFmt)
18592         : Inst_VOP3(iFmt, "v_cmpx_lt_f64", true)
18593     {
18594         setFlag(ALU);
18595         setFlag(F64);
18596     } // Inst_VOP3__V_CMPX_LT_F64
18597
18598     Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
18599     {
18600     } // ~Inst_VOP3__V_CMPX_LT_F64
18601
18602     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
18603     void
18604     Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
18605     {
18606         Wavefront *wf = gpuDynInst->wavefront();
18607         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18608         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18609         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18610
18611         src0.readSrc();
18612         src1.readSrc();
18613
18614         if (instData.ABS & 0x1) {
18615             src0.absModifier();
18616         }
18617
18618         if (instData.ABS & 0x2) {
18619             src1.absModifier();
18620         }
18621
18622         if (extData.NEG & 0x1) {
18623             src0.negModifier();
18624         }
18625
18626         if (extData.NEG & 0x2) {
18627             src1.negModifier();
18628         }
18629
18630         /**
18631          * input modifiers are supported by FP operations only
18632          */
18633         assert(!(instData.ABS & 0x4));
18634         assert(!(extData.NEG & 0x4));
18635
18636         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18637             if (wf->execMask(lane)) {
18638                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
18639             }
18640         }
18641
18642         wf->execMask() = sdst.rawData();
18643         sdst.write();
18644     }
18645
18646     Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
18647           InFmt_VOP3 *iFmt)
18648         : Inst_VOP3(iFmt, "v_cmpx_eq_f64", true)
18649     {
18650         setFlag(ALU);
18651         setFlag(F64);
18652     } // Inst_VOP3__V_CMPX_EQ_F64
18653
18654     Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
18655     {
18656     } // ~Inst_VOP3__V_CMPX_EQ_F64
18657
18658     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
18659     void
18660     Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
18661     {
18662         Wavefront *wf = gpuDynInst->wavefront();
18663         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18664         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18665         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18666
18667         src0.readSrc();
18668         src1.readSrc();
18669
18670         if (instData.ABS & 0x1) {
18671             src0.absModifier();
18672         }
18673
18674         if (instData.ABS & 0x2) {
18675             src1.absModifier();
18676         }
18677
18678         if (extData.NEG & 0x1) {
18679             src0.negModifier();
18680         }
18681
18682         if (extData.NEG & 0x2) {
18683             src1.negModifier();
18684         }
18685
18686         /**
18687          * input modifiers are supported by FP operations only
18688          */
18689         assert(!(instData.ABS & 0x4));
18690         assert(!(extData.NEG & 0x4));
18691
18692         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18693             if (wf->execMask(lane)) {
18694                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
18695             }
18696         }
18697
18698         wf->execMask() = sdst.rawData();
18699         sdst.write();
18700     }
18701
18702     Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
18703           InFmt_VOP3 *iFmt)
18704         : Inst_VOP3(iFmt, "v_cmpx_le_f64", true)
18705     {
18706         setFlag(ALU);
18707         setFlag(F64);
18708     } // Inst_VOP3__V_CMPX_LE_F64
18709
18710     Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
18711     {
18712     } // ~Inst_VOP3__V_CMPX_LE_F64
18713
18714     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
18715     void
18716     Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
18717     {
18718         Wavefront *wf = gpuDynInst->wavefront();
18719         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18720         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18721         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18722
18723         src0.readSrc();
18724         src1.readSrc();
18725
18726         if (instData.ABS & 0x1) {
18727             src0.absModifier();
18728         }
18729
18730         if (instData.ABS & 0x2) {
18731             src1.absModifier();
18732         }
18733
18734         if (extData.NEG & 0x1) {
18735             src0.negModifier();
18736         }
18737
18738         if (extData.NEG & 0x2) {
18739             src1.negModifier();
18740         }
18741
18742         /**
18743          * input modifiers are supported by FP operations only
18744          */
18745         assert(!(instData.ABS & 0x4));
18746         assert(!(extData.NEG & 0x4));
18747
18748         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18749             if (wf->execMask(lane)) {
18750                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
18751             }
18752         }
18753
18754         wf->execMask() = sdst.rawData();
18755         sdst.write();
18756     }
18757
18758     Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
18759           InFmt_VOP3 *iFmt)
18760         : Inst_VOP3(iFmt, "v_cmpx_gt_f64", true)
18761     {
18762         setFlag(ALU);
18763         setFlag(F64);
18764     } // Inst_VOP3__V_CMPX_GT_F64
18765
18766     Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
18767     {
18768     } // ~Inst_VOP3__V_CMPX_GT_F64
18769
18770     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
18771     void
18772     Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
18773     {
18774         Wavefront *wf = gpuDynInst->wavefront();
18775         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18776         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18777         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18778
18779         src0.readSrc();
18780         src1.readSrc();
18781
18782         if (instData.ABS & 0x1) {
18783             src0.absModifier();
18784         }
18785
18786         if (instData.ABS & 0x2) {
18787             src1.absModifier();
18788         }
18789
18790         if (extData.NEG & 0x1) {
18791             src0.negModifier();
18792         }
18793
18794         if (extData.NEG & 0x2) {
18795             src1.negModifier();
18796         }
18797
18798         /**
18799          * input modifiers are supported by FP operations only
18800          */
18801         assert(!(instData.ABS & 0x4));
18802         assert(!(extData.NEG & 0x4));
18803
18804         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18805             if (wf->execMask(lane)) {
18806                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
18807             }
18808         }
18809
18810         wf->execMask() = sdst.rawData();
18811         sdst.write();
18812     }
18813
18814     Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
18815           InFmt_VOP3 *iFmt)
18816         : Inst_VOP3(iFmt, "v_cmpx_lg_f64", true)
18817     {
18818         setFlag(ALU);
18819         setFlag(F64);
18820     } // Inst_VOP3__V_CMPX_LG_F64
18821
18822     Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
18823     {
18824     } // ~Inst_VOP3__V_CMPX_LG_F64
18825
18826     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
18827     void
18828     Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
18829     {
18830         Wavefront *wf = gpuDynInst->wavefront();
18831         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18832         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18833         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18834
18835         src0.readSrc();
18836         src1.readSrc();
18837
18838         if (instData.ABS & 0x1) {
18839             src0.absModifier();
18840         }
18841
18842         if (instData.ABS & 0x2) {
18843             src1.absModifier();
18844         }
18845
18846         if (extData.NEG & 0x1) {
18847             src0.negModifier();
18848         }
18849
18850         if (extData.NEG & 0x2) {
18851             src1.negModifier();
18852         }
18853
18854         /**
18855          * input modifiers are supported by FP operations only
18856          */
18857         assert(!(instData.ABS & 0x4));
18858         assert(!(extData.NEG & 0x4));
18859
18860         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18861             if (wf->execMask(lane)) {
18862                 sdst.setBit(lane, (src0[lane] < src1[lane]
18863                     || src0[lane] > src1[lane]) ? 1 : 0);
18864             }
18865         }
18866
18867         wf->execMask() = sdst.rawData();
18868         sdst.write();
18869     }
18870
18871     Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
18872           InFmt_VOP3 *iFmt)
18873         : Inst_VOP3(iFmt, "v_cmpx_ge_f64", true)
18874     {
18875         setFlag(ALU);
18876         setFlag(F64);
18877     } // Inst_VOP3__V_CMPX_GE_F64
18878
18879     Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
18880     {
18881     } // ~Inst_VOP3__V_CMPX_GE_F64
18882
18883     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18884     void
18885     Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18886     {
18887         Wavefront *wf = gpuDynInst->wavefront();
18888         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18889         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18890         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18891
18892         src0.readSrc();
18893         src1.readSrc();
18894
18895         if (instData.ABS & 0x1) {
18896             src0.absModifier();
18897         }
18898
18899         if (instData.ABS & 0x2) {
18900             src1.absModifier();
18901         }
18902
18903         if (extData.NEG & 0x1) {
18904             src0.negModifier();
18905         }
18906
18907         if (extData.NEG & 0x2) {
18908             src1.negModifier();
18909         }
18910
18911         /**
18912          * input modifiers are supported by FP operations only
18913          */
18914         assert(!(instData.ABS & 0x4));
18915         assert(!(extData.NEG & 0x4));
18916
18917         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18918             if (wf->execMask(lane)) {
18919                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18920             }
18921         }
18922
18923         wf->execMask() = sdst.rawData();
18924         sdst.write();
18925     }
18926
18927     Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
18928           InFmt_VOP3 *iFmt)
18929         : Inst_VOP3(iFmt, "v_cmpx_o_f64", true)
18930     {
18931         setFlag(ALU);
18932         setFlag(F64);
18933     } // Inst_VOP3__V_CMPX_O_F64
18934
18935     Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
18936     {
18937     } // ~Inst_VOP3__V_CMPX_O_F64
18938
18939     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
18940     // encoding.
18941     void
18942     Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
18943     {
18944         Wavefront *wf = gpuDynInst->wavefront();
18945         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18946         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18947         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18948
18949         src0.readSrc();
18950         src1.readSrc();
18951
18952         if (instData.ABS & 0x1) {
18953             src0.absModifier();
18954         }
18955
18956         if (instData.ABS & 0x2) {
18957             src1.absModifier();
18958         }
18959
18960         if (extData.NEG & 0x1) {
18961             src0.negModifier();
18962         }
18963
18964         if (extData.NEG & 0x2) {
18965             src1.negModifier();
18966         }
18967
18968         /**
18969          * input modifiers are supported by FP operations only
18970          */
18971         assert(!(instData.ABS & 0x4));
18972         assert(!(extData.NEG & 0x4));
18973
18974         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18975             if (wf->execMask(lane)) {
18976                 sdst.setBit(lane, (!std::isnan(src0[lane])
18977                     && !std::isnan(src1[lane])) ? 1 : 0);
18978             }
18979         }
18980
18981         wf->execMask() = sdst.rawData();
18982         sdst.write();
18983     }
18984
18985     Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
18986           InFmt_VOP3 *iFmt)
18987         : Inst_VOP3(iFmt, "v_cmpx_u_f64", true)
18988     {
18989         setFlag(ALU);
18990         setFlag(F64);
18991     } // Inst_VOP3__V_CMPX_U_F64
18992
18993     Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
18994     {
18995     } // ~Inst_VOP3__V_CMPX_U_F64
18996
18997     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
18998     // encoding.
18999     void
19000     Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
19001     {
19002         Wavefront *wf = gpuDynInst->wavefront();
19003         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19004         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19005         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19006
19007         src0.readSrc();
19008         src1.readSrc();
19009
19010         if (instData.ABS & 0x1) {
19011             src0.absModifier();
19012         }
19013
19014         if (instData.ABS & 0x2) {
19015             src1.absModifier();
19016         }
19017
19018         if (extData.NEG & 0x1) {
19019             src0.negModifier();
19020         }
19021
19022         if (extData.NEG & 0x2) {
19023             src1.negModifier();
19024         }
19025
19026         /**
19027          * input modifiers are supported by FP operations only
19028          */
19029         assert(!(instData.ABS & 0x4));
19030         assert(!(extData.NEG & 0x4));
19031
19032         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19033             if (wf->execMask(lane)) {
19034                 sdst.setBit(lane, (std::isnan(src0[lane])
19035                     || std::isnan(src1[lane])) ? 1 : 0);
19036             }
19037         }
19038
19039         wf->execMask() = sdst.rawData();
19040         sdst.write();
19041     }
19042
19043     Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
19044           InFmt_VOP3 *iFmt)
19045         : Inst_VOP3(iFmt, "v_cmpx_nge_f64", true)
19046     {
19047         setFlag(ALU);
19048         setFlag(F64);
19049     } // Inst_VOP3__V_CMPX_NGE_F64
19050
19051     Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
19052     {
19053     } // ~Inst_VOP3__V_CMPX_NGE_F64
19054
19055     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
19056     void
19057     Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
19058     {
19059         Wavefront *wf = gpuDynInst->wavefront();
19060         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19061         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19062         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19063
19064         src0.readSrc();
19065         src1.readSrc();
19066
19067         if (instData.ABS & 0x1) {
19068             src0.absModifier();
19069         }
19070
19071         if (instData.ABS & 0x2) {
19072             src1.absModifier();
19073         }
19074
19075         if (extData.NEG & 0x1) {
19076             src0.negModifier();
19077         }
19078
19079         if (extData.NEG & 0x2) {
19080             src1.negModifier();
19081         }
19082
19083         /**
19084          * input modifiers are supported by FP operations only
19085          */
19086         assert(!(instData.ABS & 0x4));
19087         assert(!(extData.NEG & 0x4));
19088
19089         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19090             if (wf->execMask(lane)) {
19091                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
19092             }
19093         }
19094
19095         wf->execMask() = sdst.rawData();
19096         sdst.write();
19097     }
19098
19099     Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
19100           InFmt_VOP3 *iFmt)
19101         : Inst_VOP3(iFmt, "v_cmpx_nlg_f64", true)
19102     {
19103         setFlag(ALU);
19104         setFlag(F64);
19105     } // Inst_VOP3__V_CMPX_NLG_F64
19106
19107     Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
19108     {
19109     } // ~Inst_VOP3__V_CMPX_NLG_F64
19110
19111     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
19112     void
19113     Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
19114     {
19115         Wavefront *wf = gpuDynInst->wavefront();
19116         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19117         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19118         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19119
19120         src0.readSrc();
19121         src1.readSrc();
19122
19123         if (instData.ABS & 0x1) {
19124             src0.absModifier();
19125         }
19126
19127         if (instData.ABS & 0x2) {
19128             src1.absModifier();
19129         }
19130
19131         if (extData.NEG & 0x1) {
19132             src0.negModifier();
19133         }
19134
19135         if (extData.NEG & 0x2) {
19136             src1.negModifier();
19137         }
19138
19139         /**
19140          * input modifiers are supported by FP operations only
19141          */
19142         assert(!(instData.ABS & 0x4));
19143         assert(!(extData.NEG & 0x4));
19144
19145         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19146             if (wf->execMask(lane)) {
19147                 sdst.setBit(lane, !(src0[lane] < src1[lane]
19148                     || src0[lane] > src1[lane]) ? 1 : 0);
19149             }
19150         }
19151
19152         wf->execMask() = sdst.rawData();
19153         sdst.write();
19154     }
19155
19156     Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
19157           InFmt_VOP3 *iFmt)
19158         : Inst_VOP3(iFmt, "v_cmpx_ngt_f64", true)
19159     {
19160         setFlag(ALU);
19161         setFlag(F64);
19162     } // Inst_VOP3__V_CMPX_NGT_F64
19163
19164     Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
19165     {
19166     } // ~Inst_VOP3__V_CMPX_NGT_F64
19167
19168     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
19169     void
19170     Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
19171     {
19172         Wavefront *wf = gpuDynInst->wavefront();
19173         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19174         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19175         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19176
19177         src0.readSrc();
19178         src1.readSrc();
19179
19180         if (instData.ABS & 0x1) {
19181             src0.absModifier();
19182         }
19183
19184         if (instData.ABS & 0x2) {
19185             src1.absModifier();
19186         }
19187
19188         if (extData.NEG & 0x1) {
19189             src0.negModifier();
19190         }
19191
19192         if (extData.NEG & 0x2) {
19193             src1.negModifier();
19194         }
19195
19196         /**
19197          * input modifiers are supported by FP operations only
19198          */
19199         assert(!(instData.ABS & 0x4));
19200         assert(!(extData.NEG & 0x4));
19201
19202         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19203             if (wf->execMask(lane)) {
19204                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
19205             }
19206         }
19207
19208         wf->execMask() = sdst.rawData();
19209         sdst.write();
19210     }
19211
19212     Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
19213           InFmt_VOP3 *iFmt)
19214         : Inst_VOP3(iFmt, "v_cmpx_nle_f64", true)
19215     {
19216         setFlag(ALU);
19217         setFlag(F64);
19218     } // Inst_VOP3__V_CMPX_NLE_F64
19219
19220     Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
19221     {
19222     } // ~Inst_VOP3__V_CMPX_NLE_F64
19223
19224     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
19225     void
19226     Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
19227     {
19228         Wavefront *wf = gpuDynInst->wavefront();
19229         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19230         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19231         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19232
19233         src0.readSrc();
19234         src1.readSrc();
19235
19236         if (instData.ABS & 0x1) {
19237             src0.absModifier();
19238         }
19239
19240         if (instData.ABS & 0x2) {
19241             src1.absModifier();
19242         }
19243
19244         if (extData.NEG & 0x1) {
19245             src0.negModifier();
19246         }
19247
19248         if (extData.NEG & 0x2) {
19249             src1.negModifier();
19250         }
19251
19252         /**
19253          * input modifiers are supported by FP operations only
19254          */
19255         assert(!(instData.ABS & 0x4));
19256         assert(!(extData.NEG & 0x4));
19257
19258         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19259             if (wf->execMask(lane)) {
19260                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
19261             }
19262         }
19263
19264         wf->execMask() = sdst.rawData();
19265         sdst.write();
19266     }
19267
19268     Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
19269           InFmt_VOP3 *iFmt)
19270         : Inst_VOP3(iFmt, "v_cmpx_neq_f64", true)
19271     {
19272         setFlag(ALU);
19273         setFlag(F64);
19274     } // Inst_VOP3__V_CMPX_NEQ_F64
19275
19276     Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
19277     {
19278     } // ~Inst_VOP3__V_CMPX_NEQ_F64
19279
19280     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
19281     void
19282     Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
19283     {
19284         Wavefront *wf = gpuDynInst->wavefront();
19285         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19286         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19287         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19288
19289         src0.readSrc();
19290         src1.readSrc();
19291
19292         if (instData.ABS & 0x1) {
19293             src0.absModifier();
19294         }
19295
19296         if (instData.ABS & 0x2) {
19297             src1.absModifier();
19298         }
19299
19300         if (extData.NEG & 0x1) {
19301             src0.negModifier();
19302         }
19303
19304         if (extData.NEG & 0x2) {
19305             src1.negModifier();
19306         }
19307
19308         /**
19309          * input modifiers are supported by FP operations only
19310          */
19311         assert(!(instData.ABS & 0x4));
19312         assert(!(extData.NEG & 0x4));
19313
19314         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19315             if (wf->execMask(lane)) {
19316                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19317             }
19318         }
19319
19320         wf->execMask() = sdst.rawData();
19321         sdst.write();
19322     }
19323
19324     Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
19325           InFmt_VOP3 *iFmt)
19326         : Inst_VOP3(iFmt, "v_cmpx_nlt_f64", true)
19327     {
19328         setFlag(ALU);
19329         setFlag(F64);
19330     } // Inst_VOP3__V_CMPX_NLT_F64
19331
19332     Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
19333     {
19334     } // ~Inst_VOP3__V_CMPX_NLT_F64
19335
19336     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
19337     void
19338     Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
19339     {
19340         Wavefront *wf = gpuDynInst->wavefront();
19341         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19342         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19343         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19344
19345         src0.readSrc();
19346         src1.readSrc();
19347
19348         if (instData.ABS & 0x1) {
19349             src0.absModifier();
19350         }
19351
19352         if (instData.ABS & 0x2) {
19353             src1.absModifier();
19354         }
19355
19356         if (extData.NEG & 0x1) {
19357             src0.negModifier();
19358         }
19359
19360         if (extData.NEG & 0x2) {
19361             src1.negModifier();
19362         }
19363
19364         /**
19365          * input modifiers are supported by FP operations only
19366          */
19367         assert(!(instData.ABS & 0x4));
19368         assert(!(extData.NEG & 0x4));
19369
19370         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19371             if (wf->execMask(lane)) {
19372                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
19373             }
19374         }
19375
19376         wf->execMask() = sdst.rawData();
19377         sdst.write();
19378     }
19379
19380     Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
19381           InFmt_VOP3 *iFmt)
19382         : Inst_VOP3(iFmt, "v_cmpx_tru_f64", true)
19383     {
19384         setFlag(ALU);
19385         setFlag(F64);
19386     } // Inst_VOP3__V_CMPX_TRU_F64
19387
19388     Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
19389     {
19390     } // ~Inst_VOP3__V_CMPX_TRU_F64
19391
19392     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
19393     void
19394     Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
19395     {
19396         Wavefront *wf = gpuDynInst->wavefront();
19397         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19398
19399         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19400             if (wf->execMask(lane)) {
19401                 sdst.setBit(lane, 1);
19402             }
19403         }
19404
19405         wf->execMask() = sdst.rawData();
19406         sdst.write();
19407     }
19408
19409     Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3 *iFmt)
19410         : Inst_VOP3(iFmt, "v_cmp_f_i16", true)
19411     {
19412         setFlag(ALU);
19413     } // Inst_VOP3__V_CMP_F_I16
19414
19415     Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
19416     {
19417     } // ~Inst_VOP3__V_CMP_F_I16
19418
19419     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19420     void
19421     Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
19422     {
19423         Wavefront *wf = gpuDynInst->wavefront();
19424         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19425
19426         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19427             if (wf->execMask(lane)) {
19428                 sdst.setBit(lane, 0);
19429             }
19430         }
19431
19432         sdst.write();
19433     }
19434
19435     Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
19436           InFmt_VOP3 *iFmt)
19437         : Inst_VOP3(iFmt, "v_cmp_lt_i16", true)
19438     {
19439         setFlag(ALU);
19440     } // Inst_VOP3__V_CMP_LT_I16
19441
19442     Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
19443     {
19444     } // ~Inst_VOP3__V_CMP_LT_I16
19445
19446     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19447     void
19448     Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
19449     {
19450         Wavefront *wf = gpuDynInst->wavefront();
19451         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19452         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19453         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19454
19455         src0.readSrc();
19456         src1.readSrc();
19457
19458         /**
19459          * input modifiers are supported by FP operations only
19460          */
19461         assert(!(instData.ABS & 0x1));
19462         assert(!(instData.ABS & 0x2));
19463         assert(!(instData.ABS & 0x4));
19464         assert(!(extData.NEG & 0x1));
19465         assert(!(extData.NEG & 0x2));
19466         assert(!(extData.NEG & 0x4));
19467
19468         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19469             if (wf->execMask(lane)) {
19470                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19471             }
19472         }
19473
19474         sdst.write();
19475     }
19476
19477     Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
19478           InFmt_VOP3 *iFmt)
19479         : Inst_VOP3(iFmt, "v_cmp_eq_i16", true)
19480     {
19481         setFlag(ALU);
19482     } // Inst_VOP3__V_CMP_EQ_I16
19483
19484     Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
19485     {
19486     } // ~Inst_VOP3__V_CMP_EQ_I16
19487
19488     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19489     void
19490     Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
19491     {
19492         Wavefront *wf = gpuDynInst->wavefront();
19493         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19494         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19495         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19496
19497         src0.readSrc();
19498         src1.readSrc();
19499
19500         /**
19501          * input modifiers are supported by FP operations only
19502          */
19503         assert(!(instData.ABS & 0x1));
19504         assert(!(instData.ABS & 0x2));
19505         assert(!(instData.ABS & 0x4));
19506         assert(!(extData.NEG & 0x1));
19507         assert(!(extData.NEG & 0x2));
19508         assert(!(extData.NEG & 0x4));
19509
19510         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19511             if (wf->execMask(lane)) {
19512                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19513             }
19514         }
19515
19516         sdst.write();
19517     }
19518
19519     Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
19520           InFmt_VOP3 *iFmt)
19521         : Inst_VOP3(iFmt, "v_cmp_le_i16", true)
19522     {
19523         setFlag(ALU);
19524     } // Inst_VOP3__V_CMP_LE_I16
19525
19526     Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
19527     {
19528     } // ~Inst_VOP3__V_CMP_LE_I16
19529
19530     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19531     void
19532     Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
19533     {
19534         Wavefront *wf = gpuDynInst->wavefront();
19535         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19536         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19537         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19538
19539         src0.readSrc();
19540         src1.readSrc();
19541
19542         /**
19543          * input modifiers are supported by FP operations only
19544          */
19545         assert(!(instData.ABS & 0x1));
19546         assert(!(instData.ABS & 0x2));
19547         assert(!(instData.ABS & 0x4));
19548         assert(!(extData.NEG & 0x1));
19549         assert(!(extData.NEG & 0x2));
19550         assert(!(extData.NEG & 0x4));
19551
19552         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19553             if (wf->execMask(lane)) {
19554                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19555             }
19556         }
19557
19558         sdst.write();
19559     }
19560
19561     Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
19562           InFmt_VOP3 *iFmt)
19563         : Inst_VOP3(iFmt, "v_cmp_gt_i16", true)
19564     {
19565         setFlag(ALU);
19566     } // Inst_VOP3__V_CMP_GT_I16
19567
19568     Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
19569     {
19570     } // ~Inst_VOP3__V_CMP_GT_I16
19571
19572     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19573     void
19574     Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
19575     {
19576         Wavefront *wf = gpuDynInst->wavefront();
19577         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19578         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19579         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19580
19581         src0.readSrc();
19582         src1.readSrc();
19583
19584         /**
19585          * input modifiers are supported by FP operations only
19586          */
19587         assert(!(instData.ABS & 0x1));
19588         assert(!(instData.ABS & 0x2));
19589         assert(!(instData.ABS & 0x4));
19590         assert(!(extData.NEG & 0x1));
19591         assert(!(extData.NEG & 0x2));
19592         assert(!(extData.NEG & 0x4));
19593
19594         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19595             if (wf->execMask(lane)) {
19596                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19597             }
19598         }
19599
19600         sdst.write();
19601     }
19602
19603     Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
19604           InFmt_VOP3 *iFmt)
19605         : Inst_VOP3(iFmt, "v_cmp_ne_i16", true)
19606     {
19607         setFlag(ALU);
19608     } // Inst_VOP3__V_CMP_NE_I16
19609
19610     Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
19611     {
19612     } // ~Inst_VOP3__V_CMP_NE_I16
19613
19614     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19615     void
19616     Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
19617     {
19618         Wavefront *wf = gpuDynInst->wavefront();
19619         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19620         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19621         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19622
19623         src0.readSrc();
19624         src1.readSrc();
19625
19626         /**
19627          * input modifiers are supported by FP operations only
19628          */
19629         assert(!(instData.ABS & 0x1));
19630         assert(!(instData.ABS & 0x2));
19631         assert(!(instData.ABS & 0x4));
19632         assert(!(extData.NEG & 0x1));
19633         assert(!(extData.NEG & 0x2));
19634         assert(!(extData.NEG & 0x4));
19635
19636         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19637             if (wf->execMask(lane)) {
19638                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19639             }
19640         }
19641
19642         sdst.write();
19643     }
19644
19645     Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
19646           InFmt_VOP3 *iFmt)
19647         : Inst_VOP3(iFmt, "v_cmp_ge_i16", true)
19648     {
19649         setFlag(ALU);
19650     } // Inst_VOP3__V_CMP_GE_I16
19651
19652     Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
19653     {
19654     } // ~Inst_VOP3__V_CMP_GE_I16
19655
19656     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19657     void
19658     Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
19659     {
19660         Wavefront *wf = gpuDynInst->wavefront();
19661         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19662         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19663         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19664
19665         src0.readSrc();
19666         src1.readSrc();
19667
19668         /**
19669          * input modifiers are supported by FP operations only
19670          */
19671         assert(!(instData.ABS & 0x1));
19672         assert(!(instData.ABS & 0x2));
19673         assert(!(instData.ABS & 0x4));
19674         assert(!(extData.NEG & 0x1));
19675         assert(!(extData.NEG & 0x2));
19676         assert(!(extData.NEG & 0x4));
19677
19678         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19679             if (wf->execMask(lane)) {
19680                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19681             }
19682         }
19683
19684         sdst.write();
19685     }
19686
19687     Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3 *iFmt)
19688         : Inst_VOP3(iFmt, "v_cmp_t_i16", true)
19689     {
19690         setFlag(ALU);
19691     } // Inst_VOP3__V_CMP_T_I16
19692
19693     Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
19694     {
19695     } // ~Inst_VOP3__V_CMP_T_I16
19696
19697     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19698     void
19699     Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
19700     {
19701         Wavefront *wf = gpuDynInst->wavefront();
19702         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19703
19704         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19705             if (wf->execMask(lane)) {
19706                 sdst.setBit(lane, 1);
19707             }
19708         }
19709
19710         sdst.write();
19711     }
19712
19713     Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3 *iFmt)
19714         : Inst_VOP3(iFmt, "v_cmp_f_u16", true)
19715     {
19716         setFlag(ALU);
19717     } // Inst_VOP3__V_CMP_F_U16
19718
19719     Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
19720     {
19721     } // ~Inst_VOP3__V_CMP_F_U16
19722
19723     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19724     void
19725     Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
19726     {
19727         Wavefront *wf = gpuDynInst->wavefront();
19728         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19729
19730         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19731             if (wf->execMask(lane)) {
19732                 sdst.setBit(lane, 0);
19733             }
19734         }
19735
19736         sdst.write();
19737     }
19738
19739     Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
19740           InFmt_VOP3 *iFmt)
19741         : Inst_VOP3(iFmt, "v_cmp_lt_u16", true)
19742     {
19743         setFlag(ALU);
19744     } // Inst_VOP3__V_CMP_LT_U16
19745
19746     Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
19747     {
19748     } // ~Inst_VOP3__V_CMP_LT_U16
19749
19750     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19751     void
19752     Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
19753     {
19754         Wavefront *wf = gpuDynInst->wavefront();
19755         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19756         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19757         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19758
19759         src0.readSrc();
19760         src1.readSrc();
19761
19762         /**
19763          * input modifiers are supported by FP operations only
19764          */
19765         assert(!(instData.ABS & 0x1));
19766         assert(!(instData.ABS & 0x2));
19767         assert(!(instData.ABS & 0x4));
19768         assert(!(extData.NEG & 0x1));
19769         assert(!(extData.NEG & 0x2));
19770         assert(!(extData.NEG & 0x4));
19771
19772         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19773             if (wf->execMask(lane)) {
19774                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19775             }
19776         }
19777
19778         sdst.write();
19779     }
19780
19781     Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
19782           InFmt_VOP3 *iFmt)
19783         : Inst_VOP3(iFmt, "v_cmp_eq_u16", true)
19784     {
19785         setFlag(ALU);
19786     } // Inst_VOP3__V_CMP_EQ_U16
19787
19788     Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
19789     {
19790     } // ~Inst_VOP3__V_CMP_EQ_U16
19791
19792     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19793     void
19794     Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
19795     {
19796         Wavefront *wf = gpuDynInst->wavefront();
19797         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19798         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19799         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19800
19801         src0.readSrc();
19802         src1.readSrc();
19803
19804         /**
19805          * input modifiers are supported by FP operations only
19806          */
19807         assert(!(instData.ABS & 0x1));
19808         assert(!(instData.ABS & 0x2));
19809         assert(!(instData.ABS & 0x4));
19810         assert(!(extData.NEG & 0x1));
19811         assert(!(extData.NEG & 0x2));
19812         assert(!(extData.NEG & 0x4));
19813
19814         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19815             if (wf->execMask(lane)) {
19816                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19817             }
19818         }
19819
19820         sdst.write();
19821     }
19822
19823     Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
19824           InFmt_VOP3 *iFmt)
19825         : Inst_VOP3(iFmt, "v_cmp_le_u16", true)
19826     {
19827         setFlag(ALU);
19828     } // Inst_VOP3__V_CMP_LE_U16
19829
19830     Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
19831     {
19832     } // ~Inst_VOP3__V_CMP_LE_U16
19833
19834     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19835     void
19836     Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
19837     {
19838         Wavefront *wf = gpuDynInst->wavefront();
19839         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19840         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19841         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19842
19843         src0.readSrc();
19844         src1.readSrc();
19845
19846         /**
19847          * input modifiers are supported by FP operations only
19848          */
19849         assert(!(instData.ABS & 0x1));
19850         assert(!(instData.ABS & 0x2));
19851         assert(!(instData.ABS & 0x4));
19852         assert(!(extData.NEG & 0x1));
19853         assert(!(extData.NEG & 0x2));
19854         assert(!(extData.NEG & 0x4));
19855
19856         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19857             if (wf->execMask(lane)) {
19858                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19859             }
19860         }
19861
19862         sdst.write();
19863     }
19864
19865     Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
19866           InFmt_VOP3 *iFmt)
19867         : Inst_VOP3(iFmt, "v_cmp_gt_u16", true)
19868     {
19869         setFlag(ALU);
19870     } // Inst_VOP3__V_CMP_GT_U16
19871
19872     Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
19873     {
19874     } // ~Inst_VOP3__V_CMP_GT_U16
19875
19876     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19877     void
19878     Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
19879     {
19880         Wavefront *wf = gpuDynInst->wavefront();
19881         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19882         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19883         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19884
19885         src0.readSrc();
19886         src1.readSrc();
19887
19888         /**
19889          * input modifiers are supported by FP operations only
19890          */
19891         assert(!(instData.ABS & 0x1));
19892         assert(!(instData.ABS & 0x2));
19893         assert(!(instData.ABS & 0x4));
19894         assert(!(extData.NEG & 0x1));
19895         assert(!(extData.NEG & 0x2));
19896         assert(!(extData.NEG & 0x4));
19897
19898         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19899             if (wf->execMask(lane)) {
19900                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19901             }
19902         }
19903
19904         sdst.write();
19905     }
19906
19907     Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
19908           InFmt_VOP3 *iFmt)
19909         : Inst_VOP3(iFmt, "v_cmp_ne_u16", true)
19910     {
19911         setFlag(ALU);
19912     } // Inst_VOP3__V_CMP_NE_U16
19913
19914     Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
19915     {
19916     } // ~Inst_VOP3__V_CMP_NE_U16
19917
19918     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19919     void
19920     Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
19921     {
19922         Wavefront *wf = gpuDynInst->wavefront();
19923         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19924         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19925         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19926
19927         src0.readSrc();
19928         src1.readSrc();
19929
19930         /**
19931          * input modifiers are supported by FP operations only
19932          */
19933         assert(!(instData.ABS & 0x1));
19934         assert(!(instData.ABS & 0x2));
19935         assert(!(instData.ABS & 0x4));
19936         assert(!(extData.NEG & 0x1));
19937         assert(!(extData.NEG & 0x2));
19938         assert(!(extData.NEG & 0x4));
19939
19940         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19941             if (wf->execMask(lane)) {
19942                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19943             }
19944         }
19945
19946         sdst.write();
19947     }
19948
19949     Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
19950           InFmt_VOP3 *iFmt)
19951         : Inst_VOP3(iFmt, "v_cmp_ge_u16", true)
19952     {
19953         setFlag(ALU);
19954     } // Inst_VOP3__V_CMP_GE_U16
19955
19956     Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
19957     {
19958     } // ~Inst_VOP3__V_CMP_GE_U16
19959
19960     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19961     void
19962     Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
19963     {
19964         Wavefront *wf = gpuDynInst->wavefront();
19965         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19966         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19967         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19968
19969         src0.readSrc();
19970         src1.readSrc();
19971
19972         /**
19973          * input modifiers are supported by FP operations only
19974          */
19975         assert(!(instData.ABS & 0x1));
19976         assert(!(instData.ABS & 0x2));
19977         assert(!(instData.ABS & 0x4));
19978         assert(!(extData.NEG & 0x1));
19979         assert(!(extData.NEG & 0x2));
19980         assert(!(extData.NEG & 0x4));
19981
19982         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19983             if (wf->execMask(lane)) {
19984                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19985             }
19986         }
19987
19988         sdst.write();
19989     }
19990
19991     Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3 *iFmt)
19992         : Inst_VOP3(iFmt, "v_cmp_t_u16", true)
19993     {
19994         setFlag(ALU);
19995     } // Inst_VOP3__V_CMP_T_U16
19996
19997     Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
19998     {
19999     } // ~Inst_VOP3__V_CMP_T_U16
20000
20001     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20002     void
20003     Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
20004     {
20005         Wavefront *wf = gpuDynInst->wavefront();
20006         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20007
20008         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20009             if (wf->execMask(lane)) {
20010                 sdst.setBit(lane, 1);
20011             }
20012         }
20013
20014         sdst.write();
20015     }
20016
20017     Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
20018           InFmt_VOP3 *iFmt)
20019         : Inst_VOP3(iFmt, "v_cmpx_f_i16", true)
20020     {
20021         setFlag(ALU);
20022     } // Inst_VOP3__V_CMPX_F_I16
20023
20024     Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
20025     {
20026     } // ~Inst_VOP3__V_CMPX_F_I16
20027
20028     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20029     void
20030     Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
20031     {
20032         Wavefront *wf = gpuDynInst->wavefront();
20033         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20034
20035         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20036             if (wf->execMask(lane)) {
20037                 sdst.setBit(lane, 0);
20038             }
20039         }
20040
20041         wf->execMask() = sdst.rawData();
20042         sdst.write();
20043     }
20044
20045     Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
20046           InFmt_VOP3 *iFmt)
20047         : Inst_VOP3(iFmt, "v_cmpx_lt_i16", true)
20048     {
20049         setFlag(ALU);
20050     } // Inst_VOP3__V_CMPX_LT_I16
20051
20052     Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
20053     {
20054     } // ~Inst_VOP3__V_CMPX_LT_I16
20055
20056     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20057     void
20058     Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
20059     {
20060         Wavefront *wf = gpuDynInst->wavefront();
20061         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20062         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20063         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20064
20065         src0.readSrc();
20066         src1.readSrc();
20067
20068         /**
20069          * input modifiers are supported by FP operations only
20070          */
20071         assert(!(instData.ABS & 0x1));
20072         assert(!(instData.ABS & 0x2));
20073         assert(!(instData.ABS & 0x4));
20074         assert(!(extData.NEG & 0x1));
20075         assert(!(extData.NEG & 0x2));
20076         assert(!(extData.NEG & 0x4));
20077
20078         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20079             if (wf->execMask(lane)) {
20080                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20081             }
20082         }
20083
20084         wf->execMask() = sdst.rawData();
20085         sdst.write();
20086     }
20087
20088     Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
20089           InFmt_VOP3 *iFmt)
20090         : Inst_VOP3(iFmt, "v_cmpx_eq_i16", true)
20091     {
20092         setFlag(ALU);
20093     } // Inst_VOP3__V_CMPX_EQ_I16
20094
20095     Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
20096     {
20097     } // ~Inst_VOP3__V_CMPX_EQ_I16
20098
20099     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20100     void
20101     Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
20102     {
20103         Wavefront *wf = gpuDynInst->wavefront();
20104         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20105         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20106         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20107
20108         src0.readSrc();
20109         src1.readSrc();
20110
20111         /**
20112          * input modifiers are supported by FP operations only
20113          */
20114         assert(!(instData.ABS & 0x1));
20115         assert(!(instData.ABS & 0x2));
20116         assert(!(instData.ABS & 0x4));
20117         assert(!(extData.NEG & 0x1));
20118         assert(!(extData.NEG & 0x2));
20119         assert(!(extData.NEG & 0x4));
20120
20121         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20122             if (wf->execMask(lane)) {
20123                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20124             }
20125         }
20126
20127         wf->execMask() = sdst.rawData();
20128         sdst.write();
20129     }
20130
20131     Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
20132           InFmt_VOP3 *iFmt)
20133         : Inst_VOP3(iFmt, "v_cmpx_le_i16", true)
20134     {
20135         setFlag(ALU);
20136     } // Inst_VOP3__V_CMPX_LE_I16
20137
20138     Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
20139     {
20140     } // ~Inst_VOP3__V_CMPX_LE_I16
20141
20142     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20143     void
20144     Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
20145     {
20146         Wavefront *wf = gpuDynInst->wavefront();
20147         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20148         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20149         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20150
20151         src0.readSrc();
20152         src1.readSrc();
20153
20154         /**
20155          * input modifiers are supported by FP operations only
20156          */
20157         assert(!(instData.ABS & 0x1));
20158         assert(!(instData.ABS & 0x2));
20159         assert(!(instData.ABS & 0x4));
20160         assert(!(extData.NEG & 0x1));
20161         assert(!(extData.NEG & 0x2));
20162         assert(!(extData.NEG & 0x4));
20163
20164         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20165             if (wf->execMask(lane)) {
20166                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20167             }
20168         }
20169
20170         wf->execMask() = sdst.rawData();
20171         sdst.write();
20172     }
20173
20174     Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
20175           InFmt_VOP3 *iFmt)
20176         : Inst_VOP3(iFmt, "v_cmpx_gt_i16", true)
20177     {
20178         setFlag(ALU);
20179     } // Inst_VOP3__V_CMPX_GT_I16
20180
20181     Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
20182     {
20183     } // ~Inst_VOP3__V_CMPX_GT_I16
20184
20185     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20186     void
20187     Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
20188     {
20189         Wavefront *wf = gpuDynInst->wavefront();
20190         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20191         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20192         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20193
20194         src0.readSrc();
20195         src1.readSrc();
20196
20197         /**
20198          * input modifiers are supported by FP operations only
20199          */
20200         assert(!(instData.ABS & 0x1));
20201         assert(!(instData.ABS & 0x2));
20202         assert(!(instData.ABS & 0x4));
20203         assert(!(extData.NEG & 0x1));
20204         assert(!(extData.NEG & 0x2));
20205         assert(!(extData.NEG & 0x4));
20206
20207         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20208             if (wf->execMask(lane)) {
20209                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20210             }
20211         }
20212
20213         wf->execMask() = sdst.rawData();
20214         sdst.write();
20215     }
20216
20217     Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
20218           InFmt_VOP3 *iFmt)
20219         : Inst_VOP3(iFmt, "v_cmpx_ne_i16", true)
20220     {
20221         setFlag(ALU);
20222     } // Inst_VOP3__V_CMPX_NE_I16
20223
20224     Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
20225     {
20226     } // ~Inst_VOP3__V_CMPX_NE_I16
20227
20228     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20229     void
20230     Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
20231     {
20232         Wavefront *wf = gpuDynInst->wavefront();
20233         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20234         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20235         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20236
20237         src0.readSrc();
20238         src1.readSrc();
20239
20240         /**
20241          * input modifiers are supported by FP operations only
20242          */
20243         assert(!(instData.ABS & 0x1));
20244         assert(!(instData.ABS & 0x2));
20245         assert(!(instData.ABS & 0x4));
20246         assert(!(extData.NEG & 0x1));
20247         assert(!(extData.NEG & 0x2));
20248         assert(!(extData.NEG & 0x4));
20249
20250         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20251             if (wf->execMask(lane)) {
20252                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20253             }
20254         }
20255
20256         wf->execMask() = sdst.rawData();
20257         sdst.write();
20258     }
20259
20260     Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
20261           InFmt_VOP3 *iFmt)
20262         : Inst_VOP3(iFmt, "v_cmpx_ge_i16", true)
20263     {
20264         setFlag(ALU);
20265     } // Inst_VOP3__V_CMPX_GE_I16
20266
20267     Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
20268     {
20269     } // ~Inst_VOP3__V_CMPX_GE_I16
20270
20271     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20272     void
20273     Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
20274     {
20275         Wavefront *wf = gpuDynInst->wavefront();
20276         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20277         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20278         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20279
20280         src0.readSrc();
20281         src1.readSrc();
20282
20283         /**
20284          * input modifiers are supported by FP operations only
20285          */
20286         assert(!(instData.ABS & 0x1));
20287         assert(!(instData.ABS & 0x2));
20288         assert(!(instData.ABS & 0x4));
20289         assert(!(extData.NEG & 0x1));
20290         assert(!(extData.NEG & 0x2));
20291         assert(!(extData.NEG & 0x4));
20292
20293         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20294             if (wf->execMask(lane)) {
20295                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20296             }
20297         }
20298
20299         wf->execMask() = sdst.rawData();
20300         sdst.write();
20301     }
20302
20303     Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
20304           InFmt_VOP3 *iFmt)
20305         : Inst_VOP3(iFmt, "v_cmpx_t_i16", true)
20306     {
20307         setFlag(ALU);
20308     } // Inst_VOP3__V_CMPX_T_I16
20309
20310     Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
20311     {
20312     } // ~Inst_VOP3__V_CMPX_T_I16
20313
20314     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20315     void
20316     Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
20317     {
20318         Wavefront *wf = gpuDynInst->wavefront();
20319         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20320
20321         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20322             if (wf->execMask(lane)) {
20323                 sdst.setBit(lane, 1);
20324             }
20325         }
20326
20327         wf->execMask() = sdst.rawData();
20328         sdst.write();
20329     }
20330
20331     Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
20332           InFmt_VOP3 *iFmt)
20333         : Inst_VOP3(iFmt, "v_cmpx_f_u16", true)
20334     {
20335         setFlag(ALU);
20336     } // Inst_VOP3__V_CMPX_F_U16
20337
20338     Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
20339     {
20340     } // ~Inst_VOP3__V_CMPX_F_U16
20341
20342     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20343     void
20344     Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
20345     {
20346         Wavefront *wf = gpuDynInst->wavefront();
20347         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20348
20349         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20350             if (wf->execMask(lane)) {
20351                 sdst.setBit(lane, 0);
20352             }
20353         }
20354
20355         wf->execMask() = sdst.rawData();
20356         sdst.write();
20357     }
20358
20359     Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
20360           InFmt_VOP3 *iFmt)
20361         : Inst_VOP3(iFmt, "v_cmpx_lt_u16", true)
20362     {
20363         setFlag(ALU);
20364     } // Inst_VOP3__V_CMPX_LT_U16
20365
20366     Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
20367     {
20368     } // ~Inst_VOP3__V_CMPX_LT_U16
20369
20370     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20371     void
20372     Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
20373     {
20374         Wavefront *wf = gpuDynInst->wavefront();
20375         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
20376         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
20377         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20378
20379         src0.readSrc();
20380         src1.readSrc();
20381
20382         /**
20383          * input modifiers are supported by FP operations only
20384          */
20385         assert(!(instData.ABS & 0x1));
20386         assert(!(instData.ABS & 0x2));
20387         assert(!(instData.ABS & 0x4));
20388         assert(!(extData.NEG & 0x1));
20389         assert(!(extData.NEG & 0x2));
20390         assert(!(extData.NEG & 0x4));
20391
20392         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20393             if (wf->execMask(lane)) {
20394                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20395             }
20396         }
20397
20398         wf->execMask() = sdst.rawData();
20399         sdst.write();
20400     }
20401
20402     Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
20403           InFmt_VOP3 *iFmt)
20404         : Inst_VOP3(iFmt, "v_cmpx_eq_u16", true)
20405     {
20406         setFlag(ALU);
20407     } // Inst_VOP3__V_CMPX_EQ_U16
20408
20409     Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
20410     {
20411     } // ~Inst_VOP3__V_CMPX_EQ_U16
20412
20413     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20414     void
20415     Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
20416     {
20417         Wavefront *wf = gpuDynInst->wavefront();
20418         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20419         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20420         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20421
20422         src0.readSrc();
20423         src1.readSrc();
20424
20425         /**
20426          * input modifiers are supported by FP operations only
20427          */
20428         assert(!(instData.ABS & 0x1));
20429         assert(!(instData.ABS & 0x2));
20430         assert(!(instData.ABS & 0x4));
20431         assert(!(extData.NEG & 0x1));
20432         assert(!(extData.NEG & 0x2));
20433         assert(!(extData.NEG & 0x4));
20434
20435         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20436             if (wf->execMask(lane)) {
20437                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20438             }
20439         }
20440
20441         wf->execMask() = sdst.rawData();
20442         sdst.write();
20443     }
20444
20445     Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
20446           InFmt_VOP3 *iFmt)
20447         : Inst_VOP3(iFmt, "v_cmpx_le_u16", true)
20448     {
20449         setFlag(ALU);
20450     } // Inst_VOP3__V_CMPX_LE_U16
20451
20452     Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
20453     {
20454     } // ~Inst_VOP3__V_CMPX_LE_U16
20455
20456     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20457     void
20458     Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
20459     {
20460         Wavefront *wf = gpuDynInst->wavefront();
20461         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20462         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20463         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20464
20465         src0.readSrc();
20466         src1.readSrc();
20467
20468         /**
20469          * input modifiers are supported by FP operations only
20470          */
20471         assert(!(instData.ABS & 0x1));
20472         assert(!(instData.ABS & 0x2));
20473         assert(!(instData.ABS & 0x4));
20474         assert(!(extData.NEG & 0x1));
20475         assert(!(extData.NEG & 0x2));
20476         assert(!(extData.NEG & 0x4));
20477
20478         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20479             if (wf->execMask(lane)) {
20480                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20481             }
20482         }
20483
20484         wf->execMask() = sdst.rawData();
20485         sdst.write();
20486     }
20487
20488     Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
20489           InFmt_VOP3 *iFmt)
20490         : Inst_VOP3(iFmt, "v_cmpx_gt_u16", true)
20491     {
20492         setFlag(ALU);
20493     } // Inst_VOP3__V_CMPX_GT_U16
20494
20495     Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
20496     {
20497     } // ~Inst_VOP3__V_CMPX_GT_U16
20498
20499     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20500     void
20501     Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
20502     {
20503         Wavefront *wf = gpuDynInst->wavefront();
20504         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20505         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20506         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20507
20508         src0.readSrc();
20509         src1.readSrc();
20510
20511         /**
20512          * input modifiers are supported by FP operations only
20513          */
20514         assert(!(instData.ABS & 0x1));
20515         assert(!(instData.ABS & 0x2));
20516         assert(!(instData.ABS & 0x4));
20517         assert(!(extData.NEG & 0x1));
20518         assert(!(extData.NEG & 0x2));
20519         assert(!(extData.NEG & 0x4));
20520
20521         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20522             if (wf->execMask(lane)) {
20523                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20524             }
20525         }
20526
20527         wf->execMask() = sdst.rawData();
20528         sdst.write();
20529     }
20530
20531     Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
20532           InFmt_VOP3 *iFmt)
20533         : Inst_VOP3(iFmt, "v_cmpx_ne_u16", true)
20534     {
20535         setFlag(ALU);
20536     } // Inst_VOP3__V_CMPX_NE_U16
20537
20538     Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
20539     {
20540     } // ~Inst_VOP3__V_CMPX_NE_U16
20541
20542     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20543     void
20544     Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
20545     {
20546         Wavefront *wf = gpuDynInst->wavefront();
20547         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20548         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20549         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20550
20551         src0.readSrc();
20552         src1.readSrc();
20553
20554         /**
20555          * input modifiers are supported by FP operations only
20556          */
20557         assert(!(instData.ABS & 0x1));
20558         assert(!(instData.ABS & 0x2));
20559         assert(!(instData.ABS & 0x4));
20560         assert(!(extData.NEG & 0x1));
20561         assert(!(extData.NEG & 0x2));
20562         assert(!(extData.NEG & 0x4));
20563
20564         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20565             if (wf->execMask(lane)) {
20566                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20567             }
20568         }
20569
20570         wf->execMask() = sdst.rawData();
20571         sdst.write();
20572     }
20573
20574     Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
20575           InFmt_VOP3 *iFmt)
20576         : Inst_VOP3(iFmt, "v_cmpx_ge_u16", true)
20577     {
20578         setFlag(ALU);
20579     } // Inst_VOP3__V_CMPX_GE_U16
20580
20581     Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
20582     {
20583     } // ~Inst_VOP3__V_CMPX_GE_U16
20584
20585     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20586     void
20587     Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
20588     {
20589         Wavefront *wf = gpuDynInst->wavefront();
20590         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20591         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20592         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20593
20594         src0.readSrc();
20595         src1.readSrc();
20596
20597         /**
20598          * input modifiers are supported by FP operations only
20599          */
20600         assert(!(instData.ABS & 0x1));
20601         assert(!(instData.ABS & 0x2));
20602         assert(!(instData.ABS & 0x4));
20603         assert(!(extData.NEG & 0x1));
20604         assert(!(extData.NEG & 0x2));
20605         assert(!(extData.NEG & 0x4));
20606
20607         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20608             if (wf->execMask(lane)) {
20609                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20610             }
20611         }
20612
20613         wf->execMask() = sdst.rawData();
20614         sdst.write();
20615     }
20616
20617     Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
20618           InFmt_VOP3 *iFmt)
20619         : Inst_VOP3(iFmt, "v_cmpx_t_u16", true)
20620     {
20621         setFlag(ALU);
20622     } // Inst_VOP3__V_CMPX_T_U16
20623
20624     Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
20625     {
20626     } // ~Inst_VOP3__V_CMPX_T_U16
20627
20628     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20629     void
20630     Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
20631     {
20632         Wavefront *wf = gpuDynInst->wavefront();
20633         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20634
20635         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20636             if (wf->execMask(lane)) {
20637                 sdst.setBit(lane, 1);
20638             }
20639         }
20640
20641         wf->execMask() = sdst.rawData();
20642         sdst.write();
20643     }
20644
20645     Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3 *iFmt)
20646         : Inst_VOP3(iFmt, "v_cmp_f_i32", true)
20647     {
20648         setFlag(ALU);
20649     } // Inst_VOP3__V_CMP_F_I32
20650
20651     Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
20652     {
20653     } // ~Inst_VOP3__V_CMP_F_I32
20654
20655     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20656     void
20657     Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
20658     {
20659         Wavefront *wf = gpuDynInst->wavefront();
20660         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20661
20662         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20663             if (wf->execMask(lane)) {
20664                 sdst.setBit(lane, 0);
20665             }
20666         }
20667
20668         wf->execMask() = sdst.rawData();
20669         sdst.write();
20670     }
20671
20672     Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
20673           InFmt_VOP3 *iFmt)
20674         : Inst_VOP3(iFmt, "v_cmp_lt_i32", true)
20675     {
20676         setFlag(ALU);
20677     } // Inst_VOP3__V_CMP_LT_I32
20678
20679     Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
20680     {
20681     } // ~Inst_VOP3__V_CMP_LT_I32
20682
20683     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20684     void
20685     Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
20686     {
20687         Wavefront *wf = gpuDynInst->wavefront();
20688         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20689         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20690         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20691
20692         src0.readSrc();
20693         src1.readSrc();
20694
20695         /**
20696          * input modifiers are supported by FP operations only
20697          */
20698         assert(!(instData.ABS & 0x1));
20699         assert(!(instData.ABS & 0x2));
20700         assert(!(instData.ABS & 0x4));
20701         assert(!(extData.NEG & 0x1));
20702         assert(!(extData.NEG & 0x2));
20703         assert(!(extData.NEG & 0x4));
20704
20705         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20706             if (wf->execMask(lane)) {
20707                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20708             }
20709         }
20710
20711         sdst.write();
20712     }
20713
20714     Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
20715           InFmt_VOP3 *iFmt)
20716         : Inst_VOP3(iFmt, "v_cmp_eq_i32", true)
20717     {
20718         setFlag(ALU);
20719     } // Inst_VOP3__V_CMP_EQ_I32
20720
20721     Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
20722     {
20723     } // ~Inst_VOP3__V_CMP_EQ_I32
20724
20725     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20726     void
20727     Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
20728     {
20729         Wavefront *wf = gpuDynInst->wavefront();
20730         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20731         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20732         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20733
20734         src0.readSrc();
20735         src1.readSrc();
20736
20737         /**
20738          * input modifiers are supported by FP operations only
20739          */
20740         assert(!(instData.ABS & 0x1));
20741         assert(!(instData.ABS & 0x2));
20742         assert(!(instData.ABS & 0x4));
20743         assert(!(extData.NEG & 0x1));
20744         assert(!(extData.NEG & 0x2));
20745         assert(!(extData.NEG & 0x4));
20746
20747         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20748             if (wf->execMask(lane)) {
20749                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20750             }
20751         }
20752
20753         sdst.write();
20754     }
20755
20756     Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
20757           InFmt_VOP3 *iFmt)
20758         : Inst_VOP3(iFmt, "v_cmp_le_i32", true)
20759     {
20760         setFlag(ALU);
20761     } // Inst_VOP3__V_CMP_LE_I32
20762
20763     Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
20764     {
20765     } // ~Inst_VOP3__V_CMP_LE_I32
20766
20767     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20768     void
20769     Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
20770     {
20771         Wavefront *wf = gpuDynInst->wavefront();
20772         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20773         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20774         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20775
20776         src0.readSrc();
20777         src1.readSrc();
20778
20779         /**
20780          * input modifiers are supported by FP operations only
20781          */
20782         assert(!(instData.ABS & 0x1));
20783         assert(!(instData.ABS & 0x2));
20784         assert(!(instData.ABS & 0x4));
20785         assert(!(extData.NEG & 0x1));
20786         assert(!(extData.NEG & 0x2));
20787         assert(!(extData.NEG & 0x4));
20788
20789         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20790             if (wf->execMask(lane)) {
20791                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20792             }
20793         }
20794
20795         sdst.write();
20796     }
20797
20798     Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
20799           InFmt_VOP3 *iFmt)
20800         : Inst_VOP3(iFmt, "v_cmp_gt_i32", true)
20801     {
20802         setFlag(ALU);
20803     } // Inst_VOP3__V_CMP_GT_I32
20804
20805     Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
20806     {
20807     } // ~Inst_VOP3__V_CMP_GT_I32
20808
20809     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20810     void
20811     Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
20812     {
20813         Wavefront *wf = gpuDynInst->wavefront();
20814         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20815         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20816         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20817
20818         src0.readSrc();
20819         src1.readSrc();
20820
20821         /**
20822          * input modifiers are supported by FP operations only
20823          */
20824         assert(!(instData.ABS & 0x1));
20825         assert(!(instData.ABS & 0x2));
20826         assert(!(instData.ABS & 0x4));
20827         assert(!(extData.NEG & 0x1));
20828         assert(!(extData.NEG & 0x2));
20829         assert(!(extData.NEG & 0x4));
20830
20831         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20832             if (wf->execMask(lane)) {
20833                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20834             }
20835         }
20836
20837         sdst.write();
20838     }
20839
20840     Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
20841           InFmt_VOP3 *iFmt)
20842         : Inst_VOP3(iFmt, "v_cmp_ne_i32", true)
20843     {
20844         setFlag(ALU);
20845     } // Inst_VOP3__V_CMP_NE_I32
20846
20847     Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
20848     {
20849     } // ~Inst_VOP3__V_CMP_NE_I32
20850
20851     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20852     void
20853     Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
20854     {
20855         Wavefront *wf = gpuDynInst->wavefront();
20856         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20857         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20858         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20859
20860         src0.readSrc();
20861         src1.readSrc();
20862
20863         /**
20864          * input modifiers are supported by FP operations only
20865          */
20866         assert(!(instData.ABS & 0x1));
20867         assert(!(instData.ABS & 0x2));
20868         assert(!(instData.ABS & 0x4));
20869         assert(!(extData.NEG & 0x1));
20870         assert(!(extData.NEG & 0x2));
20871         assert(!(extData.NEG & 0x4));
20872
20873         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20874             if (wf->execMask(lane)) {
20875                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20876             }
20877         }
20878
20879         sdst.write();
20880     }
20881
20882     Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
20883           InFmt_VOP3 *iFmt)
20884         : Inst_VOP3(iFmt, "v_cmp_ge_i32", true)
20885     {
20886         setFlag(ALU);
20887     } // Inst_VOP3__V_CMP_GE_I32
20888
20889     Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
20890     {
20891     } // ~Inst_VOP3__V_CMP_GE_I32
20892
20893     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20894     void
20895     Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
20896     {
20897         Wavefront *wf = gpuDynInst->wavefront();
20898         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20899         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20900         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20901
20902         src0.readSrc();
20903         src1.readSrc();
20904
20905         /**
20906          * input modifiers are supported by FP operations only
20907          */
20908         assert(!(instData.ABS & 0x1));
20909         assert(!(instData.ABS & 0x2));
20910         assert(!(instData.ABS & 0x4));
20911         assert(!(extData.NEG & 0x1));
20912         assert(!(extData.NEG & 0x2));
20913         assert(!(extData.NEG & 0x4));
20914
20915         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20916             if (wf->execMask(lane)) {
20917                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20918             }
20919         }
20920
20921         sdst.write();
20922     }
20923
20924     Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3 *iFmt)
20925         : Inst_VOP3(iFmt, "v_cmp_t_i32", true)
20926     {
20927         setFlag(ALU);
20928     } // Inst_VOP3__V_CMP_T_I32
20929
20930     Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
20931     {
20932     } // ~Inst_VOP3__V_CMP_T_I32
20933
20934     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20935     void
20936     Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
20937     {
20938         Wavefront *wf = gpuDynInst->wavefront();
20939         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20940
20941         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20942             if (wf->execMask(lane)) {
20943                 sdst.setBit(lane, 1);
20944             }
20945         }
20946
20947         sdst.write();
20948     }
20949
20950     Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3 *iFmt)
20951         : Inst_VOP3(iFmt, "v_cmp_f_u32", true)
20952     {
20953         setFlag(ALU);
20954     } // Inst_VOP3__V_CMP_F_U32
20955
20956     Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
20957     {
20958     } // ~Inst_VOP3__V_CMP_F_U32
20959
20960     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20961     void
20962     Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
20963     {
20964         Wavefront *wf = gpuDynInst->wavefront();
20965         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20966
20967         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20968             if (wf->execMask(lane)) {
20969                 sdst.setBit(lane, 0);
20970             }
20971         }
20972
20973         sdst.write();
20974     }
20975
20976     Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
20977           InFmt_VOP3 *iFmt)
20978         : Inst_VOP3(iFmt, "v_cmp_lt_u32", true)
20979     {
20980         setFlag(ALU);
20981     } // Inst_VOP3__V_CMP_LT_U32
20982
20983     Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
20984     {
20985     } // ~Inst_VOP3__V_CMP_LT_U32
20986
20987     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20988     void
20989     Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
20990     {
20991         Wavefront *wf = gpuDynInst->wavefront();
20992         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
20993         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
20994         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20995
20996         src0.readSrc();
20997         src1.readSrc();
20998
20999         /**
21000          * input modifiers are supported by FP operations only
21001          */
21002         assert(!(instData.ABS & 0x1));
21003         assert(!(instData.ABS & 0x2));
21004         assert(!(instData.ABS & 0x4));
21005         assert(!(extData.NEG & 0x1));
21006         assert(!(extData.NEG & 0x2));
21007         assert(!(extData.NEG & 0x4));
21008
21009         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21010             if (wf->execMask(lane)) {
21011                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21012             }
21013         }
21014
21015         sdst.write();
21016     }
21017
21018     Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
21019           InFmt_VOP3 *iFmt)
21020         : Inst_VOP3(iFmt, "v_cmp_eq_u32", true)
21021     {
21022         setFlag(ALU);
21023     } // Inst_VOP3__V_CMP_EQ_U32
21024
21025     Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
21026     {
21027     } // ~Inst_VOP3__V_CMP_EQ_U32
21028
21029     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21030     void
21031     Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21032     {
21033         Wavefront *wf = gpuDynInst->wavefront();
21034         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21035         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21036         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21037
21038         src0.readSrc();
21039         src1.readSrc();
21040
21041         /**
21042          * input modifiers are supported by FP operations only
21043          */
21044         assert(!(instData.ABS & 0x1));
21045         assert(!(instData.ABS & 0x2));
21046         assert(!(instData.ABS & 0x4));
21047         assert(!(extData.NEG & 0x1));
21048         assert(!(extData.NEG & 0x2));
21049         assert(!(extData.NEG & 0x4));
21050
21051         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21052             if (wf->execMask(lane)) {
21053                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21054             }
21055         }
21056
21057         sdst.write();
21058     }
21059
21060     Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
21061           InFmt_VOP3 *iFmt)
21062         : Inst_VOP3(iFmt, "v_cmp_le_u32", true)
21063     {
21064         setFlag(ALU);
21065     } // Inst_VOP3__V_CMP_LE_U32
21066
21067     Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
21068     {
21069     } // ~Inst_VOP3__V_CMP_LE_U32
21070
21071     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21072     void
21073     Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21074     {
21075         Wavefront *wf = gpuDynInst->wavefront();
21076         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21077         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21078         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21079
21080         src0.readSrc();
21081         src1.readSrc();
21082
21083         /**
21084          * input modifiers are supported by FP operations only
21085          */
21086         assert(!(instData.ABS & 0x1));
21087         assert(!(instData.ABS & 0x2));
21088         assert(!(instData.ABS & 0x4));
21089         assert(!(extData.NEG & 0x1));
21090         assert(!(extData.NEG & 0x2));
21091         assert(!(extData.NEG & 0x4));
21092
21093         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21094             if (wf->execMask(lane)) {
21095                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21096             }
21097         }
21098
21099         sdst.write();
21100     }
21101
21102     Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
21103           InFmt_VOP3 *iFmt)
21104         : Inst_VOP3(iFmt, "v_cmp_gt_u32", true)
21105     {
21106         setFlag(ALU);
21107     } // Inst_VOP3__V_CMP_GT_U32
21108
21109     Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
21110     {
21111     } // ~Inst_VOP3__V_CMP_GT_U32
21112
21113     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21114     void
21115     Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21116     {
21117         Wavefront *wf = gpuDynInst->wavefront();
21118         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21119         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21120         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21121
21122         src0.readSrc();
21123         src1.readSrc();
21124
21125         /**
21126          * input modifiers are supported by FP operations only
21127          */
21128         assert(!(instData.ABS & 0x1));
21129         assert(!(instData.ABS & 0x2));
21130         assert(!(instData.ABS & 0x4));
21131         assert(!(extData.NEG & 0x1));
21132         assert(!(extData.NEG & 0x2));
21133         assert(!(extData.NEG & 0x4));
21134
21135         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21136             if (wf->execMask(lane)) {
21137                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21138             }
21139         }
21140
21141         sdst.write();
21142     }
21143
21144     Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
21145           InFmt_VOP3 *iFmt)
21146         : Inst_VOP3(iFmt, "v_cmp_ne_u32", true)
21147     {
21148         setFlag(ALU);
21149     } // Inst_VOP3__V_CMP_NE_U32
21150
21151     Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
21152     {
21153     } // ~Inst_VOP3__V_CMP_NE_U32
21154
21155     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21156     void
21157     Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21158     {
21159         Wavefront *wf = gpuDynInst->wavefront();
21160         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21161         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21162         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21163
21164         src0.readSrc();
21165         src1.readSrc();
21166
21167         /**
21168          * input modifiers are supported by FP operations only
21169          */
21170         assert(!(instData.ABS & 0x1));
21171         assert(!(instData.ABS & 0x2));
21172         assert(!(instData.ABS & 0x4));
21173         assert(!(extData.NEG & 0x1));
21174         assert(!(extData.NEG & 0x2));
21175         assert(!(extData.NEG & 0x4));
21176
21177         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21178             if (wf->execMask(lane)) {
21179                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21180             }
21181         }
21182
21183         sdst.write();
21184     }
21185
21186     Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
21187           InFmt_VOP3 *iFmt)
21188         : Inst_VOP3(iFmt, "v_cmp_ge_u32", true)
21189     {
21190         setFlag(ALU);
21191     } // Inst_VOP3__V_CMP_GE_U32
21192
21193     Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
21194     {
21195     } // ~Inst_VOP3__V_CMP_GE_U32
21196
21197     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21198     void
21199     Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21200     {
21201         Wavefront *wf = gpuDynInst->wavefront();
21202         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21203         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21204         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21205
21206         src0.readSrc();
21207         src1.readSrc();
21208
21209         /**
21210          * input modifiers are supported by FP operations only
21211          */
21212         assert(!(instData.ABS & 0x1));
21213         assert(!(instData.ABS & 0x2));
21214         assert(!(instData.ABS & 0x4));
21215         assert(!(extData.NEG & 0x1));
21216         assert(!(extData.NEG & 0x2));
21217         assert(!(extData.NEG & 0x4));
21218
21219         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21220             if (wf->execMask(lane)) {
21221                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21222             }
21223         }
21224
21225         sdst.write();
21226     }
21227
21228     Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3 *iFmt)
21229         : Inst_VOP3(iFmt, "v_cmp_t_u32", true)
21230     {
21231         setFlag(ALU);
21232     } // Inst_VOP3__V_CMP_T_U32
21233
21234     Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
21235     {
21236     } // ~Inst_VOP3__V_CMP_T_U32
21237
21238     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
21239     void
21240     Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
21241     {
21242         Wavefront *wf = gpuDynInst->wavefront();
21243         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21244
21245         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21246             if (wf->execMask(lane)) {
21247                 sdst.setBit(lane, 1);
21248             }
21249         }
21250
21251         sdst.write();
21252     }
21253
21254     Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
21255           InFmt_VOP3 *iFmt)
21256         : Inst_VOP3(iFmt, "v_cmpx_f_i32", true)
21257     {
21258         setFlag(ALU);
21259     } // Inst_VOP3__V_CMPX_F_I32
21260
21261     Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
21262     {
21263     } // ~Inst_VOP3__V_CMPX_F_I32
21264
21265     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21266     void
21267     Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
21268     {
21269         Wavefront *wf = gpuDynInst->wavefront();
21270         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21271
21272         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21273             if (wf->execMask(lane)) {
21274                 sdst.setBit(lane, 0);
21275             }
21276         }
21277
21278         wf->execMask() = sdst.rawData();
21279         sdst.write();
21280     }
21281
21282     Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
21283           InFmt_VOP3 *iFmt)
21284         : Inst_VOP3(iFmt, "v_cmpx_lt_i32", true)
21285     {
21286         setFlag(ALU);
21287     } // Inst_VOP3__V_CMPX_LT_I32
21288
21289     Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
21290     {
21291     } // ~Inst_VOP3__V_CMPX_LT_I32
21292
21293     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21294     void
21295     Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
21296     {
21297         Wavefront *wf = gpuDynInst->wavefront();
21298         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21299         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21300         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21301
21302         src0.readSrc();
21303         src1.readSrc();
21304
21305         /**
21306          * input modifiers are supported by FP operations only
21307          */
21308         assert(!(instData.ABS & 0x1));
21309         assert(!(instData.ABS & 0x2));
21310         assert(!(instData.ABS & 0x4));
21311         assert(!(extData.NEG & 0x1));
21312         assert(!(extData.NEG & 0x2));
21313         assert(!(extData.NEG & 0x4));
21314
21315         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21316             if (wf->execMask(lane)) {
21317                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21318             }
21319         }
21320
21321         wf->execMask() = sdst.rawData();
21322         sdst.write();
21323     }
21324
21325     Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
21326           InFmt_VOP3 *iFmt)
21327         : Inst_VOP3(iFmt, "v_cmpx_eq_i32", true)
21328     {
21329         setFlag(ALU);
21330     } // Inst_VOP3__V_CMPX_EQ_I32
21331
21332     Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
21333     {
21334     } // ~Inst_VOP3__V_CMPX_EQ_I32
21335
21336     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21337     void
21338     Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
21339     {
21340         Wavefront *wf = gpuDynInst->wavefront();
21341         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21342         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21343         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21344
21345         src0.readSrc();
21346         src1.readSrc();
21347
21348         /**
21349          * input modifiers are supported by FP operations only
21350          */
21351         assert(!(instData.ABS & 0x1));
21352         assert(!(instData.ABS & 0x2));
21353         assert(!(instData.ABS & 0x4));
21354         assert(!(extData.NEG & 0x1));
21355         assert(!(extData.NEG & 0x2));
21356         assert(!(extData.NEG & 0x4));
21357
21358         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21359             if (wf->execMask(lane)) {
21360                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21361             }
21362         }
21363
21364         wf->execMask() = sdst.rawData();
21365         sdst.write();
21366     }
21367
21368     Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
21369           InFmt_VOP3 *iFmt)
21370         : Inst_VOP3(iFmt, "v_cmpx_le_i32", true)
21371     {
21372         setFlag(ALU);
21373     } // Inst_VOP3__V_CMPX_LE_I32
21374
21375     Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
21376     {
21377     } // ~Inst_VOP3__V_CMPX_LE_I32
21378
21379     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21380     void
21381     Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
21382     {
21383         Wavefront *wf = gpuDynInst->wavefront();
21384         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21385         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21386         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21387
21388         src0.readSrc();
21389         src1.readSrc();
21390
21391         /**
21392          * input modifiers are supported by FP operations only
21393          */
21394         assert(!(instData.ABS & 0x1));
21395         assert(!(instData.ABS & 0x2));
21396         assert(!(instData.ABS & 0x4));
21397         assert(!(extData.NEG & 0x1));
21398         assert(!(extData.NEG & 0x2));
21399         assert(!(extData.NEG & 0x4));
21400
21401         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21402             if (wf->execMask(lane)) {
21403                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21404             }
21405         }
21406
21407         wf->execMask() = sdst.rawData();
21408         sdst.write();
21409     }
21410
21411     Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
21412           InFmt_VOP3 *iFmt)
21413         : Inst_VOP3(iFmt, "v_cmpx_gt_i32", true)
21414     {
21415         setFlag(ALU);
21416     } // Inst_VOP3__V_CMPX_GT_I32
21417
21418     Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
21419     {
21420     } // ~Inst_VOP3__V_CMPX_GT_I32
21421
21422     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21423     void
21424     Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
21425     {
21426         Wavefront *wf = gpuDynInst->wavefront();
21427         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21428         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21429         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21430
21431         src0.readSrc();
21432         src1.readSrc();
21433
21434         /**
21435          * input modifiers are supported by FP operations only
21436          */
21437         assert(!(instData.ABS & 0x1));
21438         assert(!(instData.ABS & 0x2));
21439         assert(!(instData.ABS & 0x4));
21440         assert(!(extData.NEG & 0x1));
21441         assert(!(extData.NEG & 0x2));
21442         assert(!(extData.NEG & 0x4));
21443
21444         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21445             if (wf->execMask(lane)) {
21446                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21447             }
21448         }
21449
21450         wf->execMask() = sdst.rawData();
21451         sdst.write();
21452     }
21453
21454     Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
21455           InFmt_VOP3 *iFmt)
21456         : Inst_VOP3(iFmt, "v_cmpx_ne_i32", true)
21457     {
21458         setFlag(ALU);
21459     } // Inst_VOP3__V_CMPX_NE_I32
21460
21461     Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
21462     {
21463     } // ~Inst_VOP3__V_CMPX_NE_I32
21464
21465     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21466     void
21467     Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
21468     {
21469         Wavefront *wf = gpuDynInst->wavefront();
21470         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21471         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21472         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21473
21474         src0.readSrc();
21475         src1.readSrc();
21476
21477         /**
21478          * input modifiers are supported by FP operations only
21479          */
21480         assert(!(instData.ABS & 0x1));
21481         assert(!(instData.ABS & 0x2));
21482         assert(!(instData.ABS & 0x4));
21483         assert(!(extData.NEG & 0x1));
21484         assert(!(extData.NEG & 0x2));
21485         assert(!(extData.NEG & 0x4));
21486
21487         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21488             if (wf->execMask(lane)) {
21489                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21490             }
21491         }
21492
21493         wf->execMask() = sdst.rawData();
21494         sdst.write();
21495     }
21496
21497     Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
21498           InFmt_VOP3 *iFmt)
21499         : Inst_VOP3(iFmt, "v_cmpx_ge_i32", true)
21500     {
21501         setFlag(ALU);
21502     } // Inst_VOP3__V_CMPX_GE_I32
21503
21504     Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
21505     {
21506     } // ~Inst_VOP3__V_CMPX_GE_I32
21507
21508     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21509     void
21510     Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
21511     {
21512         Wavefront *wf = gpuDynInst->wavefront();
21513         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21514         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21515         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21516
21517         src0.readSrc();
21518         src1.readSrc();
21519
21520         /**
21521          * input modifiers are supported by FP operations only
21522          */
21523         assert(!(instData.ABS & 0x1));
21524         assert(!(instData.ABS & 0x2));
21525         assert(!(instData.ABS & 0x4));
21526         assert(!(extData.NEG & 0x1));
21527         assert(!(extData.NEG & 0x2));
21528         assert(!(extData.NEG & 0x4));
21529
21530         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21531             if (wf->execMask(lane)) {
21532                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21533             }
21534         }
21535
21536         wf->execMask() = sdst.rawData();
21537         sdst.write();
21538     }
21539
21540     Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
21541           InFmt_VOP3 *iFmt)
21542         : Inst_VOP3(iFmt, "v_cmpx_t_i32", true)
21543     {
21544         setFlag(ALU);
21545     } // Inst_VOP3__V_CMPX_T_I32
21546
21547     Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
21548     {
21549     } // ~Inst_VOP3__V_CMPX_T_I32
21550
21551     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21552     void
21553     Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
21554     {
21555         Wavefront *wf = gpuDynInst->wavefront();
21556         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21557
21558         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21559             if (wf->execMask(lane)) {
21560                 sdst.setBit(lane, 1);
21561             }
21562         }
21563
21564         wf->execMask() = sdst.rawData();
21565         sdst.write();
21566     }
21567
21568     Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
21569           InFmt_VOP3 *iFmt)
21570         : Inst_VOP3(iFmt, "v_cmpx_f_u32", true)
21571     {
21572         setFlag(ALU);
21573     } // Inst_VOP3__V_CMPX_F_U32
21574
21575     Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
21576     {
21577     } // ~Inst_VOP3__V_CMPX_F_U32
21578
21579     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21580     void
21581     Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
21582     {
21583         Wavefront *wf = gpuDynInst->wavefront();
21584         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21585
21586         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21587             if (wf->execMask(lane)) {
21588                 sdst.setBit(lane, 0);
21589             }
21590         }
21591
21592         wf->execMask() = sdst.rawData();
21593         sdst.write();
21594     }
21595
21596     Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
21597           InFmt_VOP3 *iFmt)
21598         : Inst_VOP3(iFmt, "v_cmpx_lt_u32", true)
21599     {
21600         setFlag(ALU);
21601     } // Inst_VOP3__V_CMPX_LT_U32
21602
21603     Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
21604     {
21605     } // ~Inst_VOP3__V_CMPX_LT_U32
21606
21607     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21608     void
21609     Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
21610     {
21611         Wavefront *wf = gpuDynInst->wavefront();
21612         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21613         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21614         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21615
21616         src0.readSrc();
21617         src1.readSrc();
21618
21619         /**
21620          * input modifiers are supported by FP operations only
21621          */
21622         assert(!(instData.ABS & 0x1));
21623         assert(!(instData.ABS & 0x2));
21624         assert(!(instData.ABS & 0x4));
21625         assert(!(extData.NEG & 0x1));
21626         assert(!(extData.NEG & 0x2));
21627         assert(!(extData.NEG & 0x4));
21628
21629         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21630             if (wf->execMask(lane)) {
21631                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21632             }
21633         }
21634
21635         wf->execMask() = sdst.rawData();
21636         sdst.write();
21637     }
21638
21639     Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
21640           InFmt_VOP3 *iFmt)
21641         : Inst_VOP3(iFmt, "v_cmpx_eq_u32", true)
21642     {
21643         setFlag(ALU);
21644     } // Inst_VOP3__V_CMPX_EQ_U32
21645
21646     Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
21647     {
21648     } // ~Inst_VOP3__V_CMPX_EQ_U32
21649
21650     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21651     void
21652     Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21653     {
21654         Wavefront *wf = gpuDynInst->wavefront();
21655         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21656         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21657         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21658
21659         src0.readSrc();
21660         src1.readSrc();
21661
21662         /**
21663          * input modifiers are supported by FP operations only
21664          */
21665         assert(!(instData.ABS & 0x1));
21666         assert(!(instData.ABS & 0x2));
21667         assert(!(instData.ABS & 0x4));
21668         assert(!(extData.NEG & 0x1));
21669         assert(!(extData.NEG & 0x2));
21670         assert(!(extData.NEG & 0x4));
21671
21672         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21673             if (wf->execMask(lane)) {
21674                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21675             }
21676         }
21677
21678         wf->execMask() = sdst.rawData();
21679         sdst.write();
21680     }
21681
21682     Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
21683           InFmt_VOP3 *iFmt)
21684         : Inst_VOP3(iFmt, "v_cmpx_le_u32", true)
21685     {
21686         setFlag(ALU);
21687     } // Inst_VOP3__V_CMPX_LE_U32
21688
21689     Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
21690     {
21691     } // ~Inst_VOP3__V_CMPX_LE_U32
21692
21693     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21694     void
21695     Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21696     {
21697         Wavefront *wf = gpuDynInst->wavefront();
21698         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21699         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21700         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21701
21702         src0.readSrc();
21703         src1.readSrc();
21704
21705         /**
21706          * input modifiers are supported by FP operations only
21707          */
21708         assert(!(instData.ABS & 0x1));
21709         assert(!(instData.ABS & 0x2));
21710         assert(!(instData.ABS & 0x4));
21711         assert(!(extData.NEG & 0x1));
21712         assert(!(extData.NEG & 0x2));
21713         assert(!(extData.NEG & 0x4));
21714
21715         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21716             if (wf->execMask(lane)) {
21717                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21718             }
21719         }
21720
21721         wf->execMask() = sdst.rawData();
21722         sdst.write();
21723     }
21724
21725     Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
21726           InFmt_VOP3 *iFmt)
21727         : Inst_VOP3(iFmt, "v_cmpx_gt_u32", true)
21728     {
21729         setFlag(ALU);
21730     } // Inst_VOP3__V_CMPX_GT_U32
21731
21732     Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
21733     {
21734     } // ~Inst_VOP3__V_CMPX_GT_U32
21735
21736     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21737     void
21738     Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21739     {
21740         Wavefront *wf = gpuDynInst->wavefront();
21741         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21742         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21743         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21744
21745         src0.readSrc();
21746         src1.readSrc();
21747
21748         /**
21749          * input modifiers are supported by FP operations only
21750          */
21751         assert(!(instData.ABS & 0x1));
21752         assert(!(instData.ABS & 0x2));
21753         assert(!(instData.ABS & 0x4));
21754         assert(!(extData.NEG & 0x1));
21755         assert(!(extData.NEG & 0x2));
21756         assert(!(extData.NEG & 0x4));
21757
21758         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21759             if (wf->execMask(lane)) {
21760                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21761             }
21762         }
21763
21764         wf->execMask() = sdst.rawData();
21765         sdst.write();
21766     }
21767
21768     Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
21769           InFmt_VOP3 *iFmt)
21770         : Inst_VOP3(iFmt, "v_cmpx_ne_u32", true)
21771     {
21772         setFlag(ALU);
21773     } // Inst_VOP3__V_CMPX_NE_U32
21774
21775     Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
21776     {
21777     } // ~Inst_VOP3__V_CMPX_NE_U32
21778
21779     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21780     void
21781     Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21782     {
21783         Wavefront *wf = gpuDynInst->wavefront();
21784         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21785         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21786         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21787
21788         src0.readSrc();
21789         src1.readSrc();
21790
21791         /**
21792          * input modifiers are supported by FP operations only
21793          */
21794         assert(!(instData.ABS & 0x1));
21795         assert(!(instData.ABS & 0x2));
21796         assert(!(instData.ABS & 0x4));
21797         assert(!(extData.NEG & 0x1));
21798         assert(!(extData.NEG & 0x2));
21799         assert(!(extData.NEG & 0x4));
21800
21801         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21802             if (wf->execMask(lane)) {
21803                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21804             }
21805         }
21806
21807         wf->execMask() = sdst.rawData();
21808         sdst.write();
21809     }
21810
21811     Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
21812           InFmt_VOP3 *iFmt)
21813         : Inst_VOP3(iFmt, "v_cmpx_ge_u32", true)
21814     {
21815         setFlag(ALU);
21816     } // Inst_VOP3__V_CMPX_GE_U32
21817
21818     Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
21819     {
21820     } // ~Inst_VOP3__V_CMPX_GE_U32
21821
21822     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21823     void
21824     Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21825     {
21826         Wavefront *wf = gpuDynInst->wavefront();
21827         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21828         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21829         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21830
21831         src0.readSrc();
21832         src1.readSrc();
21833
21834         /**
21835          * input modifiers are supported by FP operations only
21836          */
21837         assert(!(instData.ABS & 0x1));
21838         assert(!(instData.ABS & 0x2));
21839         assert(!(instData.ABS & 0x4));
21840         assert(!(extData.NEG & 0x1));
21841         assert(!(extData.NEG & 0x2));
21842         assert(!(extData.NEG & 0x4));
21843
21844         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21845             if (wf->execMask(lane)) {
21846                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21847             }
21848         }
21849
21850         wf->execMask() = sdst.rawData();
21851         sdst.write();
21852     }
21853
21854     Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
21855           InFmt_VOP3 *iFmt)
21856         : Inst_VOP3(iFmt, "v_cmpx_t_u32", true)
21857     {
21858         setFlag(ALU);
21859     } // Inst_VOP3__V_CMPX_T_U32
21860
21861     Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
21862     {
21863     } // ~Inst_VOP3__V_CMPX_T_U32
21864
21865     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21866     void
21867     Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
21868     {
21869         Wavefront *wf = gpuDynInst->wavefront();
21870         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21871
21872         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21873             if (wf->execMask(lane)) {
21874                 sdst.setBit(lane, 1);
21875             }
21876         }
21877
21878         wf->execMask() = sdst.rawData();
21879         sdst.write();
21880     }
21881
21882     Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3 *iFmt)
21883         : Inst_VOP3(iFmt, "v_cmp_f_i64", true)
21884     {
21885         setFlag(ALU);
21886     } // Inst_VOP3__V_CMP_F_I64
21887
21888     Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
21889     {
21890     } // ~Inst_VOP3__V_CMP_F_I64
21891
21892     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
21893     void
21894     Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
21895     {
21896         Wavefront *wf = gpuDynInst->wavefront();
21897         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21898
21899         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21900             if (wf->execMask(lane)) {
21901                 sdst.setBit(lane, 0);
21902             }
21903         }
21904
21905         sdst.write();
21906     }
21907
21908     Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
21909           InFmt_VOP3 *iFmt)
21910         : Inst_VOP3(iFmt, "v_cmp_lt_i64", true)
21911     {
21912         setFlag(ALU);
21913     } // Inst_VOP3__V_CMP_LT_I64
21914
21915     Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
21916     {
21917     } // ~Inst_VOP3__V_CMP_LT_I64
21918
21919     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21920     void
21921     Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
21922     {
21923         Wavefront *wf = gpuDynInst->wavefront();
21924         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21925         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21926         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21927
21928         src0.readSrc();
21929         src1.readSrc();
21930
21931         /**
21932          * input modifiers are supported by FP operations only
21933          */
21934         assert(!(instData.ABS & 0x1));
21935         assert(!(instData.ABS & 0x2));
21936         assert(!(instData.ABS & 0x4));
21937         assert(!(extData.NEG & 0x1));
21938         assert(!(extData.NEG & 0x2));
21939         assert(!(extData.NEG & 0x4));
21940
21941         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21942             if (wf->execMask(lane)) {
21943                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21944             }
21945         }
21946
21947         sdst.write();
21948     }
21949
21950     Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
21951           InFmt_VOP3 *iFmt)
21952         : Inst_VOP3(iFmt, "v_cmp_eq_i64", true)
21953     {
21954         setFlag(ALU);
21955     } // Inst_VOP3__V_CMP_EQ_I64
21956
21957     Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
21958     {
21959     } // ~Inst_VOP3__V_CMP_EQ_I64
21960
21961     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21962     void
21963     Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
21964     {
21965         Wavefront *wf = gpuDynInst->wavefront();
21966         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21967         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21968         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21969
21970         src0.readSrc();
21971         src1.readSrc();
21972
21973         /**
21974          * input modifiers are supported by FP operations only
21975          */
21976         assert(!(instData.ABS & 0x1));
21977         assert(!(instData.ABS & 0x2));
21978         assert(!(instData.ABS & 0x4));
21979         assert(!(extData.NEG & 0x1));
21980         assert(!(extData.NEG & 0x2));
21981         assert(!(extData.NEG & 0x4));
21982
21983         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21984             if (wf->execMask(lane)) {
21985                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21986             }
21987         }
21988
21989         sdst.write();
21990     }
21991
21992     Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
21993           InFmt_VOP3 *iFmt)
21994         : Inst_VOP3(iFmt, "v_cmp_le_i64", true)
21995     {
21996         setFlag(ALU);
21997     } // Inst_VOP3__V_CMP_LE_I64
21998
21999     Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
22000     {
22001     } // ~Inst_VOP3__V_CMP_LE_I64
22002
22003     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22004     void
22005     Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
22006     {
22007         Wavefront *wf = gpuDynInst->wavefront();
22008         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22009         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22010         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22011
22012         src0.readSrc();
22013         src1.readSrc();
22014
22015         /**
22016          * input modifiers are supported by FP operations only
22017          */
22018         assert(!(instData.ABS & 0x1));
22019         assert(!(instData.ABS & 0x2));
22020         assert(!(instData.ABS & 0x4));
22021         assert(!(extData.NEG & 0x1));
22022         assert(!(extData.NEG & 0x2));
22023         assert(!(extData.NEG & 0x4));
22024
22025         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22026             if (wf->execMask(lane)) {
22027                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22028             }
22029         }
22030
22031         sdst.write();
22032     }
22033
22034     Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
22035           InFmt_VOP3 *iFmt)
22036         : Inst_VOP3(iFmt, "v_cmp_gt_i64", true)
22037     {
22038         setFlag(ALU);
22039     } // Inst_VOP3__V_CMP_GT_I64
22040
22041     Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
22042     {
22043     } // ~Inst_VOP3__V_CMP_GT_I64
22044
22045     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22046     void
22047     Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22048     {
22049         Wavefront *wf = gpuDynInst->wavefront();
22050         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22051         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22052         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22053
22054         src0.readSrc();
22055         src1.readSrc();
22056
22057         /**
22058          * input modifiers are supported by FP operations only
22059          */
22060         assert(!(instData.ABS & 0x1));
22061         assert(!(instData.ABS & 0x2));
22062         assert(!(instData.ABS & 0x4));
22063         assert(!(extData.NEG & 0x1));
22064         assert(!(extData.NEG & 0x2));
22065         assert(!(extData.NEG & 0x4));
22066
22067         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22068             if (wf->execMask(lane)) {
22069                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22070             }
22071         }
22072
22073         sdst.write();
22074     }
22075
22076     Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
22077           InFmt_VOP3 *iFmt)
22078         : Inst_VOP3(iFmt, "v_cmp_ne_i64", true)
22079     {
22080         setFlag(ALU);
22081     } // Inst_VOP3__V_CMP_NE_I64
22082
22083     Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
22084     {
22085     } // ~Inst_VOP3__V_CMP_NE_I64
22086
22087     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22088     void
22089     Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22090     {
22091         Wavefront *wf = gpuDynInst->wavefront();
22092         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22093         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22094         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22095
22096         src0.readSrc();
22097         src1.readSrc();
22098
22099         /**
22100          * input modifiers are supported by FP operations only
22101          */
22102         assert(!(instData.ABS & 0x1));
22103         assert(!(instData.ABS & 0x2));
22104         assert(!(instData.ABS & 0x4));
22105         assert(!(extData.NEG & 0x1));
22106         assert(!(extData.NEG & 0x2));
22107         assert(!(extData.NEG & 0x4));
22108
22109         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22110             if (wf->execMask(lane)) {
22111                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22112             }
22113         }
22114
22115         sdst.write();
22116     }
22117
22118     Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
22119           InFmt_VOP3 *iFmt)
22120         : Inst_VOP3(iFmt, "v_cmp_ge_i64", true)
22121     {
22122         setFlag(ALU);
22123     } // Inst_VOP3__V_CMP_GE_I64
22124
22125     Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
22126     {
22127     } // ~Inst_VOP3__V_CMP_GE_I64
22128
22129     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22130     void
22131     Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22132     {
22133         Wavefront *wf = gpuDynInst->wavefront();
22134         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22135         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22136         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22137
22138         src0.readSrc();
22139         src1.readSrc();
22140
22141         /**
22142          * input modifiers are supported by FP operations only
22143          */
22144         assert(!(instData.ABS & 0x1));
22145         assert(!(instData.ABS & 0x2));
22146         assert(!(instData.ABS & 0x4));
22147         assert(!(extData.NEG & 0x1));
22148         assert(!(extData.NEG & 0x2));
22149         assert(!(extData.NEG & 0x4));
22150
22151         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22152             if (wf->execMask(lane)) {
22153                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22154             }
22155         }
22156
22157         sdst.write();
22158     }
22159
22160     Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3 *iFmt)
22161         : Inst_VOP3(iFmt, "v_cmp_t_i64", true)
22162     {
22163         setFlag(ALU);
22164     } // Inst_VOP3__V_CMP_T_I64
22165
22166     Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
22167     {
22168     } // ~Inst_VOP3__V_CMP_T_I64
22169
22170     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22171     void
22172     Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
22173     {
22174         Wavefront *wf = gpuDynInst->wavefront();
22175         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22176
22177         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22178             if (wf->execMask(lane)) {
22179                 sdst.setBit(lane, 1);
22180             }
22181         }
22182
22183         sdst.write();
22184     }
22185
22186     Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3 *iFmt)
22187         : Inst_VOP3(iFmt, "v_cmp_f_u64", true)
22188     {
22189         setFlag(ALU);
22190     } // Inst_VOP3__V_CMP_F_U64
22191
22192     Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
22193     {
22194     } // ~Inst_VOP3__V_CMP_F_U64
22195
22196     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
22197     void
22198     Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
22199     {
22200         Wavefront *wf = gpuDynInst->wavefront();
22201         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22202
22203         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22204             if (wf->execMask(lane)) {
22205                 sdst.setBit(lane, 0);
22206             }
22207         }
22208
22209         sdst.write();
22210     }
22211
22212     Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
22213           InFmt_VOP3 *iFmt)
22214         : Inst_VOP3(iFmt, "v_cmp_lt_u64", true)
22215     {
22216         setFlag(ALU);
22217     } // Inst_VOP3__V_CMP_LT_U64
22218
22219     Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
22220     {
22221     } // ~Inst_VOP3__V_CMP_LT_U64
22222
22223     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22224     void
22225     Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22226     {
22227         Wavefront *wf = gpuDynInst->wavefront();
22228         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22229         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22230         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22231
22232         src0.readSrc();
22233         src1.readSrc();
22234
22235         /**
22236          * input modifiers are supported by FP operations only
22237          */
22238         assert(!(instData.ABS & 0x1));
22239         assert(!(instData.ABS & 0x2));
22240         assert(!(instData.ABS & 0x4));
22241         assert(!(extData.NEG & 0x1));
22242         assert(!(extData.NEG & 0x2));
22243         assert(!(extData.NEG & 0x4));
22244
22245         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22246             if (wf->execMask(lane)) {
22247                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22248             }
22249         }
22250
22251         sdst.write();
22252     }
22253
22254     Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
22255           InFmt_VOP3 *iFmt)
22256         : Inst_VOP3(iFmt, "v_cmp_eq_u64", true)
22257     {
22258         setFlag(ALU);
22259     } // Inst_VOP3__V_CMP_EQ_U64
22260
22261     Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
22262     {
22263     } // ~Inst_VOP3__V_CMP_EQ_U64
22264
22265     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22266     void
22267     Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22268     {
22269         Wavefront *wf = gpuDynInst->wavefront();
22270         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22271         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22272         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22273
22274         src0.readSrc();
22275         src1.readSrc();
22276
22277         /**
22278          * input modifiers are supported by FP operations only
22279          */
22280         assert(!(instData.ABS & 0x1));
22281         assert(!(instData.ABS & 0x2));
22282         assert(!(instData.ABS & 0x4));
22283         assert(!(extData.NEG & 0x1));
22284         assert(!(extData.NEG & 0x2));
22285         assert(!(extData.NEG & 0x4));
22286
22287         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22288             if (wf->execMask(lane)) {
22289                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22290             }
22291         }
22292
22293         sdst.write();
22294     }
22295
22296     Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
22297           InFmt_VOP3 *iFmt)
22298         : Inst_VOP3(iFmt, "v_cmp_le_u64", true)
22299     {
22300         setFlag(ALU);
22301     } // Inst_VOP3__V_CMP_LE_U64
22302
22303     Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
22304     {
22305     } // ~Inst_VOP3__V_CMP_LE_U64
22306
22307     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22308     void
22309     Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22310     {
22311         Wavefront *wf = gpuDynInst->wavefront();
22312         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22313         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22314         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22315
22316         src0.readSrc();
22317         src1.readSrc();
22318
22319         /**
22320          * input modifiers are supported by FP operations only
22321          */
22322         assert(!(instData.ABS & 0x1));
22323         assert(!(instData.ABS & 0x2));
22324         assert(!(instData.ABS & 0x4));
22325         assert(!(extData.NEG & 0x1));
22326         assert(!(extData.NEG & 0x2));
22327         assert(!(extData.NEG & 0x4));
22328
22329         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22330             if (wf->execMask(lane)) {
22331                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22332             }
22333         }
22334
22335         sdst.write();
22336     }
22337
22338     Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
22339           InFmt_VOP3 *iFmt)
22340         : Inst_VOP3(iFmt, "v_cmp_gt_u64", true)
22341     {
22342         setFlag(ALU);
22343     } // Inst_VOP3__V_CMP_GT_U64
22344
22345     Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
22346     {
22347     } // ~Inst_VOP3__V_CMP_GT_U64
22348
22349     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22350     void
22351     Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22352     {
22353         Wavefront *wf = gpuDynInst->wavefront();
22354         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22355         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22356         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22357
22358         src0.readSrc();
22359         src1.readSrc();
22360
22361         /**
22362          * input modifiers are supported by FP operations only
22363          */
22364         assert(!(instData.ABS & 0x1));
22365         assert(!(instData.ABS & 0x2));
22366         assert(!(instData.ABS & 0x4));
22367         assert(!(extData.NEG & 0x1));
22368         assert(!(extData.NEG & 0x2));
22369         assert(!(extData.NEG & 0x4));
22370
22371         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22372             if (wf->execMask(lane)) {
22373                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22374             }
22375         }
22376
22377         sdst.write();
22378     }
22379
22380     Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
22381           InFmt_VOP3 *iFmt)
22382         : Inst_VOP3(iFmt, "v_cmp_ne_u64", true)
22383     {
22384         setFlag(ALU);
22385     } // Inst_VOP3__V_CMP_NE_U64
22386
22387     Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
22388     {
22389     } // ~Inst_VOP3__V_CMP_NE_U64
22390
22391     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22392     void
22393     Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
22394     {
22395         Wavefront *wf = gpuDynInst->wavefront();
22396         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22397         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22398         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22399
22400         src0.readSrc();
22401         src1.readSrc();
22402
22403         /**
22404          * input modifiers are supported by FP operations only
22405          */
22406         assert(!(instData.ABS & 0x1));
22407         assert(!(instData.ABS & 0x2));
22408         assert(!(instData.ABS & 0x4));
22409         assert(!(extData.NEG & 0x1));
22410         assert(!(extData.NEG & 0x2));
22411         assert(!(extData.NEG & 0x4));
22412
22413         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22414             if (wf->execMask(lane)) {
22415                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22416             }
22417         }
22418
22419         sdst.write();
22420     }
22421
22422     Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
22423           InFmt_VOP3 *iFmt)
22424         : Inst_VOP3(iFmt, "v_cmp_ge_u64", true)
22425     {
22426         setFlag(ALU);
22427     } // Inst_VOP3__V_CMP_GE_U64
22428
22429     Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
22430     {
22431     } // ~Inst_VOP3__V_CMP_GE_U64
22432
22433     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22434     void
22435     Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
22436     {
22437         Wavefront *wf = gpuDynInst->wavefront();
22438         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22439         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22440         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22441
22442         src0.readSrc();
22443         src1.readSrc();
22444
22445         /**
22446          * input modifiers are supported by FP operations only
22447          */
22448         assert(!(instData.ABS & 0x1));
22449         assert(!(instData.ABS & 0x2));
22450         assert(!(instData.ABS & 0x4));
22451         assert(!(extData.NEG & 0x1));
22452         assert(!(extData.NEG & 0x2));
22453         assert(!(extData.NEG & 0x4));
22454
22455         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22456             if (wf->execMask(lane)) {
22457                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22458             }
22459         }
22460
22461         sdst.write();
22462     }
22463
22464     Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3 *iFmt)
22465         : Inst_VOP3(iFmt, "v_cmp_t_u64", true)
22466     {
22467         setFlag(ALU);
22468     } // Inst_VOP3__V_CMP_T_U64
22469
22470     Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
22471     {
22472     } // ~Inst_VOP3__V_CMP_T_U64
22473
22474     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22475     void
22476     Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
22477     {
22478         Wavefront *wf = gpuDynInst->wavefront();
22479         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22480
22481         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22482             if (wf->execMask(lane)) {
22483                 sdst.setBit(lane, 1);
22484             }
22485         }
22486
22487         sdst.write();
22488     }
22489
22490     Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
22491           InFmt_VOP3 *iFmt)
22492         : Inst_VOP3(iFmt, "v_cmpx_f_i64", true)
22493     {
22494         setFlag(ALU);
22495     } // Inst_VOP3__V_CMPX_F_I64
22496
22497     Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
22498     {
22499     } // ~Inst_VOP3__V_CMPX_F_I64
22500
22501     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22502     void
22503     Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
22504     {
22505         Wavefront *wf = gpuDynInst->wavefront();
22506         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22507
22508         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22509             if (wf->execMask(lane)) {
22510                 sdst.setBit(lane, 0);
22511             }
22512         }
22513
22514         wf->execMask() = sdst.rawData();
22515         sdst.write();
22516     }
22517
22518     Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
22519           InFmt_VOP3 *iFmt)
22520         : Inst_VOP3(iFmt, "v_cmpx_lt_i64", true)
22521     {
22522         setFlag(ALU);
22523     } // Inst_VOP3__V_CMPX_LT_I64
22524
22525     Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
22526     {
22527     } // ~Inst_VOP3__V_CMPX_LT_I64
22528
22529     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22530     void
22531     Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
22532     {
22533         Wavefront *wf = gpuDynInst->wavefront();
22534         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22535         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22536         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22537
22538         src0.readSrc();
22539         src1.readSrc();
22540
22541         /**
22542          * input modifiers are supported by FP operations only
22543          */
22544         assert(!(instData.ABS & 0x1));
22545         assert(!(instData.ABS & 0x2));
22546         assert(!(instData.ABS & 0x4));
22547         assert(!(extData.NEG & 0x1));
22548         assert(!(extData.NEG & 0x2));
22549         assert(!(extData.NEG & 0x4));
22550
22551         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22552             if (wf->execMask(lane)) {
22553                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22554             }
22555         }
22556
22557         wf->execMask() = sdst.rawData();
22558         sdst.write();
22559     }
22560
22561     Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
22562           InFmt_VOP3 *iFmt)
22563         : Inst_VOP3(iFmt, "v_cmpx_eq_i64", true)
22564     {
22565         setFlag(ALU);
22566     } // Inst_VOP3__V_CMPX_EQ_I64
22567
22568     Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
22569     {
22570     } // ~Inst_VOP3__V_CMPX_EQ_I64
22571
22572     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22573     void
22574     Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
22575     {
22576         Wavefront *wf = gpuDynInst->wavefront();
22577         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22578         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22579         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22580
22581         src0.readSrc();
22582         src1.readSrc();
22583
22584         /**
22585          * input modifiers are supported by FP operations only
22586          */
22587         assert(!(instData.ABS & 0x1));
22588         assert(!(instData.ABS & 0x2));
22589         assert(!(instData.ABS & 0x4));
22590         assert(!(extData.NEG & 0x1));
22591         assert(!(extData.NEG & 0x2));
22592         assert(!(extData.NEG & 0x4));
22593
22594         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22595             if (wf->execMask(lane)) {
22596                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22597             }
22598         }
22599
22600         wf->execMask() = sdst.rawData();
22601         sdst.write();
22602     }
22603
22604     Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
22605           InFmt_VOP3 *iFmt)
22606         : Inst_VOP3(iFmt, "v_cmpx_le_i64", true)
22607     {
22608         setFlag(ALU);
22609     } // Inst_VOP3__V_CMPX_LE_I64
22610
22611     Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
22612     {
22613     } // ~Inst_VOP3__V_CMPX_LE_I64
22614
22615     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22616     void
22617     Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
22618     {
22619         Wavefront *wf = gpuDynInst->wavefront();
22620         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22621         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22622         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22623
22624         src0.readSrc();
22625         src1.readSrc();
22626
22627         /**
22628          * input modifiers are supported by FP operations only
22629          */
22630         assert(!(instData.ABS & 0x1));
22631         assert(!(instData.ABS & 0x2));
22632         assert(!(instData.ABS & 0x4));
22633         assert(!(extData.NEG & 0x1));
22634         assert(!(extData.NEG & 0x2));
22635         assert(!(extData.NEG & 0x4));
22636
22637         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22638             if (wf->execMask(lane)) {
22639                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22640             }
22641         }
22642
22643         wf->execMask() = sdst.rawData();
22644         sdst.write();
22645     }
22646
22647     Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
22648           InFmt_VOP3 *iFmt)
22649         : Inst_VOP3(iFmt, "v_cmpx_gt_i64", true)
22650     {
22651         setFlag(ALU);
22652     } // Inst_VOP3__V_CMPX_GT_I64
22653
22654     Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
22655     {
22656     } // ~Inst_VOP3__V_CMPX_GT_I64
22657
22658     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22659     void
22660     Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22661     {
22662         Wavefront *wf = gpuDynInst->wavefront();
22663         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22664         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22665         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22666
22667         src0.readSrc();
22668         src1.readSrc();
22669
22670         /**
22671          * input modifiers are supported by FP operations only
22672          */
22673         assert(!(instData.ABS & 0x1));
22674         assert(!(instData.ABS & 0x2));
22675         assert(!(instData.ABS & 0x4));
22676         assert(!(extData.NEG & 0x1));
22677         assert(!(extData.NEG & 0x2));
22678         assert(!(extData.NEG & 0x4));
22679
22680         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22681             if (wf->execMask(lane)) {
22682                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22683             }
22684         }
22685
22686         wf->execMask() = sdst.rawData();
22687         sdst.write();
22688     }
22689
22690     Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
22691           InFmt_VOP3 *iFmt)
22692         : Inst_VOP3(iFmt, "v_cmpx_ne_i64", true)
22693     {
22694         setFlag(ALU);
22695     } // Inst_VOP3__V_CMPX_NE_I64
22696
22697     Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
22698     {
22699     } // ~Inst_VOP3__V_CMPX_NE_I64
22700
22701     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22702     void
22703     Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22704     {
22705         Wavefront *wf = gpuDynInst->wavefront();
22706         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22707         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22708         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22709
22710         src0.readSrc();
22711         src1.readSrc();
22712
22713         /**
22714          * input modifiers are supported by FP operations only
22715          */
22716         assert(!(instData.ABS & 0x1));
22717         assert(!(instData.ABS & 0x2));
22718         assert(!(instData.ABS & 0x4));
22719         assert(!(extData.NEG & 0x1));
22720         assert(!(extData.NEG & 0x2));
22721         assert(!(extData.NEG & 0x4));
22722
22723         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22724             if (wf->execMask(lane)) {
22725                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22726             }
22727         }
22728
22729         wf->execMask() = sdst.rawData();
22730         sdst.write();
22731     }
22732
22733     Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
22734           InFmt_VOP3 *iFmt)
22735         : Inst_VOP3(iFmt, "v_cmpx_ge_i64", true)
22736     {
22737         setFlag(ALU);
22738     } // Inst_VOP3__V_CMPX_GE_I64
22739
22740     Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
22741     {
22742     } // ~Inst_VOP3__V_CMPX_GE_I64
22743
22744     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22745     void
22746     Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22747     {
22748         Wavefront *wf = gpuDynInst->wavefront();
22749         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22750         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22751         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22752
22753         src0.readSrc();
22754         src1.readSrc();
22755
22756         /**
22757          * input modifiers are supported by FP operations only
22758          */
22759         assert(!(instData.ABS & 0x1));
22760         assert(!(instData.ABS & 0x2));
22761         assert(!(instData.ABS & 0x4));
22762         assert(!(extData.NEG & 0x1));
22763         assert(!(extData.NEG & 0x2));
22764         assert(!(extData.NEG & 0x4));
22765
22766         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22767             if (wf->execMask(lane)) {
22768                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22769             }
22770         }
22771
22772         wf->execMask() = sdst.rawData();
22773         sdst.write();
22774     }
22775
22776     Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
22777           InFmt_VOP3 *iFmt)
22778         : Inst_VOP3(iFmt, "v_cmpx_t_i64", true)
22779     {
22780         setFlag(ALU);
22781     } // Inst_VOP3__V_CMPX_T_I64
22782
22783     Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
22784     {
22785     } // ~Inst_VOP3__V_CMPX_T_I64
22786
22787     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
22788     void
22789     Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
22790     {
22791         Wavefront *wf = gpuDynInst->wavefront();
22792         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22793
22794         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22795             if (wf->execMask(lane)) {
22796                 sdst.setBit(lane, 1);
22797             }
22798         }
22799
22800         wf->execMask() = sdst.rawData();
22801         sdst.write();
22802     }
22803
22804     Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
22805           InFmt_VOP3 *iFmt)
22806         : Inst_VOP3(iFmt, "v_cmpx_f_u64", true)
22807     {
22808         setFlag(ALU);
22809     } // Inst_VOP3__V_CMPX_F_U64
22810
22811     Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
22812     {
22813     } // ~Inst_VOP3__V_CMPX_F_U64
22814
22815     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22816     void
22817     Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
22818     {
22819         Wavefront *wf = gpuDynInst->wavefront();
22820         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22821
22822         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22823             if (wf->execMask(lane)) {
22824                 sdst.setBit(lane, 0);
22825             }
22826         }
22827
22828         wf->execMask() = sdst.rawData();
22829         sdst.write();
22830     }
22831
22832     Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
22833           InFmt_VOP3 *iFmt)
22834         : Inst_VOP3(iFmt, "v_cmpx_lt_u64", true)
22835     {
22836         setFlag(ALU);
22837     } // Inst_VOP3__V_CMPX_LT_U64
22838
22839     Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
22840     {
22841     } // ~Inst_VOP3__V_CMPX_LT_U64
22842
22843     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22844     void
22845     Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22846     {
22847         Wavefront *wf = gpuDynInst->wavefront();
22848         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22849         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22850         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22851
22852         src0.readSrc();
22853         src1.readSrc();
22854
22855         /**
22856          * input modifiers are supported by FP operations only
22857          */
22858         assert(!(instData.ABS & 0x1));
22859         assert(!(instData.ABS & 0x2));
22860         assert(!(instData.ABS & 0x4));
22861         assert(!(extData.NEG & 0x1));
22862         assert(!(extData.NEG & 0x2));
22863         assert(!(extData.NEG & 0x4));
22864
22865         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22866             if (wf->execMask(lane)) {
22867                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22868             }
22869         }
22870
22871         wf->execMask() = sdst.rawData();
22872         sdst.write();
22873     }
22874
22875     Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
22876           InFmt_VOP3 *iFmt)
22877         : Inst_VOP3(iFmt, "v_cmpx_eq_u64", true)
22878     {
22879         setFlag(ALU);
22880     } // Inst_VOP3__V_CMPX_EQ_U64
22881
22882     Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
22883     {
22884     } // ~Inst_VOP3__V_CMPX_EQ_U64
22885
22886     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22887     void
22888     Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22889     {
22890         Wavefront *wf = gpuDynInst->wavefront();
22891         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22892         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22893         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22894
22895         src0.readSrc();
22896         src1.readSrc();
22897
22898         /**
22899          * input modifiers are supported by FP operations only
22900          */
22901         assert(!(instData.ABS & 0x1));
22902         assert(!(instData.ABS & 0x2));
22903         assert(!(instData.ABS & 0x4));
22904         assert(!(extData.NEG & 0x1));
22905         assert(!(extData.NEG & 0x2));
22906         assert(!(extData.NEG & 0x4));
22907
22908         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22909             if (wf->execMask(lane)) {
22910                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22911             }
22912         }
22913
22914         wf->execMask() = sdst.rawData();
22915         sdst.write();
22916     }
22917
22918     Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
22919           InFmt_VOP3 *iFmt)
22920         : Inst_VOP3(iFmt, "v_cmpx_le_u64", true)
22921     {
22922         setFlag(ALU);
22923     } // Inst_VOP3__V_CMPX_LE_U64
22924
22925     Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
22926     {
22927     } // ~Inst_VOP3__V_CMPX_LE_U64
22928
22929     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22930     void
22931     Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22932     {
22933         Wavefront *wf = gpuDynInst->wavefront();
22934         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22935         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22936         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22937
22938         src0.readSrc();
22939         src1.readSrc();
22940
22941         /**
22942          * input modifiers are supported by FP operations only
22943          */
22944         assert(!(instData.ABS & 0x1));
22945         assert(!(instData.ABS & 0x2));
22946         assert(!(instData.ABS & 0x4));
22947         assert(!(extData.NEG & 0x1));
22948         assert(!(extData.NEG & 0x2));
22949         assert(!(extData.NEG & 0x4));
22950
22951         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22952             if (wf->execMask(lane)) {
22953                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22954             }
22955         }
22956
22957         wf->execMask() = sdst.rawData();
22958         sdst.write();
22959     }
22960
22961     Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
22962           InFmt_VOP3 *iFmt)
22963         : Inst_VOP3(iFmt, "v_cmpx_gt_u64", true)
22964     {
22965         setFlag(ALU);
22966     } // Inst_VOP3__V_CMPX_GT_U64
22967
22968     Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
22969     {
22970     } // ~Inst_VOP3__V_CMPX_GT_U64
22971
22972     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22973     void
22974     Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22975     {
22976         Wavefront *wf = gpuDynInst->wavefront();
22977         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22978         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22979         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22980
22981         src0.readSrc();
22982         src1.readSrc();
22983
22984         /**
22985          * input modifiers are supported by FP operations only
22986          */
22987         assert(!(instData.ABS & 0x1));
22988         assert(!(instData.ABS & 0x2));
22989         assert(!(instData.ABS & 0x4));
22990         assert(!(extData.NEG & 0x1));
22991         assert(!(extData.NEG & 0x2));
22992         assert(!(extData.NEG & 0x4));
22993
22994         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22995             if (wf->execMask(lane)) {
22996                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22997             }
22998         }
22999
23000         wf->execMask() = sdst.rawData();
23001         sdst.write();
23002     }
23003
23004     Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
23005           InFmt_VOP3 *iFmt)
23006         : Inst_VOP3(iFmt, "v_cmpx_ne_u64", true)
23007     {
23008         setFlag(ALU);
23009     } // Inst_VOP3__V_CMPX_NE_U64
23010
23011     Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
23012     {
23013     } // ~Inst_VOP3__V_CMPX_NE_U64
23014
23015     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
23016     void
23017     Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
23018     {
23019         Wavefront *wf = gpuDynInst->wavefront();
23020         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
23021         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
23022         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23023
23024         src0.readSrc();
23025         src1.readSrc();
23026
23027         /**
23028          * input modifiers are supported by FP operations only
23029          */
23030         assert(!(instData.ABS & 0x1));
23031         assert(!(instData.ABS & 0x2));
23032         assert(!(instData.ABS & 0x4));
23033         assert(!(extData.NEG & 0x1));
23034         assert(!(extData.NEG & 0x2));
23035         assert(!(extData.NEG & 0x4));
23036
23037         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23038             if (wf->execMask(lane)) {
23039                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
23040             }
23041         }
23042
23043         wf->execMask() = sdst.rawData();
23044         sdst.write();
23045     }
23046
23047     Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
23048           InFmt_VOP3 *iFmt)
23049         : Inst_VOP3(iFmt, "v_cmpx_ge_u64", true)
23050     {
23051         setFlag(ALU);
23052     } // Inst_VOP3__V_CMPX_GE_U64
23053
23054     Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
23055     {
23056     } // ~Inst_VOP3__V_CMPX_GE_U64
23057
23058     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
23059     void
23060     Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
23061     {
23062         Wavefront *wf = gpuDynInst->wavefront();
23063         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
23064         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
23065         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23066
23067         src0.readSrc();
23068         src1.readSrc();
23069
23070         /**
23071          * input modifiers are supported by FP operations only
23072          */
23073         assert(!(instData.ABS & 0x1));
23074         assert(!(instData.ABS & 0x2));
23075         assert(!(instData.ABS & 0x4));
23076         assert(!(extData.NEG & 0x1));
23077         assert(!(extData.NEG & 0x2));
23078         assert(!(extData.NEG & 0x4));
23079
23080         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23081             if (wf->execMask(lane)) {
23082                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
23083             }
23084         }
23085
23086         wf->execMask() = sdst.rawData();
23087         sdst.write();
23088     }
23089
23090     Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
23091           InFmt_VOP3 *iFmt)
23092         : Inst_VOP3(iFmt, "v_cmpx_t_u64", true)
23093     {
23094         setFlag(ALU);
23095     } // Inst_VOP3__V_CMPX_T_U64
23096
23097     Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
23098     {
23099     } // ~Inst_VOP3__V_CMPX_T_U64
23100
23101     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
23102     void
23103     Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
23104     {
23105         Wavefront *wf = gpuDynInst->wavefront();
23106         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23107
23108         /**
23109          * input modifiers are supported by FP operations only
23110          */
23111         assert(!(instData.ABS & 0x1));
23112         assert(!(instData.ABS & 0x2));
23113         assert(!(instData.ABS & 0x4));
23114         assert(!(extData.NEG & 0x1));
23115         assert(!(extData.NEG & 0x2));
23116         assert(!(extData.NEG & 0x4));
23117
23118         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23119             if (wf->execMask(lane)) {
23120                 sdst.setBit(lane, 1);
23121             }
23122         }
23123
23124         wf->execMask() = sdst.rawData();
23125         sdst.write();
23126     }
23127
23128     Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3 *iFmt)
23129         : Inst_VOP3(iFmt, "v_cndmask_b32", false)
23130     {
23131         setFlag(ALU);
23132         setFlag(ReadsVCC);
23133     } // Inst_VOP3__V_CNDMASK_B32
23134
23135     Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
23136     {
23137     } // ~Inst_VOP3__V_CNDMASK_B32
23138
23139     // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
23140     // as a scalar GPR in S2.
23141     void
23142     Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
23143     {
23144         Wavefront *wf = gpuDynInst->wavefront();
23145         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23146         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23147         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
23148         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23149
23150         src0.readSrc();
23151         src1.readSrc();
23152         vcc.read();
23153
23154         /**
23155          * input modifiers are supported by FP operations only
23156          */
23157         assert(!(instData.ABS & 0x1));
23158         assert(!(instData.ABS & 0x2));
23159         assert(!(instData.ABS & 0x4));
23160         assert(!(extData.NEG & 0x1));
23161         assert(!(extData.NEG & 0x2));
23162         assert(!(extData.NEG & 0x4));
23163
23164         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23165             if (wf->execMask(lane)) {
23166                 vdst[lane] = bits(vcc.rawData(), lane)
23167                     ? src1[lane] : src0[lane];
23168             }
23169         }
23170
23171         vdst.write();
23172     }
23173
23174     Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3 *iFmt)
23175         : Inst_VOP3(iFmt, "v_add_f32", false)
23176     {
23177         setFlag(ALU);
23178         setFlag(F32);
23179     } // Inst_VOP3__V_ADD_F32
23180
23181     Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
23182     {
23183     } // ~Inst_VOP3__V_ADD_F32
23184
23185     // D.f = S0.f + S1.f.
23186     void
23187     Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
23188     {
23189         Wavefront *wf = gpuDynInst->wavefront();
23190         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23191         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23192         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23193
23194         src0.readSrc();
23195         src1.readSrc();
23196
23197         if (instData.ABS & 0x1) {
23198             src0.absModifier();
23199         }
23200
23201         if (instData.ABS & 0x2) {
23202             src1.absModifier();
23203         }
23204
23205         if (extData.NEG & 0x1) {
23206             src0.negModifier();
23207         }
23208
23209         if (extData.NEG & 0x2) {
23210             src1.negModifier();
23211         }
23212
23213         /**
23214          * input modifiers are supported by FP operations only
23215          */
23216         assert(!(instData.ABS & 0x4));
23217         assert(!(extData.NEG & 0x4));
23218
23219         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23220             if (wf->execMask(lane)) {
23221                 vdst[lane] = src0[lane] + src1[lane];
23222             }
23223         }
23224
23225         vdst.write();
23226     }
23227
23228     Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3 *iFmt)
23229         : Inst_VOP3(iFmt, "v_sub_f32", false)
23230     {
23231         setFlag(ALU);
23232         setFlag(F32);
23233     } // Inst_VOP3__V_SUB_F32
23234
23235     Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
23236     {
23237     } // ~Inst_VOP3__V_SUB_F32
23238
23239     // D.f = S0.f - S1.f.
23240     void
23241     Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
23242     {
23243         Wavefront *wf = gpuDynInst->wavefront();
23244         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23245         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23246         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23247
23248         src0.readSrc();
23249         src1.readSrc();
23250
23251         if (instData.ABS & 0x1) {
23252             src0.absModifier();
23253         }
23254
23255         if (instData.ABS & 0x2) {
23256             src1.absModifier();
23257         }
23258
23259         if (extData.NEG & 0x1) {
23260             src0.negModifier();
23261         }
23262
23263         if (extData.NEG & 0x2) {
23264             src1.negModifier();
23265         }
23266
23267         /**
23268          * input modifiers are supported by FP operations only
23269          */
23270         assert(!(instData.ABS & 0x4));
23271         assert(!(extData.NEG & 0x4));
23272
23273         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23274             if (wf->execMask(lane)) {
23275                 vdst[lane] = src0[lane] - src1[lane];
23276             }
23277         }
23278
23279         vdst.write();
23280     }
23281
23282     Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3 *iFmt)
23283         : Inst_VOP3(iFmt, "v_subrev_f32", false)
23284     {
23285         setFlag(ALU);
23286         setFlag(F32);
23287     } // Inst_VOP3__V_SUBREV_F32
23288
23289     Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
23290     {
23291     } // ~Inst_VOP3__V_SUBREV_F32
23292
23293     // D.f = S1.f - S0.f.
23294     void
23295     Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
23296     {
23297         Wavefront *wf = gpuDynInst->wavefront();
23298         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23299         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23300         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23301
23302         src0.readSrc();
23303         src1.readSrc();
23304
23305         if (instData.ABS & 0x1) {
23306             src0.absModifier();
23307         }
23308
23309         if (instData.ABS & 0x2) {
23310             src1.absModifier();
23311         }
23312
23313         if (extData.NEG & 0x1) {
23314             src0.negModifier();
23315         }
23316
23317         if (extData.NEG & 0x2) {
23318             src1.negModifier();
23319         }
23320
23321         /**
23322          * input modifiers are supported by FP operations only
23323          */
23324         assert(!(instData.ABS & 0x4));
23325         assert(!(extData.NEG & 0x4));
23326
23327         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23328             if (wf->execMask(lane)) {
23329                 vdst[lane] = src1[lane] - src0[lane];
23330             }
23331         }
23332
23333         vdst.write();
23334     }
23335
23336     Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3 *iFmt)
23337         : Inst_VOP3(iFmt, "v_mul_legacy_f32", false)
23338     {
23339         setFlag(ALU);
23340         setFlag(F32);
23341     } // Inst_VOP3__V_MUL_LEGACY_F32
23342
23343     Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
23344     {
23345     } // ~Inst_VOP3__V_MUL_LEGACY_F32
23346
23347     // D.f = S0.f * S1.f
23348     void
23349     Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
23350     {
23351         Wavefront *wf = gpuDynInst->wavefront();
23352         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23353         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23354         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23355
23356         src0.readSrc();
23357         src1.readSrc();
23358
23359         if (instData.ABS & 0x1) {
23360             src0.absModifier();
23361         }
23362
23363         if (instData.ABS & 0x2) {
23364             src1.absModifier();
23365         }
23366
23367         if (extData.NEG & 0x1) {
23368             src0.negModifier();
23369         }
23370
23371         if (extData.NEG & 0x2) {
23372             src1.negModifier();
23373         }
23374
23375         /**
23376          * input modifiers are supported by FP operations only
23377          */
23378         assert(!(instData.ABS & 0x4));
23379         assert(!(extData.NEG & 0x4));
23380
23381         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23382             if (wf->execMask(lane)) {
23383                 if (std::isnan(src0[lane]) ||
23384                     std::isnan(src1[lane])) {
23385                     vdst[lane] = NAN;
23386                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23387                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23388                            !std::signbit(src0[lane])) {
23389                     if (std::isinf(src1[lane])) {
23390                         vdst[lane] = NAN;
23391                     } else if (!std::signbit(src1[lane])) {
23392                         vdst[lane] = +0.0;
23393                     } else {
23394                         vdst[lane] = -0.0;
23395                     }
23396                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23397                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23398                            std::signbit(src0[lane])) {
23399                     if (std::isinf(src1[lane])) {
23400                         vdst[lane] = NAN;
23401                     } else if (std::signbit(src1[lane])) {
23402                         vdst[lane] = +0.0;
23403                     } else {
23404                         vdst[lane] = -0.0;
23405                     }
23406                 } else if (std::isinf(src0[lane]) &&
23407                            !std::signbit(src0[lane])) {
23408                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23409                         std::fpclassify(src1[lane]) == FP_ZERO) {
23410                         vdst[lane] = NAN;
23411                     } else if (!std::signbit(src1[lane])) {
23412                         vdst[lane] = +INFINITY;
23413                     } else {
23414                         vdst[lane] = -INFINITY;
23415                     }
23416                 } else if (std::isinf(src0[lane]) &&
23417                            std::signbit(src0[lane])) {
23418                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23419                         std::fpclassify(src1[lane]) == FP_ZERO) {
23420                         vdst[lane] = NAN;
23421                     } else if (std::signbit(src1[lane])) {
23422                         vdst[lane] = +INFINITY;
23423                     } else {
23424                         vdst[lane] = -INFINITY;
23425                     }
23426                 } else {
23427                     vdst[lane] = src0[lane] * src1[lane];
23428                 }
23429             }
23430         }
23431
23432         vdst.write();
23433     }
23434
23435     Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3 *iFmt)
23436         : Inst_VOP3(iFmt, "v_mul_f32", false)
23437     {
23438         setFlag(ALU);
23439         setFlag(F32);
23440     } // Inst_VOP3__V_MUL_F32
23441
23442     Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
23443     {
23444     } // ~Inst_VOP3__V_MUL_F32
23445
23446     // D.f = S0.f * S1.f.
23447     void
23448     Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
23449     {
23450         Wavefront *wf = gpuDynInst->wavefront();
23451         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23452         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23453         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23454
23455         src0.readSrc();
23456         src1.readSrc();
23457
23458         if (instData.ABS & 0x1) {
23459             src0.absModifier();
23460         }
23461
23462         if (instData.ABS & 0x2) {
23463             src1.absModifier();
23464         }
23465
23466         if (extData.NEG & 0x1) {
23467             src0.negModifier();
23468         }
23469
23470         if (extData.NEG & 0x2) {
23471             src1.negModifier();
23472         }
23473
23474         /**
23475          * input modifiers are supported by FP operations only
23476          */
23477         assert(!(instData.ABS & 0x4));
23478         assert(!(extData.NEG & 0x4));
23479
23480         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23481             if (wf->execMask(lane)) {
23482                 if (std::isnan(src0[lane]) ||
23483                     std::isnan(src1[lane])) {
23484                     vdst[lane] = NAN;
23485                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23486                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23487                            !std::signbit(src0[lane])) {
23488                     if (std::isinf(src1[lane])) {
23489                         vdst[lane] = NAN;
23490                     } else if (!std::signbit(src1[lane])) {
23491                         vdst[lane] = +0.0;
23492                     } else {
23493                         vdst[lane] = -0.0;
23494                     }
23495                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23496                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23497                            std::signbit(src0[lane])) {
23498                     if (std::isinf(src1[lane])) {
23499                         vdst[lane] = NAN;
23500                     } else if (std::signbit(src1[lane])) {
23501                         vdst[lane] = +0.0;
23502                     } else {
23503                         vdst[lane] = -0.0;
23504                     }
23505                 } else if (std::isinf(src0[lane]) &&
23506                            !std::signbit(src0[lane])) {
23507                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23508                         std::fpclassify(src1[lane]) == FP_ZERO) {
23509                         vdst[lane] = NAN;
23510                     } else if (!std::signbit(src1[lane])) {
23511                         vdst[lane] = +INFINITY;
23512                     } else {
23513                         vdst[lane] = -INFINITY;
23514                     }
23515                 } else if (std::isinf(src0[lane]) &&
23516                            std::signbit(src0[lane])) {
23517                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23518                         std::fpclassify(src1[lane]) == FP_ZERO) {
23519                         vdst[lane] = NAN;
23520                     } else if (std::signbit(src1[lane])) {
23521                         vdst[lane] = +INFINITY;
23522                     } else {
23523                         vdst[lane] = -INFINITY;
23524                     }
23525                 } else {
23526                     vdst[lane] = src0[lane] * src1[lane];
23527                 }
23528             }
23529         }
23530
23531         vdst.write();
23532     }
23533
23534     Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3 *iFmt)
23535         : Inst_VOP3(iFmt, "v_mul_i32_i24", false)
23536     {
23537         setFlag(ALU);
23538     } // Inst_VOP3__V_MUL_I32_I24
23539
23540     Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
23541     {
23542     } // ~Inst_VOP3__V_MUL_I32_I24
23543
23544     // D.i = S0.i[23:0] * S1.i[23:0].
23545     void
23546     Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23547     {
23548         Wavefront *wf = gpuDynInst->wavefront();
23549         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23550         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23551         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23552
23553         src0.readSrc();
23554         src1.read();
23555
23556         /**
23557          * input modifiers are supported by FP operations only
23558          */
23559         assert(!(instData.ABS & 0x1));
23560         assert(!(instData.ABS & 0x2));
23561         assert(!(instData.ABS & 0x4));
23562         assert(!(extData.NEG & 0x1));
23563         assert(!(extData.NEG & 0x2));
23564         assert(!(extData.NEG & 0x4));
23565
23566         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23567             if (wf->execMask(lane)) {
23568                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
23569                     * sext<24>(bits(src1[lane], 23, 0));
23570             }
23571         }
23572
23573         vdst.write();
23574     }
23575
23576     Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3 *iFmt)
23577         : Inst_VOP3(iFmt, "v_mul_hi_i32_i24", false)
23578     {
23579         setFlag(ALU);
23580     } // Inst_VOP3__V_MUL_HI_I32_I24
23581
23582     Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
23583     {
23584     } // ~Inst_VOP3__V_MUL_HI_I32_I24
23585
23586     // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
23587     void
23588     Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23589     {
23590         Wavefront *wf = gpuDynInst->wavefront();
23591         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23592         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23593         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23594
23595         src0.readSrc();
23596         src1.readSrc();
23597
23598         /**
23599          * input modifiers are supported by FP operations only
23600          */
23601         assert(!(instData.ABS & 0x1));
23602         assert(!(instData.ABS & 0x2));
23603         assert(!(instData.ABS & 0x4));
23604         assert(!(extData.NEG & 0x1));
23605         assert(!(extData.NEG & 0x2));
23606         assert(!(extData.NEG & 0x4));
23607
23608         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23609             if (wf->execMask(lane)) {
23610                 VecElemI64 tmp_src0
23611                     = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
23612                 VecElemI64 tmp_src1
23613                     = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
23614
23615                 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
23616             }
23617         }
23618
23619         vdst.write();
23620     }
23621
23622     Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3 *iFmt)
23623         : Inst_VOP3(iFmt, "v_mul_u32_u24", false)
23624     {
23625         setFlag(ALU);
23626     } // Inst_VOP3__V_MUL_U32_U24
23627
23628     Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
23629     {
23630     } // ~Inst_VOP3__V_MUL_U32_U24
23631
23632     // D.u = S0.u[23:0] * S1.u[23:0].
23633     void
23634     Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23635     {
23636         Wavefront *wf = gpuDynInst->wavefront();
23637         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23638         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23639         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23640
23641         src0.readSrc();
23642         src1.readSrc();
23643
23644         /**
23645          * input modifiers are supported by FP operations only
23646          */
23647         assert(!(instData.ABS & 0x1));
23648         assert(!(instData.ABS & 0x2));
23649         assert(!(instData.ABS & 0x4));
23650         assert(!(extData.NEG & 0x1));
23651         assert(!(extData.NEG & 0x2));
23652         assert(!(extData.NEG & 0x4));
23653
23654         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23655             if (wf->execMask(lane)) {
23656                 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
23657             }
23658         }
23659
23660         vdst.write();
23661     }
23662
23663     Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3 *iFmt)
23664         : Inst_VOP3(iFmt, "v_mul_hi_u32_u24", false)
23665     {
23666         setFlag(ALU);
23667     } // Inst_VOP3__V_MUL_HI_U32_U24
23668
23669     Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
23670     {
23671     } // ~Inst_VOP3__V_MUL_HI_U32_U24
23672
23673     // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
23674     void
23675     Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23676     {
23677         Wavefront *wf = gpuDynInst->wavefront();
23678         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23679         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23680         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23681
23682         src0.readSrc();
23683         src1.readSrc();
23684
23685         /**
23686          * input modifiers are supported by FP operations only
23687          */
23688         assert(!(instData.ABS & 0x1));
23689         assert(!(instData.ABS & 0x2));
23690         assert(!(instData.ABS & 0x4));
23691         assert(!(extData.NEG & 0x1));
23692         assert(!(extData.NEG & 0x2));
23693         assert(!(extData.NEG & 0x4));
23694
23695         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23696             if (wf->execMask(lane)) {
23697                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
23698                 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
23699                 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
23700             }
23701         }
23702
23703         vdst.write();
23704     }
23705
23706     Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3 *iFmt)
23707         : Inst_VOP3(iFmt, "v_min_f32", false)
23708     {
23709         setFlag(ALU);
23710         setFlag(F32);
23711     } // Inst_VOP3__V_MIN_F32
23712
23713     Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
23714     {
23715     } // ~Inst_VOP3__V_MIN_F32
23716
23717     // D.f = (S0.f < S1.f ? S0.f : S1.f).
23718     void
23719     Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
23720     {
23721         Wavefront *wf = gpuDynInst->wavefront();
23722         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23723         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23724         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23725
23726         src0.readSrc();
23727         src1.readSrc();
23728
23729         if (instData.ABS & 0x1) {
23730             src0.absModifier();
23731         }
23732
23733         if (instData.ABS & 0x2) {
23734             src1.absModifier();
23735         }
23736
23737         if (extData.NEG & 0x1) {
23738             src0.negModifier();
23739         }
23740
23741         if (extData.NEG & 0x2) {
23742             src1.negModifier();
23743         }
23744
23745         /**
23746          * input modifiers are supported by FP operations only
23747          */
23748         assert(!(instData.ABS & 0x4));
23749         assert(!(extData.NEG & 0x4));
23750
23751         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23752             if (wf->execMask(lane)) {
23753                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
23754             }
23755         }
23756
23757         vdst.write();
23758     }
23759
23760     Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3 *iFmt)
23761         : Inst_VOP3(iFmt, "v_max_f32", false)
23762     {
23763         setFlag(ALU);
23764         setFlag(F32);
23765     } // Inst_VOP3__V_MAX_F32
23766
23767     Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
23768     {
23769     } // ~Inst_VOP3__V_MAX_F32
23770
23771     // D.f = (S0.f >= S1.f ? S0.f : S1.f).
23772     void
23773     Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
23774     {
23775         Wavefront *wf = gpuDynInst->wavefront();
23776         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23777         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23778         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23779
23780         src0.readSrc();
23781         src1.readSrc();
23782
23783         if (instData.ABS & 0x1) {
23784             src0.absModifier();
23785         }
23786
23787         if (instData.ABS & 0x2) {
23788             src1.absModifier();
23789         }
23790
23791         if (extData.NEG & 0x1) {
23792             src0.negModifier();
23793         }
23794
23795         if (extData.NEG & 0x2) {
23796             src1.negModifier();
23797         }
23798
23799         /**
23800          * input modifiers are supported by FP operations only
23801          */
23802         assert(!(instData.ABS & 0x4));
23803         assert(!(extData.NEG & 0x4));
23804
23805         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23806             if (wf->execMask(lane)) {
23807                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
23808             }
23809         }
23810
23811         vdst.write();
23812     }
23813
23814     Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3 *iFmt)
23815         : Inst_VOP3(iFmt, "v_min_i32", false)
23816     {
23817         setFlag(ALU);
23818     } // Inst_VOP3__V_MIN_I32
23819
23820     Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
23821     {
23822     } // ~Inst_VOP3__V_MIN_I32
23823
23824     // D.i = min(S0.i, S1.i).
23825     void
23826     Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
23827     {
23828         Wavefront *wf = gpuDynInst->wavefront();
23829         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23830         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23831         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23832
23833         src0.readSrc();
23834         src1.readSrc();
23835
23836         /**
23837          * input modifiers are supported by FP operations only
23838          */
23839         assert(!(instData.ABS & 0x1));
23840         assert(!(instData.ABS & 0x2));
23841         assert(!(instData.ABS & 0x4));
23842         assert(!(extData.NEG & 0x1));
23843         assert(!(extData.NEG & 0x2));
23844         assert(!(extData.NEG & 0x4));
23845
23846         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23847             if (wf->execMask(lane)) {
23848                 vdst[lane] = std::min(src0[lane], src1[lane]);
23849             }
23850         }
23851
23852         vdst.write();
23853     }
23854
23855     Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3 *iFmt)
23856         : Inst_VOP3(iFmt, "v_max_i32", false)
23857     {
23858         setFlag(ALU);
23859     } // Inst_VOP3__V_MAX_I32
23860
23861     Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
23862     {
23863     } // ~Inst_VOP3__V_MAX_I32
23864
23865     // D.i = max(S0.i, S1.i).
23866     void
23867     Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
23868     {
23869         Wavefront *wf = gpuDynInst->wavefront();
23870         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23871         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23872         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23873
23874         src0.readSrc();
23875         src1.readSrc();
23876
23877         /**
23878          * input modifiers are supported by FP operations only
23879          */
23880         assert(!(instData.ABS & 0x1));
23881         assert(!(instData.ABS & 0x2));
23882         assert(!(instData.ABS & 0x4));
23883         assert(!(extData.NEG & 0x1));
23884         assert(!(extData.NEG & 0x2));
23885         assert(!(extData.NEG & 0x4));
23886
23887         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23888             if (wf->execMask(lane)) {
23889                 vdst[lane] = std::max(src0[lane], src1[lane]);
23890             }
23891         }
23892
23893         vdst.write();
23894     }
23895
23896     Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3 *iFmt)
23897         : Inst_VOP3(iFmt, "v_min_u32", false)
23898     {
23899         setFlag(ALU);
23900     } // Inst_VOP3__V_MIN_U32
23901
23902     Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
23903     {
23904     } // ~Inst_VOP3__V_MIN_U32
23905
23906     // D.u = min(S0.u, S1.u).
23907     void
23908     Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
23909     {
23910         Wavefront *wf = gpuDynInst->wavefront();
23911         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23912         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23913         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23914
23915         src0.readSrc();
23916         src1.readSrc();
23917
23918         /**
23919          * input modifiers are supported by FP operations only
23920          */
23921         assert(!(instData.ABS & 0x1));
23922         assert(!(instData.ABS & 0x2));
23923         assert(!(instData.ABS & 0x4));
23924         assert(!(extData.NEG & 0x1));
23925         assert(!(extData.NEG & 0x2));
23926         assert(!(extData.NEG & 0x4));
23927
23928         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23929             if (wf->execMask(lane)) {
23930                 vdst[lane] = std::min(src0[lane], src1[lane]);
23931             }
23932         }
23933
23934         vdst.write();
23935     }
23936
23937     Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3 *iFmt)
23938         : Inst_VOP3(iFmt, "v_max_u32", false)
23939     {
23940         setFlag(ALU);
23941     } // Inst_VOP3__V_MAX_U32
23942
23943     Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
23944     {
23945     } // ~Inst_VOP3__V_MAX_U32
23946
23947     // D.u = max(S0.u, S1.u).
23948     void
23949     Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
23950     {
23951         Wavefront *wf = gpuDynInst->wavefront();
23952         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23953         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23954         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23955
23956         src0.readSrc();
23957         src1.readSrc();
23958
23959         /**
23960          * input modifiers are supported by FP operations only
23961          */
23962         assert(!(instData.ABS & 0x1));
23963         assert(!(instData.ABS & 0x2));
23964         assert(!(instData.ABS & 0x4));
23965         assert(!(extData.NEG & 0x1));
23966         assert(!(extData.NEG & 0x2));
23967         assert(!(extData.NEG & 0x4));
23968
23969         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23970             if (wf->execMask(lane)) {
23971                 vdst[lane] = std::max(src0[lane], src1[lane]);
23972             }
23973         }
23974
23975         vdst.write();
23976     }
23977
23978     Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3 *iFmt)
23979         : Inst_VOP3(iFmt, "v_lshrrev_b32", false)
23980     {
23981         setFlag(ALU);
23982     } // Inst_VOP3__V_LSHRREV_B32
23983
23984     Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
23985     {
23986     } // ~Inst_VOP3__V_LSHRREV_B32
23987
23988     // D.u = S1.u >> S0.u[4:0].
23989     // The vacated bits are set to zero.
23990     void
23991     Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
23992     {
23993         Wavefront *wf = gpuDynInst->wavefront();
23994         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23995         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23996         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23997
23998         src0.readSrc();
23999         src1.readSrc();
24000
24001         /**
24002          * input modifiers are supported by FP operations only
24003          */
24004         assert(!(instData.ABS & 0x1));
24005         assert(!(instData.ABS & 0x2));
24006         assert(!(instData.ABS & 0x4));
24007         assert(!(extData.NEG & 0x1));
24008         assert(!(extData.NEG & 0x2));
24009         assert(!(extData.NEG & 0x4));
24010
24011         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24012             if (wf->execMask(lane)) {
24013                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
24014             }
24015         }
24016
24017         vdst.write();
24018     }
24019
24020     Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3 *iFmt)
24021         : Inst_VOP3(iFmt, "v_ashrrev_i32", false)
24022     {
24023         setFlag(ALU);
24024     } // Inst_VOP3__V_ASHRREV_I32
24025
24026     Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
24027     {
24028     } // ~Inst_VOP3__V_ASHRREV_I32
24029
24030     // D.i = signext(S1.i) >> S0.i[4:0].
24031     // The vacated bits are set to the sign bit of the input value.
24032     void
24033     Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
24034     {
24035         Wavefront *wf = gpuDynInst->wavefront();
24036         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24037         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
24038         VecOperandI32 vdst(gpuDynInst, instData.VDST);
24039
24040         src0.readSrc();
24041         src1.readSrc();
24042
24043         /**
24044          * input modifiers are supported by FP operations only
24045          */
24046         assert(!(instData.ABS & 0x1));
24047         assert(!(instData.ABS & 0x2));
24048         assert(!(instData.ABS & 0x4));
24049         assert(!(extData.NEG & 0x1));
24050         assert(!(extData.NEG & 0x2));
24051         assert(!(extData.NEG & 0x4));
24052
24053         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24054             if (wf->execMask(lane)) {
24055                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
24056             }
24057         }
24058
24059         vdst.write();
24060     }
24061
24062     Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3 *iFmt)
24063         : Inst_VOP3(iFmt, "v_lshlrev_b32", false)
24064     {
24065         setFlag(ALU);
24066     } // Inst_VOP3__V_LSHLREV_B32
24067
24068     Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
24069     {
24070     } // ~Inst_VOP3__V_LSHLREV_B32
24071
24072     // D.u = S1.u << S0.u[4:0].
24073     void
24074     Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
24075     {
24076         Wavefront *wf = gpuDynInst->wavefront();
24077         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24078         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24079         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24080
24081         src0.readSrc();
24082         src1.readSrc();
24083
24084         /**
24085          * input modifiers are supported by FP operations only
24086          */
24087         assert(!(instData.ABS & 0x1));
24088         assert(!(instData.ABS & 0x2));
24089         assert(!(instData.ABS & 0x4));
24090         assert(!(extData.NEG & 0x1));
24091         assert(!(extData.NEG & 0x2));
24092         assert(!(extData.NEG & 0x4));
24093
24094         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24095             if (wf->execMask(lane)) {
24096                 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
24097             }
24098         }
24099
24100         vdst.write();
24101     }
24102
24103     Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3 *iFmt)
24104         : Inst_VOP3(iFmt, "v_and_b32", false)
24105     {
24106         setFlag(ALU);
24107     } // Inst_VOP3__V_AND_B32
24108
24109     Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
24110     {
24111     } // ~Inst_VOP3__V_AND_B32
24112
24113     // D.u = S0.u & S1.u.
24114     // Input and output modifiers not supported.
24115     void
24116     Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
24117     {
24118         Wavefront *wf = gpuDynInst->wavefront();
24119         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24120         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24121         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24122
24123         src0.readSrc();
24124         src1.readSrc();
24125
24126         /**
24127          * input modifiers are supported by FP operations only
24128          */
24129         assert(!(instData.ABS & 0x1));
24130         assert(!(instData.ABS & 0x2));
24131         assert(!(instData.ABS & 0x4));
24132         assert(!(extData.NEG & 0x1));
24133         assert(!(extData.NEG & 0x2));
24134         assert(!(extData.NEG & 0x4));
24135
24136         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24137             if (wf->execMask(lane)) {
24138                 vdst[lane] = src0[lane] & src1[lane];
24139             }
24140         }
24141
24142         vdst.write();
24143     }
24144
24145     Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3 *iFmt)
24146         : Inst_VOP3(iFmt, "v_or_b32", false)
24147     {
24148         setFlag(ALU);
24149     } // Inst_VOP3__V_OR_B32
24150
24151     Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
24152     {
24153     } // ~Inst_VOP3__V_OR_B32
24154
24155     // D.u = S0.u | S1.u.
24156     // Input and output modifiers not supported.
24157     void
24158     Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
24159     {
24160         Wavefront *wf = gpuDynInst->wavefront();
24161         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24162         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24163         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24164
24165         src0.readSrc();
24166         src1.readSrc();
24167
24168         /**
24169          * input modifiers are supported by FP operations only
24170          */
24171         assert(!(instData.ABS & 0x1));
24172         assert(!(instData.ABS & 0x2));
24173         assert(!(instData.ABS & 0x4));
24174         assert(!(extData.NEG & 0x1));
24175         assert(!(extData.NEG & 0x2));
24176         assert(!(extData.NEG & 0x4));
24177
24178         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24179             if (wf->execMask(lane)) {
24180                 vdst[lane] = src0[lane] | src1[lane];
24181             }
24182         }
24183
24184         vdst.write();
24185     }
24186
24187     Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3 *iFmt)
24188         : Inst_VOP3(iFmt, "v_xor_b32", false)
24189     {
24190         setFlag(ALU);
24191     } // Inst_VOP3__V_XOR_B32
24192
24193     Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
24194     {
24195     } // ~Inst_VOP3__V_XOR_B32
24196
24197     // D.u = S0.u ^ S1.u.
24198     // Input and output modifiers not supported.
24199     void
24200     Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
24201     {
24202         Wavefront *wf = gpuDynInst->wavefront();
24203         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24204         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24205         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24206
24207         src0.readSrc();
24208         src1.readSrc();
24209
24210         /**
24211          * input modifiers are supported by FP operations only
24212          */
24213         assert(!(instData.ABS & 0x1));
24214         assert(!(instData.ABS & 0x2));
24215         assert(!(instData.ABS & 0x4));
24216         assert(!(extData.NEG & 0x1));
24217         assert(!(extData.NEG & 0x2));
24218         assert(!(extData.NEG & 0x4));
24219
24220         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24221             if (wf->execMask(lane)) {
24222                 vdst[lane] = src0[lane] ^ src1[lane];
24223             }
24224         }
24225
24226         vdst.write();
24227     }
24228
24229     Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3 *iFmt)
24230         : Inst_VOP3(iFmt, "v_mac_f32", false)
24231     {
24232         setFlag(ALU);
24233         setFlag(F32);
24234         setFlag(MAC);
24235     } // Inst_VOP3__V_MAC_F32
24236
24237     Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
24238     {
24239     } // ~Inst_VOP3__V_MAC_F32
24240
24241     // D.f = S0.f * S1.f + D.f.
24242     void
24243     Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
24244     {
24245         Wavefront *wf = gpuDynInst->wavefront();
24246         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
24247         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
24248         VecOperandF32 vdst(gpuDynInst, instData.VDST);
24249
24250         src0.readSrc();
24251         src1.readSrc();
24252         vdst.read();
24253
24254         if (instData.ABS & 0x1) {
24255             src0.absModifier();
24256         }
24257
24258         if (instData.ABS & 0x2) {
24259             src1.absModifier();
24260         }
24261
24262         if (extData.NEG & 0x1) {
24263             src0.negModifier();
24264         }
24265
24266         if (extData.NEG & 0x2) {
24267             src1.negModifier();
24268         }
24269
24270         /**
24271          * input modifiers are supported by FP operations only
24272          */
24273         assert(!(instData.ABS & 0x4));
24274         assert(!(extData.NEG & 0x4));
24275
24276         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24277             if (wf->execMask(lane)) {
24278                 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
24279             }
24280         }
24281
24282         vdst.write();
24283     }
24284
24285     Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC *iFmt)
24286         : Inst_VOP3_SDST_ENC(iFmt, "v_add_u32")
24287     {
24288         setFlag(ALU);
24289         setFlag(WritesVCC);
24290     } // Inst_VOP3__V_ADD_U32
24291
24292     Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
24293     {
24294     } // ~Inst_VOP3__V_ADD_U32
24295
24296     // D.u = S0.u + S1.u;
24297     // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
24298     // overflow or carry-out.
24299     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24300     void
24301     Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
24302     {
24303         Wavefront *wf = gpuDynInst->wavefront();
24304         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24305         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24306         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24307         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24308
24309         src0.readSrc();
24310         src1.readSrc();
24311
24312         /**
24313          * input modifiers are supported by FP operations only
24314          */
24315         assert(!(extData.NEG & 0x1));
24316         assert(!(extData.NEG & 0x2));
24317         assert(!(extData.NEG & 0x4));
24318
24319         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24320             if (wf->execMask(lane)) {
24321                 vdst[lane] = src0[lane] + src1[lane];
24322                 vcc.setBit(lane, ((VecElemU64)src0[lane]
24323                     + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
24324             }
24325         }
24326
24327         vdst.write();
24328         vcc.write();
24329     }
24330
24331     Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24332         : Inst_VOP3_SDST_ENC(iFmt, "v_sub_u32")
24333     {
24334         setFlag(ALU);
24335         setFlag(WritesVCC);
24336     } // Inst_VOP3__V_SUB_U32
24337
24338     Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
24339     {
24340     } // ~Inst_VOP3__V_SUB_U32
24341
24342     // D.u = S0.u - S1.u;
24343     // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
24344     // carry-out.
24345     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24346     void
24347     Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
24348     {
24349         Wavefront *wf = gpuDynInst->wavefront();
24350         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24351         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24352         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24353         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24354
24355         src0.readSrc();
24356         src1.readSrc();
24357
24358         /**
24359          * input modifiers are supported by FP operations only
24360          */
24361         assert(!(extData.NEG & 0x1));
24362         assert(!(extData.NEG & 0x2));
24363         assert(!(extData.NEG & 0x4));
24364
24365         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24366             if (wf->execMask(lane)) {
24367                 vdst[lane] = src0[lane] - src1[lane];
24368                 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
24369             }
24370         }
24371
24372         vdst.write();
24373         vcc.write();
24374     }
24375
24376     Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
24377           InFmt_VOP3_SDST_ENC *iFmt)
24378         : Inst_VOP3_SDST_ENC(iFmt, "v_subrev_u32")
24379     {
24380         setFlag(ALU);
24381         setFlag(WritesVCC);
24382     } // Inst_VOP3__V_SUBREV_U32
24383
24384     Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
24385     {
24386     } // ~Inst_VOP3__V_SUBREV_U32
24387
24388     // D.u = S1.u - S0.u;
24389     // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
24390     // carry-out.
24391     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24392     void
24393     Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24394     {
24395         Wavefront *wf = gpuDynInst->wavefront();
24396         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24397         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24398         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24399         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24400
24401         src0.readSrc();
24402         src1.readSrc();
24403
24404         /**
24405          * input modifiers are supported by FP operations only
24406          */
24407         assert(!(extData.NEG & 0x1));
24408         assert(!(extData.NEG & 0x2));
24409         assert(!(extData.NEG & 0x4));
24410
24411         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24412             if (wf->execMask(lane)) {
24413                 vdst[lane] = src1[lane] - src0[lane];
24414                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
24415             }
24416         }
24417
24418         vdst.write();
24419         vcc.write();
24420     }
24421
24422     Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC *iFmt)
24423         : Inst_VOP3_SDST_ENC(iFmt, "v_addc_u32")
24424     {
24425         setFlag(ALU);
24426         setFlag(WritesVCC);
24427         setFlag(ReadsVCC);
24428     } // Inst_VOP3__V_ADDC_U32
24429
24430     Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
24431     {
24432     } // ~Inst_VOP3__V_ADDC_U32
24433
24434     // D.u = S0.u + S1.u + VCC[threadId];
24435     // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
24436     // is an UNSIGNED overflow.
24437     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24438     // source comes from the SGPR-pair at S2.u.
24439     void
24440     Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
24441     {
24442         Wavefront *wf = gpuDynInst->wavefront();
24443         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24444         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24445         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24446         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24447         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24448
24449         src0.readSrc();
24450         src1.readSrc();
24451         vcc.read();
24452
24453         /**
24454          * input modifiers are supported by FP operations only
24455          */
24456         assert(!(extData.NEG & 0x1));
24457         assert(!(extData.NEG & 0x2));
24458         assert(!(extData.NEG & 0x4));
24459
24460         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24461             if (wf->execMask(lane)) {
24462                 vdst[lane] = src0[lane] + src1[lane]
24463                     + bits(vcc.rawData(), lane);
24464                 sdst.setBit(lane, ((VecElemU64)src0[lane]
24465                     + (VecElemU64)src1[lane]
24466                         + (VecElemU64)bits(vcc.rawData(), lane))
24467                             >= 0x100000000 ? 1 : 0);
24468             }
24469         }
24470
24471         vdst.write();
24472         sdst.write();
24473     }
24474
24475     Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24476         : Inst_VOP3_SDST_ENC(iFmt, "v_subb_u32")
24477     {
24478         setFlag(ALU);
24479         setFlag(WritesVCC);
24480         setFlag(ReadsVCC);
24481     } // Inst_VOP3__V_SUBB_U32
24482
24483     Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
24484     {
24485     } // ~Inst_VOP3__V_SUBB_U32
24486
24487     // D.u = S0.u - S1.u - VCC[threadId];
24488     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24489     // overflow.
24490     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24491     // source comes from the SGPR-pair at S2.u.
24492     void
24493     Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
24494     {
24495         Wavefront *wf = gpuDynInst->wavefront();
24496         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24497         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24498         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24499         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24500         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24501
24502         src0.readSrc();
24503         src1.readSrc();
24504         vcc.read();
24505
24506         /**
24507          * input modifiers are supported by FP operations only
24508          */
24509         assert(!(extData.NEG & 0x1));
24510         assert(!(extData.NEG & 0x2));
24511         assert(!(extData.NEG & 0x4));
24512
24513         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24514             if (wf->execMask(lane)) {
24515                 vdst[lane] = src0[lane] - src1[lane]
24516                     - bits(vcc.rawData(), lane);
24517                 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24518                     > src0[lane] ? 1 : 0);
24519             }
24520         }
24521
24522         vdst.write();
24523         sdst.write();
24524     }
24525
24526     Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
24527           InFmt_VOP3_SDST_ENC *iFmt)
24528         : Inst_VOP3_SDST_ENC(iFmt, "v_subbrev_u32")
24529     {
24530         setFlag(ALU);
24531         setFlag(WritesVCC);
24532         setFlag(ReadsVCC);
24533     } // Inst_VOP3__V_SUBBREV_U32
24534
24535     Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
24536     {
24537     } // ~Inst_VOP3__V_SUBBREV_U32
24538
24539     // D.u = S1.u - S0.u - VCC[threadId];
24540     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24541     // overflow.
24542     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24543     // source comes from the SGPR-pair at S2.u.
24544     void
24545     Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24546     {
24547         Wavefront *wf = gpuDynInst->wavefront();
24548         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24549         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24550         ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24551         ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24552         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24553
24554         src0.readSrc();
24555         src1.readSrc();
24556         vcc.read();
24557
24558         /**
24559          * input modifiers are supported by FP operations only
24560          */
24561         assert(!(extData.NEG & 0x1));
24562         assert(!(extData.NEG & 0x2));
24563         assert(!(extData.NEG & 0x4));
24564
24565         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24566             if (wf->execMask(lane)) {
24567                 vdst[lane] = src1[lane] - src0[lane]
24568                     - bits(vcc.rawData(), lane);
24569                 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24570                     > src0[lane] ? 1 : 0);
24571             }
24572         }
24573
24574         vdst.write();
24575         sdst.write();
24576     }
24577
24578     Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3 *iFmt)
24579         : Inst_VOP3(iFmt, "v_add_f16", false)
24580     {
24581         setFlag(ALU);
24582         setFlag(F16);
24583     } // Inst_VOP3__V_ADD_F16
24584
24585     Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
24586     {
24587     } // ~Inst_VOP3__V_ADD_F16
24588
24589     // D.f16 = S0.f16 + S1.f16.
24590     void
24591     Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
24592     {
24593         panicUnimplemented();
24594     }
24595
24596     Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3 *iFmt)
24597         : Inst_VOP3(iFmt, "v_sub_f16", false)
24598     {
24599         setFlag(ALU);
24600         setFlag(F16);
24601     } // Inst_VOP3__V_SUB_F16
24602
24603     Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
24604     {
24605     } // ~Inst_VOP3__V_SUB_F16
24606
24607     // D.f16 = S0.f16 - S1.f16.
24608     void
24609     Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
24610     {
24611         panicUnimplemented();
24612     }
24613
24614     Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3 *iFmt)
24615         : Inst_VOP3(iFmt, "v_subrev_f16", false)
24616     {
24617         setFlag(ALU);
24618         setFlag(F16);
24619     } // Inst_VOP3__V_SUBREV_F16
24620
24621     Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
24622     {
24623     } // ~Inst_VOP3__V_SUBREV_F16
24624
24625     // D.f16 = S1.f16 - S0.f16.
24626     void
24627     Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
24628     {
24629         panicUnimplemented();
24630     }
24631
24632     Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3 *iFmt)
24633         : Inst_VOP3(iFmt, "v_mul_f16", false)
24634     {
24635         setFlag(ALU);
24636         setFlag(F16);
24637     } // Inst_VOP3__V_MUL_F16
24638
24639     Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
24640     {
24641     } // ~Inst_VOP3__V_MUL_F16
24642
24643     // D.f16 = S0.f16 * S1.f16.
24644     void
24645     Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
24646     {
24647         panicUnimplemented();
24648     }
24649
24650     Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3 *iFmt)
24651         : Inst_VOP3(iFmt, "v_mac_f16", false)
24652     {
24653         setFlag(ALU);
24654         setFlag(F16);
24655         setFlag(MAC);
24656     } // Inst_VOP3__V_MAC_F16
24657
24658     Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
24659     {
24660     } // ~Inst_VOP3__V_MAC_F16
24661
24662     // D.f16 = S0.f16 * S1.f16 + D.f16.
24663     void
24664     Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
24665     {
24666         panicUnimplemented();
24667     }
24668
24669     Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3 *iFmt)
24670         : Inst_VOP3(iFmt, "v_add_u16", false)
24671     {
24672         setFlag(ALU);
24673     } // Inst_VOP3__V_ADD_U16
24674
24675     Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
24676     {
24677     } // ~Inst_VOP3__V_ADD_U16
24678
24679     // D.u16 = S0.u16 + S1.u16.
24680     void
24681     Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
24682     {
24683         Wavefront *wf = gpuDynInst->wavefront();
24684         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24685         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24686         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24687
24688         src0.readSrc();
24689         src1.readSrc();
24690
24691         /**
24692          * input modifiers are supported by FP operations only
24693          */
24694         assert(!(instData.ABS & 0x1));
24695         assert(!(instData.ABS & 0x2));
24696         assert(!(instData.ABS & 0x4));
24697         assert(!(extData.NEG & 0x1));
24698         assert(!(extData.NEG & 0x2));
24699         assert(!(extData.NEG & 0x4));
24700
24701         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24702             if (wf->execMask(lane)) {
24703                 vdst[lane] = src0[lane] + src1[lane];
24704             }
24705         }
24706
24707         vdst.write();
24708     }
24709
24710     Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3 *iFmt)
24711         : Inst_VOP3(iFmt, "v_sub_u16", false)
24712     {
24713         setFlag(ALU);
24714     } // Inst_VOP3__V_SUB_U16
24715
24716     Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
24717     {
24718     } // ~Inst_VOP3__V_SUB_U16
24719
24720     // D.u16 = S0.u16 - S1.u16.
24721     void
24722     Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
24723     {
24724         Wavefront *wf = gpuDynInst->wavefront();
24725         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24726         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24727         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24728
24729         src0.readSrc();
24730         src1.readSrc();
24731
24732         /**
24733          * input modifiers are supported by FP operations only
24734          */
24735         assert(!(instData.ABS & 0x1));
24736         assert(!(instData.ABS & 0x2));
24737         assert(!(instData.ABS & 0x4));
24738         assert(!(extData.NEG & 0x1));
24739         assert(!(extData.NEG & 0x2));
24740         assert(!(extData.NEG & 0x4));
24741
24742         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24743             if (wf->execMask(lane)) {
24744                 vdst[lane] = src0[lane] - src1[lane];
24745             }
24746         }
24747
24748         vdst.write();
24749     }
24750
24751     Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3 *iFmt)
24752         : Inst_VOP3(iFmt, "v_subrev_u16", false)
24753     {
24754         setFlag(ALU);
24755     } // Inst_VOP3__V_SUBREV_U16
24756
24757     Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
24758     {
24759     } // ~Inst_VOP3__V_SUBREV_U16
24760
24761     // D.u16 = S1.u16 - S0.u16.
24762     void
24763     Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
24764     {
24765         Wavefront *wf = gpuDynInst->wavefront();
24766         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24767         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24768         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24769
24770         src0.readSrc();
24771         src1.readSrc();
24772
24773         /**
24774          * input modifiers are supported by FP operations only
24775          */
24776         assert(!(instData.ABS & 0x1));
24777         assert(!(instData.ABS & 0x2));
24778         assert(!(instData.ABS & 0x4));
24779         assert(!(extData.NEG & 0x1));
24780         assert(!(extData.NEG & 0x2));
24781         assert(!(extData.NEG & 0x4));
24782
24783         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24784             if (wf->execMask(lane)) {
24785                 vdst[lane] = src1[lane] - src0[lane];
24786             }
24787         }
24788
24789         vdst.write();
24790     }
24791
24792     Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3 *iFmt)
24793         : Inst_VOP3(iFmt, "v_mul_lo_u16", false)
24794     {
24795         setFlag(ALU);
24796     } // Inst_VOP3__V_MUL_LO_U16
24797
24798     Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
24799     {
24800     } // ~Inst_VOP3__V_MUL_LO_U16
24801
24802     // D.u16 = S0.u16 * S1.u16.
24803     void
24804     Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
24805     {
24806         Wavefront *wf = gpuDynInst->wavefront();
24807         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24808         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24809         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24810
24811         src0.readSrc();
24812         src1.readSrc();
24813
24814         /**
24815          * input modifiers are supported by FP operations only
24816          */
24817         assert(!(instData.ABS & 0x1));
24818         assert(!(instData.ABS & 0x2));
24819         assert(!(instData.ABS & 0x4));
24820         assert(!(extData.NEG & 0x1));
24821         assert(!(extData.NEG & 0x2));
24822         assert(!(extData.NEG & 0x4));
24823
24824         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24825             if (wf->execMask(lane)) {
24826                 vdst[lane] = src0[lane] * src1[lane];
24827             }
24828         }
24829
24830         vdst.write();
24831     }
24832
24833     Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3 *iFmt)
24834         : Inst_VOP3(iFmt, "v_lshlrev_b16", false)
24835     {
24836         setFlag(ALU);
24837     } // Inst_VOP3__V_LSHLREV_B16
24838
24839     Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
24840     {
24841     } // ~Inst_VOP3__V_LSHLREV_B16
24842
24843     // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
24844     void
24845     Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
24846     {
24847         Wavefront *wf = gpuDynInst->wavefront();
24848         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24849         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24850         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24851
24852         src0.readSrc();
24853         src1.readSrc();
24854
24855         /**
24856          * input modifiers are supported by FP operations only
24857          */
24858         assert(!(instData.ABS & 0x1));
24859         assert(!(instData.ABS & 0x2));
24860         assert(!(instData.ABS & 0x4));
24861         assert(!(extData.NEG & 0x1));
24862         assert(!(extData.NEG & 0x2));
24863         assert(!(extData.NEG & 0x4));
24864
24865         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24866             if (wf->execMask(lane)) {
24867                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
24868             }
24869         }
24870
24871         vdst.write();
24872     }
24873
24874     Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3 *iFmt)
24875         : Inst_VOP3(iFmt, "v_lshrrev_b16", false)
24876     {
24877         setFlag(ALU);
24878     } // Inst_VOP3__V_LSHRREV_B16
24879
24880     Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
24881     {
24882     } // ~Inst_VOP3__V_LSHRREV_B16
24883
24884     // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
24885     // The vacated bits are set to zero.
24886     void
24887     Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
24888     {
24889         Wavefront *wf = gpuDynInst->wavefront();
24890         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24891         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24892         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24893
24894         src0.readSrc();
24895         src1.readSrc();
24896
24897         if (instData.ABS & 0x1) {
24898             src0.absModifier();
24899         }
24900
24901         if (instData.ABS & 0x2) {
24902             src1.absModifier();
24903         }
24904
24905         if (extData.NEG & 0x1) {
24906             src0.negModifier();
24907         }
24908
24909         if (extData.NEG & 0x2) {
24910             src1.negModifier();
24911         }
24912
24913         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24914             if (wf->execMask(lane)) {
24915                 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24916             }
24917         }
24918
24919         vdst.write();
24920     }
24921
24922     Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3 *iFmt)
24923         : Inst_VOP3(iFmt, "v_ashrrev_i16", false)
24924     {
24925         setFlag(ALU);
24926     } // Inst_VOP3__V_ASHRREV_I16
24927
24928     Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
24929     {
24930     } // ~Inst_VOP3__V_ASHRREV_I16
24931
24932     // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
24933     // The vacated bits are set to the sign bit of the input value.
24934     void
24935     Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
24936     {
24937         Wavefront *wf = gpuDynInst->wavefront();
24938         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24939         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
24940         VecOperandI16 vdst(gpuDynInst, instData.VDST);
24941
24942         src0.readSrc();
24943         src1.readSrc();
24944
24945         /**
24946          * input modifiers are supported by FP operations only
24947          */
24948         assert(!(instData.ABS & 0x1));
24949         assert(!(instData.ABS & 0x2));
24950         assert(!(instData.ABS & 0x4));
24951         assert(!(extData.NEG & 0x1));
24952         assert(!(extData.NEG & 0x2));
24953         assert(!(extData.NEG & 0x4));
24954
24955         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24956             if (wf->execMask(lane)) {
24957                 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24958             }
24959         }
24960
24961         vdst.write();
24962     }
24963
24964     Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3 *iFmt)
24965         : Inst_VOP3(iFmt, "v_max_f16", false)
24966     {
24967         setFlag(ALU);
24968         setFlag(F16);
24969     } // Inst_VOP3__V_MAX_F16
24970
24971     Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
24972     {
24973     } // ~Inst_VOP3__V_MAX_F16
24974
24975     // D.f16 = max(S0.f16, S1.f16).
24976     void
24977     Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
24978     {
24979         panicUnimplemented();
24980     }
24981
24982     Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3 *iFmt)
24983         : Inst_VOP3(iFmt, "v_min_f16", false)
24984     {
24985         setFlag(ALU);
24986         setFlag(F16);
24987     } // Inst_VOP3__V_MIN_F16
24988
24989     Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
24990     {
24991     } // ~Inst_VOP3__V_MIN_F16
24992
24993     // D.f16 = min(S0.f16, S1.f16).
24994     void
24995     Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
24996     {
24997         panicUnimplemented();
24998     }
24999
25000     Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3 *iFmt)
25001         : Inst_VOP3(iFmt, "v_max_u16", false)
25002     {
25003         setFlag(ALU);
25004     } // Inst_VOP3__V_MAX_U16
25005
25006     Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
25007     {
25008     } // ~Inst_VOP3__V_MAX_U16
25009
25010     // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
25011     void
25012     Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
25013     {
25014         Wavefront *wf = gpuDynInst->wavefront();
25015         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
25016         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
25017         VecOperandU16 vdst(gpuDynInst, instData.VDST);
25018
25019         src0.readSrc();
25020         src1.readSrc();
25021
25022         if (instData.ABS & 0x1) {
25023             src0.absModifier();
25024         }
25025
25026         if (instData.ABS & 0x2) {
25027             src1.absModifier();
25028         }
25029
25030         if (extData.NEG & 0x1) {
25031             src0.negModifier();
25032         }
25033
25034         if (extData.NEG & 0x2) {
25035             src1.negModifier();
25036         }
25037
25038         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25039             if (wf->execMask(lane)) {
25040                 vdst[lane] = std::max(src0[lane], src1[lane]);
25041             }
25042         }
25043
25044         vdst.write();
25045     }
25046
25047     Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3 *iFmt)
25048         : Inst_VOP3(iFmt, "v_max_i16", false)
25049     {
25050         setFlag(ALU);
25051     } // Inst_VOP3__V_MAX_I16
25052
25053     Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
25054     {
25055     } // ~Inst_VOP3__V_MAX_I16
25056
25057     // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
25058     void
25059     Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
25060     {
25061         Wavefront *wf = gpuDynInst->wavefront();
25062         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25063         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25064         VecOperandI16 vdst(gpuDynInst, instData.VDST);
25065
25066         src0.readSrc();
25067         src1.readSrc();
25068
25069         if (instData.ABS & 0x1) {
25070             src0.absModifier();
25071         }
25072
25073         if (instData.ABS & 0x2) {
25074             src1.absModifier();
25075         }
25076
25077         if (extData.NEG & 0x1) {
25078             src0.negModifier();
25079         }
25080
25081         if (extData.NEG & 0x2) {
25082             src1.negModifier();
25083         }
25084
25085         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25086             if (wf->execMask(lane)) {
25087                 vdst[lane] = std::max(src0[lane], src1[lane]);
25088             }
25089         }
25090
25091         vdst.write();
25092     }
25093
25094     Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3 *iFmt)
25095         : Inst_VOP3(iFmt, "v_min_u16", false)
25096     {
25097         setFlag(ALU);
25098     } // Inst_VOP3__V_MIN_U16
25099
25100     Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
25101     {
25102     } // ~Inst_VOP3__V_MIN_U16
25103
25104     // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
25105     void
25106     Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
25107     {
25108         Wavefront *wf = gpuDynInst->wavefront();
25109         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
25110         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
25111         VecOperandU16 vdst(gpuDynInst, instData.VDST);
25112
25113         src0.readSrc();
25114         src1.readSrc();
25115
25116         if (instData.ABS & 0x1) {
25117             src0.absModifier();
25118         }
25119
25120         if (instData.ABS & 0x2) {
25121             src1.absModifier();
25122         }
25123
25124         if (extData.NEG & 0x1) {
25125             src0.negModifier();
25126         }
25127
25128         if (extData.NEG & 0x2) {
25129             src1.negModifier();
25130         }
25131
25132         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25133             if (wf->execMask(lane)) {
25134                 vdst[lane] = std::min(src0[lane], src1[lane]);
25135             }
25136         }
25137
25138         vdst.write();
25139     }
25140
25141     Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3 *iFmt)
25142         : Inst_VOP3(iFmt, "v_min_i16", false)
25143     {
25144         setFlag(ALU);
25145     } // Inst_VOP3__V_MIN_I16
25146
25147     Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
25148     {
25149     } // ~Inst_VOP3__V_MIN_I16
25150
25151     // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
25152     void
25153     Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
25154     {
25155         Wavefront *wf = gpuDynInst->wavefront();
25156         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25157         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25158         VecOperandI16 vdst(gpuDynInst, instData.VDST);
25159
25160         src0.readSrc();
25161         src1.readSrc();
25162
25163         if (instData.ABS & 0x1) {
25164             src0.absModifier();
25165         }
25166
25167         if (instData.ABS & 0x2) {
25168             src1.absModifier();
25169         }
25170
25171         if (extData.NEG & 0x1) {
25172             src0.negModifier();
25173         }
25174
25175         if (extData.NEG & 0x2) {
25176             src1.negModifier();
25177         }
25178
25179         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25180             if (wf->execMask(lane)) {
25181                 vdst[lane] = std::min(src0[lane], src1[lane]);
25182             }
25183         }
25184
25185         vdst.write();
25186     }
25187
25188     Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3 *iFmt)
25189         : Inst_VOP3(iFmt, "v_ldexp_f16", false)
25190     {
25191         setFlag(ALU);
25192         setFlag(F16);
25193     } // Inst_VOP3__V_LDEXP_F16
25194
25195     Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
25196     {
25197     } // ~Inst_VOP3__V_LDEXP_F16
25198
25199     // D.f16 = S0.f16 * (2 ** S1.i16).
25200     void
25201     Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
25202     {
25203         panicUnimplemented();
25204     }
25205
25206     Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3 *iFmt)
25207         : Inst_VOP3(iFmt, "v_nop", false)
25208     {
25209         setFlag(Nop);
25210         setFlag(ALU);
25211     } // Inst_VOP3__V_NOP
25212
25213     Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
25214     {
25215     } // ~Inst_VOP3__V_NOP
25216
25217     // Do nothing.
25218     void
25219     Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst)
25220     {
25221     }
25222
25223     Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3 *iFmt)
25224         : Inst_VOP3(iFmt, "v_mov_b32", false)
25225     {
25226         setFlag(ALU);
25227     } // Inst_VOP3__V_MOV_B32
25228
25229     Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
25230     {
25231     } // ~Inst_VOP3__V_MOV_B32
25232
25233     // D.u = S0.u.
25234     // Input and output modifiers not supported; this is an untyped operation.
25235     void
25236     Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
25237     {
25238         Wavefront *wf = gpuDynInst->wavefront();
25239         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25240         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25241
25242         src.readSrc();
25243
25244         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25245             if (wf->execMask(lane)) {
25246                 vdst[lane] = src[lane];
25247             }
25248         }
25249
25250         vdst.write();
25251     }
25252
25253     Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3 *iFmt)
25254         : Inst_VOP3(iFmt, "v_cvt_i32_f64", false)
25255     {
25256         setFlag(ALU);
25257         setFlag(F64);
25258     } // Inst_VOP3__V_CVT_I32_F64
25259
25260     Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
25261     {
25262     } // ~Inst_VOP3__V_CVT_I32_F64
25263
25264     // D.i = (int)S0.d.
25265     // Out-of-range floating point values (including infinity) saturate. NaN
25266     // is converted to 0.
25267     void
25268     Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
25269     {
25270         Wavefront *wf = gpuDynInst->wavefront();
25271         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25272         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25273
25274         src.readSrc();
25275
25276         if (instData.ABS & 0x1) {
25277             src.absModifier();
25278         }
25279
25280         if (extData.NEG & 0x1) {
25281             src.negModifier();
25282         }
25283
25284         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25285             if (wf->execMask(lane)) {
25286                 int exp;
25287                 std::frexp(src[lane],&exp);
25288                 if (std::isnan(src[lane])) {
25289                     vdst[lane] = 0;
25290                 } else if (std::isinf(src[lane]) || exp > 30) {
25291                     if (std::signbit(src[lane])) {
25292                         vdst[lane] = INT_MIN;
25293                     } else {
25294                         vdst[lane] = INT_MAX;
25295                     }
25296                 } else {
25297                     vdst[lane] = (VecElemI32)src[lane];
25298                 }
25299             }
25300         }
25301
25302         vdst.write();
25303     }
25304
25305     Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3 *iFmt)
25306         : Inst_VOP3(iFmt, "v_cvt_f64_i32", false)
25307     {
25308         setFlag(ALU);
25309         setFlag(F64);
25310     } // Inst_VOP3__V_CVT_F64_I32
25311
25312     Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
25313     {
25314     } // ~Inst_VOP3__V_CVT_F64_I32
25315
25316     // D.d = (double)S0.i.
25317     void
25318     Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
25319     {
25320         Wavefront *wf = gpuDynInst->wavefront();
25321         ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
25322         VecOperandF64 vdst(gpuDynInst, instData.VDST);
25323
25324         src.readSrc();
25325
25326         if (instData.ABS & 0x1) {
25327             src.absModifier();
25328         }
25329
25330         if (extData.NEG & 0x1) {
25331             src.negModifier();
25332         }
25333
25334         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25335             if (wf->execMask(lane)) {
25336                 vdst[lane] = (VecElemF64)src[lane];
25337             }
25338         }
25339
25340         vdst.write();
25341     }
25342
25343     Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3 *iFmt)
25344         : Inst_VOP3(iFmt, "v_cvt_f32_i32", false)
25345     {
25346         setFlag(ALU);
25347         setFlag(F32);
25348     } // Inst_VOP3__V_CVT_F32_I32
25349
25350     Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
25351     {
25352     } // ~Inst_VOP3__V_CVT_F32_I32
25353
25354     // D.f = (float)S0.i.
25355     void
25356     Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
25357     {
25358         Wavefront *wf = gpuDynInst->wavefront();
25359         VecOperandI32 src(gpuDynInst, extData.SRC0);
25360         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25361
25362         src.readSrc();
25363
25364         /**
25365          * input modifiers are supported by FP operations only
25366          */
25367         assert(!(instData.ABS & 0x1));
25368         assert(!(instData.ABS & 0x2));
25369         assert(!(instData.ABS & 0x4));
25370         assert(!(extData.NEG & 0x1));
25371         assert(!(extData.NEG & 0x2));
25372         assert(!(extData.NEG & 0x4));
25373
25374         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25375             if (wf->execMask(lane)) {
25376                 vdst[lane] = (VecElemF32)src[lane];
25377             }
25378         }
25379
25380         vdst.write();
25381     }
25382
25383     Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3 *iFmt)
25384         : Inst_VOP3(iFmt, "v_cvt_f32_u32", false)
25385     {
25386         setFlag(ALU);
25387         setFlag(F32);
25388     } // Inst_VOP3__V_CVT_F32_U32
25389
25390     Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
25391     {
25392     } // ~Inst_VOP3__V_CVT_F32_U32
25393
25394     // D.f = (float)S0.u.
25395     void
25396     Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
25397     {
25398         Wavefront *wf = gpuDynInst->wavefront();
25399         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25400         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25401
25402         src.readSrc();
25403
25404         if (instData.ABS & 0x1) {
25405             src.absModifier();
25406         }
25407
25408         if (extData.NEG & 0x1) {
25409             src.negModifier();
25410         }
25411
25412         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25413             if (wf->execMask(lane)) {
25414                 vdst[lane] = (VecElemF32)src[lane];
25415             }
25416         }
25417
25418         vdst.write();
25419     }
25420
25421     Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3 *iFmt)
25422         : Inst_VOP3(iFmt, "v_cvt_u32_f32", false)
25423     {
25424         setFlag(ALU);
25425         setFlag(F32);
25426     } // Inst_VOP3__V_CVT_U32_F32
25427
25428     Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
25429     {
25430     } // ~Inst_VOP3__V_CVT_U32_F32
25431
25432     // D.u = (unsigned)S0.f.
25433     // Out-of-range floating point values (including infinity) saturate. NaN
25434     // is converted to 0.
25435     void
25436     Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
25437     {
25438         Wavefront *wf = gpuDynInst->wavefront();
25439         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25440         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25441
25442         src.readSrc();
25443
25444         if (instData.ABS & 0x1) {
25445             src.absModifier();
25446         }
25447
25448         if (extData.NEG & 0x1) {
25449             src.negModifier();
25450         }
25451
25452         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25453             if (wf->execMask(lane)) {
25454                 int exp;
25455                 std::frexp(src[lane],&exp);
25456                 if (std::isnan(src[lane])) {
25457                     vdst[lane] = 0;
25458                 } else if (std::isinf(src[lane])) {
25459                     if (std::signbit(src[lane])) {
25460                         vdst[lane] = 0;
25461                     } else {
25462                         vdst[lane] = UINT_MAX;
25463                     }
25464                 } else if (exp > 31) {
25465                     vdst[lane] = UINT_MAX;
25466                 } else {
25467                     vdst[lane] = (VecElemU32)src[lane];
25468                 }
25469             }
25470         }
25471
25472         vdst.write();
25473     }
25474
25475     Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3 *iFmt)
25476         : Inst_VOP3(iFmt, "v_cvt_i32_f32", false)
25477     {
25478         setFlag(ALU);
25479         setFlag(F32);
25480     } // Inst_VOP3__V_CVT_I32_F32
25481
25482     Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
25483     {
25484     } // ~Inst_VOP3__V_CVT_I32_F32
25485
25486     // D.i = (int)S0.f.
25487     // Out-of-range floating point values (including infinity) saturate. NaN
25488     // is converted to 0.
25489     void
25490     Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25491     {
25492         Wavefront *wf = gpuDynInst->wavefront();
25493         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25494         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25495
25496         src.readSrc();
25497
25498         if (instData.ABS & 0x1) {
25499             src.absModifier();
25500         }
25501
25502         if (extData.NEG & 0x1) {
25503             src.negModifier();
25504         }
25505
25506         /**
25507          * input modifiers are supported by FP operations only
25508          */
25509         assert(!(instData.ABS & 0x2));
25510         assert(!(instData.ABS & 0x4));
25511         assert(!(extData.NEG & 0x2));
25512         assert(!(extData.NEG & 0x4));
25513
25514         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25515             if (wf->execMask(lane)) {
25516                 int exp;
25517                 std::frexp(src[lane],&exp);
25518                 if (std::isnan(src[lane])) {
25519                     vdst[lane] = 0;
25520                 } else if (std::isinf(src[lane]) || exp > 30) {
25521                     if (std::signbit(src[lane])) {
25522                         vdst[lane] = INT_MIN;
25523                     } else {
25524                         vdst[lane] = INT_MAX;
25525                     }
25526                 } else {
25527                     vdst[lane] = (VecElemI32)src[lane];
25528                 }
25529             }
25530         }
25531
25532         vdst.write();
25533     }
25534
25535     Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3 *iFmt)
25536         : Inst_VOP3(iFmt, "v_mov_fed_b32", false)
25537     {
25538         setFlag(ALU);
25539     } // Inst_VOP3__V_MOV_FED_B32
25540
25541     Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
25542     {
25543     } // ~Inst_VOP3__V_MOV_FED_B32
25544
25545     // D.u = S0.u;
25546     // Input and output modifiers not supported; this is an untyped operation.
25547     void
25548     Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
25549     {
25550         panicUnimplemented();
25551     }
25552
25553     Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3 *iFmt)
25554         : Inst_VOP3(iFmt, "v_cvt_f16_f32", false)
25555     {
25556         setFlag(ALU);
25557         setFlag(F32);
25558     } // Inst_VOP3__V_CVT_F16_F32
25559
25560     Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
25561     {
25562     } // ~Inst_VOP3__V_CVT_F16_F32
25563
25564     // D.f16 = flt32_to_flt16(S0.f).
25565     void
25566     Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
25567     {
25568         panicUnimplemented();
25569     }
25570
25571     Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3 *iFmt)
25572         : Inst_VOP3(iFmt, "v_cvt_f32_f16", false)
25573     {
25574         setFlag(ALU);
25575         setFlag(F32);
25576     } // Inst_VOP3__V_CVT_F32_F16
25577
25578     Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
25579     {
25580     } // ~Inst_VOP3__V_CVT_F32_F16
25581
25582     // D.f = flt16_to_flt32(S0.f16).
25583     void
25584     Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
25585     {
25586         panicUnimplemented();
25587     }
25588
25589     Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
25590           InFmt_VOP3 *iFmt)
25591         : Inst_VOP3(iFmt, "v_cvt_rpi_i32_f32", false)
25592     {
25593         setFlag(ALU);
25594         setFlag(F32);
25595     } // Inst_VOP3__V_CVT_RPI_I32_F32
25596
25597     Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
25598     {
25599     } // ~Inst_VOP3__V_CVT_RPI_I32_F32
25600
25601     // D.i = (int)floor(S0.f + 0.5).
25602     void
25603     Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25604     {
25605         Wavefront *wf = gpuDynInst->wavefront();
25606         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25607         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25608
25609         src.readSrc();
25610
25611         if (instData.ABS & 0x1) {
25612             src.absModifier();
25613         }
25614
25615         if (extData.NEG & 0x1) {
25616             src.negModifier();
25617         }
25618
25619         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25620             if (wf->execMask(lane)) {
25621                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
25622             }
25623         }
25624
25625         vdst.write();
25626     }
25627
25628     Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
25629           InFmt_VOP3 *iFmt)
25630         : Inst_VOP3(iFmt, "v_cvt_flr_i32_f32", false)
25631     {
25632         setFlag(ALU);
25633         setFlag(F32);
25634     } // Inst_VOP3__V_CVT_FLR_I32_F32
25635
25636     Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
25637     {
25638     } // ~Inst_VOP3__V_CVT_FLR_I32_F32
25639
25640     // D.i = (int)floor(S0.f).
25641     void
25642     Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25643     {
25644         Wavefront *wf = gpuDynInst->wavefront();
25645         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25646         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25647
25648         src.readSrc();
25649
25650         if (instData.ABS & 0x1) {
25651             src.absModifier();
25652         }
25653
25654         if (extData.NEG & 0x1) {
25655             src.negModifier();
25656         }
25657
25658         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25659             if (wf->execMask(lane)) {
25660                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
25661             }
25662         }
25663
25664         vdst.write();
25665     }
25666
25667     Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3 *iFmt)
25668         : Inst_VOP3(iFmt, "v_cvt_off_f32_i4", false)
25669     {
25670         setFlag(ALU);
25671         setFlag(F32);
25672     } // Inst_VOP3__V_CVT_OFF_F32_I4
25673
25674     Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
25675     {
25676     } // ~Inst_VOP3__V_CVT_OFF_F32_I4
25677
25678     // 4-bit signed int to 32-bit float.
25679     void
25680     Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
25681     {
25682         panicUnimplemented();
25683     }
25684
25685     Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3 *iFmt)
25686         : Inst_VOP3(iFmt, "v_cvt_f32_f64", false)
25687     {
25688         setFlag(ALU);
25689         setFlag(F64);
25690     } // Inst_VOP3__V_CVT_F32_F64
25691
25692     Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
25693     {
25694     } // ~Inst_VOP3__V_CVT_F32_F64
25695
25696     // D.f = (float)S0.d.
25697     void
25698     Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
25699     {
25700         Wavefront *wf = gpuDynInst->wavefront();
25701         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25702         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25703
25704         src.readSrc();
25705
25706         if (instData.ABS & 0x1) {
25707             src.absModifier();
25708         }
25709
25710         if (extData.NEG & 0x1) {
25711             src.negModifier();
25712         }
25713
25714         /**
25715          * input modifiers are supported by FP operations only
25716          */
25717         assert(!(instData.ABS & 0x2));
25718         assert(!(instData.ABS & 0x4));
25719         assert(!(extData.NEG & 0x2));
25720         assert(!(extData.NEG & 0x4));
25721
25722         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25723             if (wf->execMask(lane)) {
25724                 vdst[lane] = (VecElemF32)src[lane];
25725             }
25726         }
25727
25728         vdst.write();
25729     }
25730
25731     Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3 *iFmt)
25732         : Inst_VOP3(iFmt, "v_cvt_f64_f32", false)
25733     {
25734         setFlag(ALU);
25735         setFlag(F64);
25736     } // Inst_VOP3__V_CVT_F64_F32
25737
25738     Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
25739     {
25740     } // ~Inst_VOP3__V_CVT_F64_F32
25741
25742     // D.d = (double)S0.f.
25743     void
25744     Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
25745     {
25746         Wavefront *wf = gpuDynInst->wavefront();
25747         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25748         VecOperandF64 vdst(gpuDynInst, instData.VDST);
25749
25750         src.readSrc();
25751
25752         if (instData.ABS & 0x1) {
25753             src.absModifier();
25754         }
25755
25756         if (extData.NEG & 0x1) {
25757             src.negModifier();
25758         }
25759
25760         /**
25761          * input modifiers are supported by FP operations only
25762          */
25763         assert(!(instData.ABS & 0x2));
25764         assert(!(instData.ABS & 0x4));
25765         assert(!(extData.NEG & 0x2));
25766         assert(!(extData.NEG & 0x4));
25767
25768         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25769             if (wf->execMask(lane)) {
25770                 vdst[lane] = (VecElemF64)src[lane];
25771             }
25772         }
25773
25774         vdst.write();
25775     }
25776
25777     Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3 *iFmt)
25778         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte0", false)
25779     {
25780         setFlag(ALU);
25781         setFlag(F32);
25782     } // Inst_VOP3__V_CVT_F32_UBYTE0
25783
25784     Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
25785     {
25786     } // ~Inst_VOP3__V_CVT_F32_UBYTE0
25787
25788     // D.f = (float)(S0.u[7:0]).
25789     void
25790     Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
25791     {
25792         Wavefront *wf = gpuDynInst->wavefront();
25793         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25794         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25795
25796         src.readSrc();
25797
25798         if (instData.ABS & 0x1) {
25799             src.absModifier();
25800         }
25801
25802         if (extData.NEG & 0x1) {
25803             src.negModifier();
25804         }
25805
25806         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25807             if (wf->execMask(lane)) {
25808                 vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
25809             }
25810         }
25811
25812         vdst.write();
25813     }
25814
25815     Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3 *iFmt)
25816         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte1", false)
25817     {
25818         setFlag(ALU);
25819         setFlag(F32);
25820     } // Inst_VOP3__V_CVT_F32_UBYTE1
25821
25822     Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
25823     {
25824     } // ~Inst_VOP3__V_CVT_F32_UBYTE1
25825
25826     // D.f = (float)(S0.u[15:8]).
25827     void
25828     Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
25829     {
25830         Wavefront *wf = gpuDynInst->wavefront();
25831         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25832         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25833
25834         src.readSrc();
25835
25836         if (instData.ABS & 0x1) {
25837             src.absModifier();
25838         }
25839
25840         if (extData.NEG & 0x1) {
25841             src.negModifier();
25842         }
25843
25844         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25845             if (wf->execMask(lane)) {
25846                 vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
25847             }
25848         }
25849
25850         vdst.write();
25851     }
25852
25853     Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3 *iFmt)
25854         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte2", false)
25855     {
25856         setFlag(ALU);
25857         setFlag(F32);
25858     } // Inst_VOP3__V_CVT_F32_UBYTE2
25859
25860     Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
25861     {
25862     } // ~Inst_VOP3__V_CVT_F32_UBYTE2
25863
25864     // D.f = (float)(S0.u[23:16]).
25865     void
25866     Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
25867     {
25868         Wavefront *wf = gpuDynInst->wavefront();
25869         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25870         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25871
25872         src.readSrc();
25873
25874         if (instData.ABS & 0x1) {
25875             src.absModifier();
25876         }
25877
25878         if (extData.NEG & 0x1) {
25879             src.negModifier();
25880         }
25881
25882         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25883             if (wf->execMask(lane)) {
25884                 vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
25885             }
25886         }
25887
25888         vdst.write();
25889     }
25890
25891     Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3 *iFmt)
25892         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte3", false)
25893     {
25894         setFlag(ALU);
25895         setFlag(F32);
25896     } // Inst_VOP3__V_CVT_F32_UBYTE3
25897
25898     Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
25899     {
25900     } // ~Inst_VOP3__V_CVT_F32_UBYTE3
25901
25902     // D.f = (float)(S0.u[31:24]).
25903     void
25904     Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
25905     {
25906         Wavefront *wf = gpuDynInst->wavefront();
25907         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25908         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25909
25910         src.readSrc();
25911
25912         if (instData.ABS & 0x1) {
25913             src.absModifier();
25914         }
25915
25916         if (extData.NEG & 0x1) {
25917             src.negModifier();
25918         }
25919
25920         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25921             if (wf->execMask(lane)) {
25922                 vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
25923             }
25924         }
25925
25926         vdst.write();
25927     }
25928
25929     Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3 *iFmt)
25930         : Inst_VOP3(iFmt, "v_cvt_u32_f64", false)
25931     {
25932         setFlag(ALU);
25933         setFlag(F64);
25934     } // Inst_VOP3__V_CVT_U32_F64
25935
25936     Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
25937     {
25938     } // ~Inst_VOP3__V_CVT_U32_F64
25939
25940     // D.u = (unsigned)S0.d.
25941     // Out-of-range floating point values (including infinity) saturate. NaN
25942     // is converted to 0.
25943     void
25944     Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
25945     {
25946         Wavefront *wf = gpuDynInst->wavefront();
25947         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25948         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25949
25950         src.readSrc();
25951
25952         if (instData.ABS & 0x1) {
25953             src.absModifier();
25954         }
25955
25956         if (extData.NEG & 0x1) {
25957             src.negModifier();
25958         }
25959
25960         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25961             if (wf->execMask(lane)) {
25962                 int exp;
25963                 std::frexp(src[lane],&exp);
25964                 if (std::isnan(src[lane])) {
25965                     vdst[lane] = 0;
25966                 } else if (std::isinf(src[lane])) {
25967                     if (std::signbit(src[lane])) {
25968                         vdst[lane] = 0;
25969                     } else {
25970                         vdst[lane] = UINT_MAX;
25971                     }
25972                 } else if (exp > 31) {
25973                     vdst[lane] = UINT_MAX;
25974                 } else {
25975                     vdst[lane] = (VecElemU32)src[lane];
25976                 }
25977             }
25978         }
25979
25980         vdst.write();
25981     }
25982
25983     Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3 *iFmt)
25984         : Inst_VOP3(iFmt, "v_cvt_f64_u32", false)
25985     {
25986         setFlag(ALU);
25987         setFlag(F64);
25988     } // Inst_VOP3__V_CVT_F64_U32
25989
25990     Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
25991     {
25992     } // ~Inst_VOP3__V_CVT_F64_U32
25993
25994     // D.d = (double)S0.u.
25995     void
25996     Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
25997     {
25998         Wavefront *wf = gpuDynInst->wavefront();
25999         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26000         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26001
26002         src.readSrc();
26003
26004         if (instData.ABS & 0x1) {
26005             src.absModifier();
26006         }
26007
26008         if (extData.NEG & 0x1) {
26009             src.negModifier();
26010         }
26011
26012         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26013             if (wf->execMask(lane)) {
26014                 vdst[lane] = (VecElemF64)src[lane];
26015             }
26016         }
26017
26018         vdst.write();
26019     }
26020
26021     Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3 *iFmt)
26022         : Inst_VOP3(iFmt, "v_trunc_f64", false)
26023     {
26024         setFlag(ALU);
26025         setFlag(F64);
26026     } // Inst_VOP3__V_TRUNC_F64
26027
26028     Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
26029     {
26030     } // ~Inst_VOP3__V_TRUNC_F64
26031
26032     // D.d = trunc(S0.d), return integer part of S0.d.
26033     void
26034     Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
26035     {
26036         Wavefront *wf = gpuDynInst->wavefront();
26037         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26038         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26039
26040         src.readSrc();
26041
26042         if (instData.ABS & 0x1) {
26043             src.absModifier();
26044         }
26045
26046         if (extData.NEG & 0x1) {
26047             src.negModifier();
26048         }
26049
26050         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26051             if (wf->execMask(lane)) {
26052                 vdst[lane] = std::trunc(src[lane]);
26053             }
26054         }
26055
26056         vdst.write();
26057     }
26058
26059     Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3 *iFmt)
26060         : Inst_VOP3(iFmt, "v_ceil_f64", false)
26061     {
26062         setFlag(ALU);
26063         setFlag(F64);
26064     } // Inst_VOP3__V_CEIL_F64
26065
26066     Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
26067     {
26068     } // ~Inst_VOP3__V_CEIL_F64
26069
26070     // D.d = ceil(S0.d);
26071     void
26072     Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
26073     {
26074         Wavefront *wf = gpuDynInst->wavefront();
26075         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26076         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26077
26078         src.readSrc();
26079
26080         if (instData.ABS & 0x1) {
26081             src.absModifier();
26082         }
26083
26084         if (extData.NEG & 0x1) {
26085             src.negModifier();
26086         }
26087
26088         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26089             if (wf->execMask(lane)) {
26090                 vdst[lane] = std::ceil(src[lane]);
26091             }
26092         }
26093
26094         vdst.write();
26095     }
26096
26097     Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3 *iFmt)
26098         : Inst_VOP3(iFmt, "v_rndne_f64", false)
26099     {
26100         setFlag(ALU);
26101         setFlag(F64);
26102     } // Inst_VOP3__V_RNDNE_F64
26103
26104     Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
26105     {
26106     } // ~Inst_VOP3__V_RNDNE_F64
26107
26108     // D.d = round_nearest_even(S0.d).
26109     void
26110     Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
26111     {
26112         Wavefront *wf = gpuDynInst->wavefront();
26113         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26114         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26115
26116         src.readSrc();
26117
26118         if (instData.ABS & 0x1) {
26119             src.absModifier();
26120         }
26121
26122         if (extData.NEG & 0x1) {
26123             src.negModifier();
26124         }
26125
26126         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26127             if (wf->execMask(lane)) {
26128                 vdst[lane] = roundNearestEven(src[lane]);
26129             }
26130         }
26131
26132         vdst.write();
26133     }
26134
26135     Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3 *iFmt)
26136         : Inst_VOP3(iFmt, "v_floor_f64", false)
26137     {
26138         setFlag(ALU);
26139         setFlag(F64);
26140     } // Inst_VOP3__V_FLOOR_F64
26141
26142     Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
26143     {
26144     } // ~Inst_VOP3__V_FLOOR_F64
26145
26146     // D.d = floor(S0.d);
26147     void
26148     Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
26149     {
26150         Wavefront *wf = gpuDynInst->wavefront();
26151         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26152         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26153
26154         src.readSrc();
26155
26156         if (instData.ABS & 0x1) {
26157             src.absModifier();
26158         }
26159
26160         if (extData.NEG & 0x1) {
26161             src.negModifier();
26162         }
26163
26164         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26165             if (wf->execMask(lane)) {
26166                 vdst[lane] = std::floor(src[lane]);
26167             }
26168         }
26169
26170         vdst.write();
26171     }
26172
26173     Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3 *iFmt)
26174         : Inst_VOP3(iFmt, "v_fract_f32", false)
26175     {
26176         setFlag(ALU);
26177         setFlag(F32);
26178     } // Inst_VOP3__V_FRACT_F32
26179
26180     Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
26181     {
26182     } // ~Inst_VOP3__V_FRACT_F32
26183
26184     // D.f = modf(S0.f).
26185     void
26186     Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
26187     {
26188         Wavefront *wf = gpuDynInst->wavefront();
26189         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26190         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26191
26192         src.readSrc();
26193
26194         if (instData.ABS & 0x1) {
26195             src.absModifier();
26196         }
26197
26198         if (extData.NEG & 0x1) {
26199             src.negModifier();
26200         }
26201
26202         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26203             if (wf->execMask(lane)) {
26204                 VecElemF32 int_part(0.0);
26205                 vdst[lane] = std::modf(src[lane], &int_part);
26206             }
26207         }
26208
26209         vdst.write();
26210     }
26211
26212     Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3 *iFmt)
26213         : Inst_VOP3(iFmt, "v_trunc_f32", false)
26214     {
26215         setFlag(ALU);
26216         setFlag(F32);
26217     } // Inst_VOP3__V_TRUNC_F32
26218
26219     Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
26220     {
26221     } // ~Inst_VOP3__V_TRUNC_F32
26222
26223     // D.f = trunc(S0.f), return integer part of S0.f.
26224     void
26225     Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
26226     {
26227         Wavefront *wf = gpuDynInst->wavefront();
26228         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26229         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26230
26231         src.readSrc();
26232
26233         if (instData.ABS & 0x1) {
26234             src.absModifier();
26235         }
26236
26237         if (extData.NEG & 0x1) {
26238             src.negModifier();
26239         }
26240
26241         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26242             if (wf->execMask(lane)) {
26243                 vdst[lane] = std::trunc(src[lane]);
26244             }
26245         }
26246
26247         vdst.write();
26248     }
26249
26250     Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3 *iFmt)
26251         : Inst_VOP3(iFmt, "v_ceil_f32", false)
26252     {
26253         setFlag(ALU);
26254         setFlag(F32);
26255     } // Inst_VOP3__V_CEIL_F32
26256
26257     Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
26258     {
26259     } // ~Inst_VOP3__V_CEIL_F32
26260
26261     // D.f = ceil(S0.f);
26262     void
26263     Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
26264     {
26265         Wavefront *wf = gpuDynInst->wavefront();
26266         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26267         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26268
26269         src.readSrc();
26270
26271         if (instData.ABS & 0x1) {
26272             src.absModifier();
26273         }
26274
26275         if (extData.NEG & 0x1) {
26276             src.negModifier();
26277         }
26278
26279         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26280             if (wf->execMask(lane)) {
26281                 vdst[lane] = std::ceil(src[lane]);
26282             }
26283         }
26284
26285         vdst.write();
26286     }
26287
26288     Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3 *iFmt)
26289         : Inst_VOP3(iFmt, "v_rndne_f32", false)
26290     {
26291         setFlag(ALU);
26292         setFlag(F32);
26293     } // Inst_VOP3__V_RNDNE_F32
26294
26295     Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
26296     {
26297     } // ~Inst_VOP3__V_RNDNE_F32
26298
26299     // D.f = round_nearest_even(S0.f).
26300     void
26301     Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
26302     {
26303         Wavefront *wf = gpuDynInst->wavefront();
26304         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26305         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26306
26307         src.readSrc();
26308
26309         if (instData.ABS & 0x1) {
26310             src.absModifier();
26311         }
26312
26313         if (extData.NEG & 0x1) {
26314             src.negModifier();
26315         }
26316
26317         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26318             if (wf->execMask(lane)) {
26319                 vdst[lane] = roundNearestEven(src[lane]);
26320             }
26321         }
26322
26323         vdst.write();
26324     }
26325
26326     Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3 *iFmt)
26327         : Inst_VOP3(iFmt, "v_floor_f32", false)
26328     {
26329         setFlag(ALU);
26330         setFlag(F32);
26331     } // Inst_VOP3__V_FLOOR_F32
26332
26333     Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
26334     {
26335     } // ~Inst_VOP3__V_FLOOR_F32
26336
26337     // D.f = floor(S0.f);
26338     void
26339     Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
26340     {
26341         Wavefront *wf = gpuDynInst->wavefront();
26342         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26343         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26344
26345         src.readSrc();
26346
26347         if (instData.ABS & 0x1) {
26348             src.absModifier();
26349         }
26350
26351         if (extData.NEG & 0x1) {
26352             src.negModifier();
26353         }
26354
26355         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26356             if (wf->execMask(lane)) {
26357                 vdst[lane] = std::floor(src[lane]);
26358             }
26359         }
26360
26361         vdst.write();
26362     }
26363
26364     Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3 *iFmt)
26365         : Inst_VOP3(iFmt, "v_exp_f32", false)
26366     {
26367         setFlag(ALU);
26368         setFlag(F32);
26369     } // Inst_VOP3__V_EXP_F32
26370
26371     Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
26372     {
26373     } // ~Inst_VOP3__V_EXP_F32
26374
26375     // D.f = pow(2.0, S0.f).
26376     void
26377     Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
26378     {
26379         Wavefront *wf = gpuDynInst->wavefront();
26380         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26381         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26382
26383         src.readSrc();
26384
26385         if (instData.ABS & 0x1) {
26386             src.absModifier();
26387         }
26388
26389         if (extData.NEG & 0x1) {
26390             src.negModifier();
26391         }
26392
26393         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26394             if (wf->execMask(lane)) {
26395                 vdst[lane] = std::pow(2.0, src[lane]);
26396             }
26397         }
26398
26399         vdst.write();
26400     }
26401
26402     Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3 *iFmt)
26403         : Inst_VOP3(iFmt, "v_log_f32", false)
26404     {
26405         setFlag(ALU);
26406         setFlag(F32);
26407     } // Inst_VOP3__V_LOG_F32
26408
26409     Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
26410     {
26411     } // ~Inst_VOP3__V_LOG_F32
26412
26413     // D.f = log2(S0.f).
26414     void
26415     Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
26416     {
26417         Wavefront *wf = gpuDynInst->wavefront();
26418         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26419         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26420
26421         src.readSrc();
26422
26423         if (instData.ABS & 0x1) {
26424             src.absModifier();
26425         }
26426
26427         if (extData.NEG & 0x1) {
26428             src.negModifier();
26429         }
26430
26431         /**
26432          * input modifiers are supported by FP operations only
26433          */
26434         assert(!(instData.ABS & 0x2));
26435         assert(!(instData.ABS & 0x4));
26436         assert(!(extData.NEG & 0x2));
26437         assert(!(extData.NEG & 0x4));
26438
26439         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26440             if (wf->execMask(lane)) {
26441                 vdst[lane] = std::log2(src[lane]);
26442             }
26443         }
26444
26445         vdst.write();
26446     }
26447
26448     Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3 *iFmt)
26449         : Inst_VOP3(iFmt, "v_rcp_f32", false)
26450     {
26451         setFlag(ALU);
26452         setFlag(F32);
26453     } // Inst_VOP3__V_RCP_F32
26454
26455     Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
26456     {
26457     } // ~Inst_VOP3__V_RCP_F32
26458
26459     // D.f = 1.0 / S0.f.
26460     void
26461     Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
26462     {
26463         Wavefront *wf = gpuDynInst->wavefront();
26464         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26465         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26466
26467         src.readSrc();
26468
26469         if (instData.ABS & 0x1) {
26470             src.absModifier();
26471         }
26472
26473         if (extData.NEG & 0x1) {
26474             src.negModifier();
26475         }
26476
26477         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26478             if (wf->execMask(lane)) {
26479                 vdst[lane] = 1.0 / src[lane];
26480             }
26481         }
26482
26483         vdst.write();
26484     }
26485
26486     Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3 *iFmt)
26487         : Inst_VOP3(iFmt, "v_rcp_iflag_f32", false)
26488     {
26489         setFlag(ALU);
26490         setFlag(F32);
26491     } // Inst_VOP3__V_RCP_IFLAG_F32
26492
26493     Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
26494     {
26495     } // ~Inst_VOP3__V_RCP_IFLAG_F32
26496
26497     // D.f = 1.0 / S0.f.
26498     void
26499     Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
26500     {
26501         Wavefront *wf = gpuDynInst->wavefront();
26502         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26503         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26504
26505         src.readSrc();
26506
26507         if (instData.ABS & 0x1) {
26508             src.absModifier();
26509         }
26510
26511         if (extData.NEG & 0x1) {
26512             src.negModifier();
26513         }
26514
26515         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26516             if (wf->execMask(lane)) {
26517                 vdst[lane] = 1.0 / src[lane];
26518             }
26519         }
26520
26521         vdst.write();
26522     }
26523
26524     Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3 *iFmt)
26525         : Inst_VOP3(iFmt, "v_rsq_f32", false)
26526     {
26527         setFlag(ALU);
26528         setFlag(F32);
26529     } // Inst_VOP3__V_RSQ_F32
26530
26531     Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
26532     {
26533     } // ~Inst_VOP3__V_RSQ_F32
26534
26535     // D.f = 1.0 / sqrt(S0.f).
26536     void
26537     Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
26538     {
26539         Wavefront *wf = gpuDynInst->wavefront();
26540         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26541         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26542
26543         src.readSrc();
26544
26545         if (instData.ABS & 0x1) {
26546             src.absModifier();
26547         }
26548
26549         if (extData.NEG & 0x1) {
26550             src.negModifier();
26551         }
26552
26553         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26554             if (wf->execMask(lane)) {
26555                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
26556             }
26557         }
26558
26559         vdst.write();
26560     }
26561
26562     Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3 *iFmt)
26563         : Inst_VOP3(iFmt, "v_rcp_f64", false)
26564     {
26565         setFlag(ALU);
26566         setFlag(F64);
26567     } // Inst_VOP3__V_RCP_F64
26568
26569     Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
26570     {
26571     } // ~Inst_VOP3__V_RCP_F64
26572
26573     // D.d = 1.0 / S0.d.
26574     void
26575     Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
26576     {
26577         Wavefront *wf = gpuDynInst->wavefront();
26578         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26579         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26580
26581         src.readSrc();
26582
26583         if (instData.ABS & 0x1) {
26584             src.absModifier();
26585         }
26586
26587         if (extData.NEG & 0x1) {
26588             src.negModifier();
26589         }
26590
26591         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26592             if (wf->execMask(lane)) {
26593                 if (std::fpclassify(src[lane]) == FP_ZERO) {
26594                     vdst[lane] = +INFINITY;
26595                 } else if (std::isnan(src[lane])) {
26596                     vdst[lane] = NAN;
26597                 } else if (std::isinf(src[lane])) {
26598                     if (std::signbit(src[lane])) {
26599                         vdst[lane] = -0.0;
26600                     } else {
26601                         vdst[lane] = 0.0;
26602                     }
26603                 } else {
26604                     vdst[lane] = 1.0 / src[lane];
26605                 }
26606             }
26607         }
26608
26609         vdst.write();
26610     }
26611
26612     Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3 *iFmt)
26613         : Inst_VOP3(iFmt, "v_rsq_f64", false)
26614     {
26615         setFlag(ALU);
26616         setFlag(F64);
26617     } // Inst_VOP3__V_RSQ_F64
26618
26619     Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
26620     {
26621     } // ~Inst_VOP3__V_RSQ_F64
26622
26623     // D.d = 1.0 / sqrt(S0.d).
26624     void
26625     Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
26626     {
26627         Wavefront *wf = gpuDynInst->wavefront();
26628         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26629         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26630
26631         src.readSrc();
26632
26633         if (instData.ABS & 0x1) {
26634             src.absModifier();
26635         }
26636
26637         if (extData.NEG & 0x1) {
26638             src.negModifier();
26639         }
26640
26641         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26642             if (wf->execMask(lane)) {
26643                 if (std::fpclassify(src[lane]) == FP_ZERO) {
26644                     vdst[lane] = +INFINITY;
26645                 } else if (std::isnan(src[lane])) {
26646                     vdst[lane] = NAN;
26647                 } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
26648                     vdst[lane] = 0.0;
26649                 } else if (std::signbit(src[lane])) {
26650                     vdst[lane] = NAN;
26651                 } else {
26652                     vdst[lane] = 1.0 / std::sqrt(src[lane]);
26653                 }
26654             }
26655         }
26656
26657         vdst.write();
26658     }
26659
26660     Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3 *iFmt)
26661         : Inst_VOP3(iFmt, "v_sqrt_f32", false)
26662     {
26663         setFlag(ALU);
26664         setFlag(F32);
26665     } // Inst_VOP3__V_SQRT_F32
26666
26667     Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
26668     {
26669     } // ~Inst_VOP3__V_SQRT_F32
26670
26671     // D.f = sqrt(S0.f).
26672     void
26673     Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
26674     {
26675         Wavefront *wf = gpuDynInst->wavefront();
26676         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26677         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26678
26679         src.readSrc();
26680
26681         if (instData.ABS & 0x1) {
26682             src.absModifier();
26683         }
26684
26685         if (extData.NEG & 0x1) {
26686             src.negModifier();
26687         }
26688
26689         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26690             if (wf->execMask(lane)) {
26691                 vdst[lane] = std::sqrt(src[lane]);
26692             }
26693         }
26694
26695         vdst.write();
26696     }
26697
26698     Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3 *iFmt)
26699         : Inst_VOP3(iFmt, "v_sqrt_f64", false)
26700     {
26701         setFlag(ALU);
26702         setFlag(F64);
26703     } // Inst_VOP3__V_SQRT_F64
26704
26705     Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
26706     {
26707     } // ~Inst_VOP3__V_SQRT_F64
26708
26709     // D.d = sqrt(S0.d).
26710     void
26711     Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
26712     {
26713         Wavefront *wf = gpuDynInst->wavefront();
26714         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26715         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26716
26717         src.readSrc();
26718
26719         if (instData.ABS & 0x1) {
26720             src.absModifier();
26721         }
26722
26723         if (extData.NEG & 0x1) {
26724             src.negModifier();
26725         }
26726
26727         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26728             if (wf->execMask(lane)) {
26729                 vdst[lane] = std::sqrt(src[lane]);
26730             }
26731         }
26732
26733         vdst.write();
26734     }
26735
26736     Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3 *iFmt)
26737         : Inst_VOP3(iFmt, "v_sin_f32", false)
26738     {
26739         setFlag(ALU);
26740         setFlag(F32);
26741     } // Inst_VOP3__V_SIN_F32
26742
26743     Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
26744     {
26745     } // ~Inst_VOP3__V_SIN_F32
26746
26747     // D.f = sin(S0.f * 2 * PI).
26748     void
26749     Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
26750     {
26751         Wavefront *wf = gpuDynInst->wavefront();
26752         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26753         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26754         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26755
26756         src.readSrc();
26757         pi.read();
26758
26759         if (instData.ABS & 0x1) {
26760             src.absModifier();
26761         }
26762
26763         if (extData.NEG & 0x1) {
26764             src.negModifier();
26765         }
26766
26767         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26768             if (wf->execMask(lane)) {
26769                 vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
26770             }
26771         }
26772
26773         vdst.write();
26774     }
26775
26776     Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3 *iFmt)
26777         : Inst_VOP3(iFmt, "v_cos_f32", false)
26778     {
26779         setFlag(ALU);
26780         setFlag(F32);
26781     } // Inst_VOP3__V_COS_F32
26782
26783     Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
26784     {
26785     } // ~Inst_VOP3__V_COS_F32
26786
26787     // D.f = cos(S0.f * 2 * PI).
26788     void
26789     Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
26790     {
26791         Wavefront *wf = gpuDynInst->wavefront();
26792         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26793         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26794         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26795
26796         src.readSrc();
26797         pi.read();
26798
26799         if (instData.ABS & 0x1) {
26800             src.absModifier();
26801         }
26802
26803         if (extData.NEG & 0x1) {
26804             src.negModifier();
26805         }
26806
26807         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26808             if (wf->execMask(lane)) {
26809                 vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
26810             }
26811         }
26812
26813         vdst.write();
26814     }
26815
26816     Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3 *iFmt)
26817         : Inst_VOP3(iFmt, "v_not_b32", false)
26818     {
26819         setFlag(ALU);
26820     } // Inst_VOP3__V_NOT_B32
26821
26822     Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
26823     {
26824     } // ~Inst_VOP3__V_NOT_B32
26825
26826     // D.u = ~S0.u.
26827     // Input and output modifiers not supported.
26828     void
26829     Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
26830     {
26831         Wavefront *wf = gpuDynInst->wavefront();
26832         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26833         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26834
26835         src.readSrc();
26836
26837         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26838             if (wf->execMask(lane)) {
26839                 vdst[lane] = ~src[lane];
26840             }
26841         }
26842
26843         vdst.write();
26844     }
26845
26846     Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3 *iFmt)
26847         : Inst_VOP3(iFmt, "v_bfrev_b32", false)
26848     {
26849         setFlag(ALU);
26850     } // Inst_VOP3__V_BFREV_B32
26851
26852     Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
26853     {
26854     } // ~Inst_VOP3__V_BFREV_B32
26855
26856     // D.u[31:0] = S0.u[0:31], bitfield reverse.
26857     // Input and output modifiers not supported.
26858     void
26859     Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
26860     {
26861         Wavefront *wf = gpuDynInst->wavefront();
26862         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26863         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26864
26865         src.readSrc();
26866
26867         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26868             if (wf->execMask(lane)) {
26869                 vdst[lane] = reverseBits(src[lane]);
26870             }
26871         }
26872
26873         vdst.write();
26874     }
26875
26876     Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3 *iFmt)
26877         : Inst_VOP3(iFmt, "v_ffbh_u32", false)
26878     {
26879         setFlag(ALU);
26880     } // Inst_VOP3__V_FFBH_U32
26881
26882     Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
26883     {
26884     } // ~Inst_VOP3__V_FFBH_U32
26885
26886     // D.u = position of first 1 in S0.u from MSB;
26887     // D.u = 0xffffffff if S0.u == 0.
26888     void
26889     Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
26890     {
26891         Wavefront *wf = gpuDynInst->wavefront();
26892         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26893         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26894
26895         src.readSrc();
26896
26897         if (instData.ABS & 0x1) {
26898             src.absModifier();
26899         }
26900
26901         if (extData.NEG & 0x1) {
26902             src.negModifier();
26903         }
26904
26905         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26906             if (wf->execMask(lane)) {
26907                 vdst[lane] = findFirstOneMsb(src[lane]);
26908             }
26909         }
26910
26911         vdst.write();
26912     }
26913
26914     Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3 *iFmt)
26915         : Inst_VOP3(iFmt, "v_ffbl_b32", false)
26916     {
26917         setFlag(ALU);
26918     } // Inst_VOP3__V_FFBL_B32
26919
26920     Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
26921     {
26922     } // ~Inst_VOP3__V_FFBL_B32
26923
26924     // D.u = position of first 1 in S0.u from LSB;
26925     // D.u = 0xffffffff if S0.u == 0.
26926     void
26927     Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
26928     {
26929         Wavefront *wf = gpuDynInst->wavefront();
26930         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26931         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26932
26933         src.readSrc();
26934
26935         if (instData.ABS & 0x1) {
26936             src.absModifier();
26937         }
26938
26939         if (extData.NEG & 0x1) {
26940             src.negModifier();
26941         }
26942
26943         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26944             if (wf->execMask(lane)) {
26945                 vdst[lane] = findFirstOne(src[lane]);
26946             }
26947         }
26948
26949         vdst.write();
26950     }
26951
26952     Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3 *iFmt)
26953         : Inst_VOP3(iFmt, "v_ffbh_i32", false)
26954     {
26955         setFlag(ALU);
26956     } // Inst_VOP3__V_FFBH_I32
26957
26958     Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
26959     {
26960     } // ~Inst_VOP3__V_FFBH_I32
26961
26962     // D.u = position of first bit different from sign bit in S0.i from MSB;
26963     // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
26964     void
26965     Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
26966     {
26967         Wavefront *wf = gpuDynInst->wavefront();
26968         ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
26969         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26970
26971         src.readSrc();
26972
26973         if (instData.ABS & 0x1) {
26974             src.absModifier();
26975         }
26976
26977         if (extData.NEG & 0x1) {
26978             src.negModifier();
26979         }
26980
26981         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26982             if (wf->execMask(lane)) {
26983                 vdst[lane] = firstOppositeSignBit(src[lane]);
26984             }
26985         }
26986
26987         vdst.write();
26988     }
26989
26990     Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
26991           InFmt_VOP3 *iFmt)
26992         : Inst_VOP3(iFmt, "v_frexp_exp_i32_f64", false)
26993     {
26994         setFlag(ALU);
26995         setFlag(F64);
26996     } // Inst_VOP3__V_FREXP_EXP_I32_F64
26997
26998     Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
26999     {
27000     } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
27001
27002     // See V_FREXP_EXP_I32_F32.
27003     void
27004     Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
27005     {
27006         Wavefront *wf = gpuDynInst->wavefront();
27007         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27008         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27009
27010         src.readSrc();
27011
27012         if (instData.ABS & 0x1) {
27013             src.absModifier();
27014         }
27015
27016         if (extData.NEG & 0x1) {
27017             src.negModifier();
27018         }
27019
27020         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27021             if (wf->execMask(lane)) {
27022                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
27023                     vdst[lane] = 0;
27024                 } else {
27025                     VecElemI32 exp(0);
27026                     std::frexp(src[lane], &exp);
27027                     vdst[lane] = exp;
27028                 }
27029             }
27030         }
27031
27032         vdst.write();
27033     }
27034
27035     Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3 *iFmt)
27036         : Inst_VOP3(iFmt, "v_frexp_mant_f64", false)
27037     {
27038         setFlag(ALU);
27039         setFlag(F64);
27040     } // Inst_VOP3__V_FREXP_MANT_F64
27041
27042     Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
27043     {
27044     } // ~Inst_VOP3__V_FREXP_MANT_F64
27045
27046     void
27047     Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
27048     {
27049         Wavefront *wf = gpuDynInst->wavefront();
27050         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27051         VecOperandF64 vdst(gpuDynInst, instData.VDST);
27052
27053         src.readSrc();
27054
27055         if (instData.ABS & 0x1) {
27056             src.absModifier();
27057         }
27058
27059         if (extData.NEG & 0x1) {
27060             src.negModifier();
27061         }
27062
27063         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27064             if (wf->execMask(lane)) {
27065                 VecElemI32 exp(0);
27066                 vdst[lane] = std::frexp(src[lane], &exp);
27067             }
27068         }
27069
27070         vdst.write();
27071     }
27072
27073     Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3 *iFmt)
27074         : Inst_VOP3(iFmt, "v_fract_f64", false)
27075     {
27076         setFlag(ALU);
27077         setFlag(F64);
27078     } // Inst_VOP3__V_FRACT_F64
27079
27080     Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
27081     {
27082     } // ~Inst_VOP3__V_FRACT_F64
27083
27084     void
27085     Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
27086     {
27087         Wavefront *wf = gpuDynInst->wavefront();
27088         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27089         VecOperandF64 vdst(gpuDynInst, instData.VDST);
27090
27091         src.readSrc();
27092
27093         if (instData.ABS & 0x1) {
27094             src.absModifier();
27095         }
27096
27097         if (extData.NEG & 0x1) {
27098             src.negModifier();
27099         }
27100
27101         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27102             if (wf->execMask(lane)) {
27103                 VecElemF32 int_part(0.0);
27104                 vdst[lane] = std::modf(src[lane], &int_part);
27105             }
27106         }
27107
27108         vdst.write();
27109     }
27110
27111     Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
27112           InFmt_VOP3 *iFmt)
27113         : Inst_VOP3(iFmt, "v_frexp_exp_i32_f32", false)
27114     {
27115         setFlag(ALU);
27116         setFlag(F32);
27117     } // Inst_VOP3__V_FREXP_EXP_I32_F32
27118
27119     Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
27120     {
27121     } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
27122
27123     // frexp(S0.f, Exponenti(S0.f))
27124     // if (S0.f == INF || S0.f == NAN) then D.i = 0;
27125     // else D.i = Exponent(S0.f)
27126     void
27127     Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
27128     {
27129         Wavefront *wf = gpuDynInst->wavefront();
27130         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27131         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27132
27133         src.readSrc();
27134
27135         if (instData.ABS & 0x1) {
27136             src.absModifier();
27137         }
27138
27139         if (extData.NEG & 0x1) {
27140             src.negModifier();
27141         }
27142
27143         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27144             if (wf->execMask(lane)) {
27145                 if (std::isinf(src[lane])|| std::isnan(src[lane])) {
27146                     vdst[lane] = 0;
27147                 } else {
27148                     VecElemI32 exp(0);
27149                     std::frexp(src[lane], &exp);
27150                     vdst[lane] = exp;
27151                 }
27152             }
27153         }
27154
27155         vdst.write();
27156     }
27157
27158     Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3 *iFmt)
27159         : Inst_VOP3(iFmt, "v_frexp_mant_f32", false)
27160     {
27161         setFlag(ALU);
27162         setFlag(F32);
27163     } // Inst_VOP3__V_FREXP_MANT_F32
27164
27165     Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
27166     {
27167     } // ~Inst_VOP3__V_FREXP_MANT_F32
27168
27169     // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
27170     // else D.f = Mantissa(S0.f).
27171     void
27172     Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
27173     {
27174         Wavefront *wf = gpuDynInst->wavefront();
27175         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27176         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27177
27178         src.readSrc();
27179
27180         if (instData.ABS & 0x1) {
27181             src.absModifier();
27182         }
27183
27184         if (extData.NEG & 0x1) {
27185             src.negModifier();
27186         }
27187
27188         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27189             if (wf->execMask(lane)) {
27190                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
27191                     vdst[lane] = src[lane];
27192                 } else {
27193                     VecElemI32 exp(0);
27194                     vdst[lane] = std::frexp(src[lane], &exp);
27195                 }
27196             }
27197         }
27198
27199         vdst.write();
27200     }
27201
27202     Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3 *iFmt)
27203         : Inst_VOP3(iFmt, "v_clrexcp", false)
27204     {
27205     } // Inst_VOP3__V_CLREXCP
27206
27207     Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
27208     {
27209     } // ~Inst_VOP3__V_CLREXCP
27210
27211     void
27212     Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
27213     {
27214         panicUnimplemented();
27215     }
27216
27217     Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3 *iFmt)
27218         : Inst_VOP3(iFmt, "v_cvt_f16_u16", false)
27219     {
27220         setFlag(ALU);
27221         setFlag(F16);
27222     } // Inst_VOP3__V_CVT_F16_U16
27223
27224     Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
27225     {
27226     } // ~Inst_VOP3__V_CVT_F16_U16
27227
27228     // D.f16 = uint16_to_flt16(S.u16).
27229     void
27230     Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
27231     {
27232         panicUnimplemented();
27233     }
27234
27235     Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3 *iFmt)
27236         : Inst_VOP3(iFmt, "v_cvt_f16_i16", false)
27237     {
27238         setFlag(ALU);
27239         setFlag(F16);
27240     } // Inst_VOP3__V_CVT_F16_I16
27241
27242     Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
27243     {
27244     } // ~Inst_VOP3__V_CVT_F16_I16
27245
27246     // D.f16 = int16_to_flt16(S.i16).
27247     void
27248     Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
27249     {
27250         panicUnimplemented();
27251     }
27252
27253     Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3 *iFmt)
27254         : Inst_VOP3(iFmt, "v_cvt_u16_f16", false)
27255     {
27256         setFlag(ALU);
27257         setFlag(F16);
27258     } // Inst_VOP3__V_CVT_U16_F16
27259
27260     Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
27261     {
27262     } // ~Inst_VOP3__V_CVT_U16_F16
27263
27264     // D.u16 = flt16_to_uint16(S.f16).
27265     void
27266     Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
27267     {
27268         panicUnimplemented();
27269     }
27270
27271     Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3 *iFmt)
27272         : Inst_VOP3(iFmt, "v_cvt_i16_f16", false)
27273     {
27274         setFlag(ALU);
27275         setFlag(F16);
27276     } // Inst_VOP3__V_CVT_I16_F16
27277
27278     Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
27279     {
27280     } // ~Inst_VOP3__V_CVT_I16_F16
27281
27282     // D.i16 = flt16_to_int16(S.f16).
27283     void
27284     Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27285     {
27286         panicUnimplemented();
27287     }
27288
27289     Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3 *iFmt)
27290         : Inst_VOP3(iFmt, "v_rcp_f16", false)
27291     {
27292         setFlag(ALU);
27293         setFlag(F16);
27294     } // Inst_VOP3__V_RCP_F16
27295
27296     Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
27297     {
27298     } // ~Inst_VOP3__V_RCP_F16
27299
27300     // if (S0.f16 == 1.0f)
27301     //     D.f16 = 1.0f;
27302     // else
27303     //     D.f16 = 1 / S0.f16.
27304     void
27305     Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
27306     {
27307         panicUnimplemented();
27308     }
27309
27310     Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3 *iFmt)
27311         : Inst_VOP3(iFmt, "v_sqrt_f16", false)
27312     {
27313         setFlag(ALU);
27314         setFlag(F16);
27315     } // Inst_VOP3__V_SQRT_F16
27316
27317     Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
27318     {
27319     } // ~Inst_VOP3__V_SQRT_F16
27320
27321     // if (S0.f16 == 1.0f)
27322     //     D.f16 = 1.0f;
27323     // else
27324     //     D.f16 = sqrt(S0.f16).
27325     void
27326     Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
27327     {
27328         panicUnimplemented();
27329     }
27330
27331     Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3 *iFmt)
27332         : Inst_VOP3(iFmt, "v_rsq_f16", false)
27333     {
27334         setFlag(ALU);
27335         setFlag(F16);
27336     } // Inst_VOP3__V_RSQ_F16
27337
27338     Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
27339     {
27340     } // ~Inst_VOP3__V_RSQ_F16
27341
27342     // if (S0.f16 == 1.0f)
27343     //     D.f16 = 1.0f;
27344     // else
27345     //     D.f16 = 1 / sqrt(S0.f16).
27346     void
27347     Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
27348     {
27349         panicUnimplemented();
27350     }
27351
27352     Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3 *iFmt)
27353         : Inst_VOP3(iFmt, "v_log_f16", false)
27354     {
27355         setFlag(ALU);
27356         setFlag(F16);
27357     } // Inst_VOP3__V_LOG_F16
27358
27359     Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
27360     {
27361     } // ~Inst_VOP3__V_LOG_F16
27362
27363     // if (S0.f16 == 1.0f)
27364     //     D.f16 = 0.0f;
27365     // else
27366     //     D.f16 = log2(S0.f16).
27367     void
27368     Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
27369     {
27370         panicUnimplemented();
27371     }
27372
27373     Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3 *iFmt)
27374         : Inst_VOP3(iFmt, "v_exp_f16", false)
27375     {
27376         setFlag(ALU);
27377         setFlag(F16);
27378     } // Inst_VOP3__V_EXP_F16
27379
27380     Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
27381     {
27382     } // ~Inst_VOP3__V_EXP_F16
27383
27384     // if (S0.f16 == 0.0f)
27385     //     D.f16 = 1.0f;
27386     // else
27387     //     D.f16 = pow(2.0, S0.f16).
27388     void
27389     Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
27390     {
27391         panicUnimplemented();
27392     }
27393
27394     Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3 *iFmt)
27395         : Inst_VOP3(iFmt, "v_frexp_mant_f16", false)
27396     {
27397         setFlag(ALU);
27398         setFlag(F16);
27399     } // Inst_VOP3__V_FREXP_MANT_F16
27400
27401     Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
27402     {
27403     } // ~Inst_VOP3__V_FREXP_MANT_F16
27404
27405     // if (S0.f16 == +-INF || S0.f16 == NAN)
27406     //     D.f16 = S0.f16;
27407     // else
27408     //     D.f16 = mantissa(S0.f16).
27409     void
27410     Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
27411     {
27412         panicUnimplemented();
27413     }
27414
27415     Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
27416           InFmt_VOP3 *iFmt)
27417         : Inst_VOP3(iFmt, "v_frexp_exp_i16_f16", false)
27418     {
27419         setFlag(ALU);
27420         setFlag(F16);
27421     } // Inst_VOP3__V_FREXP_EXP_I16_F16
27422
27423     Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
27424     {
27425     } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
27426
27427     void
27428     Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27429     {
27430         panicUnimplemented();
27431     }
27432
27433     Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3 *iFmt)
27434         : Inst_VOP3(iFmt, "v_floor_f16", false)
27435     {
27436         setFlag(ALU);
27437         setFlag(F16);
27438     } // Inst_VOP3__V_FLOOR_F16
27439
27440     Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
27441     {
27442     } // ~Inst_VOP3__V_FLOOR_F16
27443
27444     // D.f16 = floor(S0.f16);
27445     void
27446     Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
27447     {
27448         panicUnimplemented();
27449     }
27450
27451     Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3 *iFmt)
27452         : Inst_VOP3(iFmt, "v_ceil_f16", false)
27453     {
27454         setFlag(ALU);
27455         setFlag(F16);
27456     } // Inst_VOP3__V_CEIL_F16
27457
27458     Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
27459     {
27460     } // ~Inst_VOP3__V_CEIL_F16
27461
27462     // D.f16 = ceil(S0.f16);
27463     void
27464     Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
27465     {
27466         panicUnimplemented();
27467     }
27468
27469     Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3 *iFmt)
27470         : Inst_VOP3(iFmt, "v_trunc_f16", false)
27471     {
27472         setFlag(ALU);
27473         setFlag(F16);
27474     } // Inst_VOP3__V_TRUNC_F16
27475
27476     Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
27477     {
27478     } // ~Inst_VOP3__V_TRUNC_F16
27479
27480     // D.f16 = trunc(S0.f16).
27481     void
27482     Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
27483     {
27484         panicUnimplemented();
27485     }
27486
27487     Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3 *iFmt)
27488         : Inst_VOP3(iFmt, "v_rndne_f16", false)
27489     {
27490         setFlag(ALU);
27491         setFlag(F16);
27492     } // Inst_VOP3__V_RNDNE_F16
27493
27494     Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
27495     {
27496     } // ~Inst_VOP3__V_RNDNE_F16
27497
27498     // D.f16 = roundNearestEven(S0.f16);
27499     void
27500     Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
27501     {
27502         panicUnimplemented();
27503     }
27504
27505     Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3 *iFmt)
27506         : Inst_VOP3(iFmt, "v_fract_f16", false)
27507     {
27508         setFlag(ALU);
27509         setFlag(F16);
27510     } // Inst_VOP3__V_FRACT_F16
27511
27512     Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
27513     {
27514     } // ~Inst_VOP3__V_FRACT_F16
27515
27516     // D.f16 = S0.f16 + -floor(S0.f16).
27517     void
27518     Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
27519     {
27520         panicUnimplemented();
27521     }
27522
27523     Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3 *iFmt)
27524         : Inst_VOP3(iFmt, "v_sin_f16", false)
27525     {
27526         setFlag(ALU);
27527         setFlag(F16);
27528     } // Inst_VOP3__V_SIN_F16
27529
27530     Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
27531     {
27532     } // ~Inst_VOP3__V_SIN_F16
27533
27534     // D.f16 = sin(S0.f16 * 2 * PI).
27535     void
27536     Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
27537     {
27538         panicUnimplemented();
27539     }
27540
27541     Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3 *iFmt)
27542         : Inst_VOP3(iFmt, "v_cos_f16", false)
27543     {
27544         setFlag(ALU);
27545         setFlag(F16);
27546     } // Inst_VOP3__V_COS_F16
27547
27548     Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
27549     {
27550     } // ~Inst_VOP3__V_COS_F16
27551
27552     // D.f16 = cos(S0.f16 * 2 * PI).
27553     void
27554     Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
27555     {
27556         panicUnimplemented();
27557     }
27558
27559     Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3 *iFmt)
27560         : Inst_VOP3(iFmt, "v_exp_legacy_f32", false)
27561     {
27562         setFlag(ALU);
27563         setFlag(F32);
27564     } // Inst_VOP3__V_EXP_LEGACY_F32
27565
27566     Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
27567     {
27568     } // ~Inst_VOP3__V_EXP_LEGACY_F32
27569
27570     // D.f = pow(2.0, S0.f)
27571     void
27572     Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27573     {
27574         Wavefront *wf = gpuDynInst->wavefront();
27575         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27576         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27577
27578         src.readSrc();
27579
27580         if (instData.ABS & 0x1) {
27581             src.absModifier();
27582         }
27583
27584         if (extData.NEG & 0x1) {
27585             src.negModifier();
27586         }
27587
27588         /**
27589          * input modifiers are supported by FP operations only
27590          */
27591         assert(!(instData.ABS & 0x2));
27592         assert(!(instData.ABS & 0x4));
27593         assert(!(extData.NEG & 0x2));
27594         assert(!(extData.NEG & 0x4));
27595
27596         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27597             if (wf->execMask(lane)) {
27598                 vdst[lane] = std::pow(2.0, src[lane]);
27599             }
27600         }
27601
27602         vdst.write();
27603     }
27604
27605     Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3 *iFmt)
27606         : Inst_VOP3(iFmt, "v_log_legacy_f32", false)
27607     {
27608         setFlag(ALU);
27609         setFlag(F32);
27610     } // Inst_VOP3__V_LOG_LEGACY_F32
27611
27612     Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
27613     {
27614     } // ~Inst_VOP3__V_LOG_LEGACY_F32
27615
27616     // D.f = log2(S0.f).
27617     void
27618     Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27619     {
27620         Wavefront *wf = gpuDynInst->wavefront();
27621         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27622         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27623
27624         src.readSrc();
27625
27626         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27627             if (wf->execMask(lane)) {
27628                 vdst[lane] = std::log2(src[lane]);
27629             }
27630         }
27631
27632         vdst.write();
27633     }
27634
27635     Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3 *iFmt)
27636         : Inst_VOP3(iFmt, "v_mad_legacy_f32", false)
27637     {
27638         setFlag(ALU);
27639         setFlag(F32);
27640         setFlag(MAD);
27641     } // Inst_VOP3__V_MAD_LEGACY_F32
27642
27643     Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
27644     {
27645     } // ~Inst_VOP3__V_MAD_LEGACY_F32
27646
27647     // D.f = S0.f * S1.f + S2.f
27648     void
27649     Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27650     {
27651         Wavefront *wf = gpuDynInst->wavefront();
27652         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27653         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27654         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27655         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27656
27657         src0.readSrc();
27658         src1.readSrc();
27659         src2.readSrc();
27660
27661         if (instData.ABS & 0x1) {
27662             src0.absModifier();
27663         }
27664
27665         if (instData.ABS & 0x2) {
27666             src1.absModifier();
27667         }
27668
27669         if (instData.ABS & 0x4) {
27670             src2.absModifier();
27671         }
27672
27673         if (extData.NEG & 0x1) {
27674             src0.negModifier();
27675         }
27676
27677         if (extData.NEG & 0x2) {
27678             src1.negModifier();
27679         }
27680
27681         if (extData.NEG & 0x4) {
27682             src2.negModifier();
27683         }
27684
27685         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27686             if (wf->execMask(lane)) {
27687                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27688             }
27689         }
27690
27691         vdst.write();
27692     }
27693
27694     Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3 *iFmt)
27695         : Inst_VOP3(iFmt, "v_mad_f32", false)
27696     {
27697         setFlag(ALU);
27698         setFlag(F32);
27699         setFlag(MAD);
27700     } // Inst_VOP3__V_MAD_F32
27701
27702     Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
27703     {
27704     } // ~Inst_VOP3__V_MAD_F32
27705
27706     // D.f = S0.f * S1.f + S2.f.
27707     void
27708     Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst)
27709     {
27710         Wavefront *wf = gpuDynInst->wavefront();
27711         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27712         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27713         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27714         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27715
27716         src0.readSrc();
27717         src1.readSrc();
27718         src2.readSrc();
27719
27720         if (instData.ABS & 0x1) {
27721             src0.absModifier();
27722         }
27723
27724         if (instData.ABS & 0x2) {
27725             src1.absModifier();
27726         }
27727
27728         if (instData.ABS & 0x4) {
27729             src2.absModifier();
27730         }
27731
27732         if (extData.NEG & 0x1) {
27733             src0.negModifier();
27734         }
27735
27736         if (extData.NEG & 0x2) {
27737             src1.negModifier();
27738         }
27739
27740         if (extData.NEG & 0x4) {
27741             src2.negModifier();
27742         }
27743
27744         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27745             if (wf->execMask(lane)) {
27746                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27747             }
27748         }
27749
27750         vdst.write();
27751     }
27752
27753     Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3 *iFmt)
27754         : Inst_VOP3(iFmt, "v_mad_i32_i24", false)
27755     {
27756         setFlag(ALU);
27757         setFlag(MAD);
27758     } // Inst_VOP3__V_MAD_I32_I24
27759
27760     Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
27761     {
27762     } // ~Inst_VOP3__V_MAD_I32_I24
27763
27764     // D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
27765     void
27766     Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst)
27767     {
27768         Wavefront *wf = gpuDynInst->wavefront();
27769         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27770         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
27771         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
27772         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27773
27774         src0.readSrc();
27775         src1.readSrc();
27776         src2.readSrc();
27777
27778         /**
27779          * input modifiers are supported by FP operations only
27780          */
27781         assert(!(instData.ABS & 0x1));
27782         assert(!(instData.ABS & 0x2));
27783         assert(!(instData.ABS & 0x4));
27784         assert(!(extData.NEG & 0x1));
27785         assert(!(extData.NEG & 0x2));
27786         assert(!(extData.NEG & 0x4));
27787
27788         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27789             if (wf->execMask(lane)) {
27790                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
27791                     * sext<24>(bits(src1[lane], 23, 0)) + src2[lane];
27792             }
27793         }
27794
27795         vdst.write();
27796     }
27797
27798     Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3 *iFmt)
27799         : Inst_VOP3(iFmt, "v_mad_u32_u24", false)
27800     {
27801         setFlag(ALU);
27802         setFlag(MAD);
27803     } // Inst_VOP3__V_MAD_U32_U24
27804
27805     Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
27806     {
27807     } // ~Inst_VOP3__V_MAD_U32_U24
27808
27809     // D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
27810     void
27811     Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst)
27812     {
27813         Wavefront *wf = gpuDynInst->wavefront();
27814         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27815         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27816         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27817         VecOperandU32 vdst(gpuDynInst, instData.VDST);
27818
27819         src0.readSrc();
27820         src1.readSrc();
27821         src2.readSrc();
27822
27823         /**
27824          * input modifiers are supported by FP operations only
27825          */
27826         assert(!(instData.ABS & 0x1));
27827         assert(!(instData.ABS & 0x2));
27828         assert(!(instData.ABS & 0x4));
27829         assert(!(extData.NEG & 0x1));
27830         assert(!(extData.NEG & 0x2));
27831         assert(!(extData.NEG & 0x4));
27832
27833         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27834             if (wf->execMask(lane)) {
27835                 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0)
27836                     + src2[lane];
27837             }
27838         }
27839
27840         vdst.write();
27841     }
27842
27843     Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3 *iFmt)
27844         : Inst_VOP3(iFmt, "v_cubeid_f32", false)
27845     {
27846         setFlag(ALU);
27847         setFlag(F32);
27848     } // Inst_VOP3__V_CUBEID_F32
27849
27850     Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
27851     {
27852     } // ~Inst_VOP3__V_CUBEID_F32
27853
27854     void
27855     Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst)
27856     {
27857         panicUnimplemented();
27858     }
27859
27860     Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3 *iFmt)
27861         : Inst_VOP3(iFmt, "v_cubesc_f32", false)
27862     {
27863         setFlag(ALU);
27864         setFlag(F32);
27865     } // Inst_VOP3__V_CUBESC_F32
27866
27867     Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
27868     {
27869     } // ~Inst_VOP3__V_CUBESC_F32
27870
27871     void
27872     Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst)
27873     {
27874         panicUnimplemented();
27875     }
27876
27877     Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3 *iFmt)
27878         : Inst_VOP3(iFmt, "v_cubetc_f32", false)
27879     {
27880         setFlag(ALU);
27881         setFlag(F32);
27882     } // Inst_VOP3__V_CUBETC_F32
27883
27884     Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
27885     {
27886     } // ~Inst_VOP3__V_CUBETC_F32
27887
27888     void
27889     Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst)
27890     {
27891         panicUnimplemented();
27892     }
27893
27894     Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3 *iFmt)
27895         : Inst_VOP3(iFmt, "v_cubema_f32", false)
27896     {
27897         setFlag(ALU);
27898         setFlag(F32);
27899     } // Inst_VOP3__V_CUBEMA_F32
27900
27901     Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
27902     {
27903     } // ~Inst_VOP3__V_CUBEMA_F32
27904
27905     void
27906     Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst)
27907     {
27908         panicUnimplemented();
27909     }
27910
27911     Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3 *iFmt)
27912         : Inst_VOP3(iFmt, "v_bfe_u32", false)
27913     {
27914         setFlag(ALU);
27915     } // Inst_VOP3__V_BFE_U32
27916
27917     Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
27918     {
27919     } // ~Inst_VOP3__V_BFE_U32
27920
27921     // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27922     // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27923     void
27924     Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
27925     {
27926         Wavefront *wf = gpuDynInst->wavefront();
27927         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27928         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27929         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27930         VecOperandU32 vdst(gpuDynInst, instData.VDST);
27931
27932         src0.readSrc();
27933         src1.readSrc();
27934         src2.readSrc();
27935
27936         /**
27937          * input modifiers are supported by FP operations only
27938          */
27939         assert(!(instData.ABS & 0x1));
27940         assert(!(instData.ABS & 0x2));
27941         assert(!(instData.ABS & 0x4));
27942         assert(!(extData.NEG & 0x1));
27943         assert(!(extData.NEG & 0x2));
27944         assert(!(extData.NEG & 0x4));
27945
27946         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27947             if (wf->execMask(lane)) {
27948                 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27949                     & ((1 << bits(src2[lane], 4, 0)) - 1);
27950             }
27951         }
27952
27953         vdst.write();
27954     }
27955
27956     Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3 *iFmt)
27957         : Inst_VOP3(iFmt, "v_bfe_i32", false)
27958     {
27959         setFlag(ALU);
27960     } // Inst_VOP3__V_BFE_I32
27961
27962     Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
27963     {
27964     } // ~Inst_VOP3__V_BFE_I32
27965
27966     // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27967     // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27968     void
27969     Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
27970     {
27971         Wavefront *wf = gpuDynInst->wavefront();
27972         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27973         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27974         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27975         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27976
27977         src0.readSrc();
27978         src1.readSrc();
27979         src2.readSrc();
27980
27981         /**
27982          * input modifiers are supported by FP operations only
27983          */
27984         assert(!(instData.ABS & 0x1));
27985         assert(!(instData.ABS & 0x2));
27986         assert(!(instData.ABS & 0x4));
27987         assert(!(extData.NEG & 0x1));
27988         assert(!(extData.NEG & 0x2));
27989         assert(!(extData.NEG & 0x4));
27990
27991         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27992             if (wf->execMask(lane)) {
27993                 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27994                     & ((1 << bits(src2[lane], 4, 0)) - 1);
27995             }
27996         }
27997
27998         vdst.write();
27999     }
28000
28001     Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3 *iFmt)
28002         : Inst_VOP3(iFmt, "v_bfi_b32", false)
28003     {
28004         setFlag(ALU);
28005     } // Inst_VOP3__V_BFI_B32
28006
28007     Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
28008     {
28009     } // ~Inst_VOP3__V_BFI_B32
28010
28011     // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
28012     void
28013     Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst)
28014     {
28015         Wavefront *wf = gpuDynInst->wavefront();
28016         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28017         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28018         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28019         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28020
28021         src0.readSrc();
28022         src1.readSrc();
28023         src2.readSrc();
28024
28025         /**
28026          * input modifiers are supported by FP operations only
28027          */
28028         assert(!(instData.ABS & 0x1));
28029         assert(!(instData.ABS & 0x2));
28030         assert(!(instData.ABS & 0x4));
28031         assert(!(extData.NEG & 0x1));
28032         assert(!(extData.NEG & 0x2));
28033         assert(!(extData.NEG & 0x4));
28034
28035         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28036             if (wf->execMask(lane)) {
28037                 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
28038                     & src2[lane]);
28039             }
28040         }
28041
28042         vdst.write();
28043     }
28044
28045     Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3 *iFmt)
28046         : Inst_VOP3(iFmt, "v_fma_f32", false)
28047     {
28048         setFlag(ALU);
28049         setFlag(F32);
28050         setFlag(FMA);
28051     } // Inst_VOP3__V_FMA_F32
28052
28053     Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
28054     {
28055     } // ~Inst_VOP3__V_FMA_F32
28056
28057     // D.f = S0.f * S1.f + S2.f.
28058     void
28059     Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst)
28060     {
28061         Wavefront *wf = gpuDynInst->wavefront();
28062         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28063         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28064         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28065         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28066
28067         src0.readSrc();
28068         src1.readSrc();
28069         src2.readSrc();
28070
28071         if (instData.ABS & 0x1) {
28072             src0.absModifier();
28073         }
28074
28075         if (instData.ABS & 0x2) {
28076             src1.absModifier();
28077         }
28078
28079         if (instData.ABS & 0x4) {
28080             src2.absModifier();
28081         }
28082
28083         if (extData.NEG & 0x1) {
28084             src0.negModifier();
28085         }
28086
28087         if (extData.NEG & 0x2) {
28088             src1.negModifier();
28089         }
28090
28091         if (extData.NEG & 0x4) {
28092             src2.negModifier();
28093         }
28094
28095         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28096             if (wf->execMask(lane)) {
28097                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28098             }
28099         }
28100
28101         vdst.write();
28102     }
28103
28104     Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3 *iFmt)
28105         : Inst_VOP3(iFmt, "v_fma_f64", false)
28106     {
28107         setFlag(ALU);
28108         setFlag(F64);
28109         setFlag(FMA);
28110     } // Inst_VOP3__V_FMA_F64
28111
28112     Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
28113     {
28114     } // ~Inst_VOP3__V_FMA_F64
28115
28116     // D.d = S0.d * S1.d + S2.d.
28117     void
28118     Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst)
28119     {
28120         Wavefront *wf = gpuDynInst->wavefront();
28121         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
28122         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
28123         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
28124         VecOperandF64 vdst(gpuDynInst, instData.VDST);
28125
28126         src0.readSrc();
28127         src1.readSrc();
28128         src2.readSrc();
28129
28130         if (instData.ABS & 0x1) {
28131             src0.absModifier();
28132         }
28133
28134         if (instData.ABS & 0x2) {
28135             src1.absModifier();
28136         }
28137
28138         if (instData.ABS & 0x4) {
28139             src2.absModifier();
28140         }
28141
28142         if (extData.NEG & 0x1) {
28143             src0.negModifier();
28144         }
28145
28146         if (extData.NEG & 0x2) {
28147             src1.negModifier();
28148         }
28149
28150         if (extData.NEG & 0x4) {
28151             src2.negModifier();
28152         }
28153
28154         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28155             if (wf->execMask(lane)) {
28156                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28157             }
28158         }
28159
28160         vdst.write();
28161     }
28162
28163     Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3 *iFmt)
28164         : Inst_VOP3(iFmt, "v_lerp_u8", false)
28165     {
28166         setFlag(ALU);
28167     } // Inst_VOP3__V_LERP_U8
28168
28169     Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
28170     {
28171     } // ~Inst_VOP3__V_LERP_U8
28172
28173     // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
28174     // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
28175     // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
28176     // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
28177     void
28178     Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst)
28179     {
28180         Wavefront *wf = gpuDynInst->wavefront();
28181         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28182         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28183         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28184         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28185
28186         src0.readSrc();
28187         src1.readSrc();
28188         src2.readSrc();
28189
28190         /**
28191          * input modifiers are supported by FP operations only
28192          */
28193         assert(!(instData.ABS & 0x1));
28194         assert(!(instData.ABS & 0x2));
28195         assert(!(instData.ABS & 0x4));
28196         assert(!(extData.NEG & 0x1));
28197         assert(!(extData.NEG & 0x2));
28198         assert(!(extData.NEG & 0x4));
28199
28200         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28201             if (wf->execMask(lane)) {
28202                 vdst[lane] = ((bits(src0[lane], 31, 24)
28203                     + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1)
28204                         << 24;
28205                 vdst[lane] += ((bits(src0[lane], 23, 16)
28206                     + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1)
28207                         << 16;
28208                 vdst[lane] += ((bits(src0[lane], 15, 8)
28209                     + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1)
28210                         << 8;
28211                 vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0)
28212                     + bits(src2[lane], 0)) >> 1);
28213             }
28214         }
28215
28216         vdst.write();
28217     }
28218
28219     Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3 *iFmt)
28220         : Inst_VOP3(iFmt, "v_alignbit_b32", false)
28221     {
28222         setFlag(ALU);
28223     } // Inst_VOP3__V_ALIGNBIT_B32
28224
28225     Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
28226     {
28227     } // ~Inst_VOP3__V_ALIGNBIT_B32
28228
28229     // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff.
28230     void
28231     Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst)
28232     {
28233         Wavefront *wf = gpuDynInst->wavefront();
28234         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28235         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28236         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28237         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28238
28239         src0.readSrc();
28240         src1.readSrc();
28241         src2.readSrc();
28242
28243         /**
28244          * input modifiers are supported by FP operations only
28245          */
28246         assert(!(instData.ABS & 0x1));
28247         assert(!(instData.ABS & 0x2));
28248         assert(!(instData.ABS & 0x4));
28249         assert(!(extData.NEG & 0x1));
28250         assert(!(extData.NEG & 0x2));
28251         assert(!(extData.NEG & 0x4));
28252
28253         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28254             if (wf->execMask(lane)) {
28255                 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28256                     | (VecElemU64)src1[lane]);
28257                 vdst[lane] = (VecElemU32)((src_0_1
28258                     >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff);
28259             }
28260         }
28261
28262         vdst.write();
28263     }
28264
28265     Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3 *iFmt)
28266         : Inst_VOP3(iFmt, "v_alignbyte_b32", false)
28267     {
28268         setFlag(ALU);
28269     } // Inst_VOP3__V_ALIGNBYTE_B32
28270
28271     Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
28272     {
28273     } // ~Inst_VOP3__V_ALIGNBYTE_B32
28274
28275     // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff.
28276     void
28277     Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst)
28278     {
28279         Wavefront *wf = gpuDynInst->wavefront();
28280         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28281         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28282         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28283         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28284
28285         src0.readSrc();
28286         src1.readSrc();
28287         src2.readSrc();
28288
28289         /**
28290          * input modifiers are supported by FP operations only
28291          */
28292         assert(!(instData.ABS & 0x1));
28293         assert(!(instData.ABS & 0x2));
28294         assert(!(instData.ABS & 0x4));
28295         assert(!(extData.NEG & 0x1));
28296         assert(!(extData.NEG & 0x2));
28297         assert(!(extData.NEG & 0x4));
28298
28299         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28300             if (wf->execMask(lane)) {
28301                 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28302                     | (VecElemU64)src1[lane]);
28303                 vdst[lane] = (VecElemU32)((src_0_1
28304                     >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0)))
28305                         & 0xffffffff);
28306             }
28307         }
28308
28309         vdst.write();
28310     }
28311
28312     Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3 *iFmt)
28313         : Inst_VOP3(iFmt, "v_min3_f32", false)
28314     {
28315         setFlag(ALU);
28316         setFlag(F32);
28317     } // Inst_VOP3__V_MIN3_F32
28318
28319     Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
28320     {
28321     } // ~Inst_VOP3__V_MIN3_F32
28322
28323     // D.f = min(S0.f, S1.f, S2.f).
28324     void
28325     Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst)
28326     {
28327         Wavefront *wf = gpuDynInst->wavefront();
28328         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28329         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28330         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28331         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28332
28333         src0.readSrc();
28334         src1.readSrc();
28335         src2.readSrc();
28336
28337         if (instData.ABS & 0x1) {
28338             src0.absModifier();
28339         }
28340
28341         if (instData.ABS & 0x2) {
28342             src1.absModifier();
28343         }
28344
28345         if (instData.ABS & 0x4) {
28346             src2.absModifier();
28347         }
28348
28349         if (extData.NEG & 0x1) {
28350             src0.negModifier();
28351         }
28352
28353         if (extData.NEG & 0x2) {
28354             src1.negModifier();
28355         }
28356
28357         if (extData.NEG & 0x4) {
28358             src2.negModifier();
28359         }
28360
28361         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28362             if (wf->execMask(lane)) {
28363                 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
28364                 vdst[lane] = std::fmin(min_0_1, src2[lane]);
28365             }
28366         }
28367
28368         vdst.write();
28369     }
28370
28371     Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3 *iFmt)
28372         : Inst_VOP3(iFmt, "v_min3_i32", false)
28373     {
28374         setFlag(ALU);
28375     } // Inst_VOP3__V_MIN3_I32
28376
28377     Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
28378     {
28379     } // ~Inst_VOP3__V_MIN3_I32
28380
28381     // D.i = min(S0.i, S1.i, S2.i).
28382     void
28383     Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst)
28384     {
28385         Wavefront *wf = gpuDynInst->wavefront();
28386         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28387         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28388         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28389         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28390
28391         src0.readSrc();
28392         src1.readSrc();
28393         src2.readSrc();
28394
28395         /**
28396          * input modifiers are supported by FP operations only
28397          */
28398         assert(!(instData.ABS & 0x1));
28399         assert(!(instData.ABS & 0x2));
28400         assert(!(instData.ABS & 0x4));
28401         assert(!(extData.NEG & 0x1));
28402         assert(!(extData.NEG & 0x2));
28403         assert(!(extData.NEG & 0x4));
28404
28405         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28406             if (wf->execMask(lane)) {
28407                 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
28408                 vdst[lane] = std::min(min_0_1, src2[lane]);
28409             }
28410         }
28411
28412         vdst.write();
28413     }
28414
28415     Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3 *iFmt)
28416         : Inst_VOP3(iFmt, "v_min3_u32", false)
28417     {
28418         setFlag(ALU);
28419     } // Inst_VOP3__V_MIN3_U32
28420
28421     Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
28422     {
28423     } // ~Inst_VOP3__V_MIN3_U32
28424
28425     // D.u = min(S0.u, S1.u, S2.u).
28426     void
28427     Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst)
28428     {
28429         Wavefront *wf = gpuDynInst->wavefront();
28430         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28431         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28432         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28433         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28434
28435         src0.readSrc();
28436         src1.readSrc();
28437         src2.readSrc();
28438
28439         /**
28440          * input modifiers are supported by FP operations only
28441          */
28442         assert(!(instData.ABS & 0x1));
28443         assert(!(instData.ABS & 0x2));
28444         assert(!(instData.ABS & 0x4));
28445         assert(!(extData.NEG & 0x1));
28446         assert(!(extData.NEG & 0x2));
28447         assert(!(extData.NEG & 0x4));
28448
28449         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28450             if (wf->execMask(lane)) {
28451                 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
28452                 vdst[lane] = std::min(min_0_1, src2[lane]);
28453             }
28454         }
28455
28456         vdst.write();
28457     }
28458
28459     Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3 *iFmt)
28460         : Inst_VOP3(iFmt, "v_max3_f32", false)
28461     {
28462         setFlag(ALU);
28463         setFlag(F32);
28464     } // Inst_VOP3__V_MAX3_F32
28465
28466     Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
28467     {
28468     } // ~Inst_VOP3__V_MAX3_F32
28469
28470     // D.f = max(S0.f, S1.f, S2.f).
28471     void
28472     Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst)
28473     {
28474         Wavefront *wf = gpuDynInst->wavefront();
28475         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28476         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28477         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28478         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28479
28480         src0.readSrc();
28481         src1.readSrc();
28482         src2.readSrc();
28483
28484         if (instData.ABS & 0x1) {
28485             src0.absModifier();
28486         }
28487
28488         if (instData.ABS & 0x2) {
28489             src1.absModifier();
28490         }
28491
28492         if (instData.ABS & 0x4) {
28493             src2.absModifier();
28494         }
28495
28496         if (extData.NEG & 0x1) {
28497             src0.negModifier();
28498         }
28499
28500         if (extData.NEG & 0x2) {
28501             src1.negModifier();
28502         }
28503
28504         if (extData.NEG & 0x4) {
28505             src2.negModifier();
28506         }
28507
28508         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28509             if (wf->execMask(lane)) {
28510                 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
28511                 vdst[lane] = std::fmax(max_0_1, src2[lane]);
28512             }
28513         }
28514
28515         vdst.write();
28516     }
28517
28518     Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3 *iFmt)
28519         : Inst_VOP3(iFmt, "v_max3_i32", false)
28520     {
28521         setFlag(ALU);
28522     } // Inst_VOP3__V_MAX3_I32
28523
28524     Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
28525     {
28526     } // ~Inst_VOP3__V_MAX3_I32
28527
28528     // D.i = max(S0.i, S1.i, S2.i).
28529     void
28530     Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst)
28531     {
28532         Wavefront *wf = gpuDynInst->wavefront();
28533         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28534         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28535         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28536         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28537
28538         src0.readSrc();
28539         src1.readSrc();
28540         src2.readSrc();
28541
28542         /**
28543          * input modifiers are supported by FP operations only
28544          */
28545         assert(!(instData.ABS & 0x1));
28546         assert(!(instData.ABS & 0x2));
28547         assert(!(instData.ABS & 0x4));
28548         assert(!(extData.NEG & 0x1));
28549         assert(!(extData.NEG & 0x2));
28550         assert(!(extData.NEG & 0x4));
28551
28552         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28553             if (wf->execMask(lane)) {
28554                 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
28555                 vdst[lane] = std::max(max_0_1, src2[lane]);
28556             }
28557         }
28558
28559         vdst.write();
28560     }
28561
28562     Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3 *iFmt)
28563         : Inst_VOP3(iFmt, "v_max3_u32", false)
28564     {
28565         setFlag(ALU);
28566     } // Inst_VOP3__V_MAX3_U32
28567
28568     Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
28569     {
28570     } // ~Inst_VOP3__V_MAX3_U32
28571
28572     // D.u = max(S0.u, S1.u, S2.u).
28573     void
28574     Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst)
28575     {
28576         Wavefront *wf = gpuDynInst->wavefront();
28577         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28578         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28579         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28580         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28581
28582         src0.readSrc();
28583         src1.readSrc();
28584         src2.readSrc();
28585
28586         /**
28587          * input modifiers are supported by FP operations only
28588          */
28589         assert(!(instData.ABS & 0x1));
28590         assert(!(instData.ABS & 0x2));
28591         assert(!(instData.ABS & 0x4));
28592         assert(!(extData.NEG & 0x1));
28593         assert(!(extData.NEG & 0x2));
28594         assert(!(extData.NEG & 0x4));
28595
28596         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28597             if (wf->execMask(lane)) {
28598                 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
28599                 vdst[lane] = std::max(max_0_1, src2[lane]);
28600             }
28601         }
28602
28603         vdst.write();
28604     }
28605
28606     Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3 *iFmt)
28607         : Inst_VOP3(iFmt, "v_med3_f32", false)
28608     {
28609         setFlag(ALU);
28610         setFlag(F32);
28611     } // Inst_VOP3__V_MED3_F32
28612
28613     Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
28614     {
28615     } // ~Inst_VOP3__V_MED3_F32
28616
28617     // D.f = median(S0.f, S1.f, S2.f).
28618     void
28619     Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst)
28620     {
28621         Wavefront *wf = gpuDynInst->wavefront();
28622         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28623         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28624         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28625         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28626
28627         src0.readSrc();
28628         src1.readSrc();
28629         src2.readSrc();
28630
28631         if (instData.ABS & 0x1) {
28632             src0.absModifier();
28633         }
28634
28635         if (instData.ABS & 0x2) {
28636             src1.absModifier();
28637         }
28638
28639         if (instData.ABS & 0x4) {
28640             src2.absModifier();
28641         }
28642
28643         if (extData.NEG & 0x1) {
28644             src0.negModifier();
28645         }
28646
28647         if (extData.NEG & 0x2) {
28648             src1.negModifier();
28649         }
28650
28651         if (extData.NEG & 0x4) {
28652             src2.negModifier();
28653         }
28654
28655         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28656             if (wf->execMask(lane)) {
28657                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28658             }
28659         }
28660
28661         vdst.write();
28662     }
28663
28664     Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3 *iFmt)
28665         : Inst_VOP3(iFmt, "v_med3_i32", false)
28666     {
28667         setFlag(ALU);
28668     } // Inst_VOP3__V_MED3_I32
28669
28670     Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
28671     {
28672     } // ~Inst_VOP3__V_MED3_I32
28673
28674     // D.i = median(S0.i, S1.i, S2.i).
28675     void
28676     Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst)
28677     {
28678         Wavefront *wf = gpuDynInst->wavefront();
28679         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28680         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28681         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28682         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28683
28684         src0.readSrc();
28685         src1.readSrc();
28686         src2.readSrc();
28687
28688         /**
28689          * input modifiers are supported by FP operations only
28690          */
28691         assert(!(instData.ABS & 0x1));
28692         assert(!(instData.ABS & 0x2));
28693         assert(!(instData.ABS & 0x4));
28694         assert(!(extData.NEG & 0x1));
28695         assert(!(extData.NEG & 0x2));
28696         assert(!(extData.NEG & 0x4));
28697
28698         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28699             if (wf->execMask(lane)) {
28700                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28701             }
28702         }
28703
28704         vdst.write();
28705     }
28706
28707     Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3 *iFmt)
28708         : Inst_VOP3(iFmt, "v_med3_u32", false)
28709     {
28710         setFlag(ALU);
28711     } // Inst_VOP3__V_MED3_U32
28712
28713     Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
28714     {
28715     } // ~Inst_VOP3__V_MED3_U32
28716
28717     // D.u = median(S0.u, S1.u, S2.u).
28718     void
28719     Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst)
28720     {
28721         Wavefront *wf = gpuDynInst->wavefront();
28722         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28723         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28724         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28725         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28726
28727         src0.readSrc();
28728         src1.readSrc();
28729         src2.readSrc();
28730
28731         /**
28732          * input modifiers are supported by FP operations only
28733          */
28734         assert(!(instData.ABS & 0x1));
28735         assert(!(instData.ABS & 0x2));
28736         assert(!(instData.ABS & 0x4));
28737         assert(!(extData.NEG & 0x1));
28738         assert(!(extData.NEG & 0x2));
28739         assert(!(extData.NEG & 0x4));
28740
28741         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28742             if (wf->execMask(lane)) {
28743                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28744             }
28745         }
28746
28747         vdst.write();
28748     }
28749
28750     Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3 *iFmt)
28751         : Inst_VOP3(iFmt, "v_sad_u8", false)
28752     {
28753         setFlag(ALU);
28754     } // Inst_VOP3__V_SAD_U8
28755
28756     Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
28757     {
28758     } // ~Inst_VOP3__V_SAD_U8
28759
28760     // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
28761     // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
28762     // Sum of absolute differences with accumulation, overflow into upper bits
28763     // is allowed.
28764     void
28765     Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst)
28766     {
28767         Wavefront *wf = gpuDynInst->wavefront();
28768         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28769         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28770         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28771         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28772
28773         src0.readSrc();
28774         src1.readSrc();
28775         src2.readSrc();
28776
28777         /**
28778          * input modifiers are supported by FP operations only
28779          */
28780         assert(!(instData.ABS & 0x1));
28781         assert(!(instData.ABS & 0x2));
28782         assert(!(instData.ABS & 0x4));
28783         assert(!(extData.NEG & 0x1));
28784         assert(!(extData.NEG & 0x2));
28785         assert(!(extData.NEG & 0x4));
28786
28787         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28788             if (wf->execMask(lane)) {
28789                 vdst[lane] = std::abs(bits(src0[lane], 31, 24)
28790                     - bits(src1[lane], 31, 24))
28791                     + std::abs(bits(src0[lane], 23, 16)
28792                     - bits(src1[lane], 23, 16))
28793                     + std::abs(bits(src0[lane], 15, 8)
28794                     - bits(src1[lane], 15, 8))
28795                     + std::abs(bits(src0[lane], 7, 0)
28796                     - bits(src1[lane], 7, 0)) + src2[lane];
28797             }
28798         }
28799
28800         vdst.write();
28801     }
28802
28803     Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3 *iFmt)
28804         : Inst_VOP3(iFmt, "v_sad_hi_u8", false)
28805     {
28806         setFlag(ALU);
28807     } // Inst_VOP3__V_SAD_HI_U8
28808
28809     Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
28810     {
28811     } // ~Inst_VOP3__V_SAD_HI_U8
28812
28813     // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
28814     // Sum of absolute differences with accumulation, overflow is lost.
28815     void
28816     Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst)
28817     {
28818         Wavefront *wf = gpuDynInst->wavefront();
28819         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28820         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28821         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28822         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28823
28824         src0.readSrc();
28825         src1.readSrc();
28826         src2.readSrc();
28827
28828         /**
28829          * input modifiers are supported by FP operations only
28830          */
28831         assert(!(instData.ABS & 0x1));
28832         assert(!(instData.ABS & 0x2));
28833         assert(!(instData.ABS & 0x4));
28834         assert(!(extData.NEG & 0x1));
28835         assert(!(extData.NEG & 0x2));
28836         assert(!(extData.NEG & 0x4));
28837
28838         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28839             if (wf->execMask(lane)) {
28840                 vdst[lane] = (((bits(src0[lane], 31, 24)
28841                     - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16)
28842                     - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8)
28843                     - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0)
28844                     - bits(src1[lane], 7, 0))) << 16) + src2[lane];
28845             }
28846         }
28847
28848         vdst.write();
28849     }
28850
28851     Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3 *iFmt)
28852         : Inst_VOP3(iFmt, "v_sad_u16", false)
28853     {
28854         setFlag(ALU);
28855     } // Inst_VOP3__V_SAD_U16
28856
28857     Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
28858     {
28859     } // ~Inst_VOP3__V_SAD_U16
28860
28861     // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
28862     // + S2.u.
28863     // Word SAD with accumulation.
28864     void
28865     Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst)
28866     {
28867         Wavefront *wf = gpuDynInst->wavefront();
28868         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28869         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28870         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28871         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28872
28873         src0.readSrc();
28874         src1.readSrc();
28875         src2.readSrc();
28876
28877         /**
28878          * input modifiers are supported by FP operations only
28879          */
28880         assert(!(instData.ABS & 0x1));
28881         assert(!(instData.ABS & 0x2));
28882         assert(!(instData.ABS & 0x4));
28883         assert(!(extData.NEG & 0x1));
28884         assert(!(extData.NEG & 0x2));
28885         assert(!(extData.NEG & 0x4));
28886
28887         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28888             if (wf->execMask(lane)) {
28889                 vdst[lane] = std::abs(bits(src0[lane], 31, 16)
28890                     - bits(src1[lane], 31, 16))
28891                     + std::abs(bits(src0[lane], 15, 0)
28892                     - bits(src1[lane], 15, 0)) + src2[lane];
28893             }
28894         }
28895
28896         vdst.write();
28897     }
28898
28899     Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3 *iFmt)
28900         : Inst_VOP3(iFmt, "v_sad_u32", false)
28901     {
28902         setFlag(ALU);
28903     } // Inst_VOP3__V_SAD_U32
28904
28905     Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
28906     {
28907     } // ~Inst_VOP3__V_SAD_U32
28908
28909     // D.u = abs(S0.i - S1.i) + S2.u.
28910     // Dword SAD with accumulation.
28911     void
28912     Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst)
28913     {
28914         Wavefront *wf = gpuDynInst->wavefront();
28915         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28916         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28917         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28918         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28919
28920         src0.readSrc();
28921         src1.readSrc();
28922         src2.readSrc();
28923
28924         /**
28925          * input modifiers are supported by FP operations only
28926          */
28927         assert(!(instData.ABS & 0x1));
28928         assert(!(instData.ABS & 0x2));
28929         assert(!(instData.ABS & 0x4));
28930         assert(!(extData.NEG & 0x1));
28931         assert(!(extData.NEG & 0x2));
28932         assert(!(extData.NEG & 0x4));
28933
28934         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28935             if (wf->execMask(lane)) {
28936                 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
28937             }
28938         }
28939
28940         vdst.write();
28941     }
28942
28943     Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3 *iFmt)
28944         : Inst_VOP3(iFmt, "v_cvt_pk_u8_f32", false)
28945     {
28946         setFlag(ALU);
28947         setFlag(F32);
28948     } // Inst_VOP3__V_CVT_PK_U8_F32
28949
28950     Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
28951     {
28952     } // ~Inst_VOP3__V_CVT_PK_U8_F32
28953
28954     // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
28955     // | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
28956     // Convert floating point value S0 to 8-bit unsigned integer and pack the
28957     // result into byte S1 of dword S2.
28958     void
28959     Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst)
28960     {
28961         Wavefront *wf = gpuDynInst->wavefront();
28962         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28963         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28964         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28965         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28966
28967         src0.readSrc();
28968         src1.readSrc();
28969         src2.readSrc();
28970
28971         if (instData.ABS & 0x1) {
28972             src0.absModifier();
28973         }
28974
28975
28976         if (extData.NEG & 0x1) {
28977             src0.negModifier();
28978         }
28979
28980         /**
28981          * input modifiers are supported by FP operations only
28982          */
28983         assert(!(instData.ABS & 0x2));
28984         assert(!(instData.ABS & 0x4));
28985         assert(!(extData.NEG & 0x2));
28986         assert(!(extData.NEG & 0x4));
28987
28988         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28989             if (wf->execMask(lane)) {
28990                 vdst[lane] = (((VecElemU8)src0[lane] & 0xff)
28991                     << (8 * bits(src1[lane], 1, 0)))
28992                     | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0))));
28993             }
28994         }
28995
28996         vdst.write();
28997     }
28998
28999     Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3 *iFmt)
29000         : Inst_VOP3(iFmt, "v_div_fixup_f32", false)
29001     {
29002         setFlag(ALU);
29003         setFlag(F32);
29004     } // Inst_VOP3__V_DIV_FIXUP_F32
29005
29006     Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
29007     {
29008     } // ~Inst_VOP3__V_DIV_FIXUP_F32
29009
29010     // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
29011     // s2.f = Numerator.
29012     void
29013     Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst)
29014     {
29015         Wavefront *wf = gpuDynInst->wavefront();
29016         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29017         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29018         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29019         VecOperandF32 vdst(gpuDynInst, instData.VDST);
29020
29021         src0.readSrc();
29022         src1.readSrc();
29023         src2.readSrc();
29024
29025         if (instData.ABS & 0x1) {
29026             src0.absModifier();
29027         }
29028
29029         if (instData.ABS & 0x2) {
29030             src1.absModifier();
29031         }
29032
29033         if (instData.ABS & 0x4) {
29034             src2.absModifier();
29035         }
29036
29037         if (extData.NEG & 0x1) {
29038             src0.negModifier();
29039         }
29040
29041         if (extData.NEG & 0x2) {
29042             src1.negModifier();
29043         }
29044
29045         if (extData.NEG & 0x4) {
29046             src2.negModifier();
29047         }
29048
29049         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29050             if (wf->execMask(lane)) {
29051                 if (std::fpclassify(src1[lane]) == FP_ZERO) {
29052                     if (std::signbit(src1[lane])) {
29053                         vdst[lane] = -INFINITY;
29054                     } else {
29055                         vdst[lane] = +INFINITY;
29056                     }
29057                 } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) {
29058                     vdst[lane] = NAN;
29059                 } else if (std::isinf(src1[lane])) {
29060                     if (std::signbit(src1[lane])) {
29061                         vdst[lane] = -INFINITY;
29062                     } else {
29063                         vdst[lane] = +INFINITY;
29064                     }
29065                 } else {
29066                     vdst[lane] = src2[lane] / src1[lane];
29067                 }
29068             }
29069         }
29070
29071         vdst.write();
29072     } // execute
29073     // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
29074
29075     Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3 *iFmt)
29076         : Inst_VOP3(iFmt, "v_div_fixup_f64", false)
29077     {
29078         setFlag(ALU);
29079         setFlag(F64);
29080     } // Inst_VOP3__V_DIV_FIXUP_F64
29081
29082     Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
29083     {
29084     } // ~Inst_VOP3__V_DIV_FIXUP_F64
29085
29086     // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
29087     // s2.d = Numerator.
29088     void
29089     Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst)
29090     {
29091         Wavefront *wf = gpuDynInst->wavefront();
29092         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29093         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29094         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29095         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29096
29097         src0.readSrc();
29098         src1.readSrc();
29099         src2.readSrc();
29100
29101         if (instData.ABS & 0x1) {
29102             src0.absModifier();
29103         }
29104
29105         if (instData.ABS & 0x2) {
29106             src1.absModifier();
29107         }
29108
29109         if (instData.ABS & 0x4) {
29110             src2.absModifier();
29111         }
29112
29113         if (extData.NEG & 0x1) {
29114             src0.negModifier();
29115         }
29116
29117         if (extData.NEG & 0x2) {
29118             src1.negModifier();
29119         }
29120
29121         if (extData.NEG & 0x4) {
29122             src2.negModifier();
29123         }
29124
29125         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29126             if (wf->execMask(lane)) {
29127                 int sign_out = std::signbit(src1[lane])
29128                               ^ std::signbit(src2[lane]);
29129                 int exp1(0);
29130                 int exp2(0);
29131                 std::frexp(src1[lane], &exp1);
29132                 std::frexp(src2[lane], &exp2);
29133
29134                 if (std::isnan(src1[lane]) || std::isnan(src2[lane])) {
29135                     vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
29136                 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29137                            && std::fpclassify(src2[lane]) == FP_ZERO) {
29138                     vdst[lane]
29139                         = std::numeric_limits<VecElemF64>::signaling_NaN();
29140                 } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) {
29141                     vdst[lane]
29142                         = std::numeric_limits<VecElemF64>::signaling_NaN();
29143                 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29144                            || std::isinf(src2[lane])) {
29145                     vdst[lane] = sign_out ? -INFINITY : +INFINITY;
29146                 } else if (std::isinf(src1[lane])
29147                            || std::fpclassify(src2[lane]) == FP_ZERO) {
29148                     vdst[lane] = sign_out ? -0.0 : +0.0;
29149                 } else if (exp2 - exp1 < -1075) {
29150                     vdst[lane] = src0[lane];
29151                 } else if (exp1 == 2047) {
29152                     vdst[lane] = src0[lane];
29153                 } else {
29154                     vdst[lane] = sign_out ? -std::fabs(src0[lane])
29155                         : std::fabs(src0[lane]);
29156                 }
29157             }
29158         }
29159
29160         vdst.write();
29161     }
29162
29163     Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
29164           InFmt_VOP3_SDST_ENC *iFmt)
29165         : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f32")
29166     {
29167         setFlag(ALU);
29168         setFlag(WritesVCC);
29169         setFlag(F32);
29170     } // Inst_VOP3__V_DIV_SCALE_F32
29171
29172     Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
29173     {
29174     } // ~Inst_VOP3__V_DIV_SCALE_F32
29175
29176     // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
29177     // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
29178     // numerator and denominator, this opcode will appropriately scale inputs
29179     // for division to avoid subnormal terms during Newton-Raphson correction
29180     // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29181     void
29182     Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst)
29183     {
29184         Wavefront *wf = gpuDynInst->wavefront();
29185         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29186         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29187         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29188         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29189         VecOperandF32 vdst(gpuDynInst, instData.VDST);
29190
29191         src0.readSrc();
29192         src1.readSrc();
29193         src2.readSrc();
29194
29195         if (extData.NEG & 0x1) {
29196             src0.negModifier();
29197         }
29198
29199         if (extData.NEG & 0x2) {
29200             src1.negModifier();
29201         }
29202
29203         if (extData.NEG & 0x4) {
29204             src2.negModifier();
29205         }
29206
29207         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29208             if (wf->execMask(lane)) {
29209                 vdst[lane] = src0[lane];
29210                 vcc.setBit(lane, 0);
29211             }
29212         }
29213
29214         vcc.write();
29215         vdst.write();
29216     } // execute
29217     // --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
29218
29219     Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
29220           InFmt_VOP3_SDST_ENC *iFmt)
29221         : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f64")
29222     {
29223         setFlag(ALU);
29224         setFlag(WritesVCC);
29225         setFlag(F64);
29226     } // Inst_VOP3__V_DIV_SCALE_F64
29227
29228     Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
29229     {
29230     } // ~Inst_VOP3__V_DIV_SCALE_F64
29231
29232     // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
29233     // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
29234     // numerator and denominator, this opcode will appropriately scale inputs
29235     // for division to avoid subnormal terms during Newton-Raphson correction
29236     // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29237     void
29238     Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst)
29239     {
29240         Wavefront *wf = gpuDynInst->wavefront();
29241         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29242         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29243         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29244         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29245         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29246
29247         src0.readSrc();
29248         src1.readSrc();
29249         src2.readSrc();
29250
29251         if (extData.NEG & 0x1) {
29252             src0.negModifier();
29253         }
29254
29255         if (extData.NEG & 0x2) {
29256             src1.negModifier();
29257         }
29258
29259         if (extData.NEG & 0x4) {
29260             src2.negModifier();
29261         }
29262
29263         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29264             if (wf->execMask(lane)) {
29265                 int exp1(0);
29266                 int exp2(0);
29267                 std::frexp(src1[lane], &exp1);
29268                 std::frexp(src2[lane], &exp2);
29269                 vcc.setBit(lane, 0);
29270
29271                 if (std::fpclassify(src1[lane]) == FP_ZERO
29272                     || std::fpclassify(src2[lane]) == FP_ZERO) {
29273                     vdst[lane] = NAN;
29274                 } else if (exp2 - exp1 >= 768) {
29275                     vcc.setBit(lane, 1);
29276                     if (src0[lane] == src1[lane]) {
29277                         vdst[lane] = std::ldexp(src0[lane], 128);
29278                     }
29279                 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
29280                     vdst[lane] = std::ldexp(src0[lane], 128);
29281                 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
29282                            && std::fpclassify(src2[lane] / src1[lane])
29283                            == FP_SUBNORMAL) {
29284                     vcc.setBit(lane, 1);
29285                     if (src0[lane] == src1[lane]) {
29286                         vdst[lane] = std::ldexp(src0[lane], 128);
29287                     }
29288                 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
29289                     vdst[lane] = std::ldexp(src0[lane], -128);
29290                 } else if (std::fpclassify(src2[lane] / src1[lane])
29291                            == FP_SUBNORMAL) {
29292                     vcc.setBit(lane, 1);
29293                     if (src0[lane] == src2[lane]) {
29294                         vdst[lane] = std::ldexp(src0[lane], 128);
29295                     }
29296                 } else if (exp2 <= 53) {
29297                     vdst[lane] = std::ldexp(src0[lane], 128);
29298                 }
29299             }
29300         }
29301
29302         vcc.write();
29303         vdst.write();
29304     }
29305
29306     Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3 *iFmt)
29307         : Inst_VOP3(iFmt, "v_div_fmas_f32", false)
29308     {
29309         setFlag(ALU);
29310         setFlag(ReadsVCC);
29311         setFlag(F32);
29312         setFlag(FMA);
29313     } // Inst_VOP3__V_DIV_FMAS_F32
29314
29315     Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
29316     {
29317     } // ~Inst_VOP3__V_DIV_FMAS_F32
29318
29319     // D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
29320     // s1.f = Denominator, s2.f = Numerator)
29321     void
29322     Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst)
29323     {
29324         Wavefront *wf = gpuDynInst->wavefront();
29325         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29326         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29327         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29328         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29329
29330         src0.readSrc();
29331         src1.readSrc();
29332         src2.readSrc();
29333
29334         if (instData.ABS & 0x1) {
29335             src0.absModifier();
29336         }
29337
29338         if (instData.ABS & 0x2) {
29339             src1.absModifier();
29340         }
29341
29342         if (instData.ABS & 0x4) {
29343             src2.absModifier();
29344         }
29345
29346         if (extData.NEG & 0x1) {
29347             src0.negModifier();
29348         }
29349
29350         if (extData.NEG & 0x2) {
29351             src1.negModifier();
29352         }
29353
29354         if (extData.NEG & 0x4) {
29355             src2.negModifier();
29356         }
29357
29358         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29359             if (wf->execMask(lane)) {
29360                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29361             }
29362         }
29363
29364         //vdst.write();
29365     } // execute
29366     // --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
29367
29368     Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3 *iFmt)
29369         : Inst_VOP3(iFmt, "v_div_fmas_f64", false)
29370     {
29371         setFlag(ALU);
29372         setFlag(ReadsVCC);
29373         setFlag(F64);
29374         setFlag(FMA);
29375     } // Inst_VOP3__V_DIV_FMAS_F64
29376
29377     Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
29378     {
29379     } // ~Inst_VOP3__V_DIV_FMAS_F64
29380
29381     // D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
29382     // s1.d = Denominator, s2.d = Numerator)
29383     void
29384     Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst)
29385     {
29386         Wavefront *wf = gpuDynInst->wavefront();
29387         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29388         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29389         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29390         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29391         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
29392
29393         src0.readSrc();
29394         src1.readSrc();
29395         src2.readSrc();
29396         vcc.read();
29397
29398         if (instData.ABS & 0x1) {
29399             src0.absModifier();
29400         }
29401
29402         if (instData.ABS & 0x2) {
29403             src1.absModifier();
29404         }
29405
29406         if (instData.ABS & 0x4) {
29407             src2.absModifier();
29408         }
29409
29410         if (extData.NEG & 0x1) {
29411             src0.negModifier();
29412         }
29413
29414         if (extData.NEG & 0x2) {
29415             src1.negModifier();
29416         }
29417
29418         if (extData.NEG & 0x4) {
29419             src2.negModifier();
29420         }
29421
29422         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29423             if (wf->execMask(lane)) {
29424                 if (bits(vcc.rawData(), lane)) {
29425                     vdst[lane] = std::pow(2, 64)
29426                         * std::fma(src0[lane], src1[lane], src2[lane]);
29427                 } else {
29428                     vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29429                 }
29430             }
29431         }
29432
29433         vdst.write();
29434     }
29435
29436     Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3 *iFmt)
29437         : Inst_VOP3(iFmt, "v_msad_u8", false)
29438     {
29439         setFlag(ALU);
29440     } // Inst_VOP3__V_MSAD_U8
29441
29442     Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
29443     {
29444     } // ~Inst_VOP3__V_MSAD_U8
29445
29446     // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
29447     void
29448     Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst)
29449     {
29450         panicUnimplemented();
29451     }
29452
29453     Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3 *iFmt)
29454         : Inst_VOP3(iFmt, "v_qsad_pk_u16_u8", false)
29455     {
29456         setFlag(ALU);
29457     } // Inst_VOP3__V_QSAD_PK_U16_U8
29458
29459     Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
29460     {
29461     } // ~Inst_VOP3__V_QSAD_PK_U16_U8
29462
29463     // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29464     // S1.u[31:0], S2.u[63:0])
29465     void
29466     Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29467     {
29468         panicUnimplemented();
29469     }
29470
29471     Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
29472           InFmt_VOP3 *iFmt)
29473         : Inst_VOP3(iFmt, "v_mqsad_pk_u16_u8", false)
29474     {
29475         setFlag(ALU);
29476     } // Inst_VOP3__V_MQSAD_PK_U16_U8
29477
29478     Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
29479     {
29480     } // ~Inst_VOP3__V_MQSAD_PK_U16_U8
29481
29482     // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29483     // S1.u[31:0], S2.u[63:0])
29484     void
29485     Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29486     {
29487         panicUnimplemented();
29488     }
29489
29490     Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3 *iFmt)
29491         : Inst_VOP3(iFmt, "v_mqsad_u32_u8", false)
29492     {
29493         setFlag(ALU);
29494     } // Inst_VOP3__V_MQSAD_U32_U8
29495
29496     Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
29497     {
29498     } // ~Inst_VOP3__V_MQSAD_U32_U8
29499
29500     // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
29501     // S1.u[31:0], S2.u[127:0])
29502     void
29503     Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst)
29504     {
29505         panicUnimplemented();
29506     }
29507
29508     Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
29509           InFmt_VOP3_SDST_ENC *iFmt)
29510         : Inst_VOP3_SDST_ENC(iFmt, "v_mad_u64_u32")
29511     {
29512         setFlag(ALU);
29513         setFlag(WritesVCC);
29514         setFlag(MAD);
29515     } // Inst_VOP3__V_MAD_U64_U32
29516
29517     Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
29518     {
29519     } // ~Inst_VOP3__V_MAD_U64_U32
29520
29521     // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64.
29522     void
29523     Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst)
29524     {
29525         Wavefront *wf = gpuDynInst->wavefront();
29526         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29527         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29528         ConstVecOperandU64 src2(gpuDynInst, extData.SRC2);
29529         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29530         VecOperandU64 vdst(gpuDynInst, instData.VDST);
29531
29532         src0.readSrc();
29533         src1.readSrc();
29534         src2.readSrc();
29535         vdst.read();
29536
29537         /**
29538          * input modifiers are supported by FP operations only
29539          */
29540         assert(!(extData.NEG & 0x1));
29541         assert(!(extData.NEG & 0x2));
29542         assert(!(extData.NEG & 0x4));
29543
29544         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29545             if (wf->execMask(lane)) {
29546                 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29547                     src2[lane]));
29548             }
29549         }
29550
29551         vcc.write();
29552         vdst.write();
29553     }
29554
29555     Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
29556           InFmt_VOP3_SDST_ENC *iFmt)
29557         : Inst_VOP3_SDST_ENC(iFmt, "v_mad_i64_i32")
29558     {
29559         setFlag(ALU);
29560         setFlag(WritesVCC);
29561         setFlag(MAD);
29562     } // Inst_VOP3__V_MAD_I64_I32
29563
29564     Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
29565     {
29566     } // ~Inst_VOP3__V_MAD_I64_I32
29567
29568     // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
29569     void
29570     Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst)
29571     {
29572         Wavefront *wf = gpuDynInst->wavefront();
29573         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
29574         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
29575         ConstVecOperandI64 src2(gpuDynInst, extData.SRC2);
29576         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29577         VecOperandI64 vdst(gpuDynInst, instData.VDST);
29578
29579         src0.readSrc();
29580         src1.readSrc();
29581         src2.readSrc();
29582
29583         /**
29584          * input modifiers are supported by FP operations only
29585          */
29586         assert(!(extData.NEG & 0x1));
29587         assert(!(extData.NEG & 0x2));
29588         assert(!(extData.NEG & 0x4));
29589
29590         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29591             if (wf->execMask(lane)) {
29592                 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29593                     src2[lane]));
29594             }
29595         }
29596
29597         vcc.write();
29598         vdst.write();
29599     }
29600
29601     Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3 *iFmt)
29602         : Inst_VOP3(iFmt, "v_mad_f16", false)
29603     {
29604         setFlag(ALU);
29605         setFlag(F16);
29606         setFlag(MAD);
29607     } // Inst_VOP3__V_MAD_F16
29608
29609     Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
29610     {
29611     } // ~Inst_VOP3__V_MAD_F16
29612
29613     // D.f16 = S0.f16 * S1.f16 + S2.f16.
29614     // Supports round mode, exception flags, saturation.
29615     void
29616     Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst)
29617     {
29618         panicUnimplemented();
29619     }
29620
29621     Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3 *iFmt)
29622         : Inst_VOP3(iFmt, "v_mad_u16", false)
29623     {
29624         setFlag(ALU);
29625         setFlag(MAD);
29626     } // Inst_VOP3__V_MAD_U16
29627
29628     Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
29629     {
29630     } // ~Inst_VOP3__V_MAD_U16
29631
29632     // D.u16 = S0.u16 * S1.u16 + S2.u16.
29633     // Supports saturation (unsigned 16-bit integer domain).
29634     void
29635     Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst)
29636     {
29637         Wavefront *wf = gpuDynInst->wavefront();
29638         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
29639         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
29640         ConstVecOperandU16 src2(gpuDynInst, extData.SRC2);
29641         VecOperandU16 vdst(gpuDynInst, instData.VDST);
29642
29643         src0.readSrc();
29644         src1.readSrc();
29645         src2.readSrc();
29646
29647         /**
29648          * input modifiers are supported by FP operations only
29649          */
29650         assert(!(instData.ABS & 0x1));
29651         assert(!(instData.ABS & 0x2));
29652         assert(!(instData.ABS & 0x4));
29653         assert(!(extData.NEG & 0x1));
29654         assert(!(extData.NEG & 0x2));
29655         assert(!(extData.NEG & 0x4));
29656
29657         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29658             if (wf->execMask(lane)) {
29659                 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29660             }
29661         }
29662
29663         vdst.write();
29664     }
29665
29666     Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3 *iFmt)
29667         : Inst_VOP3(iFmt, "v_mad_i16", false)
29668     {
29669         setFlag(ALU);
29670         setFlag(MAD);
29671     } // Inst_VOP3__V_MAD_I16
29672
29673     Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
29674     {
29675     } // ~Inst_VOP3__V_MAD_I16
29676
29677     // D.i16 = S0.i16 * S1.i16 + S2.i16.
29678     // Supports saturation (signed 16-bit integer domain).
29679     void
29680     Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst)
29681     {
29682         Wavefront *wf = gpuDynInst->wavefront();
29683         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
29684         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
29685         ConstVecOperandI16 src2(gpuDynInst, extData.SRC2);
29686         VecOperandI16 vdst(gpuDynInst, instData.VDST);
29687
29688         src0.readSrc();
29689         src1.readSrc();
29690         src2.readSrc();
29691
29692         /**
29693          * input modifiers are supported by FP operations only
29694          */
29695         assert(!(instData.ABS & 0x1));
29696         assert(!(instData.ABS & 0x2));
29697         assert(!(instData.ABS & 0x4));
29698         assert(!(extData.NEG & 0x1));
29699         assert(!(extData.NEG & 0x2));
29700         assert(!(extData.NEG & 0x4));
29701
29702         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29703             if (wf->execMask(lane)) {
29704                 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29705             }
29706         }
29707
29708         vdst.write();
29709     }
29710
29711     Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3 *iFmt)
29712         : Inst_VOP3(iFmt, "v_perm_b32", false)
29713     {
29714         setFlag(ALU);
29715     } // Inst_VOP3__V_PERM_B32
29716
29717     Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
29718     {
29719     } // ~Inst_VOP3__V_PERM_B32
29720
29721     // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
29722     // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
29723     // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
29724     // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
29725     // byte permute(byte in[8], byte sel) {
29726     //     if(sel>=13) then return 0xff;
29727     //     elsif(sel==12) then return 0x00;
29728     //     elsif(sel==11) then return in[7][7] * 0xff;
29729     //     elsif(sel==10) then return in[5][7] * 0xff;
29730     //     elsif(sel==9) then return in[3][7] * 0xff;
29731     //     elsif(sel==8) then return in[1][7] * 0xff;
29732     //     else return in[sel];
29733     //     }
29734     void
29735     Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst)
29736     {
29737         Wavefront *wf = gpuDynInst->wavefront();
29738         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29739         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29740         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
29741         VecOperandU32 vdst(gpuDynInst, instData.VDST);
29742
29743         src0.readSrc();
29744         src1.readSrc();
29745         src2.readSrc();
29746
29747         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29748             if (wf->execMask(lane)) {
29749                 VecElemU64 selector = (VecElemU64)src0[lane];
29750                 selector = (selector << 32) | (VecElemU64)src1[lane];
29751                 vdst[lane] = 0;
29752
29753                 DPRINTF(GCN3, "Executing v_perm_b32 src_0 0x%08x, src_1 "
29754                         "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
29755                         src1[lane], src2[lane], vdst[lane]);
29756                 DPRINTF(GCN3, "Selector: 0x%08x \n", selector);
29757
29758                 for (int i = 0; i < 4 ; ++i) {
29759                     VecElemU32 permuted_val = permute(selector, 0xFF
29760                         & ((VecElemU32)src2[lane] >> (8 * i)));
29761                     vdst[lane] |= (permuted_val << i);
29762                 }
29763
29764                 DPRINTF(GCN3, "v_perm result: 0x%08x\n", vdst[lane]);
29765             }
29766         }
29767
29768         vdst.write();
29769     }
29770
29771     Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3 *iFmt)
29772         : Inst_VOP3(iFmt, "v_fma_f16", false)
29773     {
29774         setFlag(ALU);
29775         setFlag(F16);
29776         setFlag(FMA);
29777     } // Inst_VOP3__V_FMA_F16
29778
29779     Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
29780     {
29781     } // ~Inst_VOP3__V_FMA_F16
29782
29783     // D.f16 = S0.f16 * S1.f16 + S2.f16.
29784     // Fused half precision multiply add.
29785     void
29786     Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst)
29787     {
29788         panicUnimplemented();
29789     }
29790
29791     Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3 *iFmt)
29792         : Inst_VOP3(iFmt, "v_div_fixup_f16", false)
29793     {
29794         setFlag(ALU);
29795         setFlag(F16);
29796     } // Inst_VOP3__V_DIV_FIXUP_F16
29797
29798     Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
29799     {
29800     } // ~Inst_VOP3__V_DIV_FIXUP_F16
29801
29802     // sign_out =  sign(S1.f16)^sign(S2.f16);
29803     // if (S2.f16 == NAN)
29804     //     D.f16 = Quiet(S2.f16);
29805     // else if (S1.f16 == NAN)
29806     //     D.f16 = Quiet(S1.f16);
29807     // else if (S1.f16 == S2.f16 == 0)
29808     //     # 0/0
29809     //     D.f16 = pele_nan(0xfe00);
29810     // else if (abs(S1.f16) == abs(S2.f16) == +-INF)
29811     //     # inf/inf
29812     //     D.f16 = pele_nan(0xfe00);
29813     // else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
29814     //     # x/0, or inf/y
29815     //     D.f16 = sign_out ? -INF : INF;
29816     // else if (abs(S1.f16) == +-INF || S2.f16 == 0)
29817     //     # x/inf, 0/y
29818     //     D.f16 = sign_out ? -0 : 0;
29819     // else if ((exp(S2.f16) - exp(S1.f16)) < -150)
29820     //     D.f16 = sign_out ? -underflow : underflow;
29821     // else if (exp(S1.f16) == 255)
29822     //     D.f16 = sign_out ? -overflow : overflow;
29823     // else
29824     //     D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
29825     // Half precision division fixup.
29826     // S0 = Quotient, S1 = Denominator, S3 = Numerator.
29827     // Given a numerator, denominator, and quotient from a divide, this opcode
29828     // will detect and apply special case numerics, touching up the quotient if
29829     // necessary. This opcode also generates invalid, denorm and divide by
29830     // zero exceptions caused by the division.
29831     void
29832     Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst)
29833     {
29834         panicUnimplemented();
29835     }
29836
29837     Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
29838           InFmt_VOP3 *iFmt)
29839         : Inst_VOP3(iFmt, "v_cvt_pkaccum_u8_f32", false)
29840     {
29841         setFlag(ALU);
29842         setFlag(F32);
29843     } // Inst_VOP3__V_CVT_PKACCUM_U8_F32
29844
29845     Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
29846     {
29847     } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
29848
29849     // byte = S1.u[1:0]; bit = byte * 8;
29850     // D.u[bit + 7:bit] = flt32_to_uint8(S0.f);
29851     // Pack converted value of S0.f into byte S1 of the destination.
29852     // SQ translates to V_CVT_PK_U8_F32.
29853     // Note: this opcode uses src_c to pass destination in as a source.
29854     void
29855     Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst)
29856     {
29857         panicUnimplemented();
29858     }
29859
29860     Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3 *iFmt)
29861         : Inst_VOP3(iFmt, "v_interp_p1_f32", false)
29862     {
29863         setFlag(ALU);
29864         setFlag(F32);
29865     } // Inst_VOP3__V_INTERP_P1_F32
29866
29867     Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
29868     {
29869     } // ~Inst_VOP3__V_INTERP_P1_F32
29870
29871     // D.f = P10 * S.f + P0;
29872     void
29873     Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
29874     {
29875         panicUnimplemented();
29876     }
29877
29878     Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3 *iFmt)
29879         : Inst_VOP3(iFmt, "v_interp_p2_f32", false)
29880     {
29881         setFlag(ALU);
29882         setFlag(F32);
29883     } // Inst_VOP3__V_INTERP_P2_F32
29884
29885     Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
29886     {
29887     } // ~Inst_VOP3__V_INTERP_P2_F32
29888
29889     // D.f = P20 * S.f + D.f;
29890     void
29891     Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
29892     {
29893         panicUnimplemented();
29894     }
29895
29896     Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3 *iFmt)
29897         : Inst_VOP3(iFmt, "v_interp_mov_f32", false)
29898     {
29899         setFlag(ALU);
29900         setFlag(F32);
29901     } // Inst_VOP3__V_INTERP_MOV_F32
29902
29903     Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
29904     {
29905     } // ~Inst_VOP3__V_INTERP_MOV_F32
29906
29907     // D.f = {P10,P20,P0}[S.u]; parameter load.
29908     void
29909     Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
29910     {
29911         panicUnimplemented();
29912     }
29913
29914     Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
29915           InFmt_VOP3 *iFmt)
29916         : Inst_VOP3(iFmt, "v_interp_p1ll_f16", false)
29917     {
29918         setFlag(ALU);
29919         setFlag(F16);
29920     } // Inst_VOP3__V_INTERP_P1LL_F16
29921
29922     Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
29923     {
29924     } // ~Inst_VOP3__V_INTERP_P1LL_F16
29925
29926     // D.f32 = P10.f16 * S0.f32 + P0.f16.
29927     void
29928     Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst)
29929     {
29930         panicUnimplemented();
29931     }
29932
29933     Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
29934           InFmt_VOP3 *iFmt)
29935         : Inst_VOP3(iFmt, "v_interp_p1lv_f16", false)
29936     {
29937         setFlag(ALU);
29938         setFlag(F16);
29939     } // Inst_VOP3__V_INTERP_P1LV_F16
29940
29941     Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
29942     {
29943     } // ~Inst_VOP3__V_INTERP_P1LV_F16
29944
29945     void
29946     Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst)
29947     {
29948         panicUnimplemented();
29949     }
29950
29951     Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3 *iFmt)
29952         : Inst_VOP3(iFmt, "v_interp_p2_f16", false)
29953     {
29954         setFlag(ALU);
29955         setFlag(F16);
29956     } // Inst_VOP3__V_INTERP_P2_F16
29957
29958     Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
29959     {
29960     } // ~Inst_VOP3__V_INTERP_P2_F16
29961
29962     // D.f16 = P20.f16 * S0.f32 + S2.f32.
29963     void
29964     Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst)
29965     {
29966         panicUnimplemented();
29967     }
29968
29969     Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3 *iFmt)
29970         : Inst_VOP3(iFmt, "v_add_f64", false)
29971     {
29972         setFlag(ALU);
29973         setFlag(F64);
29974     } // Inst_VOP3__V_ADD_F64
29975
29976     Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
29977     {
29978     } // ~Inst_VOP3__V_ADD_F64
29979
29980     // D.d = S0.d + S1.d.
29981     void
29982     Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst)
29983     {
29984         Wavefront *wf = gpuDynInst->wavefront();
29985         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29986         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29987         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29988
29989         src0.readSrc();
29990         src1.readSrc();
29991
29992         if (instData.ABS & 0x1) {
29993             src0.absModifier();
29994         }
29995
29996         if (instData.ABS & 0x2) {
29997             src1.absModifier();
29998         }
29999
30000         if (extData.NEG & 0x1) {
30001             src0.negModifier();
30002         }
30003
30004         if (extData.NEG & 0x2) {
30005             src1.negModifier();
30006         }
30007
30008         /**
30009          * input modifiers are supported by FP operations only
30010          */
30011         assert(!(instData.ABS & 0x4));
30012         assert(!(extData.NEG & 0x4));
30013
30014         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30015             if (wf->execMask(lane)) {
30016                 if (std::isnan(src0[lane]) ||
30017                     std::isnan(src1[lane]) ) {
30018                         vdst[lane] = NAN;
30019                 } else if (std::isinf(src0[lane]) &&
30020                            std::isinf(src1[lane])) {
30021                     if (std::signbit(src0[lane]) !=
30022                         std::signbit(src1[lane])) {
30023                         vdst[lane] = NAN;
30024                     } else {
30025                         vdst[lane] = src0[lane];
30026                     }
30027                 } else if (std::isinf(src0[lane])) {
30028                     vdst[lane] = src0[lane];
30029                 } else if (std::isinf(src1[lane])) {
30030                     vdst[lane] = src1[lane];
30031                 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30032                            std::fpclassify(src0[lane]) == FP_ZERO) {
30033                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30034                         std::fpclassify(src1[lane]) == FP_ZERO) {
30035                         if (std::signbit(src0[lane]) &&
30036                             std::signbit(src1[lane])) {
30037                             vdst[lane] = -0.0;
30038                         } else {
30039                             vdst[lane] = 0.0;
30040                         }
30041                     } else {
30042                         vdst[lane] = src1[lane];
30043                     }
30044                 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30045                            std::fpclassify(src1[lane]) == FP_ZERO) {
30046                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30047                         std::fpclassify(src0[lane]) == FP_ZERO) {
30048                         if (std::signbit(src0[lane]) &&
30049                             std::signbit(src1[lane])) {
30050                             vdst[lane] = -0.0;
30051                         } else {
30052                             vdst[lane] = 0.0;
30053                         }
30054                     } else {
30055                         vdst[lane] = src0[lane];
30056                     }
30057                 } else {
30058                     vdst[lane] = src0[lane] + src1[lane];
30059                 }
30060             }
30061         }
30062
30063         vdst.write();
30064     }
30065
30066     Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3 *iFmt)
30067         : Inst_VOP3(iFmt, "v_mul_f64", false)
30068     {
30069         setFlag(ALU);
30070         setFlag(F64);
30071     } // Inst_VOP3__V_MUL_F64
30072
30073     Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
30074     {
30075     } // ~Inst_VOP3__V_MUL_F64
30076
30077     // D.d = S0.d * S1.d.
30078     void
30079     Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst)
30080     {
30081         Wavefront *wf = gpuDynInst->wavefront();
30082         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30083         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30084         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30085
30086         src0.readSrc();
30087         src1.readSrc();
30088
30089         if (instData.ABS & 0x1) {
30090             src0.absModifier();
30091         }
30092
30093         if (instData.ABS & 0x2) {
30094             src1.absModifier();
30095         }
30096
30097         if (extData.NEG & 0x1) {
30098             src0.negModifier();
30099         }
30100
30101         if (extData.NEG & 0x2) {
30102             src1.negModifier();
30103         }
30104
30105         /**
30106          * input modifiers are supported by FP operations only
30107          */
30108         assert(!(instData.ABS & 0x4));
30109         assert(!(extData.NEG & 0x4));
30110
30111         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30112             if (wf->execMask(lane)) {
30113                 if (std::isnan(src0[lane]) ||
30114                     std::isnan(src1[lane])) {
30115                     vdst[lane] = NAN;
30116                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30117                            std::fpclassify(src0[lane]) == FP_ZERO) &&
30118                            !std::signbit(src0[lane])) {
30119                     if (std::isinf(src1[lane])) {
30120                         vdst[lane] = NAN;
30121                     } else if (!std::signbit(src1[lane])) {
30122                         vdst[lane] = +0.0;
30123                     } else {
30124                         vdst[lane] = -0.0;
30125                     }
30126                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30127                            std::fpclassify(src0[lane]) == FP_ZERO) &&
30128                            std::signbit(src0[lane])) {
30129                     if (std::isinf(src1[lane])) {
30130                         vdst[lane] = NAN;
30131                     } else if (std::signbit(src1[lane])) {
30132                         vdst[lane] = +0.0;
30133                     } else {
30134                         vdst[lane] = -0.0;
30135                     }
30136                 } else if (std::isinf(src0[lane]) &&
30137                            !std::signbit(src0[lane])) {
30138                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30139                         std::fpclassify(src1[lane]) == FP_ZERO) {
30140                         vdst[lane] = NAN;
30141                     } else if (!std::signbit(src1[lane])) {
30142                         vdst[lane] = +INFINITY;
30143                     } else {
30144                         vdst[lane] = -INFINITY;
30145                     }
30146                 } else if (std::isinf(src0[lane]) &&
30147                            std::signbit(src0[lane])) {
30148                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30149                         std::fpclassify(src1[lane]) == FP_ZERO) {
30150                         vdst[lane] = NAN;
30151                     } else if (std::signbit(src1[lane])) {
30152                         vdst[lane] = +INFINITY;
30153                     } else {
30154                         vdst[lane] = -INFINITY;
30155                     }
30156                 } else {
30157                     vdst[lane] = src0[lane] * src1[lane];
30158                 }
30159             }
30160         }
30161
30162         vdst.write();
30163     }
30164
30165     Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3 *iFmt)
30166         : Inst_VOP3(iFmt, "v_min_f64", false)
30167     {
30168         setFlag(ALU);
30169         setFlag(F64);
30170     } // Inst_VOP3__V_MIN_F64
30171
30172     Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
30173     {
30174     } // ~Inst_VOP3__V_MIN_F64
30175
30176     // D.d = min(S0.d, S1.d).
30177     void
30178     Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
30179     {
30180         Wavefront *wf = gpuDynInst->wavefront();
30181         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30182         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30183         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30184
30185         src0.readSrc();
30186         src1.readSrc();
30187
30188         if (instData.ABS & 0x1) {
30189             src0.absModifier();
30190         }
30191
30192         if (instData.ABS & 0x2) {
30193             src1.absModifier();
30194         }
30195
30196         if (extData.NEG & 0x1) {
30197             src0.negModifier();
30198         }
30199
30200         if (extData.NEG & 0x2) {
30201             src1.negModifier();
30202         }
30203
30204         /**
30205          * input modifiers are supported by FP operations only
30206          */
30207         assert(!(instData.ABS & 0x4));
30208         assert(!(extData.NEG & 0x4));
30209
30210         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30211             if (wf->execMask(lane)) {
30212                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
30213             }
30214         }
30215
30216         vdst.write();
30217     }
30218
30219     Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3 *iFmt)
30220         : Inst_VOP3(iFmt, "v_max_f64", false)
30221     {
30222         setFlag(ALU);
30223         setFlag(F64);
30224     } // Inst_VOP3__V_MAX_F64
30225
30226     Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
30227     {
30228     } // ~Inst_VOP3__V_MAX_F64
30229
30230     // D.d = max(S0.d, S1.d).
30231     void
30232     Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
30233     {
30234         Wavefront *wf = gpuDynInst->wavefront();
30235         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30236         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30237         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30238
30239         src0.readSrc();
30240         src1.readSrc();
30241
30242         if (instData.ABS & 0x1) {
30243             src0.absModifier();
30244         }
30245
30246         if (instData.ABS & 0x2) {
30247             src1.absModifier();
30248         }
30249
30250         if (extData.NEG & 0x1) {
30251             src0.negModifier();
30252         }
30253
30254         if (extData.NEG & 0x2) {
30255             src1.negModifier();
30256         }
30257
30258         /**
30259          * input modifiers are supported by FP operations only
30260          */
30261         assert(!(instData.ABS & 0x4));
30262         assert(!(extData.NEG & 0x4));
30263
30264         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30265             if (wf->execMask(lane)) {
30266                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
30267             }
30268         }
30269
30270         vdst.write();
30271     }
30272
30273     Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3 *iFmt)
30274         : Inst_VOP3(iFmt, "v_ldexp_f64", false)
30275     {
30276         setFlag(ALU);
30277         setFlag(F64);
30278     } // Inst_VOP3__V_LDEXP_F64
30279
30280     Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
30281     {
30282     } // ~Inst_VOP3__V_LDEXP_F64
30283
30284     // D.d = pow(S0.d, S1.i[31:0]).
30285     void
30286     Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst)
30287     {
30288         Wavefront *wf = gpuDynInst->wavefront();
30289         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30290         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30291         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30292
30293         src0.readSrc();
30294         src1.readSrc();
30295
30296         if (instData.ABS & 0x1) {
30297             src0.absModifier();
30298         }
30299
30300         if (extData.NEG & 0x1) {
30301             src0.negModifier();
30302         }
30303
30304         /**
30305          * input modifiers are supported by FP operations only
30306          */
30307         assert(!(instData.ABS & 0x2));
30308         assert(!(instData.ABS & 0x4));
30309         assert(!(extData.NEG & 0x2));
30310         assert(!(extData.NEG & 0x4));
30311
30312         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30313             if (wf->execMask(lane)) {
30314                 if (std::isnan(src0[lane]) || std::isinf(src0[lane])) {
30315                     vdst[lane] = src0[lane];
30316                 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
30317                            || std::fpclassify(src0[lane]) == FP_ZERO) {
30318                     if (std::signbit(src0[lane])) {
30319                         vdst[lane] = -0.0;
30320                     } else {
30321                         vdst[lane] = +0.0;
30322                     }
30323                 } else {
30324                     vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30325                 }
30326             }
30327         }
30328
30329         vdst.write();
30330     }
30331
30332     Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3 *iFmt)
30333         : Inst_VOP3(iFmt, "v_mul_lo_u32", false)
30334     {
30335         setFlag(ALU);
30336     } // Inst_VOP3__V_MUL_LO_U32
30337
30338     Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
30339     {
30340     } // ~Inst_VOP3__V_MUL_LO_U32
30341
30342     // D.u = S0.u * S1.u.
30343     void
30344     Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst)
30345     {
30346         Wavefront *wf = gpuDynInst->wavefront();
30347         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30348         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30349         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30350
30351         src0.readSrc();
30352         src1.readSrc();
30353
30354         /**
30355          * input modifiers are supported by FP operations only
30356          */
30357         assert(!(instData.ABS & 0x1));
30358         assert(!(instData.ABS & 0x2));
30359         assert(!(instData.ABS & 0x4));
30360         assert(!(extData.NEG & 0x1));
30361         assert(!(extData.NEG & 0x2));
30362         assert(!(extData.NEG & 0x4));
30363
30364         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30365             if (wf->execMask(lane)) {
30366                 VecElemI64 s0 = (VecElemI64)src0[lane];
30367                 VecElemI64 s1 = (VecElemI64)src1[lane];
30368                 vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL);
30369             }
30370         }
30371
30372         vdst.write();
30373     }
30374
30375     Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3 *iFmt)
30376         : Inst_VOP3(iFmt, "v_mul_hi_u32", false)
30377     {
30378         setFlag(ALU);
30379     } // Inst_VOP3__V_MUL_HI_U32
30380
30381     Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
30382     {
30383     } // ~Inst_VOP3__V_MUL_HI_U32
30384
30385     // D.u = (S0.u * S1.u) >> 32.
30386     void
30387     Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst)
30388     {
30389         Wavefront *wf = gpuDynInst->wavefront();
30390         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30391         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30392         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30393
30394         src0.readSrc();
30395         src1.readSrc();
30396
30397         /**
30398          * input modifiers are supported by FP operations only
30399          */
30400         assert(!(instData.ABS & 0x1));
30401         assert(!(instData.ABS & 0x2));
30402         assert(!(instData.ABS & 0x4));
30403         assert(!(extData.NEG & 0x1));
30404         assert(!(extData.NEG & 0x2));
30405         assert(!(extData.NEG & 0x4));
30406
30407         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30408             if (wf->execMask(lane)) {
30409                 VecElemI64 s0 = (VecElemI64)src0[lane];
30410                 VecElemI64 s1 = (VecElemI64)src1[lane];
30411                 vdst[lane]
30412                     = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
30413             }
30414         }
30415
30416         vdst.write();
30417     }
30418
30419     Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3 *iFmt)
30420         : Inst_VOP3(iFmt, "v_mul_hi_i32", false)
30421     {
30422         setFlag(ALU);
30423     } // Inst_VOP3__V_MUL_HI_I32
30424
30425     Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
30426     {
30427     } // ~Inst_VOP3__V_MUL_HI_I32
30428
30429     // D.i = (S0.i * S1.i) >> 32.
30430     void
30431     Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst)
30432     {
30433         Wavefront *wf = gpuDynInst->wavefront();
30434         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
30435         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30436         VecOperandI32 vdst(gpuDynInst, instData.VDST);
30437
30438         src0.readSrc();
30439         src1.readSrc();
30440
30441         /**
30442          * input modifiers are supported by FP operations only
30443          */
30444         assert(!(instData.ABS & 0x1));
30445         assert(!(instData.ABS & 0x2));
30446         assert(!(instData.ABS & 0x4));
30447         assert(!(extData.NEG & 0x1));
30448         assert(!(extData.NEG & 0x2));
30449         assert(!(extData.NEG & 0x4));
30450
30451         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30452             if (wf->execMask(lane)) {
30453                 VecElemI64 s0 = (VecElemI64)src0[lane];
30454                 VecElemI64 s1 = (VecElemI64)src1[lane];
30455                 vdst[lane]
30456                     = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
30457             }
30458         }
30459
30460         vdst.write();
30461     }
30462
30463     Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3 *iFmt)
30464         : Inst_VOP3(iFmt, "v_ldexp_f32", false)
30465     {
30466         setFlag(ALU);
30467         setFlag(F32);
30468     } // Inst_VOP3__V_LDEXP_F32
30469
30470     Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
30471     {
30472     } // ~Inst_VOP3__V_LDEXP_F32
30473
30474     // D.f = pow(S0.f, S1.i)
30475     void
30476     Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst)
30477     {
30478         Wavefront *wf = gpuDynInst->wavefront();
30479         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
30480         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30481         VecOperandF32 vdst(gpuDynInst, instData.VDST);
30482
30483         src0.readSrc();
30484         src1.readSrc();
30485
30486         /**
30487          * input modifiers are supported by FP operations only
30488          */
30489         assert(!(instData.ABS & 0x2));
30490         assert(!(instData.ABS & 0x4));
30491         assert(!(extData.NEG & 0x2));
30492         assert(!(extData.NEG & 0x4));
30493
30494         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30495             if (wf->execMask(lane)) {
30496                 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30497             }
30498         }
30499
30500         vdst.write();
30501     }
30502
30503     Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3 *iFmt)
30504         : Inst_VOP3(iFmt, "v_readlane_b32", true)
30505     {
30506         setFlag(ALU);
30507         setFlag(IgnoreExec);
30508     } // Inst_VOP3__V_READLANE_B32
30509
30510     Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
30511     {
30512     } // ~Inst_VOP3__V_READLANE_B32
30513
30514     // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
30515     // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
30516     // Input and output modifiers not supported; this is an untyped operation.
30517     void
30518     Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst)
30519     {
30520         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30521         ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30522         ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
30523
30524         src0.readSrc();
30525         src1.read();
30526
30527         /**
30528          * input modifiers are supported by FP operations only
30529          */
30530         assert(!(instData.ABS & 0x1));
30531         assert(!(instData.ABS & 0x2));
30532         assert(!(instData.ABS & 0x4));
30533         assert(!(extData.NEG & 0x1));
30534         assert(!(extData.NEG & 0x2));
30535         assert(!(extData.NEG & 0x4));
30536
30537         sdst = src0[src1.rawData() & 0x3f];
30538
30539         sdst.write();
30540     }
30541
30542     Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3 *iFmt)
30543         : Inst_VOP3(iFmt, "v_writelane_b32", false)
30544     {
30545         setFlag(ALU);
30546         setFlag(IgnoreExec);
30547     } // Inst_VOP3__V_WRITELANE_B32
30548
30549     Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
30550     {
30551     } // ~Inst_VOP3__V_WRITELANE_B32
30552
30553     // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
30554     // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
30555     // exec mask. Input and output modifiers not supported; this is an untyped
30556     // operation.
30557     void
30558     Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst)
30559     {
30560         ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0);
30561         ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30562         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30563
30564         src0.read();
30565         src1.read();
30566         vdst.read();
30567
30568         /**
30569          * input modifiers are supported by FP operations only
30570          */
30571         assert(!(instData.ABS & 0x1));
30572         assert(!(instData.ABS & 0x2));
30573         assert(!(instData.ABS & 0x4));
30574         assert(!(extData.NEG & 0x1));
30575         assert(!(extData.NEG & 0x2));
30576         assert(!(extData.NEG & 0x4));
30577
30578         vdst[src1.rawData() & 0x3f] = src0.rawData();
30579
30580         vdst.write();
30581     }
30582
30583     Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3 *iFmt)
30584         : Inst_VOP3(iFmt, "v_bcnt_u32_b32", false)
30585     {
30586         setFlag(ALU);
30587     } // Inst_VOP3__V_BCNT_U32_B32
30588
30589     Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
30590     {
30591     } // ~Inst_VOP3__V_BCNT_U32_B32
30592
30593     // D.u = CountOneBits(S0.u) + S1.u. Bit count.
30594     void
30595     Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30596     {
30597         Wavefront *wf = gpuDynInst->wavefront();
30598         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30599         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30600         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30601
30602         src0.readSrc();
30603         src1.readSrc();
30604
30605         /**
30606          * input modifiers are supported by FP operations only
30607          */
30608         assert(!(instData.ABS & 0x1));
30609         assert(!(instData.ABS & 0x2));
30610         assert(!(instData.ABS & 0x4));
30611         assert(!(extData.NEG & 0x1));
30612         assert(!(extData.NEG & 0x2));
30613         assert(!(extData.NEG & 0x4));
30614
30615         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30616             if (wf->execMask(lane)) {
30617                 vdst[lane] = popCount(src0[lane]) + src1[lane];
30618             }
30619         }
30620
30621         vdst.write();
30622     }
30623
30624     Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
30625           InFmt_VOP3 *iFmt)
30626         : Inst_VOP3(iFmt, "v_mbcnt_lo_u32_b32", false)
30627     {
30628         setFlag(ALU);
30629     } // Inst_VOP3__V_MBCNT_LO_U32_B32
30630
30631     Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
30632     {
30633     } // ~Inst_VOP3__V_MBCNT_LO_U32_B32
30634
30635     // Masked bit count, ThreadPosition is the position of this thread in the
30636     // wavefront (in 0..63).
30637     void
30638     Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30639     {
30640         Wavefront *wf = gpuDynInst->wavefront();
30641         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30642         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30643         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30644         uint64_t threadMask = 0;
30645
30646         src0.readSrc();
30647         src1.readSrc();
30648
30649         /**
30650          * input modifiers are supported by FP operations only
30651          */
30652         assert(!(instData.ABS & 0x1));
30653         assert(!(instData.ABS & 0x2));
30654         assert(!(instData.ABS & 0x4));
30655         assert(!(extData.NEG & 0x1));
30656         assert(!(extData.NEG & 0x2));
30657         assert(!(extData.NEG & 0x4));
30658
30659         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30660             if (wf->execMask(lane)) {
30661                 threadMask = ((1LL << lane) - 1LL);
30662                 vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
30663                              src1[lane];
30664             }
30665         }
30666
30667         vdst.write();
30668     } // execute
30669     // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
30670
30671     Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
30672           InFmt_VOP3 *iFmt)
30673         : Inst_VOP3(iFmt, "v_mbcnt_hi_u32_b32", false)
30674     {
30675         setFlag(ALU);
30676     } // Inst_VOP3__V_MBCNT_HI_U32_B32
30677
30678     Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
30679     {
30680     } // ~Inst_VOP3__V_MBCNT_HI_U32_B32
30681
30682     // ThreadMask = (1 << ThreadPosition) - 1;
30683     // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
30684     // Masked bit count, ThreadPosition is the position of this thread in the
30685     // wavefront (in 0..63).
30686     void
30687     Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30688     {
30689         Wavefront *wf = gpuDynInst->wavefront();
30690         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30691         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30692         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30693         uint64_t threadMask = 0;
30694
30695         src0.readSrc();
30696         src1.readSrc();
30697
30698         /**
30699          * input modifiers are supported by FP operations only
30700          */
30701         assert(!(instData.ABS & 0x1));
30702         assert(!(instData.ABS & 0x2));
30703         assert(!(instData.ABS & 0x4));
30704         assert(!(extData.NEG & 0x1));
30705         assert(!(extData.NEG & 0x2));
30706         assert(!(extData.NEG & 0x4));
30707
30708         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30709             if (wf->execMask(lane)) {
30710                 threadMask = ((1LL << lane) - 1LL);
30711                 vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
30712                              src1[lane];
30713             }
30714         }
30715
30716         vdst.write();
30717     } // execute
30718     // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
30719
30720     Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt)
30721         : Inst_VOP3(iFmt, "v_lshlrev_b64", false)
30722     {
30723         setFlag(ALU);
30724     } // Inst_VOP3__V_LSHLREV_B64
30725
30726     Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
30727     {
30728     } // ~Inst_VOP3__V_LSHLREV_B64
30729
30730     // D.u64 = S1.u64 << S0.u[5:0].
30731     void
30732     Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst)
30733     {
30734         Wavefront *wf = gpuDynInst->wavefront();
30735         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30736         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30737         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30738
30739         src0.readSrc();
30740         src1.readSrc();
30741
30742         /**
30743          * input modifiers are supported by FP operations only
30744          */
30745         assert(!(instData.ABS & 0x1));
30746         assert(!(instData.ABS & 0x2));
30747         assert(!(instData.ABS & 0x4));
30748         assert(!(extData.NEG & 0x1));
30749         assert(!(extData.NEG & 0x2));
30750         assert(!(extData.NEG & 0x4));
30751
30752         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30753             if (wf->execMask(lane)) {
30754                 vdst[lane] = src1[lane] << bits(src0[lane], 5, 0);
30755             }
30756         }
30757
30758         vdst.write();
30759     }
30760
30761     Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3 *iFmt)
30762         : Inst_VOP3(iFmt, "v_lshrrev_b64", false)
30763     {
30764         setFlag(ALU);
30765     } // Inst_VOP3__V_LSHRREV_B64
30766
30767     Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
30768     {
30769     } // ~Inst_VOP3__V_LSHRREV_B64
30770
30771     // D.u64 = S1.u64 >> S0.u[5:0].
30772     // The vacated bits are set to zero.
30773     void
30774     Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst)
30775     {
30776         Wavefront *wf = gpuDynInst->wavefront();
30777         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30778         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30779         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30780
30781         src0.readSrc();
30782         src1.readSrc();
30783
30784         /**
30785          * input modifiers are supported by FP operations only
30786          */
30787         assert(!(instData.ABS & 0x1));
30788         assert(!(instData.ABS & 0x2));
30789         assert(!(instData.ABS & 0x4));
30790         assert(!(extData.NEG & 0x1));
30791         assert(!(extData.NEG & 0x2));
30792         assert(!(extData.NEG & 0x4));
30793
30794         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30795             if (wf->execMask(lane)) {
30796                 vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0);
30797             }
30798         }
30799
30800         vdst.write();
30801     }
30802
30803     Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3 *iFmt)
30804         : Inst_VOP3(iFmt, "v_ashrrev_i64", false)
30805     {
30806         setFlag(ALU);
30807     } // Inst_VOP3__V_ASHRREV_I64
30808
30809     Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
30810     {
30811     } // ~Inst_VOP3__V_ASHRREV_I64
30812
30813     // D.u64 = signext(S1.u64) >> S0.u[5:0].
30814     // The vacated bits are set to the sign bit of the input value.
30815     void
30816     Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst)
30817     {
30818         Wavefront *wf = gpuDynInst->wavefront();
30819         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30820         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
30821         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30822
30823         src0.readSrc();
30824         src1.readSrc();
30825
30826         /**
30827          * input modifiers are supported by FP operations only
30828          */
30829         assert(!(instData.ABS & 0x1));
30830         assert(!(instData.ABS & 0x2));
30831         assert(!(instData.ABS & 0x4));
30832         assert(!(extData.NEG & 0x1));
30833         assert(!(extData.NEG & 0x2));
30834         assert(!(extData.NEG & 0x4));
30835
30836         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30837             if (wf->execMask(lane)) {
30838                 vdst[lane]
30839                     = src1[lane] >> bits(src0[lane], 5, 0);
30840             }
30841         }
30842
30843         vdst.write();
30844     }
30845
30846     Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3 *iFmt)
30847         : Inst_VOP3(iFmt, "v_trig_preop_f64", false)
30848     {
30849         setFlag(ALU);
30850         setFlag(F64);
30851     } // Inst_VOP3__V_TRIG_PREOP_F64
30852
30853     Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
30854     {
30855     } // ~Inst_VOP3__V_TRIG_PREOP_F64
30856
30857     void
30858     Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst)
30859     {
30860         panicUnimplemented();
30861     }
30862
30863     Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3 *iFmt)
30864         : Inst_VOP3(iFmt, "v_bfm_b32", false)
30865     {
30866         setFlag(ALU);
30867     } // Inst_VOP3__V_BFM_B32
30868
30869     Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
30870     {
30871     } // ~Inst_VOP3__V_BFM_B32
30872
30873     // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0];
30874     void
30875     Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
30876     {
30877         Wavefront *wf = gpuDynInst->wavefront();
30878         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30879         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30880         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30881
30882         src0.readSrc();
30883         src1.readSrc();
30884
30885         /**
30886          * input modifiers are supported by FP operations only
30887          */
30888         assert(!(instData.ABS & 0x1));
30889         assert(!(instData.ABS & 0x2));
30890         assert(!(instData.ABS & 0x4));
30891         assert(!(extData.NEG & 0x1));
30892         assert(!(extData.NEG & 0x2));
30893         assert(!(extData.NEG & 0x4));
30894
30895         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30896             if (wf->execMask(lane)) {
30897                 vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1)
30898                     << bits(src1[lane], 4, 0);
30899             }
30900         }
30901
30902         vdst.write();
30903     }
30904
30905     Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
30906           InFmt_VOP3 *iFmt)
30907         : Inst_VOP3(iFmt, "v_cvt_pknorm_i16_f32", false)
30908     {
30909         setFlag(ALU);
30910         setFlag(F32);
30911     } // Inst_VOP3__V_CVT_PKNORM_I16_F32
30912
30913     Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
30914     {
30915     } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
30916
30917     // D = {(snorm)S1.f, (snorm)S0.f}.
30918     void
30919     Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst)
30920     {
30921         panicUnimplemented();
30922     }
30923
30924     Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
30925           InFmt_VOP3 *iFmt)
30926         : Inst_VOP3(iFmt, "v_cvt_pknorm_u16_f32", false)
30927     {
30928         setFlag(ALU);
30929         setFlag(F32);
30930     } // Inst_VOP3__V_CVT_PKNORM_U16_F32
30931
30932     Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
30933     {
30934     } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
30935
30936     // D = {(unorm)S1.f, (unorm)S0.f}.
30937     void
30938     Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst)
30939     {
30940         panicUnimplemented();
30941     }
30942
30943     Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
30944           InFmt_VOP3 *iFmt)
30945         : Inst_VOP3(iFmt, "v_cvt_pkrtz_f16_f32", false)
30946     {
30947         setFlag(ALU);
30948         setFlag(F32);
30949     } // Inst_VOP3__V_CVT_PKRTZ_F16_F32
30950
30951     Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
30952     {
30953     } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
30954
30955     void
30956     Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst)
30957     {
30958         panicUnimplemented();
30959     }
30960
30961     Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3 *iFmt)
30962         : Inst_VOP3(iFmt, "v_cvt_pk_u16_u32", false)
30963     {
30964         setFlag(ALU);
30965     } // Inst_VOP3__V_CVT_PK_U16_U32
30966
30967     Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
30968     {
30969     } // ~Inst_VOP3__V_CVT_PK_U16_U32
30970
30971     // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
30972     void
30973     Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst)
30974     {
30975         panicUnimplemented();
30976     }
30977
30978     Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3 *iFmt)
30979         : Inst_VOP3(iFmt, "v_cvt_pk_i16_i32", false)
30980     {
30981         setFlag(ALU);
30982     } // Inst_VOP3__V_CVT_PK_I16_I32
30983
30984     Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
30985     {
30986     } // ~Inst_VOP3__V_CVT_PK_I16_I32
30987
30988     // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
30989     void
30990     Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst)
30991     {
30992         panicUnimplemented();
30993     }
30994
30995     Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt)
30996         : Inst_DS(iFmt, "ds_add_u32")
30997     {
30998     } // Inst_DS__DS_ADD_U32
30999
31000     Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
31001     {
31002     } // ~Inst_DS__DS_ADD_U32
31003
31004     // tmp = MEM[ADDR];
31005     // MEM[ADDR] += DATA;
31006     // RETURN_DATA = tmp.
31007     void
31008     Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
31009     {
31010         panicUnimplemented();
31011     }
31012
31013     Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt)
31014         : Inst_DS(iFmt, "ds_sub_u32")
31015     {
31016     } // Inst_DS__DS_SUB_U32
31017
31018     Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
31019     {
31020     } // ~Inst_DS__DS_SUB_U32
31021
31022     // tmp = MEM[ADDR];
31023     // MEM[ADDR] -= DATA;
31024     // RETURN_DATA = tmp.
31025     void
31026     Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
31027     {
31028         panicUnimplemented();
31029     }
31030
31031     Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt)
31032         : Inst_DS(iFmt, "ds_rsub_u32")
31033     {
31034     } // Inst_DS__DS_RSUB_U32
31035
31036     Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
31037     {
31038     } // ~Inst_DS__DS_RSUB_U32
31039
31040     // tmp = MEM[ADDR];
31041     // MEM[ADDR] = DATA - MEM[ADDR];
31042     // RETURN_DATA = tmp.
31043     // Subtraction with reversed operands.
31044     void
31045     Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst)
31046     {
31047         panicUnimplemented();
31048     }
31049
31050     Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt)
31051         : Inst_DS(iFmt, "ds_inc_u32")
31052     {
31053     } // Inst_DS__DS_INC_U32
31054
31055     Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
31056     {
31057     } // ~Inst_DS__DS_INC_U32
31058
31059     // tmp = MEM[ADDR];
31060     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31061     // RETURN_DATA = tmp.
31062     void
31063     Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst)
31064     {
31065         panicUnimplemented();
31066     }
31067
31068     Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt)
31069         : Inst_DS(iFmt, "ds_dec_u32")
31070     {
31071     } // Inst_DS__DS_DEC_U32
31072
31073     Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
31074     {
31075     } // ~Inst_DS__DS_DEC_U32
31076
31077     // tmp = MEM[ADDR];
31078     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31079     // (unsigned compare); RETURN_DATA = tmp.
31080     void
31081     Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst)
31082     {
31083         panicUnimplemented();
31084     }
31085
31086     Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt)
31087         : Inst_DS(iFmt, "ds_min_i32")
31088     {
31089     } // Inst_DS__DS_MIN_I32
31090
31091     Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
31092     {
31093     } // ~Inst_DS__DS_MIN_I32
31094
31095     // tmp = MEM[ADDR];
31096     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31097     // RETURN_DATA = tmp.
31098     void
31099     Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
31100     {
31101         panicUnimplemented();
31102     }
31103
31104     Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt)
31105         : Inst_DS(iFmt, "ds_max_i32")
31106     {
31107     } // Inst_DS__DS_MAX_I32
31108
31109     Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
31110     {
31111     } // ~Inst_DS__DS_MAX_I32
31112
31113     // tmp = MEM[ADDR];
31114     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31115     // RETURN_DATA = tmp.
31116     void
31117     Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
31118     {
31119         panicUnimplemented();
31120     }
31121
31122     Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt)
31123         : Inst_DS(iFmt, "ds_min_u32")
31124     {
31125     } // Inst_DS__DS_MIN_U32
31126
31127     Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
31128     {
31129     } // ~Inst_DS__DS_MIN_U32
31130
31131     // tmp = MEM[ADDR];
31132     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31133     // RETURN_DATA = tmp.
31134     void
31135     Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
31136     {
31137         panicUnimplemented();
31138     }
31139
31140     Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt)
31141         : Inst_DS(iFmt, "ds_max_u32")
31142     {
31143     } // Inst_DS__DS_MAX_U32
31144
31145     Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
31146     {
31147     } // ~Inst_DS__DS_MAX_U32
31148
31149     // tmp = MEM[ADDR];
31150     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31151     // RETURN_DATA = tmp.
31152     void
31153     Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
31154     {
31155         panicUnimplemented();
31156     }
31157
31158     Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt)
31159         : Inst_DS(iFmt, "ds_and_b32")
31160     {
31161     } // Inst_DS__DS_AND_B32
31162
31163     Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
31164     {
31165     } // ~Inst_DS__DS_AND_B32
31166
31167     // tmp = MEM[ADDR];
31168     // MEM[ADDR] &= DATA;
31169     // RETURN_DATA = tmp.
31170     void
31171     Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst)
31172     {
31173         panicUnimplemented();
31174     }
31175
31176     Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt)
31177         : Inst_DS(iFmt, "ds_or_b32")
31178     {
31179     } // Inst_DS__DS_OR_B32
31180
31181     Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
31182     {
31183     } // ~Inst_DS__DS_OR_B32
31184
31185     // tmp = MEM[ADDR];
31186     // MEM[ADDR] |= DATA;
31187     // RETURN_DATA = tmp.
31188     void
31189     Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst)
31190     {
31191         panicUnimplemented();
31192     }
31193
31194     Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt)
31195         : Inst_DS(iFmt, "ds_xor_b32")
31196     {
31197     } // Inst_DS__DS_XOR_B32
31198
31199     Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
31200     {
31201     } // ~Inst_DS__DS_XOR_B32
31202
31203     // tmp = MEM[ADDR];
31204     // MEM[ADDR] ^= DATA;
31205     // RETURN_DATA = tmp.
31206     void
31207     Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
31208     {
31209         panicUnimplemented();
31210     }
31211
31212     Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt)
31213         : Inst_DS(iFmt, "ds_mskor_b32")
31214     {
31215     } // Inst_DS__DS_MSKOR_B32
31216
31217     Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
31218     {
31219     } // ~Inst_DS__DS_MSKOR_B32
31220
31221     // tmp = MEM[ADDR];
31222     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31223     // RETURN_DATA = tmp.
31224     void
31225     Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst)
31226     {
31227         panicUnimplemented();
31228     }
31229
31230     Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt)
31231         : Inst_DS(iFmt, "ds_write_b32")
31232     {
31233         setFlag(MemoryRef);
31234         setFlag(Store);
31235     } // Inst_DS__DS_WRITE_B32
31236
31237     Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
31238     {
31239     } // ~Inst_DS__DS_WRITE_B32
31240
31241     // MEM[ADDR] = DATA.
31242     // Write dword.
31243     void
31244     Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst)
31245     {
31246         Wavefront *wf = gpuDynInst->wavefront();
31247         gpuDynInst->execUnitId = wf->execUnitId;
31248         gpuDynInst->exec_mask = wf->execMask();
31249         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31250         gpuDynInst->latency.set(
31251                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31252         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31253         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
31254
31255         addr.read();
31256         data.read();
31257
31258         calcAddr(gpuDynInst, addr);
31259
31260         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31261             if (wf->execMask(lane)) {
31262                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
31263                     = data[lane];
31264             }
31265         }
31266
31267         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31268
31269         wf->wrLmReqsInPipe--;
31270         wf->outstandingReqsWrLm++;
31271         wf->outstandingReqs++;
31272         wf->validateRequestCounters();
31273     }
31274
31275     void
31276     Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31277     {
31278         Addr offset0 = instData.OFFSET0;
31279         Addr offset1 = instData.OFFSET1;
31280         Addr offset = (offset1 << 8) | offset0;
31281
31282         initMemWrite<VecElemU32>(gpuDynInst, offset);
31283     } // initiateAcc
31284
31285     void
31286     Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31287     {
31288     } // completeAcc
31289
31290     Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt)
31291         : Inst_DS(iFmt, "ds_write2_b32")
31292     {
31293         setFlag(MemoryRef);
31294         setFlag(Store);
31295     } // Inst_DS__DS_WRITE2_B32
31296
31297     Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
31298     {
31299     } // ~Inst_DS__DS_WRITE2_B32
31300
31301     // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
31302     // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
31303     // Write 2 dwords.
31304     void
31305     Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst)
31306     {
31307         Wavefront *wf = gpuDynInst->wavefront();
31308         gpuDynInst->execUnitId = wf->execUnitId;
31309         gpuDynInst->exec_mask = wf->execMask();
31310         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31311         gpuDynInst->latency.set(
31312                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31313         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31314         ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31315         ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31316
31317         addr.read();
31318         data0.read();
31319         data1.read();
31320
31321         calcAddr(gpuDynInst, addr);
31322
31323         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31324             if (wf->execMask(lane)) {
31325                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31326                     = data0[lane];
31327                 (reinterpret_cast<VecElemU32*>(
31328                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31329             }
31330         }
31331
31332         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31333
31334         wf->wrLmReqsInPipe--;
31335         wf->outstandingReqsWrLm++;
31336         wf->outstandingReqs++;
31337         wf->validateRequestCounters();
31338     }
31339
31340     void
31341     Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31342     {
31343         Addr offset0 = instData.OFFSET0 * 4;
31344         Addr offset1 = instData.OFFSET1 * 4;
31345
31346         initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31347     }
31348
31349     void
31350     Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31351     {
31352     }
31353
31354     Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt)
31355         : Inst_DS(iFmt, "ds_write2st64_b32")
31356     {
31357         setFlag(MemoryRef);
31358         setFlag(Store);
31359     } // Inst_DS__DS_WRITE2ST64_B32
31360
31361     Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
31362     {
31363     } // ~Inst_DS__DS_WRITE2ST64_B32
31364
31365     // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
31366     // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
31367     // Write 2 dwords.
31368     void
31369     Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
31370     {
31371         Wavefront *wf = gpuDynInst->wavefront();
31372         gpuDynInst->execUnitId = wf->execUnitId;
31373         gpuDynInst->exec_mask = wf->execMask();
31374         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31375         gpuDynInst->latency.set(
31376                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31377         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31378         ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31379         ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31380
31381         addr.read();
31382         data0.read();
31383         data1.read();
31384
31385         calcAddr(gpuDynInst, addr);
31386
31387         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31388             if (wf->execMask(lane)) {
31389                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31390                     = data0[lane];
31391                 (reinterpret_cast<VecElemU32*>(
31392                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31393             }
31394         }
31395
31396         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31397
31398         wf->wrLmReqsInPipe--;
31399         wf->outstandingReqsWrLm++;
31400         wf->outstandingReqs++;
31401         wf->validateRequestCounters();
31402     } // execute
31403
31404     void
31405     Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31406     {
31407         Addr offset0 = instData.OFFSET0 * 4 * 64;
31408         Addr offset1 = instData.OFFSET1 * 4 * 64;
31409
31410         initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31411     }
31412
31413     void
31414     Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31415     {
31416     }
31417     // --- Inst_DS__DS_CMPST_B32 class methods ---
31418
31419     Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt)
31420         : Inst_DS(iFmt, "ds_cmpst_b32")
31421     {
31422     } // Inst_DS__DS_CMPST_B32
31423
31424     Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
31425     {
31426     } // ~Inst_DS__DS_CMPST_B32
31427
31428     // tmp = MEM[ADDR];
31429     // src = DATA2;
31430     // cmp = DATA;
31431     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31432     // RETURN_DATA[0] = tmp.
31433     // Compare and store.
31434     void
31435     Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst)
31436     {
31437         panicUnimplemented();
31438     }
31439
31440     Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt)
31441         : Inst_DS(iFmt, "ds_cmpst_f32")
31442     {
31443         setFlag(F32);
31444     } // Inst_DS__DS_CMPST_F32
31445
31446     Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
31447     {
31448     } // ~Inst_DS__DS_CMPST_F32
31449
31450     // tmp = MEM[ADDR];
31451     // src = DATA2;
31452     // cmp = DATA;
31453     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31454     // RETURN_DATA[0] = tmp.
31455     void
31456     Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst)
31457     {
31458         panicUnimplemented();
31459     }
31460
31461     Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt)
31462         : Inst_DS(iFmt, "ds_min_f32")
31463     {
31464         setFlag(F32);
31465     } // Inst_DS__DS_MIN_F32
31466
31467     Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
31468     {
31469     } // ~Inst_DS__DS_MIN_F32
31470
31471     // tmp = MEM[ADDR];
31472     // src = DATA;
31473     // cmp = DATA2;
31474     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31475     void
31476     Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
31477     {
31478         panicUnimplemented();
31479     }
31480
31481     Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt)
31482         : Inst_DS(iFmt, "ds_max_f32")
31483     {
31484         setFlag(F32);
31485     } // Inst_DS__DS_MAX_F32
31486
31487     Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
31488     {
31489     } // ~Inst_DS__DS_MAX_F32
31490
31491     // tmp = MEM[ADDR];
31492     // src = DATA;
31493     // cmp = DATA2;
31494     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31495     void
31496     Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
31497     {
31498         panicUnimplemented();
31499     }
31500
31501     Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt)
31502         : Inst_DS(iFmt, "ds_nop")
31503     {
31504         setFlag(Nop);
31505     } // Inst_DS__DS_NOP
31506
31507     Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
31508     {
31509     } // ~Inst_DS__DS_NOP
31510
31511     // Do nothing.
31512     void
31513     Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst)
31514     {
31515     }
31516
31517     Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt)
31518         : Inst_DS(iFmt, "ds_add_f32")
31519     {
31520         setFlag(F32);
31521     } // Inst_DS__DS_ADD_F32
31522
31523     Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
31524     {
31525     } // ~Inst_DS__DS_ADD_F32
31526
31527     // tmp = MEM[ADDR];
31528     // MEM[ADDR] += DATA;
31529     // RETURN_DATA = tmp.
31530     void
31531     Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
31532     {
31533         panicUnimplemented();
31534     }
31535
31536     Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt)
31537         : Inst_DS(iFmt, "ds_write_b8")
31538     {
31539         setFlag(MemoryRef);
31540         setFlag(Store);
31541     } // Inst_DS__DS_WRITE_B8
31542
31543     Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
31544     {
31545     } // ~Inst_DS__DS_WRITE_B8
31546
31547     // MEM[ADDR] = DATA[7:0].
31548     void
31549     Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst)
31550     {
31551         Wavefront *wf = gpuDynInst->wavefront();
31552         gpuDynInst->execUnitId = wf->execUnitId;
31553         gpuDynInst->exec_mask = wf->execMask();
31554         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31555         gpuDynInst->latency.set(
31556                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31557         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31558         ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
31559
31560         addr.read();
31561         data.read();
31562
31563         calcAddr(gpuDynInst, addr);
31564
31565         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31566             if (wf->execMask(lane)) {
31567                 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
31568                     = data[lane];
31569             }
31570         }
31571
31572         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31573
31574         wf->wrLmReqsInPipe--;
31575         wf->outstandingReqsWrLm++;
31576         wf->outstandingReqs++;
31577         wf->validateRequestCounters();
31578     } // execute
31579
31580     void
31581     Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst)
31582     {
31583         Addr offset0 = instData.OFFSET0;
31584         Addr offset1 = instData.OFFSET1;
31585         Addr offset = (offset1 << 8) | offset0;
31586
31587         initMemWrite<VecElemU8>(gpuDynInst, offset);
31588     } // initiateAcc
31589
31590     void
31591     Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst)
31592     {
31593     } // completeAcc
31594     // --- Inst_DS__DS_WRITE_B16 class methods ---
31595
31596     Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt)
31597         : Inst_DS(iFmt, "ds_write_b16")
31598     {
31599         setFlag(MemoryRef);
31600         setFlag(Store);
31601     } // Inst_DS__DS_WRITE_B16
31602
31603     Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
31604     {
31605     } // ~Inst_DS__DS_WRITE_B16
31606
31607     // MEM[ADDR] = DATA[15:0]
31608     void
31609     Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst)
31610     {
31611         Wavefront *wf = gpuDynInst->wavefront();
31612         gpuDynInst->execUnitId = wf->execUnitId;
31613         gpuDynInst->exec_mask = wf->execMask();
31614         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31615         gpuDynInst->latency.set(
31616                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31617         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31618         ConstVecOperandU16 data(gpuDynInst, extData.DATA0);
31619
31620         addr.read();
31621         data.read();
31622
31623         calcAddr(gpuDynInst, addr);
31624
31625         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31626             if (wf->execMask(lane)) {
31627                 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
31628                     = data[lane];
31629             }
31630         }
31631
31632         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31633
31634         wf->wrLmReqsInPipe--;
31635         wf->outstandingReqsWrLm++;
31636         wf->outstandingReqs++;
31637         wf->validateRequestCounters();
31638     } // execute
31639
31640     void
31641     Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst)
31642     {
31643         Addr offset0 = instData.OFFSET0;
31644         Addr offset1 = instData.OFFSET1;
31645         Addr offset = (offset1 << 8) | offset0;
31646
31647         initMemWrite<VecElemU16>(gpuDynInst, offset);
31648     } // initiateAcc
31649
31650     void
31651     Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst)
31652     {
31653     } // completeAcc
31654     // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
31655
31656     Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt)
31657         : Inst_DS(iFmt, "ds_add_rtn_u32")
31658     {
31659     } // Inst_DS__DS_ADD_RTN_U32
31660
31661     Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
31662     {
31663     } // ~Inst_DS__DS_ADD_RTN_U32
31664
31665     // tmp = MEM[ADDR];
31666     // MEM[ADDR] += DATA;
31667     // RETURN_DATA = tmp.
31668     void
31669     Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31670     {
31671         panicUnimplemented();
31672     }
31673
31674     Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt)
31675         : Inst_DS(iFmt, "ds_sub_rtn_u32")
31676     {
31677     } // Inst_DS__DS_SUB_RTN_U32
31678
31679     Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
31680     {
31681     } // ~Inst_DS__DS_SUB_RTN_U32
31682
31683     // tmp = MEM[ADDR];
31684     // MEM[ADDR] -= DATA;
31685     // RETURN_DATA = tmp.
31686     void
31687     Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31688     {
31689         panicUnimplemented();
31690     }
31691
31692     Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt)
31693         : Inst_DS(iFmt, "ds_rsub_rtn_u32")
31694     {
31695     } // Inst_DS__DS_RSUB_RTN_U32
31696
31697     Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
31698     {
31699     } // ~Inst_DS__DS_RSUB_RTN_U32
31700
31701     // tmp = MEM[ADDR];
31702     // MEM[ADDR] = DATA - MEM[ADDR];
31703     // RETURN_DATA = tmp.
31704     void
31705     Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31706     {
31707         panicUnimplemented();
31708     }
31709
31710     Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt)
31711         : Inst_DS(iFmt, "ds_inc_rtn_u32")
31712     {
31713     } // Inst_DS__DS_INC_RTN_U32
31714
31715     Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
31716     {
31717     } // ~Inst_DS__DS_INC_RTN_U32
31718
31719     // tmp = MEM[ADDR];
31720     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31721     // RETURN_DATA = tmp.
31722     void
31723     Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31724     {
31725         panicUnimplemented();
31726     }
31727
31728     Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt)
31729         : Inst_DS(iFmt, "ds_dec_rtn_u32")
31730     {
31731     } // Inst_DS__DS_DEC_RTN_U32
31732
31733     Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
31734     {
31735     } // ~Inst_DS__DS_DEC_RTN_U32
31736
31737     // tmp = MEM[ADDR];
31738     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31739     // (unsigned compare); RETURN_DATA = tmp.
31740     void
31741     Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31742     {
31743         panicUnimplemented();
31744     }
31745
31746     Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt)
31747         : Inst_DS(iFmt, "ds_min_rtn_i32")
31748     {
31749     } // Inst_DS__DS_MIN_RTN_I32
31750
31751     Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
31752     {
31753     } // ~Inst_DS__DS_MIN_RTN_I32
31754
31755     // tmp = MEM[ADDR];
31756     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31757     // RETURN_DATA = tmp.
31758     void
31759     Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31760     {
31761         panicUnimplemented();
31762     }
31763
31764     Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt)
31765         : Inst_DS(iFmt, "ds_max_rtn_i32")
31766     {
31767     } // Inst_DS__DS_MAX_RTN_I32
31768
31769     Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
31770     {
31771     } // ~Inst_DS__DS_MAX_RTN_I32
31772
31773     // tmp = MEM[ADDR];
31774     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31775     // RETURN_DATA = tmp.
31776     void
31777     Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31778     {
31779         panicUnimplemented();
31780     }
31781
31782     Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt)
31783         : Inst_DS(iFmt, "ds_min_rtn_u32")
31784     {
31785     } // Inst_DS__DS_MIN_RTN_U32
31786
31787     Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
31788     {
31789     } // ~Inst_DS__DS_MIN_RTN_U32
31790
31791     // tmp = MEM[ADDR];
31792     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31793     // RETURN_DATA = tmp.
31794     void
31795     Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31796     {
31797         panicUnimplemented();
31798     }
31799
31800     Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt)
31801         : Inst_DS(iFmt, "ds_max_rtn_u32")
31802     {
31803     } // Inst_DS__DS_MAX_RTN_U32
31804
31805     Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
31806     {
31807     } // ~Inst_DS__DS_MAX_RTN_U32
31808
31809     // tmp = MEM[ADDR];
31810     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31811     // RETURN_DATA = tmp.
31812     void
31813     Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31814     {
31815         panicUnimplemented();
31816     }
31817
31818     Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt)
31819         : Inst_DS(iFmt, "ds_and_rtn_b32")
31820     {
31821     } // Inst_DS__DS_AND_RTN_B32
31822
31823     Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
31824     {
31825     } // ~Inst_DS__DS_AND_RTN_B32
31826
31827     // tmp = MEM[ADDR];
31828     // MEM[ADDR] &= DATA;
31829     // RETURN_DATA = tmp.
31830     void
31831     Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31832     {
31833         panicUnimplemented();
31834     }
31835
31836     Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt)
31837         : Inst_DS(iFmt, "ds_or_rtn_b32")
31838     {
31839     } // Inst_DS__DS_OR_RTN_B32
31840
31841     Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
31842     {
31843     } // ~Inst_DS__DS_OR_RTN_B32
31844
31845     // tmp = MEM[ADDR];
31846     // MEM[ADDR] |= DATA;
31847     // RETURN_DATA = tmp.
31848     void
31849     Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31850     {
31851         panicUnimplemented();
31852     }
31853
31854     Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt)
31855         : Inst_DS(iFmt, "ds_xor_rtn_b32")
31856     {
31857     } // Inst_DS__DS_XOR_RTN_B32
31858
31859     Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
31860     {
31861     } // ~Inst_DS__DS_XOR_RTN_B32
31862
31863     // tmp = MEM[ADDR];
31864     // MEM[ADDR] ^= DATA;
31865     // RETURN_DATA = tmp.
31866     void
31867     Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31868     {
31869         panicUnimplemented();
31870     }
31871
31872     Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt)
31873         : Inst_DS(iFmt, "ds_mskor_rtn_b32")
31874     {
31875     } // Inst_DS__DS_MSKOR_RTN_B32
31876
31877     Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
31878     {
31879     } // ~Inst_DS__DS_MSKOR_RTN_B32
31880
31881     // tmp = MEM[ADDR];
31882     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31883     // RETURN_DATA = tmp.
31884     void
31885     Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31886     {
31887         panicUnimplemented();
31888     }
31889
31890     Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt)
31891         : Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
31892     {
31893     } // Inst_DS__DS_WRXCHG_RTN_B32
31894
31895     Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
31896     {
31897     } // ~Inst_DS__DS_WRXCHG_RTN_B32
31898
31899     // tmp = MEM[ADDR];
31900     // MEM[ADDR] = DATA;
31901     // RETURN_DATA = tmp.
31902     // Write-exchange operation.
31903     void
31904     Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31905     {
31906         panicUnimplemented();
31907     }
31908
31909     Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt)
31910         : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
31911     {
31912     } // Inst_DS__DS_WRXCHG2_RTN_B32
31913
31914     Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
31915     {
31916     } // ~Inst_DS__DS_WRXCHG2_RTN_B32
31917
31918     // Write-exchange 2 separate dwords.
31919     void
31920     Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31921     {
31922         panicUnimplemented();
31923     }
31924
31925     Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
31926           InFmt_DS *iFmt)
31927         : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
31928     {
31929     } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
31930
31931     Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
31932     {
31933     } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
31934
31935     // Write-exchange 2 separate dwords with a stride of 64 dwords.
31936     void
31937     Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31938     {
31939         panicUnimplemented();
31940     }
31941
31942     Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt)
31943         : Inst_DS(iFmt, "ds_cmpst_rtn_b32")
31944     {
31945     } // Inst_DS__DS_CMPST_RTN_B32
31946
31947     Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
31948     {
31949     } // ~Inst_DS__DS_CMPST_RTN_B32
31950
31951     // tmp = MEM[ADDR];
31952     // src = DATA2;
31953     // cmp = DATA;
31954     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31955     // RETURN_DATA[0] = tmp.
31956     // Compare and store.
31957     void
31958     Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31959     {
31960         panicUnimplemented();
31961     }
31962
31963     Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt)
31964         : Inst_DS(iFmt, "ds_cmpst_rtn_f32")
31965     {
31966         setFlag(F32);
31967     } // Inst_DS__DS_CMPST_RTN_F32
31968
31969     Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
31970     {
31971     } // ~Inst_DS__DS_CMPST_RTN_F32
31972
31973     // tmp = MEM[ADDR];
31974     // src = DATA2;
31975     // cmp = DATA;
31976     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31977     // RETURN_DATA[0] = tmp.
31978     void
31979     Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31980     {
31981         panicUnimplemented();
31982     }
31983
31984     Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt)
31985         : Inst_DS(iFmt, "ds_min_rtn_f32")
31986     {
31987         setFlag(F32);
31988     } // Inst_DS__DS_MIN_RTN_F32
31989
31990     Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
31991     {
31992     } // ~Inst_DS__DS_MIN_RTN_F32
31993
31994     // tmp = MEM[ADDR];
31995     // src = DATA;
31996     // cmp = DATA2;
31997     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31998     void
31999     Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
32000     {
32001         panicUnimplemented();
32002     }
32003
32004     Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt)
32005         : Inst_DS(iFmt, "ds_max_rtn_f32")
32006     {
32007         setFlag(F32);
32008     } // Inst_DS__DS_MAX_RTN_F32
32009
32010     Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
32011     {
32012     } // ~Inst_DS__DS_MAX_RTN_F32
32013
32014     // tmp = MEM[ADDR];
32015     // src = DATA;
32016     // cmp = DATA2;
32017     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
32018     void
32019     Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
32020     {
32021         panicUnimplemented();
32022     }
32023
32024     Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt)
32025         : Inst_DS(iFmt, "ds_wrap_rtn_b32")
32026     {
32027     } // Inst_DS__DS_WRAP_RTN_B32
32028
32029     Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
32030     {
32031     } // ~Inst_DS__DS_WRAP_RTN_B32
32032
32033     // tmp = MEM[ADDR];
32034     // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
32035     // RETURN_DATA = tmp.
32036     void
32037     Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
32038     {
32039         panicUnimplemented();
32040     }
32041
32042     Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt)
32043         : Inst_DS(iFmt, "ds_add_rtn_f32")
32044     {
32045         setFlag(F32);
32046     } // Inst_DS__DS_ADD_RTN_F32
32047
32048     Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
32049     {
32050     } // ~Inst_DS__DS_ADD_RTN_F32
32051
32052     // tmp = MEM[ADDR];
32053     // MEM[ADDR] += DATA;
32054     // RETURN_DATA = tmp.
32055     void
32056     Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
32057     {
32058     }
32059
32060     Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt)
32061         : Inst_DS(iFmt, "ds_read_b32")
32062     {
32063         setFlag(MemoryRef);
32064         setFlag(Load);
32065     } // Inst_DS__DS_READ_B32
32066
32067     Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
32068     {
32069     } // ~Inst_DS__DS_READ_B32
32070
32071     // RETURN_DATA = MEM[ADDR].
32072     // Dword read.
32073     void
32074     Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst)
32075     {
32076         Wavefront *wf = gpuDynInst->wavefront();
32077         gpuDynInst->execUnitId = wf->execUnitId;
32078         gpuDynInst->exec_mask = wf->execMask();
32079         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32080         gpuDynInst->latency.set(
32081                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32082         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32083
32084         addr.read();
32085
32086         calcAddr(gpuDynInst, addr);
32087
32088         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32089
32090         wf->rdLmReqsInPipe--;
32091         wf->outstandingReqsRdLm++;
32092         wf->outstandingReqs++;
32093         wf->validateRequestCounters();
32094     }
32095
32096     void
32097     Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32098     {
32099         Addr offset0 = instData.OFFSET0;
32100         Addr offset1 = instData.OFFSET1;
32101         Addr offset = (offset1 << 8) | offset0;
32102
32103         initMemRead<VecElemU32>(gpuDynInst, offset);
32104     } // initiateAcc
32105
32106     void
32107     Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32108     {
32109         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32110
32111         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32112             if (gpuDynInst->exec_mask[lane]) {
32113                 vdst[lane] = (reinterpret_cast<VecElemU32*>(
32114                     gpuDynInst->d_data))[lane];
32115             }
32116         }
32117
32118         vdst.write();
32119     } // completeAcc
32120
32121     Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt)
32122         : Inst_DS(iFmt, "ds_read2_b32")
32123     {
32124         setFlag(MemoryRef);
32125         setFlag(Load);
32126     } // Inst_DS__DS_READ2_B32
32127
32128     Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
32129     {
32130     } // ~Inst_DS__DS_READ2_B32
32131
32132     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
32133     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
32134     // Read 2 dwords.
32135     void
32136     Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst)
32137     {
32138         Wavefront *wf = gpuDynInst->wavefront();
32139         gpuDynInst->execUnitId = wf->execUnitId;
32140         gpuDynInst->exec_mask = wf->execMask();
32141         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32142         gpuDynInst->latency.set(
32143                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32144         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32145
32146         addr.read();
32147
32148         calcAddr(gpuDynInst, addr);
32149
32150         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32151
32152         wf->rdLmReqsInPipe--;
32153         wf->outstandingReqsRdLm++;
32154         wf->outstandingReqs++;
32155         wf->validateRequestCounters();
32156     }
32157
32158     void
32159     Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32160     {
32161         Addr offset0 = instData.OFFSET0 * 4;
32162         Addr offset1 = instData.OFFSET1 * 4;
32163
32164         initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32165     } // initiateAcc
32166
32167     void
32168     Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32169     {
32170         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32171         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32172
32173         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32174             if (gpuDynInst->exec_mask[lane]) {
32175                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
32176                     gpuDynInst->d_data))[lane * 2];
32177                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
32178                     gpuDynInst->d_data))[lane * 2 + 1];
32179             }
32180         }
32181
32182         vdst0.write();
32183         vdst1.write();
32184     } // completeAcc
32185
32186     Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt)
32187         : Inst_DS(iFmt, "ds_read2st64_b32")
32188     {
32189         setFlag(MemoryRef);
32190         setFlag(Load);
32191     } // Inst_DS__DS_READ2ST64_B32
32192
32193     Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
32194     {
32195     } // ~Inst_DS__DS_READ2ST64_B32
32196
32197     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
32198     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
32199     // Read 2 dwords.
32200     void
32201     Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
32202     {
32203         Wavefront *wf = gpuDynInst->wavefront();
32204         gpuDynInst->execUnitId = wf->execUnitId;
32205         gpuDynInst->exec_mask = wf->execMask();
32206         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32207         gpuDynInst->latency.set(
32208                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32209         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32210
32211         addr.read();
32212
32213         calcAddr(gpuDynInst, addr);
32214
32215         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32216
32217         wf->rdLmReqsInPipe--;
32218         wf->outstandingReqsRdLm++;
32219         wf->outstandingReqs++;
32220         wf->validateRequestCounters();
32221     } // execute
32222
32223     void
32224     Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32225     {
32226         Addr offset0 = (instData.OFFSET0 * 4 * 64);
32227         Addr offset1 = (instData.OFFSET1 * 4 * 64);
32228
32229         initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32230     }
32231
32232     void
32233     Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32234     {
32235         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32236         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32237
32238         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32239             if (gpuDynInst->exec_mask[lane]) {
32240                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
32241                     gpuDynInst->d_data))[lane * 2];
32242                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
32243                     gpuDynInst->d_data))[lane * 2 + 1];
32244             }
32245         }
32246
32247         vdst0.write();
32248         vdst1.write();
32249     }
32250     // --- Inst_DS__DS_READ_I8 class methods ---
32251
32252     Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt)
32253         : Inst_DS(iFmt, "ds_read_i8")
32254     {
32255         setFlag(MemoryRef);
32256         setFlag(Load);
32257     } // Inst_DS__DS_READ_I8
32258
32259     Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
32260     {
32261     } // ~Inst_DS__DS_READ_I8
32262
32263     // RETURN_DATA = signext(MEM[ADDR][7:0]).
32264     // Signed byte read.
32265     void
32266     Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst)
32267     {
32268         panicUnimplemented();
32269     }
32270
32271     Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt)
32272         : Inst_DS(iFmt, "ds_read_u8")
32273     {
32274         setFlag(MemoryRef);
32275         setFlag(Load);
32276     } // Inst_DS__DS_READ_U8
32277
32278     Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
32279     {
32280     } // ~Inst_DS__DS_READ_U8
32281
32282     // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
32283     // Unsigned byte read.
32284     void
32285     Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst)
32286     {
32287         Wavefront *wf = gpuDynInst->wavefront();
32288         gpuDynInst->execUnitId = wf->execUnitId;
32289         gpuDynInst->exec_mask = wf->execMask();
32290         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32291         gpuDynInst->latency.set(
32292                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32293         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32294
32295         addr.read();
32296
32297         calcAddr(gpuDynInst, addr);
32298
32299         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32300
32301         wf->rdLmReqsInPipe--;
32302         wf->outstandingReqsRdLm++;
32303         wf->outstandingReqs++;
32304         wf->validateRequestCounters();
32305     } // execute
32306
32307     void
32308     Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst)
32309     {
32310         Addr offset0 = instData.OFFSET0;
32311         Addr offset1 = instData.OFFSET1;
32312         Addr offset = (offset1 << 8) | offset0;
32313
32314         initMemRead<VecElemU8>(gpuDynInst, offset);
32315     } // initiateAcc
32316
32317     void
32318     Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst)
32319     {
32320         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32321
32322         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32323             if (gpuDynInst->exec_mask[lane]) {
32324                 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
32325                     gpuDynInst->d_data))[lane];
32326             }
32327         }
32328
32329         vdst.write();
32330     } // completeAcc
32331     // --- Inst_DS__DS_READ_I16 class methods ---
32332
32333     Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt)
32334         : Inst_DS(iFmt, "ds_read_i16")
32335     {
32336         setFlag(MemoryRef);
32337         setFlag(Load);
32338     } // Inst_DS__DS_READ_I16
32339
32340     Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
32341     {
32342     } // ~Inst_DS__DS_READ_I16
32343
32344     // RETURN_DATA = signext(MEM[ADDR][15:0]).
32345     // Signed short read.
32346     void
32347     Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst)
32348     {
32349         panicUnimplemented();
32350     }
32351
32352     Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt)
32353         : Inst_DS(iFmt, "ds_read_u16")
32354     {
32355         setFlag(MemoryRef);
32356         setFlag(Load);
32357     } // Inst_DS__DS_READ_U16
32358
32359     Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
32360     {
32361     } // ~Inst_DS__DS_READ_U16
32362
32363     // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
32364     // Unsigned short read.
32365     void
32366     Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst)
32367     {
32368         Wavefront *wf = gpuDynInst->wavefront();
32369         gpuDynInst->execUnitId = wf->execUnitId;
32370         gpuDynInst->exec_mask = wf->execMask();
32371         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32372         gpuDynInst->latency.set(
32373                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32374         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32375
32376         addr.read();
32377
32378         calcAddr(gpuDynInst, addr);
32379
32380         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32381
32382         wf->rdLmReqsInPipe--;
32383         wf->outstandingReqsRdLm++;
32384         wf->outstandingReqs++;
32385         wf->validateRequestCounters();
32386     } // execute
32387     void
32388     Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst)
32389     {
32390         Addr offset0 = instData.OFFSET0;
32391         Addr offset1 = instData.OFFSET1;
32392         Addr offset = (offset1 << 8) | offset0;
32393
32394         initMemRead<VecElemU16>(gpuDynInst, offset);
32395     } // initiateAcc
32396
32397     void
32398     Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst)
32399     {
32400         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32401
32402         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32403             if (gpuDynInst->exec_mask[lane]) {
32404                 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
32405                     gpuDynInst->d_data))[lane];
32406             }
32407         }
32408
32409         vdst.write();
32410     } // completeAcc
32411     // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
32412
32413     Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt)
32414         : Inst_DS(iFmt, "ds_swizzle_b32")
32415     {
32416          setFlag(Load);
32417     } // Inst_DS__DS_SWIZZLE_B32
32418
32419     Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
32420     {
32421     } // ~Inst_DS__DS_SWIZZLE_B32
32422
32423     // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
32424     // Dword swizzle, no data is written to LDS memory;
32425     void
32426     Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst)
32427     {
32428         Wavefront *wf = gpuDynInst->wavefront();
32429         wf->rdLmReqsInPipe--;
32430         wf->validateRequestCounters();
32431
32432         if (gpuDynInst->exec_mask.none()) {
32433             return;
32434         }
32435
32436         gpuDynInst->execUnitId = wf->execUnitId;
32437         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32438         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32439                                 ->cyclesToTicks(Cycles(24)));
32440
32441         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32442         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32443         /**
32444          * The "DS pattern" is comprised of both offset fields. That is, the
32445          * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
32446          * which swizzle mode to use. There are two different swizzle
32447          * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
32448          * QDMode else use Bit-masks mode. The remaining bits dictate how to
32449          * swizzle the lanes.
32450          *
32451          * QDMode:      Chunks the lanes into 4s and swizzles among them.
32452          *              Bits 7:6 dictate where lane 3 (of the current chunk)
32453          *              gets its date, 5:4 lane 2, etc.
32454          *
32455          * Bit-mask:    This mode breaks bits 14:0 into 3 equal-sized chunks.
32456          *              14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
32457          *              is the and_mask. Each lane is swizzled by performing
32458          *              the appropriate operation using these masks.
32459          */
32460         VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
32461
32462         data.read();
32463
32464         if (bits(ds_pattern, 15)) {
32465             // QDMode
32466             for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
32467                 /**
32468                  * This operation allows data sharing between groups
32469                  * of four consecutive threads. Note the increment by
32470                  * 4 in the for loop.
32471                  */
32472                 if (gpuDynInst->exec_mask[lane]) {
32473                     int index0 = lane + bits(ds_pattern, 1, 0);
32474                     panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
32475                              "is out of bounds.\n", gpuDynInst->disassemble(),
32476                              index0);
32477                     vdst[lane]
32478                         = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
32479                 }
32480                 if (gpuDynInst->exec_mask[lane + 1]) {
32481                     int index1 = lane + bits(ds_pattern, 3, 2);
32482                     panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
32483                              "is out of bounds.\n", gpuDynInst->disassemble(),
32484                              index1);
32485                     vdst[lane + 1]
32486                         = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
32487                 }
32488                 if (gpuDynInst->exec_mask[lane + 2]) {
32489                     int index2 = lane + bits(ds_pattern, 5, 4);
32490                     panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
32491                              "is out of bounds.\n", gpuDynInst->disassemble(),
32492                              index2);
32493                     vdst[lane + 2]
32494                         = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
32495                 }
32496                 if (gpuDynInst->exec_mask[lane + 3]) {
32497                     int index3 = lane + bits(ds_pattern, 7, 6);
32498                     panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
32499                              "is out of bounds.\n", gpuDynInst->disassemble(),
32500                              index3);
32501                     vdst[lane + 3]
32502                         = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
32503                 }
32504             }
32505         } else {
32506             // Bit Mode
32507             int and_mask = bits(ds_pattern, 4, 0);
32508             int or_mask = bits(ds_pattern, 9, 5);
32509             int xor_mask = bits(ds_pattern, 14, 10);
32510             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32511                 if (gpuDynInst->exec_mask[lane]) {
32512                     int index = (((lane & and_mask) | or_mask) ^ xor_mask);
32513                     // Adjust for the next 32 lanes.
32514                     if (lane > 31) {
32515                         index += 32;
32516                     }
32517                     panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
32518                              "out of bounds.\n", gpuDynInst->disassemble(),
32519                              index);
32520                     vdst[lane]
32521                         = gpuDynInst->exec_mask[index] ? data[index] : 0;
32522                 }
32523             }
32524         }
32525
32526         vdst.write();
32527     } // execute
32528     // --- Inst_DS__DS_PERMUTE_B32 class methods ---
32529
32530     Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
32531         : Inst_DS(iFmt, "ds_permute_b32")
32532     {
32533         setFlag(MemoryRef);
32534         /**
32535          * While this operation doesn't actually use DS storage we classify
32536          * it as a load here because it does a writeback to a VGPR, which
32537          * fits in better with the LDS pipeline logic.
32538          */
32539          setFlag(Load);
32540     } // Inst_DS__DS_PERMUTE_B32
32541
32542     Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
32543     {
32544     } // ~Inst_DS__DS_PERMUTE_B32
32545
32546     // Forward permute.
32547     void
32548     Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32549     {
32550         Wavefront *wf = gpuDynInst->wavefront();
32551         gpuDynInst->execUnitId = wf->execUnitId;
32552         gpuDynInst->exec_mask = wf->execMask();
32553         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32554         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32555                                 ->cyclesToTicks(Cycles(24)));
32556         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32557         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32558         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32559
32560         addr.read();
32561         data.read();
32562
32563         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32564             if (wf->execMask(lane)) {
32565                 /**
32566                  * One of the offset fields can be used for the index.
32567                  * It is assumed OFFSET0 would be used, as OFFSET1 is
32568                  * typically only used for DS ops that operate on two
32569                  * disparate pieces of data.
32570                  */
32571                 assert(!instData.OFFSET1);
32572                 /**
32573                  * The address provided is a byte address, but VGPRs are
32574                  * 4 bytes, so we must divide by 4 to get the actual VGPR
32575                  * index. Additionally, the index is calculated modulo the
32576                  * WF size, 64 in this case, so we simply extract bits 7-2.
32577                  */
32578                 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32579                 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32580                          "of bounds.\n", gpuDynInst->disassemble(), index);
32581                 /**
32582                  * If the shuffled index corresponds to a lane that is
32583                  * inactive then this instruction writes a 0 to the active
32584                  * lane in VDST.
32585                  */
32586                 if (wf->execMask(index)) {
32587                     vdst[index] = data[lane];
32588                 } else {
32589                     vdst[index] = 0;
32590                 }
32591             }
32592         }
32593
32594         vdst.write();
32595
32596         wf->decLGKMInstsIssued();
32597         wf->rdLmReqsInPipe--;
32598         wf->validateRequestCounters();
32599     } // execute
32600     // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
32601
32602     Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt)
32603         : Inst_DS(iFmt, "ds_bpermute_b32")
32604     {
32605         setFlag(MemoryRef);
32606         /**
32607          * While this operation doesn't actually use DS storage we classify
32608          * it as a load here because it does a writeback to a VGPR, which
32609          * fits in better with the LDS pipeline logic.
32610          */
32611         setFlag(Load);
32612     } // Inst_DS__DS_BPERMUTE_B32
32613
32614     Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
32615     {
32616     } // ~Inst_DS__DS_BPERMUTE_B32
32617
32618     // Backward permute.
32619     void
32620     Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32621     {
32622         Wavefront *wf = gpuDynInst->wavefront();
32623         gpuDynInst->execUnitId = wf->execUnitId;
32624         gpuDynInst->exec_mask = wf->execMask();
32625         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32626         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32627                                 ->cyclesToTicks(Cycles(24)));
32628         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32629         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32630         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32631
32632         addr.read();
32633         data.read();
32634
32635         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32636             if (wf->execMask(lane)) {
32637                 /**
32638                  * One of the offset fields can be used for the index.
32639                  * It is assumed OFFSET0 would be used, as OFFSET1 is
32640                  * typically only used for DS ops that operate on two
32641                  * disparate pieces of data.
32642                  */
32643                 assert(!instData.OFFSET1);
32644                 /**
32645                  * The address provided is a byte address, but VGPRs are
32646                  * 4 bytes, so we must divide by 4 to get the actual VGPR
32647                  * index. Additionally, the index is calculated modulo the
32648                  * WF size, 64 in this case, so we simply extract bits 7-2.
32649                  */
32650                 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32651                 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32652                          "of bounds.\n", gpuDynInst->disassemble(), index);
32653                 /**
32654                  * If the shuffled index corresponds to a lane that is
32655                  * inactive then this instruction writes a 0 to the active
32656                  * lane in VDST.
32657                  */
32658                 if (wf->execMask(index)) {
32659                     vdst[lane] = data[index];
32660                 } else {
32661                     vdst[lane] = 0;
32662                 }
32663             }
32664         }
32665
32666         vdst.write();
32667
32668         wf->decLGKMInstsIssued();
32669         wf->rdLmReqsInPipe--;
32670         wf->validateRequestCounters();
32671     } // execute
32672
32673     // --- Inst_DS__DS_ADD_U64 class methods ---
32674
32675     Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
32676         : Inst_DS(iFmt, "ds_add_u64")
32677     {
32678     } // Inst_DS__DS_ADD_U64
32679
32680     Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
32681     {
32682     } // ~Inst_DS__DS_ADD_U64
32683
32684     // tmp = MEM[ADDR];
32685     // MEM[ADDR] += DATA[0:1];
32686     // RETURN_DATA[0:1] = tmp.
32687     void
32688     Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
32689     {
32690         panicUnimplemented();
32691     }
32692
32693     Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
32694         : Inst_DS(iFmt, "ds_sub_u64")
32695     {
32696     } // Inst_DS__DS_SUB_U64
32697
32698     Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
32699     {
32700     } // ~Inst_DS__DS_SUB_U64
32701
32702     // tmp = MEM[ADDR];
32703     // MEM[ADDR] -= DATA[0:1];
32704     // RETURN_DATA[0:1] = tmp.
32705     void
32706     Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst)
32707     {
32708         panicUnimplemented();
32709     }
32710
32711     Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt)
32712         : Inst_DS(iFmt, "ds_rsub_u64")
32713     {
32714     } // Inst_DS__DS_RSUB_U64
32715
32716     Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
32717     {
32718     } // ~Inst_DS__DS_RSUB_U64
32719
32720     // tmp = MEM[ADDR];
32721     // MEM[ADDR] = DATA - MEM[ADDR];
32722     // RETURN_DATA = tmp.
32723     // Subtraction with reversed operands.
32724     void
32725     Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst)
32726     {
32727         panicUnimplemented();
32728     }
32729
32730     Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt)
32731         : Inst_DS(iFmt, "ds_inc_u64")
32732     {
32733     } // Inst_DS__DS_INC_U64
32734
32735     Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
32736     {
32737     } // ~Inst_DS__DS_INC_U64
32738
32739     // tmp = MEM[ADDR];
32740     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
32741     // RETURN_DATA[0:1] = tmp.
32742     void
32743     Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst)
32744     {
32745         panicUnimplemented();
32746     }
32747
32748     Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt)
32749         : Inst_DS(iFmt, "ds_dec_u64")
32750     {
32751     } // Inst_DS__DS_DEC_U64
32752
32753     Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
32754     {
32755     } // ~Inst_DS__DS_DEC_U64
32756
32757     // tmp = MEM[ADDR];
32758     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
32759     // (unsigned compare);
32760     // RETURN_DATA[0:1] = tmp.
32761     void
32762     Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst)
32763     {
32764         panicUnimplemented();
32765     }
32766
32767     Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt)
32768         : Inst_DS(iFmt, "ds_min_i64")
32769     {
32770     } // Inst_DS__DS_MIN_I64
32771
32772     Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
32773     {
32774     } // ~Inst_DS__DS_MIN_I64
32775
32776     // tmp = MEM[ADDR];
32777     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
32778     // RETURN_DATA[0:1] = tmp.
32779     void
32780     Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst)
32781     {
32782         panicUnimplemented();
32783     }
32784
32785     Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt)
32786         : Inst_DS(iFmt, "ds_max_i64")
32787     {
32788     } // Inst_DS__DS_MAX_I64
32789
32790     Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
32791     {
32792     } // ~Inst_DS__DS_MAX_I64
32793
32794     // tmp = MEM[ADDR];
32795     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
32796     // RETURN_DATA[0:1] = tmp.
32797     void
32798     Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst)
32799     {
32800         panicUnimplemented();
32801     }
32802
32803     Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt)
32804         : Inst_DS(iFmt, "ds_min_u64")
32805     {
32806     } // Inst_DS__DS_MIN_U64
32807
32808     Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
32809     {
32810     } // ~Inst_DS__DS_MIN_U64
32811
32812     // tmp = MEM[ADDR];
32813     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
32814     // RETURN_DATA[0:1] = tmp.
32815     void
32816     Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst)
32817     {
32818         panicUnimplemented();
32819     }
32820
32821     Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt)
32822         : Inst_DS(iFmt, "ds_max_u64")
32823     {
32824     } // Inst_DS__DS_MAX_U64
32825
32826     Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
32827     {
32828     } // ~Inst_DS__DS_MAX_U64
32829
32830     // tmp = MEM[ADDR];
32831     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
32832     // RETURN_DATA[0:1] = tmp.
32833     void
32834     Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst)
32835     {
32836         panicUnimplemented();
32837     }
32838
32839     Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt)
32840         : Inst_DS(iFmt, "ds_and_b64")
32841     {
32842     } // Inst_DS__DS_AND_B64
32843
32844     Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
32845     {
32846     } // ~Inst_DS__DS_AND_B64
32847
32848     // tmp = MEM[ADDR];
32849     // MEM[ADDR] &= DATA[0:1];
32850     // RETURN_DATA[0:1] = tmp.
32851     void
32852     Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst)
32853     {
32854         panicUnimplemented();
32855     }
32856
32857     Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt)
32858         : Inst_DS(iFmt, "ds_or_b64")
32859     {
32860     } // Inst_DS__DS_OR_B64
32861
32862     Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
32863     {
32864     } // ~Inst_DS__DS_OR_B64
32865
32866     // tmp = MEM[ADDR];
32867     // MEM[ADDR] |= DATA[0:1];
32868     // RETURN_DATA[0:1] = tmp.
32869     void
32870     Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst)
32871     {
32872         panicUnimplemented();
32873     }
32874
32875     Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt)
32876         : Inst_DS(iFmt, "ds_xor_b64")
32877     {
32878     } // Inst_DS__DS_XOR_B64
32879
32880     Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
32881     {
32882     } // ~Inst_DS__DS_XOR_B64
32883
32884     // tmp = MEM[ADDR];
32885     // MEM[ADDR] ^= DATA[0:1];
32886     // RETURN_DATA[0:1] = tmp.
32887     void
32888     Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
32889     {
32890         panicUnimplemented();
32891     }
32892
32893     Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt)
32894         : Inst_DS(iFmt, "ds_mskor_b64")
32895     {
32896     } // Inst_DS__DS_MSKOR_B64
32897
32898     Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
32899     {
32900     } // ~Inst_DS__DS_MSKOR_B64
32901
32902     // tmp = MEM[ADDR];
32903     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
32904     // RETURN_DATA = tmp.
32905     void
32906     Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst)
32907     {
32908         panicUnimplemented();
32909     }
32910
32911     Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt)
32912         : Inst_DS(iFmt, "ds_write_b64")
32913     {
32914         setFlag(MemoryRef);
32915         setFlag(Store);
32916     } // Inst_DS__DS_WRITE_B64
32917
32918     Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
32919     {
32920     } // ~Inst_DS__DS_WRITE_B64
32921
32922     // MEM[ADDR] = DATA.
32923     // Write qword.
32924     void
32925     Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst)
32926     {
32927         Wavefront *wf = gpuDynInst->wavefront();
32928         gpuDynInst->execUnitId = wf->execUnitId;
32929         gpuDynInst->exec_mask = wf->execMask();
32930         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32931         gpuDynInst->latency.set(
32932                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32933         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32934         ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
32935
32936         addr.read();
32937         data.read();
32938
32939         calcAddr(gpuDynInst, addr);
32940
32941         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32942             if (wf->execMask(lane)) {
32943                 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
32944                     = data[lane];
32945             }
32946         }
32947
32948         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32949
32950         wf->wrLmReqsInPipe--;
32951         wf->outstandingReqsWrLm++;
32952         wf->outstandingReqs++;
32953         wf->validateRequestCounters();
32954     }
32955
32956     void
32957     Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
32958     {
32959         Addr offset0 = instData.OFFSET0;
32960         Addr offset1 = instData.OFFSET1;
32961         Addr offset = (offset1 << 8) | offset0;
32962
32963         initMemWrite<VecElemU64>(gpuDynInst, offset);
32964     } // initiateAcc
32965
32966     void
32967     Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst)
32968     {
32969     } // completeAcc
32970
32971     Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt)
32972         : Inst_DS(iFmt, "ds_write2_b64")
32973     {
32974         setFlag(MemoryRef);
32975         setFlag(Store);
32976     } // Inst_DS__DS_WRITE2_B64
32977
32978     Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
32979     {
32980     } // ~Inst_DS__DS_WRITE2_B64
32981
32982     // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
32983     // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
32984     // Write 2 qwords.
32985     void
32986     Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst)
32987     {
32988         Wavefront *wf = gpuDynInst->wavefront();
32989         gpuDynInst->execUnitId = wf->execUnitId;
32990         gpuDynInst->exec_mask = wf->execMask();
32991         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32992         gpuDynInst->latency.set(
32993                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32994         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32995         ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
32996         ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
32997
32998         addr.read();
32999         data0.read();
33000         data1.read();
33001
33002         calcAddr(gpuDynInst, addr);
33003
33004         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33005             if (wf->execMask(lane)) {
33006                 (reinterpret_cast<VecElemU64*>(
33007                     gpuDynInst->d_data))[lane * 2] = data0[lane];
33008                 (reinterpret_cast<VecElemU64*>(
33009                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
33010             }
33011         }
33012
33013         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33014
33015         wf->wrLmReqsInPipe--;
33016         wf->outstandingReqsWrLm++;
33017         wf->outstandingReqs++;
33018         wf->validateRequestCounters();
33019     }
33020
33021     void
33022     Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33023     {
33024         Addr offset0 = instData.OFFSET0 * 8;
33025         Addr offset1 = instData.OFFSET1 * 8;
33026
33027         initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
33028     }
33029
33030     void
33031     Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33032     {
33033     }
33034
33035     Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt)
33036         : Inst_DS(iFmt, "ds_write2st64_b64")
33037     {
33038         setFlag(MemoryRef);
33039         setFlag(Store);
33040     } // Inst_DS__DS_WRITE2ST64_B64
33041
33042     Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
33043     {
33044     } // ~Inst_DS__DS_WRITE2ST64_B64
33045
33046     // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
33047     // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
33048     // Write 2 qwords.
33049     void
33050     Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33051     {
33052         panicUnimplemented();
33053     }
33054
33055     Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt)
33056         : Inst_DS(iFmt, "ds_cmpst_b64")
33057     {
33058     } // Inst_DS__DS_CMPST_B64
33059
33060     Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
33061     {
33062     } // ~Inst_DS__DS_CMPST_B64
33063
33064     // tmp = MEM[ADDR];
33065     // src = DATA2;
33066     // cmp = DATA;
33067     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33068     // RETURN_DATA[0] = tmp.
33069     // Compare and store.
33070     void
33071     Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst)
33072     {
33073         panicUnimplemented();
33074     }
33075
33076     Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt)
33077         : Inst_DS(iFmt, "ds_cmpst_f64")
33078     {
33079         setFlag(F64);
33080     } // Inst_DS__DS_CMPST_F64
33081
33082     Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
33083     {
33084     } // ~Inst_DS__DS_CMPST_F64
33085
33086     // tmp = MEM[ADDR];
33087     // src = DATA2;
33088     // cmp = DATA;
33089     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33090     // RETURN_DATA[0] = tmp.
33091     void
33092     Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst)
33093     {
33094         panicUnimplemented();
33095     }
33096
33097     Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt)
33098         : Inst_DS(iFmt, "ds_min_f64")
33099     {
33100         setFlag(F64);
33101     } // Inst_DS__DS_MIN_F64
33102
33103     Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
33104     {
33105     } // ~Inst_DS__DS_MIN_F64
33106
33107     // tmp = MEM[ADDR];
33108     // src = DATA;
33109     // cmp = DATA2;
33110     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33111     void
33112     Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
33113     {
33114         panicUnimplemented();
33115     }
33116
33117     Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt)
33118         : Inst_DS(iFmt, "ds_max_f64")
33119     {
33120         setFlag(F64);
33121     } // Inst_DS__DS_MAX_F64
33122
33123     Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
33124     {
33125     } // ~Inst_DS__DS_MAX_F64
33126
33127     // tmp = MEM[ADDR];
33128     // src = DATA;
33129     // cmp = DATA2;
33130     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33131     void
33132     Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
33133     {
33134         panicUnimplemented();
33135     }
33136
33137     Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt)
33138         : Inst_DS(iFmt, "ds_add_rtn_u64")
33139     {
33140     } // Inst_DS__DS_ADD_RTN_U64
33141
33142     Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
33143     {
33144     } // ~Inst_DS__DS_ADD_RTN_U64
33145
33146     // tmp = MEM[ADDR];
33147     // MEM[ADDR] += DATA[0:1];
33148     // RETURN_DATA[0:1] = tmp.
33149     void
33150     Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33151     {
33152         panicUnimplemented();
33153     }
33154
33155     Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt)
33156         : Inst_DS(iFmt, "ds_sub_rtn_u64")
33157     {
33158     } // Inst_DS__DS_SUB_RTN_U64
33159
33160     Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
33161     {
33162     } // ~Inst_DS__DS_SUB_RTN_U64
33163
33164     // tmp = MEM[ADDR];
33165     // MEM[ADDR] -= DATA[0:1];
33166     // RETURN_DATA[0:1] = tmp.
33167     void
33168     Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33169     {
33170         panicUnimplemented();
33171     }
33172
33173     Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt)
33174         : Inst_DS(iFmt, "ds_rsub_rtn_u64")
33175     {
33176     } // Inst_DS__DS_RSUB_RTN_U64
33177
33178     Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
33179     {
33180     } // ~Inst_DS__DS_RSUB_RTN_U64
33181
33182     // tmp = MEM[ADDR];
33183     // MEM[ADDR] = DATA - MEM[ADDR];
33184     // RETURN_DATA = tmp.
33185     // Subtraction with reversed operands.
33186     void
33187     Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33188     {
33189         panicUnimplemented();
33190     }
33191
33192     Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt)
33193         : Inst_DS(iFmt, "ds_inc_rtn_u64")
33194     {
33195     } // Inst_DS__DS_INC_RTN_U64
33196
33197     Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
33198     {
33199     } // ~Inst_DS__DS_INC_RTN_U64
33200
33201     // tmp = MEM[ADDR];
33202     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
33203     // RETURN_DATA[0:1] = tmp.
33204     void
33205     Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33206     {
33207         panicUnimplemented();
33208     }
33209
33210     Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt)
33211         : Inst_DS(iFmt, "ds_dec_rtn_u64")
33212     {
33213     } // Inst_DS__DS_DEC_RTN_U64
33214
33215     Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
33216     {
33217     } // ~Inst_DS__DS_DEC_RTN_U64
33218
33219     // tmp = MEM[ADDR];
33220     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
33221     // (unsigned compare);
33222     // RETURN_DATA[0:1] = tmp.
33223     void
33224     Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33225     {
33226         panicUnimplemented();
33227     }
33228
33229     Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt)
33230         : Inst_DS(iFmt, "ds_min_rtn_i64")
33231     {
33232     } // Inst_DS__DS_MIN_RTN_I64
33233
33234     Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
33235     {
33236     } // ~Inst_DS__DS_MIN_RTN_I64
33237
33238     // tmp = MEM[ADDR];
33239     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
33240     // RETURN_DATA[0:1] = tmp.
33241     void
33242     Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33243     {
33244         panicUnimplemented();
33245     }
33246
33247     Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt)
33248         : Inst_DS(iFmt, "ds_max_rtn_i64")
33249     {
33250     } // Inst_DS__DS_MAX_RTN_I64
33251
33252     Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
33253     {
33254     } // ~Inst_DS__DS_MAX_RTN_I64
33255
33256     // tmp = MEM[ADDR];
33257     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
33258     // RETURN_DATA[0:1] = tmp.
33259     void
33260     Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33261     {
33262         panicUnimplemented();
33263     }
33264
33265     Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt)
33266         : Inst_DS(iFmt, "ds_min_rtn_u64")
33267     {
33268     } // Inst_DS__DS_MIN_RTN_U64
33269
33270     Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
33271     {
33272     } // ~Inst_DS__DS_MIN_RTN_U64
33273
33274     // tmp = MEM[ADDR];
33275     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
33276     // RETURN_DATA[0:1] = tmp.
33277     void
33278     Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33279     {
33280         panicUnimplemented();
33281     }
33282
33283     Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt)
33284         : Inst_DS(iFmt, "ds_max_rtn_u64")
33285     {
33286     } // Inst_DS__DS_MAX_RTN_U64
33287
33288     Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
33289     {
33290     } // ~Inst_DS__DS_MAX_RTN_U64
33291
33292     // tmp = MEM[ADDR];
33293     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
33294     // RETURN_DATA[0:1] = tmp.
33295     void
33296     Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33297     {
33298         panicUnimplemented();
33299     }
33300
33301     Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt)
33302         : Inst_DS(iFmt, "ds_and_rtn_b64")
33303     {
33304     } // Inst_DS__DS_AND_RTN_B64
33305
33306     Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
33307     {
33308     } // ~Inst_DS__DS_AND_RTN_B64
33309
33310     // tmp = MEM[ADDR];
33311     // MEM[ADDR] &= DATA[0:1];
33312     // RETURN_DATA[0:1] = tmp.
33313     void
33314     Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33315     {
33316         panicUnimplemented();
33317     }
33318
33319     Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt)
33320         : Inst_DS(iFmt, "ds_or_rtn_b64")
33321     {
33322     } // Inst_DS__DS_OR_RTN_B64
33323
33324     Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
33325     {
33326     } // ~Inst_DS__DS_OR_RTN_B64
33327
33328     // tmp = MEM[ADDR];
33329     // MEM[ADDR] |= DATA[0:1];
33330     // RETURN_DATA[0:1] = tmp.
33331     void
33332     Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33333     {
33334         panicUnimplemented();
33335     }
33336
33337     Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt)
33338         : Inst_DS(iFmt, "ds_xor_rtn_b64")
33339     {
33340     } // Inst_DS__DS_XOR_RTN_B64
33341
33342     Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
33343     {
33344     } // ~Inst_DS__DS_XOR_RTN_B64
33345
33346     // tmp = MEM[ADDR];
33347     // MEM[ADDR] ^= DATA[0:1];
33348     // RETURN_DATA[0:1] = tmp.
33349     void
33350     Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33351     {
33352         panicUnimplemented();
33353     }
33354
33355     Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt)
33356         : Inst_DS(iFmt, "ds_mskor_rtn_b64")
33357     {
33358     } // Inst_DS__DS_MSKOR_RTN_B64
33359
33360     Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
33361     {
33362     } // ~Inst_DS__DS_MSKOR_RTN_B64
33363
33364     // tmp = MEM[ADDR];
33365     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
33366     // RETURN_DATA = tmp.
33367     // Masked dword OR, D0 contains the mask and D1 contains the new value.
33368     void
33369     Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33370     {
33371         panicUnimplemented();
33372     }
33373
33374     Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt)
33375         : Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
33376     {
33377     } // Inst_DS__DS_WRXCHG_RTN_B64
33378
33379     Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
33380     {
33381     } // ~Inst_DS__DS_WRXCHG_RTN_B64
33382
33383     // tmp = MEM[ADDR];
33384     // MEM[ADDR] = DATA;
33385     // RETURN_DATA = tmp.
33386     // Write-exchange operation.
33387     void
33388     Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33389     {
33390         panicUnimplemented();
33391     }
33392
33393     Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt)
33394         : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
33395     {
33396     } // Inst_DS__DS_WRXCHG2_RTN_B64
33397
33398     Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
33399     {
33400     } // ~Inst_DS__DS_WRXCHG2_RTN_B64
33401
33402     // Write-exchange 2 separate qwords.
33403     void
33404     Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33405     {
33406         panicUnimplemented();
33407     }
33408
33409     Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
33410           InFmt_DS *iFmt)
33411         : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
33412     {
33413     } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
33414
33415     Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
33416     {
33417     } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
33418
33419     // Write-exchange 2 qwords with a stride of 64 qwords.
33420     void
33421     Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33422     {
33423         panicUnimplemented();
33424     }
33425
33426     Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt)
33427         : Inst_DS(iFmt, "ds_cmpst_rtn_b64")
33428     {
33429     } // Inst_DS__DS_CMPST_RTN_B64
33430
33431     Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
33432     {
33433     } // ~Inst_DS__DS_CMPST_RTN_B64
33434
33435     // tmp = MEM[ADDR];
33436     // src = DATA2;
33437     // cmp = DATA;
33438     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33439     // RETURN_DATA[0] = tmp.
33440     // Compare and store.
33441     void
33442     Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33443     {
33444         panicUnimplemented();
33445     }
33446
33447     Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt)
33448         : Inst_DS(iFmt, "ds_cmpst_rtn_f64")
33449     {
33450         setFlag(F64);
33451     } // Inst_DS__DS_CMPST_RTN_F64
33452
33453     Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
33454     {
33455     } // ~Inst_DS__DS_CMPST_RTN_F64
33456
33457     // tmp = MEM[ADDR];
33458     // src = DATA2;
33459     // cmp = DATA;
33460     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33461     // RETURN_DATA[0] = tmp.
33462     void
33463     Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33464     {
33465         panicUnimplemented();
33466     }
33467
33468     Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt)
33469         : Inst_DS(iFmt, "ds_min_rtn_f64")
33470     {
33471         setFlag(F64);
33472     } // Inst_DS__DS_MIN_RTN_F64
33473
33474     Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
33475     {
33476     } // ~Inst_DS__DS_MIN_RTN_F64
33477
33478     // tmp = MEM[ADDR];
33479     // src = DATA;
33480     // cmp = DATA2;
33481     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33482     void
33483     Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33484     {
33485         panicUnimplemented();
33486     }
33487
33488     Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt)
33489         : Inst_DS(iFmt, "ds_max_rtn_f64")
33490     {
33491         setFlag(F64);
33492     } // Inst_DS__DS_MAX_RTN_F64
33493
33494     Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
33495     {
33496     } // ~Inst_DS__DS_MAX_RTN_F64
33497
33498     // tmp = MEM[ADDR];
33499     // src = DATA;
33500     // cmp = DATA2;
33501     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33502     void
33503     Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33504     {
33505         panicUnimplemented();
33506     }
33507
33508     Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt)
33509         : Inst_DS(iFmt, "ds_read_b64")
33510     {
33511         setFlag(MemoryRef);
33512         setFlag(Load);
33513     } // Inst_DS__DS_READ_B64
33514
33515     Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
33516     {
33517     } // ~Inst_DS__DS_READ_B64
33518
33519     // RETURN_DATA = MEM[ADDR].
33520     // Read 1 qword.
33521     void
33522     Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst)
33523     {
33524         Wavefront *wf = gpuDynInst->wavefront();
33525         gpuDynInst->execUnitId = wf->execUnitId;
33526         gpuDynInst->exec_mask = wf->execMask();
33527         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33528         gpuDynInst->latency.set(
33529                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33530         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33531
33532         addr.read();
33533
33534         calcAddr(gpuDynInst, addr);
33535
33536         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33537
33538         wf->rdLmReqsInPipe--;
33539         wf->outstandingReqsRdLm++;
33540         wf->outstandingReqs++;
33541         wf->validateRequestCounters();
33542     }
33543
33544     void
33545     Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33546     {
33547         Addr offset0 = instData.OFFSET0;
33548         Addr offset1 = instData.OFFSET1;
33549         Addr offset = (offset1 << 8) | offset0;
33550
33551         initMemRead<VecElemU64>(gpuDynInst, offset);
33552     } // initiateAcc
33553
33554     void
33555     Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33556     {
33557         VecOperandU64 vdst(gpuDynInst, extData.VDST);
33558
33559         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33560             if (gpuDynInst->exec_mask[lane]) {
33561                 vdst[lane] = (reinterpret_cast<VecElemU64*>(
33562                     gpuDynInst->d_data))[lane];
33563             }
33564         }
33565
33566         vdst.write();
33567     } // completeAcc
33568
33569     Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt)
33570         : Inst_DS(iFmt, "ds_read2_b64")
33571     {
33572         setFlag(MemoryRef);
33573         setFlag(Load);
33574     } // Inst_DS__DS_READ2_B64
33575
33576     Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
33577     {
33578     } // ~Inst_DS__DS_READ2_B64
33579
33580     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
33581     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
33582     // Read 2 qwords.
33583     void
33584     Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst)
33585     {
33586         Wavefront *wf = gpuDynInst->wavefront();
33587         gpuDynInst->execUnitId = wf->execUnitId;
33588         gpuDynInst->exec_mask = wf->execMask();
33589         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33590         gpuDynInst->latency.set(
33591                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33592         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33593
33594         addr.read();
33595
33596         calcAddr(gpuDynInst, addr);
33597
33598         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33599
33600         wf->rdLmReqsInPipe--;
33601         wf->outstandingReqsRdLm++;
33602         wf->outstandingReqs++;
33603         wf->validateRequestCounters();
33604     }
33605
33606     void
33607     Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33608     {
33609         Addr offset0 = instData.OFFSET0 * 8;
33610         Addr offset1 = instData.OFFSET1 * 8;
33611
33612         initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33613     } // initiateAcc
33614
33615     void
33616     Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33617     {
33618         VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33619         VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33620
33621         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33622             if (gpuDynInst->exec_mask[lane]) {
33623                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33624                     gpuDynInst->d_data))[lane * 2];
33625                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33626                     gpuDynInst->d_data))[lane * 2 + 1];
33627             }
33628         }
33629
33630         vdst0.write();
33631         vdst1.write();
33632     } // completeAcc
33633
33634     Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt)
33635         : Inst_DS(iFmt, "ds_read2st64_b64")
33636     {
33637         setFlag(MemoryRef);
33638         setFlag(Load);
33639     } // Inst_DS__DS_READ2ST64_B64
33640
33641     Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
33642     {
33643     } // ~Inst_DS__DS_READ2ST64_B64
33644
33645     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
33646     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
33647     // Read 2 qwords.
33648     void
33649     Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33650     {
33651         Wavefront *wf = gpuDynInst->wavefront();
33652         gpuDynInst->execUnitId = wf->execUnitId;
33653         gpuDynInst->exec_mask = wf->execMask();
33654         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33655         gpuDynInst->latency.set(
33656                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33657         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33658
33659         addr.read();
33660
33661         calcAddr(gpuDynInst, addr);
33662
33663         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33664
33665         wf->rdLmReqsInPipe--;
33666         wf->outstandingReqsRdLm++;
33667         wf->outstandingReqs++;
33668         wf->validateRequestCounters();
33669     }
33670
33671     void
33672     Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33673     {
33674         Addr offset0 = (instData.OFFSET0 * 8 * 64);
33675         Addr offset1 = (instData.OFFSET1 * 8 * 64);
33676
33677         initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33678     }
33679
33680     void
33681     Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33682     {
33683         VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33684         VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33685
33686         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33687             if (gpuDynInst->exec_mask[lane]) {
33688                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33689                     gpuDynInst->d_data))[lane * 2];
33690                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33691                     gpuDynInst->d_data))[lane * 2 + 1];
33692             }
33693         }
33694
33695         vdst0.write();
33696         vdst1.write();
33697     }
33698
33699     Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
33700           InFmt_DS *iFmt)
33701         : Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
33702     {
33703     } // Inst_DS__DS_CONDXCHG32_RTN_B64
33704
33705     Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
33706     {
33707     } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
33708
33709     // Conditional write exchange.
33710     void
33711     Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33712     {
33713         panicUnimplemented();
33714     }
33715
33716     Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt)
33717         : Inst_DS(iFmt, "ds_add_src2_u32")
33718     {
33719     } // Inst_DS__DS_ADD_SRC2_U32
33720
33721     Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
33722     {
33723     } // ~Inst_DS__DS_ADD_SRC2_U32
33724
33725     // A = ADDR_BASE;
33726     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33727     // {offset1[6],offset1[6:0],offset0});
33728     // MEM[A] = MEM[A] + MEM[B].
33729     void
33730     Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33731     {
33732         panicUnimplemented();
33733     }
33734
33735     Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt)
33736         : Inst_DS(iFmt, "ds_sub_src2_u32")
33737     {
33738     } // Inst_DS__DS_SUB_SRC2_U32
33739
33740     Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
33741     {
33742     } // ~Inst_DS__DS_SUB_SRC2_U32
33743
33744     // A = ADDR_BASE;
33745     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33746     // {offset1[6],offset1[6:0],offset0});
33747     // MEM[A] = MEM[A] - MEM[B].
33748     void
33749     Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33750     {
33751         panicUnimplemented();
33752     }
33753
33754     Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt)
33755         : Inst_DS(iFmt, "ds_rsub_src2_u32")
33756     {
33757     } // Inst_DS__DS_RSUB_SRC2_U32
33758
33759     Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
33760     {
33761     } // ~Inst_DS__DS_RSUB_SRC2_U32
33762
33763     // A = ADDR_BASE;
33764     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33765     // {offset1[6],offset1[6:0],offset0});
33766     // MEM[A] = MEM[B] - MEM[A].
33767     void
33768     Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33769     {
33770         panicUnimplemented();
33771     }
33772
33773     Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt)
33774         : Inst_DS(iFmt, "ds_inc_src2_u32")
33775     {
33776     } // Inst_DS__DS_INC_SRC2_U32
33777
33778     Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
33779     {
33780     } // ~Inst_DS__DS_INC_SRC2_U32
33781
33782     // A = ADDR_BASE;
33783     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33784     // {offset1[6],offset1[6:0],offset0});
33785     // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
33786     void
33787     Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33788     {
33789         panicUnimplemented();
33790     }
33791
33792     Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt)
33793         : Inst_DS(iFmt, "ds_dec_src2_u32")
33794     {
33795     } // Inst_DS__DS_DEC_SRC2_U32
33796
33797     Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
33798     {
33799     } // ~Inst_DS__DS_DEC_SRC2_U32
33800
33801     // A = ADDR_BASE;
33802     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33803     // {offset1[6],offset1[6:0],offset0});
33804     // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
33805     // Uint decrement.
33806     void
33807     Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33808     {
33809         panicUnimplemented();
33810     }
33811
33812     Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt)
33813         : Inst_DS(iFmt, "ds_min_src2_i32")
33814     {
33815     } // Inst_DS__DS_MIN_SRC2_I32
33816
33817     Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
33818     {
33819     } // ~Inst_DS__DS_MIN_SRC2_I32
33820
33821     // A = ADDR_BASE;
33822     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33823     // {offset1[6],offset1[6:0],offset0});
33824     // MEM[A] = min(MEM[A], MEM[B]).
33825     void
33826     Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33827     {
33828         panicUnimplemented();
33829     }
33830
33831     Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt)
33832         : Inst_DS(iFmt, "ds_max_src2_i32")
33833     {
33834     } // Inst_DS__DS_MAX_SRC2_I32
33835
33836     Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
33837     {
33838     } // ~Inst_DS__DS_MAX_SRC2_I32
33839
33840     // A = ADDR_BASE;
33841     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33842     // {offset1[6],offset1[6:0],offset0});
33843     // MEM[A] = max(MEM[A], MEM[B]).
33844     void
33845     Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33846     {
33847         panicUnimplemented();
33848     }
33849
33850     Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt)
33851         : Inst_DS(iFmt, "ds_min_src2_u32")
33852     {
33853     } // Inst_DS__DS_MIN_SRC2_U32
33854
33855     Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
33856     {
33857     } // ~Inst_DS__DS_MIN_SRC2_U32
33858
33859     // A = ADDR_BASE;
33860     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33861     // {offset1[6],offset1[6:0],offset0});
33862     // MEM[A] = min(MEM[A], MEM[B]).
33863     void
33864     Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33865     {
33866         panicUnimplemented();
33867     }
33868
33869     Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt)
33870         : Inst_DS(iFmt, "ds_max_src2_u32")
33871     {
33872     } // Inst_DS__DS_MAX_SRC2_U32
33873
33874     Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
33875     {
33876     } // ~Inst_DS__DS_MAX_SRC2_U32
33877
33878     // A = ADDR_BASE;
33879     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33880     // {offset1[6],offset1[6:0],offset0});
33881     // MEM[A] = max(MEM[A], MEM[B]).
33882     void
33883     Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33884     {
33885         panicUnimplemented();
33886     }
33887
33888     Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt)
33889         : Inst_DS(iFmt, "ds_and_src2_b32")
33890     {
33891     } // Inst_DS__DS_AND_SRC2_B32
33892
33893     Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
33894     {
33895     } // ~Inst_DS__DS_AND_SRC2_B32
33896
33897     // A = ADDR_BASE;
33898     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33899     // {offset1[6],offset1[6:0],offset0});
33900     // MEM[A] = MEM[A] & MEM[B].
33901     void
33902     Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33903     {
33904         panicUnimplemented();
33905     }
33906
33907     Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt)
33908         : Inst_DS(iFmt, "ds_or_src2_b32")
33909     {
33910     } // Inst_DS__DS_OR_SRC2_B32
33911
33912     Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
33913     {
33914     } // ~Inst_DS__DS_OR_SRC2_B32
33915
33916     // A = ADDR_BASE;
33917     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33918     // {offset1[6],offset1[6:0],offset0});
33919     // MEM[A] = MEM[A] | MEM[B].
33920     void
33921     Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33922     {
33923         panicUnimplemented();
33924     }
33925
33926     Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt)
33927         : Inst_DS(iFmt, "ds_xor_src2_b32")
33928     {
33929     } // Inst_DS__DS_XOR_SRC2_B32
33930
33931     Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
33932     {
33933     } // ~Inst_DS__DS_XOR_SRC2_B32
33934
33935     // A = ADDR_BASE;
33936     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33937     // {offset1[6],offset1[6:0],offset0});
33938     // MEM[A] = MEM[A] ^ MEM[B].
33939     void
33940     Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33941     {
33942         panicUnimplemented();
33943     }
33944
33945     Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt)
33946         : Inst_DS(iFmt, "ds_write_src2_b32")
33947     {
33948         setFlag(MemoryRef);
33949         setFlag(Store);
33950     } // Inst_DS__DS_WRITE_SRC2_B32
33951
33952     Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
33953     {
33954     } // ~Inst_DS__DS_WRITE_SRC2_B32
33955
33956     // A = ADDR_BASE;
33957     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33958     // {offset1[6],offset1[6:0],offset0});
33959     // MEM[A] = MEM[B].
33960     // Write dword.
33961     void
33962     Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33963     {
33964         panicUnimplemented();
33965     }
33966
33967     Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt)
33968         : Inst_DS(iFmt, "ds_min_src2_f32")
33969     {
33970         setFlag(F32);
33971     } // Inst_DS__DS_MIN_SRC2_F32
33972
33973     Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
33974     {
33975     } // ~Inst_DS__DS_MIN_SRC2_F32
33976
33977     // A = ADDR_BASE;
33978     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33979     // {offset1[6],offset1[6:0],offset0});
33980     // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
33981     void
33982     Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33983     {
33984         panicUnimplemented();
33985     }
33986
33987     Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt)
33988         : Inst_DS(iFmt, "ds_max_src2_f32")
33989     {
33990         setFlag(F32);
33991     } // Inst_DS__DS_MAX_SRC2_F32
33992
33993     Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
33994     {
33995     } // ~Inst_DS__DS_MAX_SRC2_F32
33996
33997     // A = ADDR_BASE;
33998     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33999     // {offset1[6],offset1[6:0],offset0});
34000     // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34001     void
34002     Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
34003     {
34004         panicUnimplemented();
34005     }
34006
34007     Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt)
34008         : Inst_DS(iFmt, "ds_add_src2_f32")
34009     {
34010         setFlag(F32);
34011     } // Inst_DS__DS_ADD_SRC2_F32
34012
34013     Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
34014     {
34015     } // ~Inst_DS__DS_ADD_SRC2_F32
34016
34017     // A = ADDR_BASE;
34018     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34019     // {offset1[6],offset1[6:0],offset0});
34020     // MEM[A] = MEM[B] + MEM[A].
34021     void
34022     Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
34023     {
34024         panicUnimplemented();
34025     }
34026
34027     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
34028           InFmt_DS *iFmt)
34029         : Inst_DS(iFmt, "ds_gws_sema_release_all")
34030     {
34031     } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34032
34033     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
34034     {
34035     } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34036
34037     void
34038     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst)
34039     {
34040         panicUnimplemented();
34041     }
34042
34043     Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt)
34044         : Inst_DS(iFmt, "ds_gws_init")
34045     {
34046     } // Inst_DS__DS_GWS_INIT
34047
34048     Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
34049     {
34050     } // ~Inst_DS__DS_GWS_INIT
34051
34052     void
34053     Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst)
34054     {
34055         panicUnimplemented();
34056     }
34057
34058     Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt)
34059         : Inst_DS(iFmt, "ds_gws_sema_v")
34060     {
34061     } // Inst_DS__DS_GWS_SEMA_V
34062
34063     Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
34064     {
34065     } // ~Inst_DS__DS_GWS_SEMA_V
34066
34067     void
34068     Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst)
34069     {
34070         panicUnimplemented();
34071     }
34072
34073     Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt)
34074         : Inst_DS(iFmt, "ds_gws_sema_br")
34075     {
34076     } // Inst_DS__DS_GWS_SEMA_BR
34077
34078     Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
34079     {
34080     } // ~Inst_DS__DS_GWS_SEMA_BR
34081
34082     void
34083     Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst)
34084     {
34085         panicUnimplemented();
34086     }
34087
34088     Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt)
34089         : Inst_DS(iFmt, "ds_gws_sema_p")
34090     {
34091     } // Inst_DS__DS_GWS_SEMA_P
34092
34093     Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
34094     {
34095     } // ~Inst_DS__DS_GWS_SEMA_P
34096
34097     void
34098     Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst)
34099     {
34100         panicUnimplemented();
34101     }
34102
34103     Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt)
34104         : Inst_DS(iFmt, "ds_gws_barrier")
34105     {
34106     } // Inst_DS__DS_GWS_BARRIER
34107
34108     Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
34109     {
34110     } // ~Inst_DS__DS_GWS_BARRIER
34111
34112     void
34113     Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst)
34114     {
34115         panicUnimplemented();
34116     }
34117
34118     Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt)
34119         : Inst_DS(iFmt, "ds_consume")
34120     {
34121     } // Inst_DS__DS_CONSUME
34122
34123     Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
34124     {
34125     } // ~Inst_DS__DS_CONSUME
34126
34127     void
34128     Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst)
34129     {
34130         panicUnimplemented();
34131     }
34132
34133     Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt)
34134         : Inst_DS(iFmt, "ds_append")
34135     {
34136     } // Inst_DS__DS_APPEND
34137
34138     Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
34139     {
34140     } // ~Inst_DS__DS_APPEND
34141
34142     void
34143     Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst)
34144     {
34145         panicUnimplemented();
34146     }
34147
34148     Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt)
34149         : Inst_DS(iFmt, "ds_ordered_count")
34150     {
34151     } // Inst_DS__DS_ORDERED_COUNT
34152
34153     Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
34154     {
34155     } // ~Inst_DS__DS_ORDERED_COUNT
34156
34157     void
34158     Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst)
34159     {
34160         panicUnimplemented();
34161     }
34162
34163     Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt)
34164         : Inst_DS(iFmt, "ds_add_src2_u64")
34165     {
34166     } // Inst_DS__DS_ADD_SRC2_U64
34167
34168     Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
34169     {
34170     } // ~Inst_DS__DS_ADD_SRC2_U64
34171
34172     // A = ADDR_BASE;
34173     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34174     // {offset1[6],offset1[6:0],offset0});
34175     // MEM[A] = MEM[A] + MEM[B].
34176     void
34177     Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34178     {
34179         panicUnimplemented();
34180     }
34181
34182     Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt)
34183         : Inst_DS(iFmt, "ds_sub_src2_u64")
34184     {
34185     } // Inst_DS__DS_SUB_SRC2_U64
34186
34187     Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
34188     {
34189     } // ~Inst_DS__DS_SUB_SRC2_U64
34190
34191     // A = ADDR_BASE;
34192     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34193     // {offset1[6],offset1[6:0],offset0});
34194     // MEM[A] = MEM[A] - MEM[B].
34195     void
34196     Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34197     {
34198         panicUnimplemented();
34199     }
34200
34201     Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt)
34202         : Inst_DS(iFmt, "ds_rsub_src2_u64")
34203     {
34204     } // Inst_DS__DS_RSUB_SRC2_U64
34205
34206     Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
34207     {
34208     } // ~Inst_DS__DS_RSUB_SRC2_U64
34209
34210     // A = ADDR_BASE;
34211     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34212     // {offset1[6],offset1[6:0],offset0});
34213     // MEM[A] = MEM[B] - MEM[A].
34214     void
34215     Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34216     {
34217         panicUnimplemented();
34218     }
34219
34220     Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt)
34221         : Inst_DS(iFmt, "ds_inc_src2_u64")
34222     {
34223     } // Inst_DS__DS_INC_SRC2_U64
34224
34225     Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
34226     {
34227     } // ~Inst_DS__DS_INC_SRC2_U64
34228
34229     // A = ADDR_BASE;
34230     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34231     // {offset1[6],offset1[6:0],offset0});
34232     // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
34233     void
34234     Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34235     {
34236         panicUnimplemented();
34237     }
34238
34239     Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt)
34240         : Inst_DS(iFmt, "ds_dec_src2_u64")
34241     {
34242     } // Inst_DS__DS_DEC_SRC2_U64
34243
34244     Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
34245     {
34246     } // ~Inst_DS__DS_DEC_SRC2_U64
34247
34248     // A = ADDR_BASE;
34249     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34250     // {offset1[6],offset1[6:0],offset0});
34251     // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
34252     // Uint decrement.
34253     void
34254     Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34255     {
34256         panicUnimplemented();
34257     }
34258
34259     Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt)
34260         : Inst_DS(iFmt, "ds_min_src2_i64")
34261     {
34262     } // Inst_DS__DS_MIN_SRC2_I64
34263
34264     Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
34265     {
34266     } // ~Inst_DS__DS_MIN_SRC2_I64
34267
34268     // A = ADDR_BASE;
34269     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34270     // {offset1[6],offset1[6:0],offset0});
34271     // MEM[A] = min(MEM[A], MEM[B]).
34272     void
34273     Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34274     {
34275         panicUnimplemented();
34276     }
34277
34278     Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt)
34279         : Inst_DS(iFmt, "ds_max_src2_i64")
34280     {
34281     } // Inst_DS__DS_MAX_SRC2_I64
34282
34283     Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
34284     {
34285     } // ~Inst_DS__DS_MAX_SRC2_I64
34286
34287     // A = ADDR_BASE;
34288     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34289     // {offset1[6],offset1[6:0],offset0});
34290     // MEM[A] = max(MEM[A], MEM[B]).
34291     void
34292     Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34293     {
34294         panicUnimplemented();
34295     }
34296
34297     Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt)
34298         : Inst_DS(iFmt, "ds_min_src2_u64")
34299     {
34300     } // Inst_DS__DS_MIN_SRC2_U64
34301
34302     Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
34303     {
34304     } // ~Inst_DS__DS_MIN_SRC2_U64
34305
34306     // A = ADDR_BASE;
34307     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34308     // {offset1[6],offset1[6:0],offset0});
34309     // MEM[A] = min(MEM[A], MEM[B]).
34310     void
34311     Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34312     {
34313         panicUnimplemented();
34314     }
34315
34316     Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt)
34317         : Inst_DS(iFmt, "ds_max_src2_u64")
34318     {
34319     } // Inst_DS__DS_MAX_SRC2_U64
34320
34321     Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
34322     {
34323     } // ~Inst_DS__DS_MAX_SRC2_U64
34324
34325     // A = ADDR_BASE;
34326     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34327     // {offset1[6],offset1[6:0],offset0});
34328     // MEM[A] = max(MEM[A], MEM[B]).
34329     void
34330     Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34331     {
34332         panicUnimplemented();
34333     }
34334
34335     Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt)
34336         : Inst_DS(iFmt, "ds_and_src2_b64")
34337     {
34338     } // Inst_DS__DS_AND_SRC2_B64
34339
34340     Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
34341     {
34342     } // ~Inst_DS__DS_AND_SRC2_B64
34343
34344     // A = ADDR_BASE;
34345     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34346     // {offset1[6],offset1[6:0],offset0});
34347     // MEM[A] = MEM[A] & MEM[B].
34348     void
34349     Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34350     {
34351         panicUnimplemented();
34352     }
34353
34354     Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt)
34355         : Inst_DS(iFmt, "ds_or_src2_b64")
34356     {
34357     } // Inst_DS__DS_OR_SRC2_B64
34358
34359     Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
34360     {
34361     } // ~Inst_DS__DS_OR_SRC2_B64
34362
34363     // A = ADDR_BASE;
34364     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34365     // {offset1[6],offset1[6:0],offset0});
34366     // MEM[A] = MEM[A] | MEM[B].
34367     void
34368     Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34369     {
34370         panicUnimplemented();
34371     }
34372
34373     Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt)
34374         : Inst_DS(iFmt, "ds_xor_src2_b64")
34375     {
34376     } // Inst_DS__DS_XOR_SRC2_B64
34377
34378     Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
34379     {
34380     } // ~Inst_DS__DS_XOR_SRC2_B64
34381
34382     // A = ADDR_BASE;
34383     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34384     // {offset1[6],offset1[6:0],offset0});
34385     // MEM[A] = MEM[A] ^ MEM[B].
34386     void
34387     Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34388     {
34389         panicUnimplemented();
34390     }
34391
34392     Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt)
34393         : Inst_DS(iFmt, "ds_write_src2_b64")
34394     {
34395         setFlag(MemoryRef);
34396         setFlag(Store);
34397     } // Inst_DS__DS_WRITE_SRC2_B64
34398
34399     Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
34400     {
34401     } // ~Inst_DS__DS_WRITE_SRC2_B64
34402
34403     // A = ADDR_BASE;
34404     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34405     // {offset1[6],offset1[6:0],offset0});
34406     // MEM[A] = MEM[B].
34407     // Write qword.
34408     void
34409     Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34410     {
34411         panicUnimplemented();
34412     }
34413
34414     Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt)
34415         : Inst_DS(iFmt, "ds_min_src2_f64")
34416     {
34417         setFlag(F64);
34418     } // Inst_DS__DS_MIN_SRC2_F64
34419
34420     Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
34421     {
34422     } // ~Inst_DS__DS_MIN_SRC2_F64
34423
34424     // A = ADDR_BASE;
34425     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34426     // {offset1[6],offset1[6:0],offset0});
34427     // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
34428     void
34429     Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34430     {
34431         panicUnimplemented();
34432     }
34433
34434     Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt)
34435         : Inst_DS(iFmt, "ds_max_src2_f64")
34436     {
34437         setFlag(F64);
34438     } // Inst_DS__DS_MAX_SRC2_F64
34439
34440     Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
34441     {
34442     } // ~Inst_DS__DS_MAX_SRC2_F64
34443
34444     // A = ADDR_BASE;
34445     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34446     // {offset1[6],offset1[6:0],offset0});
34447     // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34448     void
34449     Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34450     {
34451         panicUnimplemented();
34452     }
34453
34454     Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt)
34455         : Inst_DS(iFmt, "ds_write_b96")
34456     {
34457         setFlag(MemoryRef);
34458         setFlag(Store);
34459     } // Inst_DS__DS_WRITE_B96
34460
34461     Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
34462     {
34463     } // ~Inst_DS__DS_WRITE_B96
34464
34465     // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
34466     // Tri-dword write.
34467     void
34468     Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst)
34469     {
34470         panicUnimplemented();
34471     }
34472
34473     Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt)
34474         : Inst_DS(iFmt, "ds_write_b128")
34475     {
34476         setFlag(MemoryRef);
34477         setFlag(Store);
34478     } // Inst_DS__DS_WRITE_B128
34479
34480     Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
34481     {
34482     } // ~Inst_DS__DS_WRITE_B128
34483
34484     // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
34485     // Qword write.
34486     void
34487     Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst)
34488     {
34489         panicUnimplemented();
34490     }
34491
34492     Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt)
34493         : Inst_DS(iFmt, "ds_read_b96")
34494     {
34495         setFlag(MemoryRef);
34496         setFlag(Load);
34497     } // Inst_DS__DS_READ_B96
34498
34499     Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
34500     {
34501     } // ~Inst_DS__DS_READ_B96
34502
34503     // Tri-dword read.
34504     void
34505     Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst)
34506     {
34507         panicUnimplemented();
34508     }
34509
34510     Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt)
34511         : Inst_DS(iFmt, "ds_read_b128")
34512     {
34513         setFlag(MemoryRef);
34514         setFlag(Load);
34515     } // Inst_DS__DS_READ_B128
34516
34517     Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
34518     {
34519     } // ~Inst_DS__DS_READ_B128
34520
34521     // Qword read.
34522     void
34523     Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst)
34524     {
34525         panicUnimplemented();
34526     }
34527
34528     Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34529         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
34530         : Inst_MUBUF(iFmt, "buffer_load_format_x")
34531     {
34532         setFlag(MemoryRef);
34533         setFlag(Load);
34534         setFlag(GlobalSegment);
34535     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34536
34537     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
34538     {
34539     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34540
34541     // Untyped buffer load 1 dword with format conversion.
34542     void
34543     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34544     {
34545         panicUnimplemented();
34546     }
34547
34548     void
34549     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34550     {
34551     } // initiateAcc
34552
34553     void
34554     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34555     {
34556     }
34557
34558     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34559         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
34560         : Inst_MUBUF(iFmt, "buffer_load_format_xy")
34561     {
34562         setFlag(MemoryRef);
34563         setFlag(Load);
34564         setFlag(GlobalSegment);
34565     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34566
34567     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
34568     {
34569     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34570
34571     // Untyped buffer load 2 dwords with format conversion.
34572     void
34573     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34574     {
34575         panicUnimplemented();
34576     }
34577
34578     void
34579     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34580     {
34581     } // initiateAcc
34582
34583     void
34584     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34585     {
34586     }
34587
34588     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34589         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34590         : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
34591     {
34592         setFlag(MemoryRef);
34593         setFlag(Load);
34594         setFlag(GlobalSegment);
34595     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34596
34597     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
34598     {
34599     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34600
34601     // Untyped buffer load 3 dwords with format conversion.
34602     void
34603     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34604     {
34605         panicUnimplemented();
34606     }
34607
34608     void
34609     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34610     {
34611     } // initiateAcc
34612
34613     void
34614     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34615     {
34616     }
34617
34618     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34619         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34620         : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
34621     {
34622         setFlag(MemoryRef);
34623         setFlag(Load);
34624         setFlag(GlobalSegment);
34625     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34626
34627     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
34628     {
34629     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34630
34631     // Untyped buffer load 4 dwords with format conversion.
34632     void
34633     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34634     {
34635         panicUnimplemented();
34636     }
34637
34638     void
34639     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34640     {
34641     } // initiateAcc
34642
34643     void
34644     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34645     {
34646     }
34647
34648     Inst_MUBUF__BUFFER_STORE_FORMAT_X
34649         ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
34650         : Inst_MUBUF(iFmt, "buffer_store_format_x")
34651     {
34652         setFlag(MemoryRef);
34653         setFlag(Store);
34654         setFlag(GlobalSegment);
34655     } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
34656
34657     Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
34658     {
34659     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
34660
34661     // Untyped buffer store 1 dword with format conversion.
34662     void
34663     Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34664     {
34665         panicUnimplemented();
34666     }
34667
34668     void
34669     Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34670     {
34671     } // initiateAcc
34672
34673     void
34674     Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34675     {
34676     }
34677
34678     Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34679         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
34680         : Inst_MUBUF(iFmt, "buffer_store_format_xy")
34681     {
34682         setFlag(MemoryRef);
34683         setFlag(Store);
34684         setFlag(GlobalSegment);
34685     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34686
34687     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
34688     {
34689     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34690
34691     // Untyped buffer store 2 dwords with format conversion.
34692     void
34693     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34694     {
34695         panicUnimplemented();
34696     }
34697
34698     void
34699     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34700     {
34701     } // initiateAcc
34702
34703     void
34704     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34705     {
34706     }
34707
34708     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34709         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34710         : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
34711     {
34712         setFlag(MemoryRef);
34713         setFlag(Store);
34714         setFlag(GlobalSegment);
34715     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34716
34717     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
34718     {
34719     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34720
34721     // Untyped buffer store 3 dwords with format conversion.
34722     void
34723     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34724     {
34725         panicUnimplemented();
34726     }
34727
34728     void
34729     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34730     {
34731     } // initiateAcc
34732
34733     void
34734     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34735     {
34736     }
34737
34738     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34739         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34740         : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
34741     {
34742         setFlag(MemoryRef);
34743         setFlag(Store);
34744         setFlag(GlobalSegment);
34745     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34746
34747     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34748         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
34749     {
34750     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34751
34752     // Untyped buffer store 4 dwords with format conversion.
34753     void
34754     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34755     {
34756         panicUnimplemented();
34757     }
34758
34759     void
34760     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34761     {
34762     } // initiateAcc
34763
34764     void
34765     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34766     {
34767     }
34768
34769     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34770         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34771         : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
34772     {
34773         setFlag(MemoryRef);
34774         setFlag(Load);
34775         setFlag(GlobalSegment);
34776     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34777
34778     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34779         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
34780     {
34781     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34782
34783     // Untyped buffer load 1 dword with format conversion.
34784     void
34785     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34786     {
34787         panicUnimplemented();
34788     }
34789
34790     void
34791     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34792     {
34793     } // initiateAcc
34794
34795     void
34796     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst)
34797     {
34798     }
34799
34800     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34801         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34802         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
34803     {
34804         setFlag(MemoryRef);
34805         setFlag(Load);
34806         setFlag(GlobalSegment);
34807     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34808
34809     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34810         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
34811     {
34812     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34813
34814     // Untyped buffer load 2 dwords with format conversion.
34815     void
34816     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34817     {
34818         panicUnimplemented();
34819     }
34820
34821     void
34822     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
34823         GPUDynInstPtr gpuDynInst)
34824     {
34825     } // initiateAcc
34826
34827     void
34828     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
34829         GPUDynInstPtr gpuDynInst)
34830     {
34831     }
34832
34833     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34834         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34835         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
34836     {
34837         setFlag(MemoryRef);
34838         setFlag(Load);
34839         setFlag(GlobalSegment);
34840     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34841
34842     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34843         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
34844     {
34845     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34846
34847     // Untyped buffer load 3 dwords with format conversion.
34848     void
34849     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34850     {
34851         panicUnimplemented();
34852     }
34853
34854     void
34855     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
34856         GPUDynInstPtr gpuDynInst)
34857     {
34858     } // initiateAcc
34859
34860     void
34861     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
34862         GPUDynInstPtr gpuDynInst)
34863     {
34864     }
34865
34866     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34867         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
34868         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
34869     {
34870         setFlag(MemoryRef);
34871         setFlag(Load);
34872         setFlag(GlobalSegment);
34873     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34874
34875     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34876         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
34877     {
34878     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34879
34880     // Untyped buffer load 4 dwords with format conversion.
34881     void
34882     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
34883     {
34884         panicUnimplemented();
34885     }
34886
34887     void
34888     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
34889         GPUDynInstPtr gpuDynInst)
34890     {
34891     } // initiateAcc
34892
34893     void
34894     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
34895         GPUDynInstPtr gpuDynInst)
34896     {
34897     }
34898
34899     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34900         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34901         : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
34902     {
34903         setFlag(MemoryRef);
34904         setFlag(Store);
34905         setFlag(GlobalSegment);
34906     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34907
34908     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34909         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
34910     {
34911     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34912
34913     // Untyped buffer store 1 dword with format conversion.
34914     void
34915     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34916     {
34917         panicUnimplemented();
34918     }
34919
34920     void
34921     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
34922         GPUDynInstPtr gpuDynInst)
34923     {
34924     } // initiateAcc
34925
34926     void
34927     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
34928         GPUDynInstPtr gpuDynInst)
34929     {
34930     }
34931
34932     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34933         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34934         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
34935     {
34936         setFlag(MemoryRef);
34937         setFlag(Store);
34938         setFlag(GlobalSegment);
34939     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34940
34941     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34942         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
34943     {
34944     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34945
34946     // Untyped buffer store 2 dwords with format conversion.
34947     void
34948     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34949     {
34950         panicUnimplemented();
34951     }
34952
34953     void
34954     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
34955         GPUDynInstPtr gpuDynInst)
34956     {
34957     } // initiateAcc
34958
34959     void
34960     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
34961         GPUDynInstPtr gpuDynInst)
34962     {
34963     }
34964
34965     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34966         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34967         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
34968     {
34969         setFlag(MemoryRef);
34970         setFlag(Store);
34971         setFlag(GlobalSegment);
34972     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34973
34974     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34975         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
34976     {
34977     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34978
34979     // Untyped buffer store 3 dwords with format conversion.
34980     void
34981     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34982     {
34983         panicUnimplemented();
34984     }
34985
34986     void
34987     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
34988         GPUDynInstPtr gpuDynInst)
34989     {
34990     } // initiateAcc
34991
34992     void
34993     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
34994         GPUDynInstPtr gpuDynInst)
34995     {
34996     }
34997
34998     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34999         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
35000         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
35001     {
35002         setFlag(MemoryRef);
35003         setFlag(Store);
35004         setFlag(GlobalSegment);
35005     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35006
35007     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35008         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
35009     {
35010     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35011
35012     // Untyped buffer store 4 dwords with format conversion.
35013     void
35014     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
35015     {
35016         panicUnimplemented();
35017     }
35018
35019     void
35020     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
35021         GPUDynInstPtr gpuDynInst)
35022     {
35023     } // initiateAcc
35024
35025     void
35026     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
35027         GPUDynInstPtr gpuDynInst)
35028     {
35029     }
35030
35031     Inst_MUBUF__BUFFER_LOAD_UBYTE
35032         ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
35033         : Inst_MUBUF(iFmt, "buffer_load_ubyte")
35034     {
35035         setFlag(MemoryRef);
35036         setFlag(Load);
35037         if (instData.LDS) {
35038             setFlag(GroupSegment);
35039         } else {
35040             setFlag(GlobalSegment);
35041         }
35042     } // Inst_MUBUF__BUFFER_LOAD_UBYTE
35043
35044     Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
35045     {
35046     } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
35047
35048     // Untyped buffer load unsigned byte (zero extend to VGPR destination).
35049     void
35050     Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
35051     {
35052         Wavefront *wf = gpuDynInst->wavefront();
35053         gpuDynInst->execUnitId = wf->execUnitId;
35054         gpuDynInst->exec_mask = wf->execMask();
35055         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35056         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35057
35058         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35059         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35060         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35061         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35062
35063         rsrcDesc.read();
35064         offset.read();
35065
35066         int inst_offset = instData.OFFSET;
35067
35068         if (!instData.IDXEN && !instData.OFFEN) {
35069             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35070                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35071                     addr0, addr1, rsrcDesc, offset, inst_offset);
35072         } else if (!instData.IDXEN && instData.OFFEN) {
35073             addr0.read();
35074             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35075                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35076                     addr0, addr1, rsrcDesc, offset, inst_offset);
35077         } else if (instData.IDXEN && !instData.OFFEN) {
35078             addr0.read();
35079             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35080                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35081                     addr1, addr0, rsrcDesc, offset, inst_offset);
35082         } else {
35083             addr0.read();
35084             addr1.read();
35085             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35086                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35087                     addr1, addr0, rsrcDesc, offset, inst_offset);
35088         }
35089
35090         if (isLocalMem()) {
35091             gpuDynInst->computeUnit()->localMemoryPipe.
35092                 issueRequest(gpuDynInst);
35093             wf->rdLmReqsInPipe--;
35094             wf->outstandingReqsRdLm++;
35095         } else {
35096             gpuDynInst->computeUnit()->globalMemoryPipe.
35097                 issueRequest(gpuDynInst);
35098             wf->rdGmReqsInPipe--;
35099             wf->outstandingReqsRdGm++;
35100         }
35101
35102         wf->outstandingReqs++;
35103         wf->validateRequestCounters();
35104     }
35105
35106     void
35107     Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35108     {
35109         initMemRead<VecElemU8>(gpuDynInst);
35110     } // initiateAcc
35111
35112     void
35113     Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35114     {
35115         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35116
35117         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35118             if (gpuDynInst->exec_mask[lane]) {
35119                 if (!oobMask[lane]) {
35120                     vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
35121                         gpuDynInst->d_data))[lane]);
35122                 } else {
35123                     vdst[lane] = 0;
35124                 }
35125             }
35126         }
35127
35128         vdst.write();
35129     }
35130
35131
35132     Inst_MUBUF__BUFFER_LOAD_SBYTE
35133         ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
35134         : Inst_MUBUF(iFmt, "buffer_load_sbyte")
35135     {
35136         setFlag(MemoryRef);
35137         setFlag(Load);
35138         setFlag(GlobalSegment);
35139     } // Inst_MUBUF__BUFFER_LOAD_SBYTE
35140
35141     Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
35142     {
35143     } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
35144
35145     // Untyped buffer load signed byte (sign extend to VGPR destination).
35146     void
35147     Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
35148     {
35149         panicUnimplemented();
35150     }
35151
35152     void
35153     Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35154     {
35155     } // initiateAcc
35156
35157     void
35158     Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35159     {
35160     }
35161
35162     Inst_MUBUF__BUFFER_LOAD_USHORT
35163         ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
35164         : Inst_MUBUF(iFmt, "buffer_load_ushort")
35165     {
35166         setFlag(MemoryRef);
35167         setFlag(Load);
35168         if (instData.LDS) {
35169             setFlag(GroupSegment);
35170         } else {
35171             setFlag(GlobalSegment);
35172         }
35173     } // Inst_MUBUF__BUFFER_LOAD_USHORT
35174
35175     Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
35176     {
35177     } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
35178
35179     // Untyped buffer load unsigned short (zero extend to VGPR destination).
35180     void
35181     Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
35182     {
35183         Wavefront *wf = gpuDynInst->wavefront();
35184         gpuDynInst->execUnitId = wf->execUnitId;
35185         gpuDynInst->exec_mask = wf->execMask();
35186         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35187         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35188
35189         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35190         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35191         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35192         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35193
35194         rsrcDesc.read();
35195         offset.read();
35196
35197         int inst_offset = instData.OFFSET;
35198
35199         if (!instData.IDXEN && !instData.OFFEN) {
35200             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35201                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35202                     addr0, addr1, rsrcDesc, offset, inst_offset);
35203         } else if (!instData.IDXEN && instData.OFFEN) {
35204             addr0.read();
35205             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35206                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35207                     addr0, addr1, rsrcDesc, offset, inst_offset);
35208         } else if (instData.IDXEN && !instData.OFFEN) {
35209             addr0.read();
35210             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35211                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35212                     addr1, addr0, rsrcDesc, offset, inst_offset);
35213         } else {
35214             addr0.read();
35215             addr1.read();
35216             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35217                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35218                     addr1, addr0, rsrcDesc, offset, inst_offset);
35219         }
35220
35221         if (isLocalMem()) {
35222             gpuDynInst->computeUnit()->localMemoryPipe
35223                 .issueRequest(gpuDynInst);
35224             wf->rdLmReqsInPipe--;
35225             wf->outstandingReqsRdLm++;
35226         } else {
35227             gpuDynInst->computeUnit()->globalMemoryPipe
35228                 .issueRequest(gpuDynInst);
35229             wf->rdGmReqsInPipe--;
35230             wf->outstandingReqsRdGm++;
35231         }
35232
35233         wf->outstandingReqs++;
35234         wf->validateRequestCounters();
35235     }
35236
35237     void
35238     Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35239     {
35240         initMemRead<VecElemU16>(gpuDynInst);
35241     } // initiateAcc
35242
35243     void
35244     Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35245     {
35246         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35247
35248         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35249             if (gpuDynInst->exec_mask[lane]) {
35250                 if (!oobMask[lane]) {
35251                     vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
35252                         gpuDynInst->d_data))[lane]);
35253                 } else {
35254                     vdst[lane] = 0;
35255                 }
35256             }
35257         }
35258
35259         vdst.write();
35260     }
35261
35262
35263     Inst_MUBUF__BUFFER_LOAD_SSHORT
35264         ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
35265         : Inst_MUBUF(iFmt, "buffer_load_sshort")
35266     {
35267         setFlag(MemoryRef);
35268         setFlag(Load);
35269         setFlag(GlobalSegment);
35270     } // Inst_MUBUF__BUFFER_LOAD_SSHORT
35271
35272     Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
35273     {
35274     } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
35275
35276     // Untyped buffer load signed short (sign extend to VGPR destination).
35277     void
35278     Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
35279     {
35280         panicUnimplemented();
35281     }
35282
35283     void
35284     Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35285     {
35286     } // initiateAcc
35287
35288     void
35289     Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35290     {
35291     }
35292
35293     Inst_MUBUF__BUFFER_LOAD_DWORD
35294         ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
35295         : Inst_MUBUF(iFmt, "buffer_load_dword")
35296     {
35297         setFlag(MemoryRef);
35298         setFlag(Load);
35299         if (instData.LDS) {
35300             setFlag(GroupSegment);
35301         } else {
35302             setFlag(GlobalSegment);
35303         }
35304     } // Inst_MUBUF__BUFFER_LOAD_DWORD
35305
35306     Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
35307     {
35308     } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
35309
35310     // Untyped buffer load dword.
35311     void
35312     Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
35313     {
35314         Wavefront *wf = gpuDynInst->wavefront();
35315         gpuDynInst->execUnitId = wf->execUnitId;
35316         gpuDynInst->exec_mask = wf->execMask();
35317         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35318         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35319
35320         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35321         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35322         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35323         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35324
35325         rsrcDesc.read();
35326         offset.read();
35327
35328         int inst_offset = instData.OFFSET;
35329
35330         if (!instData.IDXEN && !instData.OFFEN) {
35331             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35332                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35333                     addr0, addr1, rsrcDesc, offset, inst_offset);
35334         } else if (!instData.IDXEN && instData.OFFEN) {
35335             addr0.read();
35336             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35337                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35338                     addr0, addr1, rsrcDesc, offset, inst_offset);
35339         } else if (instData.IDXEN && !instData.OFFEN) {
35340             addr0.read();
35341             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35342                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35343                     addr1, addr0, rsrcDesc, offset, inst_offset);
35344         } else {
35345             addr0.read();
35346             addr1.read();
35347             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35348                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35349                     addr1, addr0, rsrcDesc, offset, inst_offset);
35350         }
35351
35352         if (isLocalMem()) {
35353             gpuDynInst->computeUnit()->localMemoryPipe
35354                 .issueRequest(gpuDynInst);
35355             wf->rdLmReqsInPipe--;
35356             wf->outstandingReqsRdLm++;
35357         } else {
35358             gpuDynInst->computeUnit()->globalMemoryPipe
35359                 .issueRequest(gpuDynInst);
35360             wf->rdGmReqsInPipe--;
35361             wf->outstandingReqsRdGm++;
35362         }
35363
35364         wf->outstandingReqs++;
35365         wf->validateRequestCounters();
35366     }
35367
35368     void
35369     Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35370     {
35371         initMemRead<VecElemU32>(gpuDynInst);
35372     } // initiateAcc
35373
35374     void
35375     Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
35376     {
35377         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35378
35379         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35380             if (gpuDynInst->exec_mask[lane]) {
35381                 if (!oobMask[lane]) {
35382                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
35383                         gpuDynInst->d_data))[lane];
35384                 } else {
35385                     vdst[lane] = 0;
35386                 }
35387             }
35388         }
35389
35390         vdst.write();
35391     } // completeAcc
35392
35393     Inst_MUBUF__BUFFER_LOAD_DWORDX2
35394         ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
35395         : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
35396     {
35397         setFlag(MemoryRef);
35398         setFlag(Load);
35399         if (instData.LDS) {
35400             setFlag(GroupSegment);
35401         } else {
35402             setFlag(GlobalSegment);
35403         }
35404     } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
35405
35406     Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
35407     {
35408     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
35409
35410     // Untyped buffer load 2 dwords.
35411     void
35412     Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
35413     {
35414         Wavefront *wf = gpuDynInst->wavefront();
35415         gpuDynInst->execUnitId = wf->execUnitId;
35416         gpuDynInst->exec_mask = wf->execMask();
35417         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35418         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35419
35420         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35421         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35422         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35423         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35424
35425         rsrcDesc.read();
35426         offset.read();
35427
35428         int inst_offset = instData.OFFSET;
35429
35430         if (!instData.IDXEN && !instData.OFFEN) {
35431             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35432                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35433                     addr0, addr1, rsrcDesc, offset, inst_offset);
35434         } else if (!instData.IDXEN && instData.OFFEN) {
35435             addr0.read();
35436             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35437                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35438                     addr0, addr1, rsrcDesc, offset, inst_offset);
35439         } else if (instData.IDXEN && !instData.OFFEN) {
35440             addr0.read();
35441             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35442                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35443                     addr1, addr0, rsrcDesc, offset, inst_offset);
35444         } else {
35445             addr0.read();
35446             addr1.read();
35447             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35448                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35449                     addr1, addr0, rsrcDesc, offset, inst_offset);
35450         }
35451
35452         if (isLocalMem()) {
35453             gpuDynInst->computeUnit()->localMemoryPipe
35454                 .issueRequest(gpuDynInst);
35455             wf->rdLmReqsInPipe--;
35456             wf->outstandingReqsRdLm++;
35457         } else {
35458             gpuDynInst->computeUnit()->globalMemoryPipe
35459                 .issueRequest(gpuDynInst);
35460             wf->rdGmReqsInPipe--;
35461             wf->outstandingReqsRdGm++;
35462         }
35463
35464         wf->outstandingReqs++;
35465         wf->validateRequestCounters();
35466     } // execute
35467
35468     void
35469     Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
35470     {
35471         initMemRead<2>(gpuDynInst);
35472     } // initiateAcc
35473
35474     void
35475     Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
35476     {
35477         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35478         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35479
35480         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35481             if (gpuDynInst->exec_mask[lane]) {
35482                 if (!oobMask[lane]) {
35483                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35484                         gpuDynInst->d_data))[lane * 2];
35485                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35486                         gpuDynInst->d_data))[lane * 2 + 1];
35487                 } else {
35488                     vdst0[lane] = 0;
35489                     vdst1[lane] = 0;
35490                 }
35491             }
35492         }
35493
35494         vdst0.write();
35495         vdst1.write();
35496     } // completeAcc
35497
35498     Inst_MUBUF__BUFFER_LOAD_DWORDX3
35499         ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
35500         : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
35501     {
35502         setFlag(MemoryRef);
35503         setFlag(Load);
35504         if (instData.LDS) {
35505             setFlag(GroupSegment);
35506         } else {
35507             setFlag(GlobalSegment);
35508         }
35509     } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
35510
35511     Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
35512     {
35513     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
35514
35515     // Untyped buffer load 3 dwords.
35516     void
35517     Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
35518     {
35519         Wavefront *wf = gpuDynInst->wavefront();
35520         gpuDynInst->execUnitId = wf->execUnitId;
35521         gpuDynInst->exec_mask = wf->execMask();
35522         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35523         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35524
35525         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35526         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35527         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35528         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35529
35530         rsrcDesc.read();
35531         offset.read();
35532
35533         int inst_offset = instData.OFFSET;
35534
35535         if (!instData.IDXEN && !instData.OFFEN) {
35536             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35537                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35538                     addr0, addr1, rsrcDesc, offset, inst_offset);
35539         } else if (!instData.IDXEN && instData.OFFEN) {
35540             addr0.read();
35541             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35542                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35543                     addr0, addr1, rsrcDesc, offset, inst_offset);
35544         } else if (instData.IDXEN && !instData.OFFEN) {
35545             addr0.read();
35546             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35547                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35548                     addr1, addr0, rsrcDesc, offset, inst_offset);
35549         } else {
35550             addr0.read();
35551             addr1.read();
35552             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35553                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35554                     addr1, addr0, rsrcDesc, offset, inst_offset);
35555         }
35556
35557         if (isLocalMem()) {
35558             gpuDynInst->computeUnit()->localMemoryPipe
35559                 .issueRequest(gpuDynInst);
35560             wf->rdLmReqsInPipe--;
35561             wf->outstandingReqsRdLm++;
35562         } else {
35563             gpuDynInst->computeUnit()->globalMemoryPipe
35564                 .issueRequest(gpuDynInst);
35565             wf->rdGmReqsInPipe--;
35566             wf->outstandingReqsRdGm++;
35567         }
35568
35569         wf->outstandingReqs++;
35570         wf->validateRequestCounters();
35571     } // execute
35572
35573     void
35574     Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
35575     {
35576         initMemRead<3>(gpuDynInst);
35577     } // initiateAcc
35578
35579     void
35580     Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
35581     {
35582         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35583         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35584         VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35585
35586         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35587             if (gpuDynInst->exec_mask[lane]) {
35588                 if (!oobMask[lane]) {
35589                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35590                         gpuDynInst->d_data))[lane * 3];
35591                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35592                         gpuDynInst->d_data))[lane * 3 + 1];
35593                     vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35594                         gpuDynInst->d_data))[lane * 3 + 2];
35595                 } else {
35596                     vdst0[lane] = 0;
35597                     vdst1[lane] = 0;
35598                     vdst2[lane] = 0;
35599                 }
35600             }
35601         }
35602
35603         vdst0.write();
35604         vdst1.write();
35605         vdst2.write();
35606     } // completeAcc
35607
35608     Inst_MUBUF__BUFFER_LOAD_DWORDX4
35609         ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
35610         : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
35611     {
35612         setFlag(MemoryRef);
35613         setFlag(Load);
35614         if (instData.LDS) {
35615             setFlag(GroupSegment);
35616         } else {
35617             setFlag(GlobalSegment);
35618         }
35619     } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
35620
35621     Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
35622     {
35623     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
35624
35625     // Untyped buffer load 4 dwords.
35626     void
35627     Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
35628     {
35629         Wavefront *wf = gpuDynInst->wavefront();
35630         gpuDynInst->execUnitId = wf->execUnitId;
35631         gpuDynInst->exec_mask = wf->execMask();
35632         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35633         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35634
35635         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35636         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35637         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35638         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35639
35640         rsrcDesc.read();
35641         offset.read();
35642
35643         int inst_offset = instData.OFFSET;
35644
35645         if (!instData.IDXEN && !instData.OFFEN) {
35646             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35647                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35648                     addr0, addr1, rsrcDesc, offset, inst_offset);
35649         } else if (!instData.IDXEN && instData.OFFEN) {
35650             addr0.read();
35651             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35652                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35653                     addr0, addr1, rsrcDesc, offset, inst_offset);
35654         } else if (instData.IDXEN && !instData.OFFEN) {
35655             addr0.read();
35656             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35657                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35658                     addr1, addr0, rsrcDesc, offset, inst_offset);
35659         } else {
35660             addr0.read();
35661             addr1.read();
35662             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35663                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35664                     addr1, addr0, rsrcDesc, offset, inst_offset);
35665         }
35666
35667         if (isLocalMem()) {
35668             gpuDynInst->computeUnit()->localMemoryPipe
35669                 .issueRequest(gpuDynInst);
35670             wf->rdLmReqsInPipe--;
35671             wf->outstandingReqsRdLm++;
35672         } else {
35673             gpuDynInst->computeUnit()->globalMemoryPipe
35674                 .issueRequest(gpuDynInst);
35675             wf->rdGmReqsInPipe--;
35676             wf->outstandingReqsRdGm++;
35677         }
35678
35679         wf->outstandingReqs++;
35680         wf->validateRequestCounters();
35681     } // execute
35682
35683     void
35684     Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
35685     {
35686         initMemRead<4>(gpuDynInst);
35687     } // initiateAcc
35688
35689     void
35690     Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
35691     {
35692         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35693         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35694         VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35695         VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
35696
35697         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35698             if (gpuDynInst->exec_mask[lane]) {
35699                 if (!oobMask[lane]) {
35700                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35701                         gpuDynInst->d_data))[lane * 4];
35702                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35703                         gpuDynInst->d_data))[lane * 4 + 1];
35704                     vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35705                         gpuDynInst->d_data))[lane * 4 + 2];
35706                     vdst3[lane] = (reinterpret_cast<VecElemU32*>(
35707                         gpuDynInst->d_data))[lane * 4 + 3];
35708                 } else {
35709                     vdst0[lane] = 0;
35710                     vdst1[lane] = 0;
35711                     vdst2[lane] = 0;
35712                     vdst3[lane] = 0;
35713                 }
35714             }
35715         }
35716
35717         vdst0.write();
35718         vdst1.write();
35719         vdst2.write();
35720         vdst3.write();
35721     } // completeAcc
35722
35723     Inst_MUBUF__BUFFER_STORE_BYTE
35724         ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
35725         : Inst_MUBUF(iFmt, "buffer_store_byte")
35726     {
35727         setFlag(MemoryRef);
35728         setFlag(Store);
35729         if (instData.LDS) {
35730             setFlag(GroupSegment);
35731         } else {
35732             setFlag(GlobalSegment);
35733         }
35734     } // Inst_MUBUF__BUFFER_STORE_BYTE
35735
35736     Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
35737     {
35738     } // ~Inst_MUBUF__BUFFER_STORE_BYTE
35739
35740     // Untyped buffer store byte.
35741     void
35742     Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
35743     {
35744         Wavefront *wf = gpuDynInst->wavefront();
35745         gpuDynInst->execUnitId = wf->execUnitId;
35746         gpuDynInst->exec_mask = wf->execMask();
35747         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35748         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35749
35750         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35751         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35752         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35753         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35754
35755         rsrcDesc.read();
35756         offset.read();
35757
35758         int inst_offset = instData.OFFSET;
35759
35760         if (!instData.IDXEN && !instData.OFFEN) {
35761             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35762                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35763                     addr0, addr1, rsrcDesc, offset, inst_offset);
35764         } else if (!instData.IDXEN && instData.OFFEN) {
35765             addr0.read();
35766             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35767                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35768                     addr0, addr1, rsrcDesc, offset, inst_offset);
35769         } else if (instData.IDXEN && !instData.OFFEN) {
35770             addr0.read();
35771             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35772                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35773                     addr1, addr0, rsrcDesc, offset, inst_offset);
35774         } else {
35775             addr0.read();
35776             addr1.read();
35777             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35778                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35779                     addr1, addr0, rsrcDesc, offset, inst_offset);
35780         }
35781
35782         if (isLocalMem()) {
35783             gpuDynInst->computeUnit()->localMemoryPipe
35784                 .issueRequest(gpuDynInst);
35785             wf->wrLmReqsInPipe--;
35786             wf->outstandingReqsWrLm++;
35787         } else {
35788             gpuDynInst->computeUnit()->globalMemoryPipe
35789                 .issueRequest(gpuDynInst);
35790             wf->wrGmReqsInPipe--;
35791             wf->outstandingReqsWrGm++;
35792         }
35793
35794         wf->outstandingReqs++;
35795         wf->validateRequestCounters();
35796     }
35797
35798     void
35799     Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35800     {
35801         ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
35802         data.read();
35803
35804         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35805             if (gpuDynInst->exec_mask[lane]) {
35806                 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
35807                     = data[lane];
35808             }
35809         }
35810
35811         initMemWrite<VecElemI8>(gpuDynInst);
35812     } // initiateAcc
35813
35814     void
35815     Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35816     {
35817     }
35818
35819     Inst_MUBUF__BUFFER_STORE_SHORT
35820         ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
35821         : Inst_MUBUF(iFmt, "buffer_store_short")
35822     {
35823         setFlag(MemoryRef);
35824         setFlag(Store);
35825         if (instData.LDS) {
35826             setFlag(GroupSegment);
35827         } else {
35828             setFlag(GlobalSegment);
35829         }
35830     } // Inst_MUBUF__BUFFER_STORE_SHORT
35831
35832     Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
35833     {
35834     } // ~Inst_MUBUF__BUFFER_STORE_SHORT
35835
35836     // Untyped buffer store short.
35837     void
35838     Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
35839     {
35840         Wavefront *wf = gpuDynInst->wavefront();
35841         gpuDynInst->execUnitId = wf->execUnitId;
35842         gpuDynInst->exec_mask = wf->execMask();
35843         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35844         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35845
35846         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35847         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35848         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35849         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35850
35851         rsrcDesc.read();
35852         offset.read();
35853
35854         int inst_offset = instData.OFFSET;
35855
35856         if (!instData.IDXEN && !instData.OFFEN) {
35857             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35858                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35859                     addr0, addr1, rsrcDesc, offset, inst_offset);
35860         } else if (!instData.IDXEN && instData.OFFEN) {
35861             addr0.read();
35862             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35863                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35864                     addr0, addr1, rsrcDesc, offset, inst_offset);
35865         } else if (instData.IDXEN && !instData.OFFEN) {
35866             addr0.read();
35867             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35868                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35869                     addr1, addr0, rsrcDesc, offset, inst_offset);
35870         } else {
35871             addr0.read();
35872             addr1.read();
35873             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35874                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35875                     addr1, addr0, rsrcDesc, offset, inst_offset);
35876         }
35877
35878         if (isLocalMem()) {
35879             gpuDynInst->computeUnit()->localMemoryPipe
35880                 .issueRequest(gpuDynInst);
35881             wf->wrLmReqsInPipe--;
35882             wf->outstandingReqsWrLm++;
35883         } else {
35884             gpuDynInst->computeUnit()->globalMemoryPipe
35885                 .issueRequest(gpuDynInst);
35886             wf->wrGmReqsInPipe--;
35887             wf->outstandingReqsWrGm++;
35888         }
35889
35890         wf->outstandingReqs++;
35891         wf->validateRequestCounters();
35892     }
35893
35894     void
35895     Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35896     {
35897         ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
35898         data.read();
35899
35900         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35901             if (gpuDynInst->exec_mask[lane]) {
35902                 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
35903                     = data[lane];
35904             }
35905         }
35906
35907         initMemWrite<VecElemI16>(gpuDynInst);
35908     } // initiateAcc
35909
35910     void
35911     Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35912     {
35913     }
35914
35915     Inst_MUBUF__BUFFER_STORE_DWORD::
35916         Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt)
35917         : Inst_MUBUF(iFmt, "buffer_store_dword")
35918     {
35919         setFlag(MemoryRef);
35920         setFlag(Store);
35921         if (instData.LDS) {
35922             setFlag(GroupSegment);
35923         } else {
35924             setFlag(GlobalSegment);
35925         }
35926     } // Inst_MUBUF__BUFFER_STORE_DWORD
35927
35928     Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
35929     {
35930     } // ~Inst_MUBUF__BUFFER_STORE_DWORD
35931
35932     // Untyped buffer store dword.
35933     void
35934     Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
35935     {
35936         Wavefront *wf = gpuDynInst->wavefront();
35937         gpuDynInst->execUnitId = wf->execUnitId;
35938         gpuDynInst->exec_mask = wf->execMask();
35939         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35940         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35941
35942         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35943         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35944         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35945         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35946
35947         rsrcDesc.read();
35948         offset.read();
35949
35950         int inst_offset = instData.OFFSET;
35951
35952         if (!instData.IDXEN && !instData.OFFEN) {
35953             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35954                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35955                     addr0, addr1, rsrcDesc, offset, inst_offset);
35956         } else if (!instData.IDXEN && instData.OFFEN) {
35957             addr0.read();
35958             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35959                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35960                     addr0, addr1, rsrcDesc, offset, inst_offset);
35961         } else if (instData.IDXEN && !instData.OFFEN) {
35962             addr0.read();
35963             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35964                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35965                     addr1, addr0, rsrcDesc, offset, inst_offset);
35966         } else {
35967             addr0.read();
35968             addr1.read();
35969             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35970                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35971                     addr1, addr0, rsrcDesc, offset, inst_offset);
35972         }
35973
35974         if (isLocalMem()) {
35975             gpuDynInst->computeUnit()->localMemoryPipe
35976                 .issueRequest(gpuDynInst);
35977             wf->wrLmReqsInPipe--;
35978             wf->outstandingReqsWrLm++;
35979         } else {
35980             gpuDynInst->computeUnit()->globalMemoryPipe
35981                 .issueRequest(gpuDynInst);
35982             wf->wrGmReqsInPipe--;
35983             wf->outstandingReqsWrGm++;
35984         }
35985
35986         wf->outstandingReqs++;
35987         wf->validateRequestCounters();
35988     }
35989
35990     void
35991     Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35992     {
35993         ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
35994         data.read();
35995
35996         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35997             if (gpuDynInst->exec_mask[lane]) {
35998                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
35999                     = data[lane];
36000             }
36001         }
36002
36003         initMemWrite<VecElemU32>(gpuDynInst);
36004     } // initiateAcc
36005
36006     void
36007     Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
36008     {
36009     } // completeAcc
36010
36011     Inst_MUBUF__BUFFER_STORE_DWORDX2
36012         ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
36013         : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
36014     {
36015         setFlag(MemoryRef);
36016         setFlag(Store);
36017         if (instData.LDS) {
36018             setFlag(GroupSegment);
36019         } else {
36020             setFlag(GlobalSegment);
36021         }
36022     } // Inst_MUBUF__BUFFER_STORE_DWORDX2
36023
36024     Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
36025     {
36026     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
36027
36028     // Untyped buffer store 2 dwords.
36029     void
36030     Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
36031     {
36032         Wavefront *wf = gpuDynInst->wavefront();
36033         gpuDynInst->execUnitId = wf->execUnitId;
36034         gpuDynInst->exec_mask = wf->execMask();
36035         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36036         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36037
36038         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36039         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36040         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36041         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36042         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36043         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36044
36045         rsrcDesc.read();
36046         offset.read();
36047         data0.read();
36048         data1.read();
36049
36050         int inst_offset = instData.OFFSET;
36051
36052         if (!instData.IDXEN && !instData.OFFEN) {
36053             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36054                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36055                     addr0, addr1, rsrcDesc, offset, inst_offset);
36056         } else if (!instData.IDXEN && instData.OFFEN) {
36057             addr0.read();
36058             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36059                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36060                     addr0, addr1, rsrcDesc, offset, inst_offset);
36061         } else if (instData.IDXEN && !instData.OFFEN) {
36062             addr0.read();
36063             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36064                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36065                     addr1, addr0, rsrcDesc, offset, inst_offset);
36066         } else {
36067             addr0.read();
36068             addr1.read();
36069             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36070                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36071                     addr1, addr0, rsrcDesc, offset, inst_offset);
36072         }
36073
36074         if (isLocalMem()) {
36075             gpuDynInst->computeUnit()->localMemoryPipe
36076                 .issueRequest(gpuDynInst);
36077             wf->wrLmReqsInPipe--;
36078             wf->outstandingReqsWrLm++;
36079         } else {
36080             gpuDynInst->computeUnit()->globalMemoryPipe
36081                 .issueRequest(gpuDynInst);
36082             wf->wrGmReqsInPipe--;
36083             wf->outstandingReqsWrGm++;
36084         }
36085
36086         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36087             if (gpuDynInst->exec_mask[lane]) {
36088                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36089                     = data0[lane];
36090                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36091                     = data1[lane];
36092             }
36093         }
36094
36095         wf->outstandingReqs++;
36096         wf->validateRequestCounters();
36097     } // execute
36098
36099     void
36100     Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
36101     {
36102         initMemWrite<2>(gpuDynInst);
36103     } // initiateAcc
36104
36105     void
36106     Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
36107     {
36108     } // completeAcc
36109
36110     Inst_MUBUF__BUFFER_STORE_DWORDX3
36111         ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
36112         : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
36113     {
36114         setFlag(MemoryRef);
36115         setFlag(Store);
36116         if (instData.LDS) {
36117             setFlag(GroupSegment);
36118         } else {
36119             setFlag(GlobalSegment);
36120         }
36121     } // Inst_MUBUF__BUFFER_STORE_DWORDX3
36122
36123     Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
36124     {
36125     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
36126
36127     // Untyped buffer store 3 dwords.
36128     void
36129     Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
36130     {
36131         Wavefront *wf = gpuDynInst->wavefront();
36132         gpuDynInst->execUnitId = wf->execUnitId;
36133         gpuDynInst->exec_mask = wf->execMask();
36134         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36135         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36136
36137         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36138         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36139         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36140         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36141         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36142         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36143         ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36144
36145         rsrcDesc.read();
36146         offset.read();
36147         data0.read();
36148         data1.read();
36149         data2.read();
36150
36151         int inst_offset = instData.OFFSET;
36152
36153         if (!instData.IDXEN && !instData.OFFEN) {
36154             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36155                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36156                     addr0, addr1, rsrcDesc, offset, inst_offset);
36157         } else if (!instData.IDXEN && instData.OFFEN) {
36158             addr0.read();
36159             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36160                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36161                     addr0, addr1, rsrcDesc, offset, inst_offset);
36162         } else if (instData.IDXEN && !instData.OFFEN) {
36163             addr0.read();
36164             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36165                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36166                     addr1, addr0, rsrcDesc, offset, inst_offset);
36167         } else {
36168             addr0.read();
36169             addr1.read();
36170             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36171                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36172                     addr1, addr0, rsrcDesc, offset, inst_offset);
36173         }
36174
36175         if (isLocalMem()) {
36176             gpuDynInst->computeUnit()->localMemoryPipe
36177                 .issueRequest(gpuDynInst);
36178             wf->wrLmReqsInPipe--;
36179             wf->outstandingReqsWrLm++;
36180         } else {
36181             gpuDynInst->computeUnit()->globalMemoryPipe
36182                 .issueRequest(gpuDynInst);
36183             wf->wrGmReqsInPipe--;
36184             wf->outstandingReqsWrGm++;
36185         }
36186
36187         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36188             if (gpuDynInst->exec_mask[lane]) {
36189                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36190                     = data0[lane];
36191                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36192                     = data1[lane];
36193                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36194                     = data2[lane];
36195             }
36196         }
36197
36198         wf->outstandingReqs++;
36199         wf->validateRequestCounters();
36200     } // execute
36201
36202     void
36203     Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
36204     {
36205         initMemWrite<3>(gpuDynInst);
36206     } // initiateAcc
36207
36208     void
36209     Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
36210     {
36211     } // completeAcc
36212
36213     Inst_MUBUF__BUFFER_STORE_DWORDX4
36214         ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
36215         : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
36216     {
36217         setFlag(MemoryRef);
36218         setFlag(Store);
36219         if (instData.LDS) {
36220             setFlag(GroupSegment);
36221         } else {
36222             setFlag(GlobalSegment);
36223         }
36224     } // Inst_MUBUF__BUFFER_STORE_DWORDX4
36225
36226     Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
36227     {
36228     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
36229
36230     // Untyped buffer store 4 dwords.
36231     void
36232     Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
36233     {
36234         Wavefront *wf = gpuDynInst->wavefront();
36235         gpuDynInst->execUnitId = wf->execUnitId;
36236         gpuDynInst->exec_mask = wf->execMask();
36237         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36238         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36239
36240         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36241         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36242         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36243         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36244         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36245         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36246         ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36247         ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
36248
36249         rsrcDesc.read();
36250         offset.read();
36251         data0.read();
36252         data1.read();
36253         data2.read();
36254         data3.read();
36255
36256         int inst_offset = instData.OFFSET;
36257
36258         if (!instData.IDXEN && !instData.OFFEN) {
36259             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36260                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36261                     addr0, addr1, rsrcDesc, offset, inst_offset);
36262         } else if (!instData.IDXEN && instData.OFFEN) {
36263             addr0.read();
36264             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36265                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36266                     addr0, addr1, rsrcDesc, offset, inst_offset);
36267         } else if (instData.IDXEN && !instData.OFFEN) {
36268             addr0.read();
36269             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36270                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36271                     addr1, addr0, rsrcDesc, offset, inst_offset);
36272         } else {
36273             addr0.read();
36274             addr1.read();
36275             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36276                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36277                     addr1, addr0, rsrcDesc, offset, inst_offset);
36278         }
36279
36280         if (isLocalMem()) {
36281             gpuDynInst->computeUnit()->localMemoryPipe
36282                 .issueRequest(gpuDynInst);
36283             wf->wrLmReqsInPipe--;
36284             wf->outstandingReqsWrLm++;
36285         } else {
36286             gpuDynInst->computeUnit()->globalMemoryPipe
36287                 .issueRequest(gpuDynInst);
36288             wf->wrGmReqsInPipe--;
36289             wf->outstandingReqsWrGm++;
36290         }
36291
36292         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36293             if (gpuDynInst->exec_mask[lane]) {
36294                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36295                     = data0[lane];
36296                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36297                     = data1[lane];
36298                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36299                     = data2[lane];
36300                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
36301                     = data3[lane];
36302             }
36303         }
36304
36305         wf->outstandingReqs++;
36306         wf->validateRequestCounters();
36307     } // execute
36308
36309     void
36310     Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
36311     {
36312         initMemWrite<4>(gpuDynInst);
36313     } // initiateAcc
36314
36315     void
36316     Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
36317     {
36318     } // completeAcc
36319
36320     Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36321         ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
36322         : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
36323     {
36324         setFlag(GlobalSegment);
36325     } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36326
36327     Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
36328     {
36329     } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36330
36331     // Store one DWORD from LDS memory to system memory without utilizing
36332     // VGPRs.
36333     void
36334     Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst)
36335     {
36336         panicUnimplemented();
36337     }
36338
36339     Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt)
36340         : Inst_MUBUF(iFmt, "buffer_wbinvl1")
36341     {
36342         setFlag(MemoryRef);
36343         setFlag(GPUStaticInst::MemSync);
36344         setFlag(GlobalSegment);
36345         setFlag(MemSync);
36346     } // Inst_MUBUF__BUFFER_WBINVL1
36347
36348     Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
36349     {
36350     } // ~Inst_MUBUF__BUFFER_WBINVL1
36351
36352     // Write back and invalidate the shader L1.
36353     // Always returns ACK to shader.
36354     void
36355     Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst)
36356     {
36357         Wavefront *wf = gpuDynInst->wavefront();
36358         gpuDynInst->execUnitId = wf->execUnitId;
36359         gpuDynInst->exec_mask = wf->execMask();
36360         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36361         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36362
36363         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36364             gpuDynInst->computeUnit()->globalMemoryPipe.
36365                 issueRequest(gpuDynInst);
36366             wf->wrGmReqsInPipe--;
36367             wf->rdGmReqsInPipe--;
36368
36369             wf->outstandingReqsWrGm++;
36370             wf->outstandingReqsRdGm++;
36371         } else {
36372             fatal("Non global flat instructions not implemented yet.\n");
36373         }
36374
36375         wf->outstandingReqs++;
36376         wf->validateRequestCounters();
36377     }
36378
36379     void
36380     Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst)
36381     {
36382         injectGlobalMemFence(gpuDynInst);
36383     } // initiateAcc
36384
36385     void
36386     Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst)
36387     {
36388     } // completeAcc
36389
36390     Inst_MUBUF__BUFFER_WBINVL1_VOL
36391         ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
36392         : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
36393         /**
36394          * This instruction is same as buffer_wbinvl1 instruction except this
36395          * instruction only invalidate L1 shader line with MTYPE for system
36396          * or group coherence. Since L1 do not differentiate between its cache
36397          * lines, this instruction currently behaves (and implemented )
36398          * exactly like buffer_wbinvl1 instruction.
36399          */
36400         setFlag(MemoryRef);
36401         setFlag(GPUStaticInst::MemSync);
36402         setFlag(GlobalSegment);
36403         setFlag(MemSync);
36404     } // Inst_MUBUF__BUFFER_WBINVL1_VOL
36405
36406     Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
36407     {
36408     } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
36409
36410     // Write back and invalidate the shader L1 only for lines that are marked
36411     // volatile. Always returns ACK to shader.
36412     void
36413     Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst)
36414     {
36415         Wavefront *wf = gpuDynInst->wavefront();
36416         gpuDynInst->execUnitId = wf->execUnitId;
36417         gpuDynInst->exec_mask = wf->execMask();
36418         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36419         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36420
36421         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36422             gpuDynInst->computeUnit()->globalMemoryPipe.
36423                 issueRequest(gpuDynInst);
36424             wf->wrGmReqsInPipe--;
36425             wf->rdGmReqsInPipe--;
36426
36427             wf->outstandingReqsWrGm++;
36428             wf->outstandingReqsRdGm++;
36429         } else {
36430             fatal("Non global flat instructions not implemented yet.\n");
36431         }
36432
36433         wf->outstandingReqs++;
36434         wf->validateRequestCounters();
36435     }
36436     void
36437     Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst)
36438     {
36439         injectGlobalMemFence(gpuDynInst);
36440     } // initiateAcc
36441     void
36442     Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst)
36443     {
36444     } // completeAcc
36445
36446     Inst_MUBUF__BUFFER_ATOMIC_SWAP
36447         ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
36448         : Inst_MUBUF(iFmt, "buffer_atomic_swap")
36449     {
36450         setFlag(AtomicExch);
36451         if (instData.GLC) {
36452             setFlag(AtomicReturn);
36453         } else {
36454             setFlag(AtomicNoReturn);
36455         } // if
36456         setFlag(MemoryRef);
36457         setFlag(GlobalSegment);
36458     } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
36459
36460     Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
36461     {
36462     } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
36463
36464     // tmp = MEM[ADDR];
36465     // MEM[ADDR] = DATA;
36466     // RETURN_DATA = tmp.
36467     void
36468     Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
36469     {
36470         panicUnimplemented();
36471     }
36472
36473     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36474         ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
36475         : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
36476     {
36477         setFlag(AtomicCAS);
36478         if (instData.GLC) {
36479             setFlag(AtomicReturn);
36480         } else {
36481             setFlag(AtomicNoReturn);
36482         }
36483         setFlag(MemoryRef);
36484         setFlag(GlobalSegment);
36485     } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36486
36487     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
36488     {
36489     } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36490
36491     // tmp = MEM[ADDR];
36492     // src = DATA[0];
36493     // cmp = DATA[1];
36494     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36495     // RETURN_DATA[0] = tmp.
36496     void
36497     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
36498     {
36499         panicUnimplemented();
36500     }
36501
36502     Inst_MUBUF__BUFFER_ATOMIC_ADD
36503         ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
36504         : Inst_MUBUF(iFmt, "buffer_atomic_add")
36505     {
36506         setFlag(AtomicAdd);
36507         if (instData.GLC) {
36508             setFlag(AtomicReturn);
36509         } else {
36510             setFlag(AtomicNoReturn);
36511         } // if
36512         setFlag(MemoryRef);
36513         setFlag(GlobalSegment);
36514     } // Inst_MUBUF__BUFFER_ATOMIC_ADD
36515
36516     Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
36517     {
36518     } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
36519
36520     // tmp = MEM[ADDR];
36521     // MEM[ADDR] += DATA;
36522     // RETURN_DATA = tmp.
36523     void
36524     Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
36525     {
36526         panicUnimplemented();
36527     }
36528
36529     Inst_MUBUF__BUFFER_ATOMIC_SUB
36530         ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
36531         : Inst_MUBUF(iFmt, "buffer_atomic_sub")
36532     {
36533         setFlag(AtomicSub);
36534         if (instData.GLC) {
36535             setFlag(AtomicReturn);
36536         } else {
36537             setFlag(AtomicNoReturn);
36538         }
36539         setFlag(MemoryRef);
36540         setFlag(GlobalSegment);
36541     } // Inst_MUBUF__BUFFER_ATOMIC_SUB
36542
36543     Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
36544     {
36545     } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
36546
36547     // tmp = MEM[ADDR];
36548     // MEM[ADDR] -= DATA;
36549     // RETURN_DATA = tmp.
36550     void
36551     Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
36552     {
36553         panicUnimplemented();
36554     }
36555
36556     Inst_MUBUF__BUFFER_ATOMIC_SMIN
36557         ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
36558         : Inst_MUBUF(iFmt, "buffer_atomic_smin")
36559     {
36560         setFlag(AtomicMin);
36561         if (instData.GLC) {
36562             setFlag(AtomicReturn);
36563         } else {
36564             setFlag(AtomicNoReturn);
36565         }
36566         setFlag(MemoryRef);
36567         setFlag(GlobalSegment);
36568     } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
36569
36570     Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
36571     {
36572     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
36573
36574     // tmp = MEM[ADDR];
36575     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
36576     // RETURN_DATA = tmp.
36577     void
36578     Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
36579     {
36580         panicUnimplemented();
36581     }
36582
36583     Inst_MUBUF__BUFFER_ATOMIC_UMIN
36584         ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
36585         : Inst_MUBUF(iFmt, "buffer_atomic_umin")
36586     {
36587         setFlag(AtomicMin);
36588         if (instData.GLC) {
36589             setFlag(AtomicReturn);
36590         } else {
36591             setFlag(AtomicNoReturn);
36592         }
36593         setFlag(MemoryRef);
36594         setFlag(GlobalSegment);
36595     } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
36596
36597     Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
36598     {
36599     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
36600
36601     // tmp = MEM[ADDR];
36602     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
36603     // RETURN_DATA = tmp.
36604     void
36605     Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
36606     {
36607         panicUnimplemented();
36608     }
36609
36610     Inst_MUBUF__BUFFER_ATOMIC_SMAX
36611         ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
36612         : Inst_MUBUF(iFmt, "buffer_atomic_smax")
36613     {
36614         setFlag(AtomicMax);
36615         if (instData.GLC) {
36616             setFlag(AtomicReturn);
36617         } else {
36618             setFlag(AtomicNoReturn);
36619         }
36620         setFlag(MemoryRef);
36621         setFlag(GlobalSegment);
36622     } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
36623
36624     Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
36625     {
36626     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
36627
36628     // tmp = MEM[ADDR];
36629     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
36630     // RETURN_DATA = tmp.
36631     void
36632     Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
36633     {
36634         panicUnimplemented();
36635     }
36636
36637     Inst_MUBUF__BUFFER_ATOMIC_UMAX
36638         ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
36639         : Inst_MUBUF(iFmt, "buffer_atomic_umax")
36640     {
36641         setFlag(AtomicMax);
36642         if (instData.GLC) {
36643             setFlag(AtomicReturn);
36644         } else {
36645             setFlag(AtomicNoReturn);
36646         } // if
36647         setFlag(MemoryRef);
36648         setFlag(GlobalSegment);
36649     } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
36650
36651     Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
36652     {
36653     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
36654
36655     // tmp = MEM[ADDR];
36656     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
36657     // RETURN_DATA = tmp.
36658     void
36659     Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
36660     {
36661         panicUnimplemented();
36662     }
36663
36664     Inst_MUBUF__BUFFER_ATOMIC_AND
36665         ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
36666         : Inst_MUBUF(iFmt, "buffer_atomic_and")
36667     {
36668         setFlag(AtomicAnd);
36669         if (instData.GLC) {
36670             setFlag(AtomicReturn);
36671         } else {
36672             setFlag(AtomicNoReturn);
36673         }
36674         setFlag(MemoryRef);
36675         setFlag(GlobalSegment);
36676     } // Inst_MUBUF__BUFFER_ATOMIC_AND
36677
36678     Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
36679     {
36680     } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
36681
36682     // tmp = MEM[ADDR];
36683     // MEM[ADDR] &= DATA;
36684     // RETURN_DATA = tmp.
36685     void
36686     Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
36687     {
36688         panicUnimplemented();
36689     }
36690
36691     Inst_MUBUF__BUFFER_ATOMIC_OR
36692         ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
36693         : Inst_MUBUF(iFmt, "buffer_atomic_or")
36694     {
36695         setFlag(AtomicOr);
36696         if (instData.GLC) {
36697             setFlag(AtomicReturn);
36698         } else {
36699             setFlag(AtomicNoReturn);
36700         }
36701         setFlag(MemoryRef);
36702         setFlag(GlobalSegment);
36703     } // Inst_MUBUF__BUFFER_ATOMIC_OR
36704
36705     Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
36706     {
36707     } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
36708
36709     // tmp = MEM[ADDR];
36710     // MEM[ADDR] |= DATA;
36711     // RETURN_DATA = tmp.
36712     void
36713     Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
36714     {
36715         panicUnimplemented();
36716     }
36717
36718     Inst_MUBUF__BUFFER_ATOMIC_XOR
36719         ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
36720         : Inst_MUBUF(iFmt, "buffer_atomic_xor")
36721     {
36722         setFlag(AtomicXor);
36723         if (instData.GLC) {
36724             setFlag(AtomicReturn);
36725         } else {
36726             setFlag(AtomicNoReturn);
36727         }
36728         setFlag(MemoryRef);
36729         setFlag(GlobalSegment);
36730     } // Inst_MUBUF__BUFFER_ATOMIC_XOR
36731
36732     Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
36733     {
36734     } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
36735
36736     // tmp = MEM[ADDR];
36737     // MEM[ADDR] ^= DATA;
36738     // RETURN_DATA = tmp.
36739     void
36740     Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
36741     {
36742         panicUnimplemented();
36743     }
36744
36745     Inst_MUBUF__BUFFER_ATOMIC_INC
36746         ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
36747         : Inst_MUBUF(iFmt, "buffer_atomic_inc")
36748     {
36749         setFlag(AtomicInc);
36750         if (instData.GLC) {
36751             setFlag(AtomicReturn);
36752         } else {
36753             setFlag(AtomicNoReturn);
36754         }
36755         setFlag(MemoryRef);
36756         setFlag(GlobalSegment);
36757     } // Inst_MUBUF__BUFFER_ATOMIC_INC
36758
36759     Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
36760     {
36761     } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
36762
36763     // tmp = MEM[ADDR];
36764     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
36765     // RETURN_DATA = tmp.
36766     void
36767     Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
36768     {
36769         panicUnimplemented();
36770     }
36771
36772     Inst_MUBUF__BUFFER_ATOMIC_DEC
36773         ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
36774         : Inst_MUBUF(iFmt, "buffer_atomic_dec")
36775     {
36776         setFlag(AtomicDec);
36777         if (instData.GLC) {
36778             setFlag(AtomicReturn);
36779         } else {
36780             setFlag(AtomicNoReturn);
36781         }
36782         setFlag(MemoryRef);
36783         setFlag(GlobalSegment);
36784     } // Inst_MUBUF__BUFFER_ATOMIC_DEC
36785
36786     Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
36787     {
36788     } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
36789
36790     // tmp = MEM[ADDR];
36791     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
36792     // (unsigned compare); RETURN_DATA = tmp.
36793     void
36794     Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
36795     {
36796         panicUnimplemented();
36797     }
36798
36799     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36800         ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
36801         : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
36802     {
36803         setFlag(AtomicExch);
36804         if (instData.GLC) {
36805             setFlag(AtomicReturn);
36806         } else {
36807             setFlag(AtomicNoReturn);
36808         }
36809         setFlag(MemoryRef);
36810         setFlag(GlobalSegment);
36811     } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36812
36813     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
36814     {
36815     } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36816
36817     // tmp = MEM[ADDR];
36818     // MEM[ADDR] = DATA[0:1];
36819     // RETURN_DATA[0:1] = tmp.
36820     void
36821     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36822     {
36823         panicUnimplemented();
36824     }
36825
36826     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36827         ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
36828         : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
36829     {
36830         setFlag(AtomicCAS);
36831         if (instData.GLC) {
36832             setFlag(AtomicReturn);
36833         } else {
36834             setFlag(AtomicNoReturn);
36835         }
36836         setFlag(MemoryRef);
36837         setFlag(GlobalSegment);
36838     } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36839
36840     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36841         ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
36842     {
36843     } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36844
36845     // tmp = MEM[ADDR];
36846     // src = DATA[0:1];
36847     // cmp = DATA[2:3];
36848     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36849     // RETURN_DATA[0:1] = tmp.
36850     void
36851     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36852     {
36853         panicUnimplemented();
36854     }
36855
36856     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36857         ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
36858         : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
36859     {
36860         setFlag(AtomicAdd);
36861         if (instData.GLC) {
36862             setFlag(AtomicReturn);
36863         } else {
36864             setFlag(AtomicNoReturn);
36865         }
36866         setFlag(MemoryRef);
36867         setFlag(GlobalSegment);
36868     } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36869
36870     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
36871     {
36872     } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36873
36874     // tmp = MEM[ADDR];
36875     // MEM[ADDR] += DATA[0:1];
36876     // RETURN_DATA[0:1] = tmp.
36877     void
36878     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
36879     {
36880         panicUnimplemented();
36881     }
36882
36883     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36884         ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
36885         : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
36886     {
36887         setFlag(AtomicSub);
36888         if (instData.GLC) {
36889             setFlag(AtomicReturn);
36890         } else {
36891             setFlag(AtomicNoReturn);
36892         }
36893         setFlag(MemoryRef);
36894         setFlag(GlobalSegment);
36895     } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36896
36897     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
36898     {
36899     } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36900
36901     // tmp = MEM[ADDR];
36902     // MEM[ADDR] -= DATA[0:1];
36903     // RETURN_DATA[0:1] = tmp.
36904     void
36905     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
36906     {
36907         panicUnimplemented();
36908     }
36909
36910     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36911         ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
36912         : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
36913     {
36914         setFlag(AtomicMin);
36915         if (instData.GLC) {
36916             setFlag(AtomicReturn);
36917         } else {
36918             setFlag(AtomicNoReturn);
36919         }
36920         setFlag(MemoryRef);
36921         setFlag(GlobalSegment);
36922     } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36923
36924     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
36925     {
36926     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36927
36928     // tmp = MEM[ADDR];
36929     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
36930     // RETURN_DATA[0:1] = tmp.
36931     void
36932     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36933     {
36934         panicUnimplemented();
36935     }
36936
36937     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36938         ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
36939         : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
36940     {
36941         setFlag(AtomicMin);
36942         if (instData.GLC) {
36943             setFlag(AtomicReturn);
36944         } else {
36945             setFlag(AtomicNoReturn);
36946         }
36947         setFlag(MemoryRef);
36948         setFlag(GlobalSegment);
36949     } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36950
36951     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
36952     {
36953     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36954
36955     // tmp = MEM[ADDR];
36956     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
36957     // RETURN_DATA[0:1] = tmp.
36958     void
36959     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36960     {
36961         panicUnimplemented();
36962     }
36963
36964     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36965         ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
36966         : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
36967     {
36968         setFlag(AtomicMax);
36969         if (instData.GLC) {
36970             setFlag(AtomicReturn);
36971         } else {
36972             setFlag(AtomicNoReturn);
36973         }
36974         setFlag(MemoryRef);
36975         setFlag(GlobalSegment);
36976     } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36977
36978     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
36979     {
36980     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36981
36982     // tmp = MEM[ADDR];
36983     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
36984     // RETURN_DATA[0:1] = tmp.
36985     void
36986     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
36987     {
36988         panicUnimplemented();
36989     }
36990
36991     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36992         ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
36993         : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
36994     {
36995         setFlag(AtomicMax);
36996         if (instData.GLC) {
36997             setFlag(AtomicReturn);
36998         } else {
36999             setFlag(AtomicNoReturn);
37000         }
37001         setFlag(MemoryRef);
37002         setFlag(GlobalSegment);
37003     } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
37004
37005     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
37006     {
37007     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
37008
37009     // tmp = MEM[ADDR];
37010     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
37011     // RETURN_DATA[0:1] = tmp.
37012     void
37013     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
37014     {
37015         panicUnimplemented();
37016     }
37017
37018     Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37019         ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
37020         : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
37021     {
37022         setFlag(AtomicAnd);
37023         if (instData.GLC) {
37024             setFlag(AtomicReturn);
37025         } else {
37026             setFlag(AtomicNoReturn);
37027         }
37028         setFlag(MemoryRef);
37029         setFlag(GlobalSegment);
37030     } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37031
37032     Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
37033     {
37034     } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37035
37036     // tmp = MEM[ADDR];
37037     // MEM[ADDR] &= DATA[0:1];
37038     // RETURN_DATA[0:1] = tmp.
37039     void
37040     Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
37041     {
37042         panicUnimplemented();
37043     }
37044
37045     Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37046         ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
37047         : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
37048     {
37049         setFlag(AtomicOr);
37050         if (instData.GLC) {
37051             setFlag(AtomicReturn);
37052         } else {
37053             setFlag(AtomicNoReturn);
37054         }
37055         setFlag(MemoryRef);
37056         setFlag(GlobalSegment);
37057     } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37058
37059     Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
37060     {
37061     } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37062
37063     // tmp = MEM[ADDR];
37064     // MEM[ADDR] |= DATA[0:1];
37065     // RETURN_DATA[0:1] = tmp.
37066     void
37067     Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
37068     {
37069         panicUnimplemented();
37070     }
37071
37072     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37073         ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
37074         : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
37075     {
37076         setFlag(AtomicXor);
37077         if (instData.GLC) {
37078             setFlag(AtomicReturn);
37079         } else {
37080             setFlag(AtomicNoReturn);
37081         }
37082         setFlag(MemoryRef);
37083         setFlag(GlobalSegment);
37084     } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37085
37086     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
37087     {
37088     } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37089
37090     // tmp = MEM[ADDR];
37091     // MEM[ADDR] ^= DATA[0:1];
37092     // RETURN_DATA[0:1] = tmp.
37093     void
37094     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
37095     {
37096         panicUnimplemented();
37097     }
37098
37099     Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37100         ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
37101         : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
37102     {
37103         setFlag(AtomicInc);
37104         if (instData.GLC) {
37105             setFlag(AtomicReturn);
37106         } else {
37107             setFlag(AtomicNoReturn);
37108         }
37109         setFlag(MemoryRef);
37110         setFlag(GlobalSegment);
37111     } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37112
37113     Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
37114     {
37115     } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37116
37117     // tmp = MEM[ADDR];
37118     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
37119     // RETURN_DATA[0:1] = tmp.
37120     void
37121     Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
37122     {
37123         panicUnimplemented();
37124     }
37125
37126     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37127         ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
37128         : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
37129     {
37130         setFlag(AtomicDec);
37131         if (instData.GLC) {
37132             setFlag(AtomicReturn);
37133         } else {
37134             setFlag(AtomicNoReturn);
37135         }
37136         setFlag(MemoryRef);
37137         setFlag(GlobalSegment);
37138     } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37139
37140     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
37141     {
37142     } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37143
37144     // tmp = MEM[ADDR];
37145     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
37146     // (unsigned compare);
37147     // RETURN_DATA[0:1] = tmp.
37148     void
37149     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
37150     {
37151         panicUnimplemented();
37152     }
37153
37154     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37155         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt)
37156         : Inst_MTBUF(iFmt, "tbuffer_load_format_x")
37157     {
37158         setFlag(MemoryRef);
37159         setFlag(Load);
37160         setFlag(GlobalSegment);
37161     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37162
37163     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
37164     {
37165     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37166
37167     // Typed buffer load 1 dword with format conversion.
37168     void
37169     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37170     {
37171         panicUnimplemented();
37172     }
37173
37174     void
37175     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37176     {
37177     } // initiateAcc
37178
37179     void
37180     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37181     {
37182     }
37183
37184     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37185         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt)
37186         : Inst_MTBUF(iFmt, "tbuffer_load_format_xy")
37187     {
37188         setFlag(MemoryRef);
37189         setFlag(Load);
37190         setFlag(GlobalSegment);
37191     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37192
37193     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
37194     {
37195     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37196
37197     // Typed buffer load 2 dwords with format conversion.
37198     void
37199     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37200     {
37201         panicUnimplemented();
37202     }
37203
37204     void
37205     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37206     {
37207     } // initiateAcc
37208
37209     void
37210     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37211     {
37212     }
37213
37214     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37215         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37216         : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz")
37217     {
37218         setFlag(MemoryRef);
37219         setFlag(Load);
37220         setFlag(GlobalSegment);
37221     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37222
37223     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
37224     {
37225     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37226
37227     // Typed buffer load 3 dwords with format conversion.
37228     void
37229     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37230     {
37231         panicUnimplemented();
37232     }
37233
37234     void
37235     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37236     {
37237     } // initiateAcc
37238
37239     void
37240     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37241     {
37242     }
37243
37244     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37245         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37246         : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw")
37247     {
37248         setFlag(MemoryRef);
37249         setFlag(Load);
37250         setFlag(GlobalSegment);
37251     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37252
37253     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37254         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
37255     {
37256     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37257
37258     // Typed buffer load 4 dwords with format conversion.
37259     void
37260     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37261     {
37262         panicUnimplemented();
37263     }
37264
37265     void
37266     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
37267     {
37268     } // initiateAcc
37269
37270     void
37271     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
37272     {
37273     }
37274
37275     Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37276         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt)
37277         : Inst_MTBUF(iFmt, "tbuffer_store_format_x")
37278     {
37279         setFlag(MemoryRef);
37280         setFlag(Store);
37281         setFlag(GlobalSegment);
37282     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37283
37284     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
37285     {
37286     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37287
37288     // Typed buffer store 1 dword with format conversion.
37289     void
37290     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37291     {
37292         panicUnimplemented();
37293     }
37294
37295     void
37296     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37297     {
37298     } // initiateAcc
37299
37300     void
37301     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37302     {
37303     }
37304
37305     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37306         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt)
37307         : Inst_MTBUF(iFmt, "tbuffer_store_format_xy")
37308     {
37309         setFlag(MemoryRef);
37310         setFlag(Store);
37311         setFlag(GlobalSegment);
37312     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37313
37314     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
37315     {
37316     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37317
37318     // Typed buffer store 2 dwords with format conversion.
37319     void
37320     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37321     {
37322         panicUnimplemented();
37323     }
37324
37325     void
37326     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37327     {
37328     } // initiateAcc
37329
37330     void
37331     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37332     {
37333     }
37334
37335     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37336         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37337         : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz")
37338     {
37339         setFlag(MemoryRef);
37340         setFlag(Store);
37341         setFlag(GlobalSegment);
37342     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37343
37344     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37345         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
37346     {
37347     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37348
37349     // Typed buffer store 3 dwords with format conversion.
37350     void
37351     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37352     {
37353         panicUnimplemented();
37354     }
37355
37356     void
37357     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37358     {
37359     } // initiateAcc
37360
37361     void
37362     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37363     {
37364     }
37365
37366     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37367         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37368         : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw")
37369     {
37370         setFlag(MemoryRef);
37371         setFlag(Store);
37372         setFlag(GlobalSegment);
37373     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37374
37375     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37376         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
37377     {
37378     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37379
37380     // Typed buffer store 4 dwords with format conversion.
37381     void
37382     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37383     {
37384         panicUnimplemented();
37385     }
37386
37387     void
37388     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
37389         GPUDynInstPtr gpuDynInst)
37390     {
37391     } // initiateAcc
37392
37393     void
37394     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
37395         GPUDynInstPtr gpuDynInst)
37396     {
37397     }
37398
37399     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37400         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37401         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x")
37402     {
37403         setFlag(MemoryRef);
37404         setFlag(Load);
37405         setFlag(GlobalSegment);
37406     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37407
37408     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
37409         ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
37410     {
37411     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37412
37413     // Typed buffer load 1 dword with format conversion.
37414     void
37415     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37416     {
37417         panicUnimplemented();
37418     }
37419
37420     void
37421     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
37422           GPUDynInstPtr gpuDynInst)
37423     {
37424     } // initiateAcc
37425
37426     void
37427     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
37428         GPUDynInstPtr gpuDynInst)
37429     {
37430     }
37431
37432     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37433         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37434         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy")
37435     {
37436         setFlag(MemoryRef);
37437         setFlag(Load);
37438         setFlag(GlobalSegment);
37439     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37440
37441     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37442         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
37443     {
37444     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37445
37446     // Typed buffer load 2 dwords with format conversion.
37447     void
37448     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37449     {
37450         panicUnimplemented();
37451     }
37452
37453     void
37454     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
37455         GPUDynInstPtr gpuDynInst)
37456     {
37457     } // initiateAcc
37458
37459     void
37460     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
37461         GPUDynInstPtr gpuDynInst)
37462     {
37463     }
37464
37465     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37466         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
37467           InFmt_MTBUF *iFmt)
37468         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz")
37469     {
37470         setFlag(MemoryRef);
37471         setFlag(Load);
37472         setFlag(GlobalSegment);
37473     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37474
37475     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37476         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
37477     {
37478     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37479
37480     // Typed buffer load 3 dwords with format conversion.
37481     void
37482     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37483     {
37484         panicUnimplemented();
37485     }
37486
37487     void
37488     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
37489         GPUDynInstPtr gpuDynInst)
37490     {
37491     } // initiateAcc
37492
37493     void
37494     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
37495         GPUDynInstPtr gpuDynInst)
37496     {
37497     }
37498
37499     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37500         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
37501           InFmt_MTBUF *iFmt)
37502         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw")
37503     {
37504         setFlag(MemoryRef);
37505         setFlag(Load);
37506         setFlag(GlobalSegment);
37507     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37508
37509     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37510         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
37511     {
37512     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37513
37514     // Typed buffer load 4 dwords with format conversion.
37515     void
37516     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
37517     {
37518         panicUnimplemented();
37519     }
37520
37521     void
37522     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
37523         GPUDynInstPtr gpuDynInst)
37524     {
37525     } // initiateAcc
37526
37527     void
37528     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
37529         GPUDynInstPtr gpuDynInst)
37530     {
37531     }
37532
37533     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37534         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37535         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x")
37536     {
37537         setFlag(MemoryRef);
37538         setFlag(Store);
37539         setFlag(GlobalSegment);
37540     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37541
37542     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37543         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
37544     {
37545     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37546
37547     // Typed buffer store 1 dword with format conversion.
37548     void
37549     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37550     {
37551         panicUnimplemented();
37552     }
37553
37554     void
37555     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
37556         GPUDynInstPtr gpuDynInst)
37557     {
37558     } // initiateAcc
37559
37560     void
37561     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
37562         GPUDynInstPtr gpuDynInst)
37563     {
37564     }
37565
37566     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37567         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37568         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy")
37569     {
37570         setFlag(MemoryRef);
37571         setFlag(Store);
37572         setFlag(GlobalSegment);
37573     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37574
37575     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37576         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
37577     {
37578     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37579
37580     // Typed buffer store 2 dwords with format conversion.
37581     void
37582     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37583     {
37584         panicUnimplemented();
37585     }
37586
37587     void
37588     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
37589         GPUDynInstPtr gpuDynInst)
37590     {
37591     } // initiateAcc
37592
37593     void
37594     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
37595         GPUDynInstPtr gpuDynInst)
37596     {
37597     }
37598
37599     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37600         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt)
37601         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz")
37602     {
37603         setFlag(MemoryRef);
37604         setFlag(Store);
37605         setFlag(GlobalSegment);
37606     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37607
37608     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37609         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
37610     {
37611     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37612
37613     // Typed buffer store 3 dwords with format conversion.
37614     void
37615     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37616     {
37617         panicUnimplemented();
37618     }
37619
37620     void
37621     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
37622           GPUDynInstPtr gpuDynInst)
37623     {
37624     } // initiateAcc
37625
37626     void
37627     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
37628         GPUDynInstPtr gpuDynInst)
37629     {
37630     }
37631
37632     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37633         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt)
37634         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw")
37635     {
37636         setFlag(MemoryRef);
37637         setFlag(Store);
37638         setFlag(GlobalSegment);
37639     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37640
37641     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37642         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
37643     {
37644     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37645
37646     // Typed buffer store 4 dwords with format conversion.
37647     void
37648     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
37649         GPUDynInstPtr gpuDynInst)
37650     {
37651         panicUnimplemented();
37652     }
37653
37654     void
37655     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
37656         GPUDynInstPtr gpuDynInst)
37657     {
37658     } // initiateAcc
37659
37660     void
37661     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
37662         GPUDynInstPtr gpuDynInst)
37663     {
37664     }
37665
37666     Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt)
37667         : Inst_MIMG(iFmt, "image_load")
37668     {
37669         setFlag(MemoryRef);
37670         setFlag(Load);
37671         setFlag(GlobalSegment);
37672     } // Inst_MIMG__IMAGE_LOAD
37673
37674     Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
37675     {
37676     } // ~Inst_MIMG__IMAGE_LOAD
37677
37678     // Image memory load with format conversion specified
37679     void
37680     Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst)
37681     {
37682         panicUnimplemented();
37683     }
37684
37685     void
37686     Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst)
37687     {
37688     } // initiateAcc
37689
37690     void
37691     Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst)
37692     {
37693     }
37694
37695     Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt)
37696         : Inst_MIMG(iFmt, "image_load_mip")
37697     {
37698         setFlag(MemoryRef);
37699         setFlag(Load);
37700         setFlag(GlobalSegment);
37701     } // Inst_MIMG__IMAGE_LOAD_MIP
37702
37703     Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
37704     {
37705     } // ~Inst_MIMG__IMAGE_LOAD_MIP
37706
37707     void
37708     Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst)
37709     {
37710         panicUnimplemented();
37711     }
37712
37713     void
37714     Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37715     {
37716     } // initiateAcc
37717
37718     void
37719     Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37720     {
37721     }
37722
37723     Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt)
37724         : Inst_MIMG(iFmt, "image_load_pck")
37725     {
37726         setFlag(MemoryRef);
37727         setFlag(Load);
37728         setFlag(GlobalSegment);
37729     } // Inst_MIMG__IMAGE_LOAD_PCK
37730
37731     Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
37732     {
37733     } // ~Inst_MIMG__IMAGE_LOAD_PCK
37734
37735     void
37736     Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst)
37737     {
37738         panicUnimplemented();
37739     }
37740
37741     void
37742     Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37743     {
37744     } // initiateAcc
37745
37746     void
37747     Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37748     {
37749     }
37750
37751     Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
37752         InFmt_MIMG *iFmt)
37753         : Inst_MIMG(iFmt, "image_load_pck_sgn")
37754     {
37755         setFlag(MemoryRef);
37756         setFlag(Load);
37757         setFlag(GlobalSegment);
37758     } // Inst_MIMG__IMAGE_LOAD_PCK_SGN
37759
37760     Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
37761     {
37762     } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
37763
37764     // Image memory load with with no format conversion and sign extension
37765     void
37766     Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37767     {
37768         panicUnimplemented();
37769     }
37770
37771     void
37772     Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37773     {
37774     } // initiateAcc
37775
37776     void
37777     Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37778     {
37779     }
37780
37781     Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
37782           InFmt_MIMG *iFmt)
37783         : Inst_MIMG(iFmt, "image_load_mip_pck")
37784     {
37785         setFlag(MemoryRef);
37786         setFlag(Load);
37787         setFlag(GlobalSegment);
37788     } // Inst_MIMG__IMAGE_LOAD_MIP_PCK
37789
37790     Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
37791     {
37792     } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
37793
37794     // Image memory load with user-supplied mip level, no format conversion
37795     void
37796     Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37797     {
37798         panicUnimplemented();
37799     }
37800
37801     void
37802     Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37803     {
37804     } // initiateAcc
37805
37806     void
37807     Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37808     {
37809     }
37810
37811     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
37812         InFmt_MIMG *iFmt)
37813         : Inst_MIMG(iFmt, "image_load_mip_pck_sgn")
37814     {
37815         setFlag(MemoryRef);
37816         setFlag(Load);
37817         setFlag(GlobalSegment);
37818     } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37819
37820     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
37821     {
37822     } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37823
37824     // Image memory load with user-supplied mip level, no format conversion.
37825     void
37826     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37827     {
37828         panicUnimplemented();
37829     }
37830
37831     void
37832     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37833     {
37834     } // initiateAcc
37835
37836     void
37837     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37838     {
37839     }
37840
37841     Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt)
37842         : Inst_MIMG(iFmt, "image_store")
37843     {
37844         setFlag(MemoryRef);
37845         setFlag(Store);
37846         setFlag(GlobalSegment);
37847     } // Inst_MIMG__IMAGE_STORE
37848
37849     Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
37850     {
37851     } // ~Inst_MIMG__IMAGE_STORE
37852
37853     // Image memory store with format conversion specified
37854     void
37855     Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst)
37856     {
37857         panicUnimplemented();
37858     }
37859
37860     void
37861     Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst)
37862     {
37863     } // initiateAcc
37864
37865     void
37866     Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst)
37867     {
37868     }
37869
37870     Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt)
37871         : Inst_MIMG(iFmt, "image_store_mip")
37872     {
37873         setFlag(MemoryRef);
37874         setFlag(Store);
37875         setFlag(GlobalSegment);
37876     } // Inst_MIMG__IMAGE_STORE_MIP
37877
37878     Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
37879     {
37880     } // ~Inst_MIMG__IMAGE_STORE_MIP
37881
37882     void
37883     Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst)
37884     {
37885         panicUnimplemented();
37886     }
37887
37888     void
37889     Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37890     {
37891     } // initiateAcc
37892
37893     void
37894     Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37895     {
37896     }
37897
37898     Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt)
37899         : Inst_MIMG(iFmt, "image_store_pck")
37900     {
37901         setFlag(MemoryRef);
37902         setFlag(Store);
37903         setFlag(GlobalSegment);
37904     } // Inst_MIMG__IMAGE_STORE_PCK
37905
37906     Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
37907     {
37908     } // ~Inst_MIMG__IMAGE_STORE_PCK
37909
37910     // Image memory store of packed data without format conversion.
37911     void
37912     Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst)
37913     {
37914         panicUnimplemented();
37915     }
37916
37917     void
37918     Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37919     {
37920     } // initiateAcc
37921
37922     void
37923     Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37924     {
37925     }
37926
37927     Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
37928           InFmt_MIMG *iFmt)
37929         : Inst_MIMG(iFmt, "image_store_mip_pck")
37930     {
37931         setFlag(MemoryRef);
37932         setFlag(Store);
37933         setFlag(GlobalSegment);
37934     } // Inst_MIMG__IMAGE_STORE_MIP_PCK
37935
37936     Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
37937     {
37938     } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
37939
37940     // Image memory store of packed data without format conversion
37941     void
37942     Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37943     {
37944         panicUnimplemented();
37945     }
37946
37947     void
37948     Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37949     {
37950     } // initiateAcc
37951
37952     void
37953     Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37954     {
37955     }
37956
37957     Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
37958           InFmt_MIMG *iFmt)
37959         : Inst_MIMG(iFmt, "image_get_resinfo")
37960     {
37961         setFlag(GlobalSegment);
37962     } // Inst_MIMG__IMAGE_GET_RESINFO
37963
37964     Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
37965     {
37966     } // ~Inst_MIMG__IMAGE_GET_RESINFO
37967
37968     void
37969     Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst)
37970     {
37971         panicUnimplemented();
37972     }
37973
37974     Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
37975         InFmt_MIMG *iFmt)
37976         : Inst_MIMG(iFmt, "image_atomic_swap")
37977     {
37978         setFlag(AtomicExch);
37979         if (instData.GLC) {
37980             setFlag(AtomicReturn);
37981         } else {
37982             setFlag(AtomicNoReturn);
37983         }
37984         setFlag(MemoryRef);
37985         setFlag(GlobalSegment);
37986     } // Inst_MIMG__IMAGE_ATOMIC_SWAP
37987
37988     Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
37989     {
37990     } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
37991
37992     // tmp = MEM[ADDR];
37993     // MEM[ADDR] = DATA;
37994     // RETURN_DATA = tmp.
37995     void
37996     Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
37997     {
37998         panicUnimplemented();
37999     }
38000
38001     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
38002         InFmt_MIMG *iFmt)
38003         : Inst_MIMG(iFmt, "image_atomic_cmpswap")
38004     {
38005         setFlag(AtomicCAS);
38006         if (instData.GLC) {
38007             setFlag(AtomicReturn);
38008         } else {
38009             setFlag(AtomicNoReturn);
38010         }
38011         setFlag(MemoryRef);
38012         setFlag(GlobalSegment);
38013     } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
38014
38015     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
38016     {
38017     } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
38018
38019     // tmp = MEM[ADDR];
38020     // src = DATA[0];
38021     // cmp = DATA[1];
38022     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
38023     // RETURN_DATA[0] = tmp.
38024     void
38025     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
38026     {
38027         panicUnimplemented();
38028     }
38029
38030     Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt)
38031         : Inst_MIMG(iFmt, "image_atomic_add")
38032     {
38033         setFlag(AtomicAdd);
38034         if (instData.GLC) {
38035             setFlag(AtomicReturn);
38036         } else {
38037             setFlag(AtomicNoReturn);
38038         }
38039         setFlag(MemoryRef);
38040         setFlag(GlobalSegment);
38041     } // Inst_MIMG__IMAGE_ATOMIC_ADD
38042
38043     Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
38044     {
38045     } // ~Inst_MIMG__IMAGE_ATOMIC_ADD
38046
38047     // tmp = MEM[ADDR];
38048     // MEM[ADDR] += DATA;
38049     // RETURN_DATA = tmp.
38050     void
38051     Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
38052     {
38053         panicUnimplemented();
38054     }
38055
38056     Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt)
38057         : Inst_MIMG(iFmt, "image_atomic_sub")
38058     {
38059         setFlag(AtomicSub);
38060         if (instData.GLC) {
38061             setFlag(AtomicReturn);
38062         } else {
38063             setFlag(AtomicNoReturn);
38064         }
38065         setFlag(MemoryRef);
38066         setFlag(GlobalSegment);
38067     } // Inst_MIMG__IMAGE_ATOMIC_SUB
38068
38069     Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
38070     {
38071     } // ~Inst_MIMG__IMAGE_ATOMIC_SUB
38072
38073     // tmp = MEM[ADDR];
38074     // MEM[ADDR] -= DATA;
38075     // RETURN_DATA = tmp.
38076     void
38077     Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
38078     {
38079         panicUnimplemented();
38080     }
38081
38082     Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
38083           InFmt_MIMG *iFmt)
38084         : Inst_MIMG(iFmt, "image_atomic_smin")
38085     {
38086         setFlag(AtomicMin);
38087         if (instData.GLC) {
38088             setFlag(AtomicReturn);
38089         } else {
38090             setFlag(AtomicNoReturn);
38091         }
38092         setFlag(MemoryRef);
38093         setFlag(GlobalSegment);
38094     } // Inst_MIMG__IMAGE_ATOMIC_SMIN
38095
38096     Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
38097     {
38098     } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
38099
38100     // tmp = MEM[ADDR];
38101     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
38102     // RETURN_DATA = tmp.
38103     void
38104     Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
38105     {
38106         panicUnimplemented();
38107     }
38108
38109     Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
38110           InFmt_MIMG *iFmt)
38111         : Inst_MIMG(iFmt, "image_atomic_umin")
38112     {
38113         setFlag(AtomicMin);
38114         if (instData.GLC) {
38115             setFlag(AtomicReturn);
38116         } else {
38117             setFlag(AtomicNoReturn);
38118         }
38119         setFlag(MemoryRef);
38120         setFlag(GlobalSegment);
38121     } // Inst_MIMG__IMAGE_ATOMIC_UMIN
38122
38123     Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
38124     {
38125     } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
38126
38127     // tmp = MEM[ADDR];
38128     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
38129     // RETURN_DATA = tmp.
38130     void
38131     Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
38132     {
38133         panicUnimplemented();
38134     }
38135
38136     Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
38137           InFmt_MIMG *iFmt)
38138         : Inst_MIMG(iFmt, "image_atomic_smax")
38139     {
38140         setFlag(AtomicMax);
38141         if (instData.GLC) {
38142             setFlag(AtomicReturn);
38143         } else {
38144             setFlag(AtomicNoReturn);
38145         }
38146         setFlag(MemoryRef);
38147         setFlag(GlobalSegment);
38148     } // Inst_MIMG__IMAGE_ATOMIC_SMAX
38149
38150     Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
38151     {
38152     } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
38153
38154     // tmp = MEM[ADDR];
38155     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
38156     // RETURN_DATA = tmp.
38157     void
38158     Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
38159     {
38160         panicUnimplemented();
38161     }
38162
38163     Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
38164         InFmt_MIMG *iFmt)
38165         : Inst_MIMG(iFmt, "image_atomic_umax")
38166     {
38167         setFlag(AtomicMax);
38168         if (instData.GLC) {
38169             setFlag(AtomicReturn);
38170         } else {
38171             setFlag(AtomicNoReturn);
38172         }
38173         setFlag(MemoryRef);
38174         setFlag(GlobalSegment);
38175     } // Inst_MIMG__IMAGE_ATOMIC_UMAX
38176
38177     Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
38178     {
38179     } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
38180
38181     // tmp = MEM[ADDR];
38182     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
38183     // RETURN_DATA = tmp.
38184     void
38185     Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
38186     {
38187         panicUnimplemented();
38188     }
38189
38190     Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt)
38191         : Inst_MIMG(iFmt, "image_atomic_and")
38192     {
38193         setFlag(AtomicAnd);
38194         if (instData.GLC) {
38195             setFlag(AtomicReturn);
38196         } else {
38197             setFlag(AtomicNoReturn);
38198         }
38199         setFlag(MemoryRef);
38200         setFlag(GlobalSegment);
38201     } // Inst_MIMG__IMAGE_ATOMIC_AND
38202
38203     Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
38204     {
38205     } // ~Inst_MIMG__IMAGE_ATOMIC_AND
38206
38207     // tmp = MEM[ADDR];
38208     // MEM[ADDR] &= DATA;
38209     // RETURN_DATA = tmp.
38210     void
38211     Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
38212     {
38213         panicUnimplemented();
38214     }
38215
38216     Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt)
38217         : Inst_MIMG(iFmt, "image_atomic_or")
38218     {
38219         setFlag(AtomicOr);
38220         if (instData.GLC) {
38221             setFlag(AtomicReturn);
38222         } else {
38223             setFlag(AtomicNoReturn);
38224         }
38225         setFlag(MemoryRef);
38226         setFlag(GlobalSegment);
38227     } // Inst_MIMG__IMAGE_ATOMIC_OR
38228
38229     Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
38230     {
38231     } // ~Inst_MIMG__IMAGE_ATOMIC_OR
38232
38233     // tmp = MEM[ADDR];
38234     // MEM[ADDR] |= DATA;
38235     // RETURN_DATA = tmp.
38236     void
38237     Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
38238     {
38239         panicUnimplemented();
38240     }
38241
38242     Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt)
38243         : Inst_MIMG(iFmt, "image_atomic_xor")
38244     {
38245         setFlag(AtomicXor);
38246         if (instData.GLC) {
38247             setFlag(AtomicReturn);
38248         } else {
38249             setFlag(AtomicNoReturn);
38250         }
38251         setFlag(MemoryRef);
38252         setFlag(GlobalSegment);
38253     } // Inst_MIMG__IMAGE_ATOMIC_XOR
38254
38255     Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
38256     {
38257     } // ~Inst_MIMG__IMAGE_ATOMIC_XOR
38258
38259     // tmp = MEM[ADDR];
38260     // MEM[ADDR] ^= DATA;
38261     // RETURN_DATA = tmp.
38262     void
38263     Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
38264     {
38265         panicUnimplemented();
38266     }
38267
38268     Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt)
38269         : Inst_MIMG(iFmt, "image_atomic_inc")
38270     {
38271         setFlag(AtomicInc);
38272         if (instData.GLC) {
38273             setFlag(AtomicReturn);
38274         } else {
38275             setFlag(AtomicNoReturn);
38276         }
38277         setFlag(MemoryRef);
38278         setFlag(GlobalSegment);
38279     } // Inst_MIMG__IMAGE_ATOMIC_INC
38280
38281     Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
38282     {
38283     } // ~Inst_MIMG__IMAGE_ATOMIC_INC
38284
38285     // tmp = MEM[ADDR];
38286     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
38287     // RETURN_DATA = tmp.
38288     void
38289     Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
38290     {
38291         panicUnimplemented();
38292     }
38293
38294     Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt)
38295         : Inst_MIMG(iFmt, "image_atomic_dec")
38296     {
38297         setFlag(AtomicDec);
38298         if (instData.GLC) {
38299             setFlag(AtomicReturn);
38300         } else {
38301             setFlag(AtomicNoReturn);
38302         }
38303         setFlag(MemoryRef);
38304         setFlag(GlobalSegment);
38305     } // Inst_MIMG__IMAGE_ATOMIC_DEC
38306
38307     Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
38308     {
38309     } // ~Inst_MIMG__IMAGE_ATOMIC_DEC
38310
38311     // tmp = MEM[ADDR];
38312     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
38313     // (unsigned compare); RETURN_DATA = tmp.
38314     void
38315     Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
38316     {
38317         panicUnimplemented();
38318     }
38319
38320     Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt)
38321         : Inst_MIMG(iFmt, "image_sample")
38322     {
38323         setFlag(GlobalSegment);
38324     } // Inst_MIMG__IMAGE_SAMPLE
38325
38326     Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
38327     {
38328     } // ~Inst_MIMG__IMAGE_SAMPLE
38329
38330     void
38331     Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst)
38332     {
38333         panicUnimplemented();
38334     }
38335
38336     Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt)
38337         : Inst_MIMG(iFmt, "image_sample_cl")
38338     {
38339         setFlag(GlobalSegment);
38340     } // Inst_MIMG__IMAGE_SAMPLE_CL
38341
38342     Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
38343     {
38344     } // ~Inst_MIMG__IMAGE_SAMPLE_CL
38345
38346     void
38347     Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst)
38348     {
38349         panicUnimplemented();
38350     }
38351
38352     Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt)
38353         : Inst_MIMG(iFmt, "image_sample_d")
38354     {
38355         setFlag(GlobalSegment);
38356     } // Inst_MIMG__IMAGE_SAMPLE_D
38357
38358     Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
38359     {
38360     } // ~Inst_MIMG__IMAGE_SAMPLE_D
38361
38362     void
38363     Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst)
38364     {
38365         panicUnimplemented();
38366     }
38367
38368     Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
38369           InFmt_MIMG *iFmt)
38370         : Inst_MIMG(iFmt, "image_sample_d_cl")
38371     {
38372         setFlag(GlobalSegment);
38373     } // Inst_MIMG__IMAGE_SAMPLE_D_CL
38374
38375     Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
38376     {
38377     } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
38378
38379     void
38380     Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst)
38381     {
38382         panicUnimplemented();
38383     }
38384
38385     Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt)
38386         : Inst_MIMG(iFmt, "image_sample_l")
38387     {
38388         setFlag(GlobalSegment);
38389     } // Inst_MIMG__IMAGE_SAMPLE_L
38390
38391     Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
38392     {
38393     } // ~Inst_MIMG__IMAGE_SAMPLE_L
38394
38395     void
38396     Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst)
38397     {
38398         panicUnimplemented();
38399     }
38400
38401     Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt)
38402         : Inst_MIMG(iFmt, "image_sample_b")
38403     {
38404         setFlag(GlobalSegment);
38405     } // Inst_MIMG__IMAGE_SAMPLE_B
38406
38407     Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
38408     {
38409     } // ~Inst_MIMG__IMAGE_SAMPLE_B
38410
38411     void
38412     Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst)
38413     {
38414         panicUnimplemented();
38415     }
38416
38417     Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
38418           InFmt_MIMG *iFmt)
38419         : Inst_MIMG(iFmt, "image_sample_b_cl")
38420     {
38421         setFlag(GlobalSegment);
38422     } // Inst_MIMG__IMAGE_SAMPLE_B_CL
38423
38424     Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
38425     {
38426     } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
38427
38428     void
38429     Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst)
38430     {
38431         panicUnimplemented();
38432     }
38433
38434     Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt)
38435         : Inst_MIMG(iFmt, "image_sample_lz")
38436     {
38437         setFlag(GlobalSegment);
38438     } // Inst_MIMG__IMAGE_SAMPLE_LZ
38439
38440     Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
38441     {
38442     } // ~Inst_MIMG__IMAGE_SAMPLE_LZ
38443
38444     void
38445     Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst)
38446     {
38447         panicUnimplemented();
38448     }
38449
38450     Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt)
38451         : Inst_MIMG(iFmt, "image_sample_c")
38452     {
38453         setFlag(GlobalSegment);
38454     } // Inst_MIMG__IMAGE_SAMPLE_C
38455
38456     Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
38457     {
38458     } // ~Inst_MIMG__IMAGE_SAMPLE_C
38459
38460     void
38461     Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst)
38462     {
38463         panicUnimplemented();
38464     }
38465
38466     Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
38467           InFmt_MIMG *iFmt)
38468         : Inst_MIMG(iFmt, "image_sample_c_cl")
38469     {
38470         setFlag(GlobalSegment);
38471     } // Inst_MIMG__IMAGE_SAMPLE_C_CL
38472
38473     Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
38474     {
38475     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
38476
38477     void
38478     Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst)
38479     {
38480         panicUnimplemented();
38481     }
38482
38483     Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt)
38484         : Inst_MIMG(iFmt, "image_sample_c_d")
38485     {
38486         setFlag(GlobalSegment);
38487     } // Inst_MIMG__IMAGE_SAMPLE_C_D
38488
38489     Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
38490     {
38491     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D
38492
38493     void
38494     Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst)
38495     {
38496         panicUnimplemented();
38497     }
38498
38499     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
38500           InFmt_MIMG *iFmt)
38501         : Inst_MIMG(iFmt, "image_sample_c_d_cl")
38502     {
38503         setFlag(GlobalSegment);
38504     } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38505
38506     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
38507     {
38508     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38509
38510     void
38511     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst)
38512     {
38513         panicUnimplemented();
38514     }
38515
38516     Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt)
38517         : Inst_MIMG(iFmt, "image_sample_c_l")
38518     {
38519         setFlag(GlobalSegment);
38520     } // Inst_MIMG__IMAGE_SAMPLE_C_L
38521
38522     Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
38523     {
38524     } // ~Inst_MIMG__IMAGE_SAMPLE_C_L
38525
38526     void
38527     Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst)
38528     {
38529         panicUnimplemented();
38530     }
38531
38532     Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt)
38533         : Inst_MIMG(iFmt, "image_sample_c_b")
38534     {
38535         setFlag(GlobalSegment);
38536     } // Inst_MIMG__IMAGE_SAMPLE_C_B
38537
38538     Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
38539     {
38540     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B
38541
38542     void
38543     Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst)
38544     {
38545         panicUnimplemented();
38546     }
38547
38548     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
38549           InFmt_MIMG *iFmt)
38550         : Inst_MIMG(iFmt, "image_sample_c_b_cl")
38551     {
38552         setFlag(GlobalSegment);
38553     } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38554
38555     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
38556     {
38557     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38558
38559     void
38560     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
38561     {
38562         panicUnimplemented();
38563     }
38564
38565     Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
38566           InFmt_MIMG *iFmt)
38567         : Inst_MIMG(iFmt, "image_sample_c_lz")
38568     {
38569         setFlag(GlobalSegment);
38570     } // Inst_MIMG__IMAGE_SAMPLE_C_LZ
38571
38572     Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
38573     {
38574     } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
38575
38576     void
38577     Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst)
38578     {
38579         panicUnimplemented();
38580     }
38581
38582     Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt)
38583         : Inst_MIMG(iFmt, "image_sample_o")
38584     {
38585         setFlag(GlobalSegment);
38586     } // Inst_MIMG__IMAGE_SAMPLE_O
38587
38588     Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
38589     {
38590     } // ~Inst_MIMG__IMAGE_SAMPLE_O
38591
38592     void
38593     Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst)
38594     {
38595         panicUnimplemented();
38596     }
38597
38598     Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
38599           InFmt_MIMG *iFmt)
38600         : Inst_MIMG(iFmt, "image_sample_cl_o")
38601     {
38602         setFlag(GlobalSegment);
38603     } // Inst_MIMG__IMAGE_SAMPLE_CL_O
38604
38605     Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
38606     {
38607     } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
38608
38609     void
38610     Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst)
38611     {
38612         panicUnimplemented();
38613     }
38614
38615     Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt)
38616         : Inst_MIMG(iFmt, "image_sample_d_o")
38617     {
38618         setFlag(GlobalSegment);
38619     } // Inst_MIMG__IMAGE_SAMPLE_D_O
38620
38621     Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
38622     {
38623     } // ~Inst_MIMG__IMAGE_SAMPLE_D_O
38624
38625     void
38626     Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst)
38627     {
38628         panicUnimplemented();
38629     }
38630
38631     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
38632           InFmt_MIMG *iFmt)
38633         : Inst_MIMG(iFmt, "image_sample_d_cl_o")
38634     {
38635         setFlag(GlobalSegment);
38636     } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38637
38638     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
38639     {
38640     } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38641
38642     void
38643     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38644     {
38645         panicUnimplemented();
38646     }
38647
38648     Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt)
38649         : Inst_MIMG(iFmt, "image_sample_l_o")
38650     {
38651         setFlag(GlobalSegment);
38652     } // Inst_MIMG__IMAGE_SAMPLE_L_O
38653
38654     Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
38655     {
38656     } // ~Inst_MIMG__IMAGE_SAMPLE_L_O
38657
38658     void
38659     Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst)
38660     {
38661         panicUnimplemented();
38662     }
38663
38664     Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt)
38665         : Inst_MIMG(iFmt, "image_sample_b_o")
38666     {
38667         setFlag(GlobalSegment);
38668     } // Inst_MIMG__IMAGE_SAMPLE_B_O
38669
38670     Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
38671     {
38672     } // ~Inst_MIMG__IMAGE_SAMPLE_B_O
38673
38674     void
38675     Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst)
38676     {
38677         panicUnimplemented();
38678     }
38679
38680     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
38681           InFmt_MIMG *iFmt)
38682         : Inst_MIMG(iFmt, "image_sample_b_cl_o")
38683     {
38684         setFlag(GlobalSegment);
38685     } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38686
38687     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
38688     {
38689     } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38690
38691     void
38692     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38693     {
38694         panicUnimplemented();
38695     }
38696
38697     Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
38698           InFmt_MIMG *iFmt)
38699         : Inst_MIMG(iFmt, "image_sample_lz_o")
38700     {
38701         setFlag(GlobalSegment);
38702     } // Inst_MIMG__IMAGE_SAMPLE_LZ_O
38703
38704     Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
38705     {
38706     } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
38707
38708     void
38709     Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38710     {
38711         panicUnimplemented();
38712     }
38713
38714     Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt)
38715         : Inst_MIMG(iFmt, "image_sample_c_o")
38716     {
38717         setFlag(GlobalSegment);
38718     } // Inst_MIMG__IMAGE_SAMPLE_C_O
38719
38720     Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
38721     {
38722     } // ~Inst_MIMG__IMAGE_SAMPLE_C_O
38723
38724     void
38725     Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst)
38726     {
38727         panicUnimplemented();
38728     }
38729
38730     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
38731           InFmt_MIMG *iFmt)
38732         : Inst_MIMG(iFmt, "image_sample_c_cl_o")
38733     {
38734         setFlag(GlobalSegment);
38735     } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38736
38737     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
38738     {
38739     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38740
38741     void
38742     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
38743     {
38744         panicUnimplemented();
38745     }
38746
38747     Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
38748           InFmt_MIMG *iFmt)
38749         : Inst_MIMG(iFmt, "image_sample_c_d_o")
38750     {
38751         setFlag(GlobalSegment);
38752     } // Inst_MIMG__IMAGE_SAMPLE_C_D_O
38753
38754     Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
38755     {
38756     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
38757
38758     void
38759     Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst)
38760     {
38761         panicUnimplemented();
38762     }
38763
38764     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
38765           InFmt_MIMG *iFmt)
38766         : Inst_MIMG(iFmt, "image_sample_c_d_cl_o")
38767     {
38768         setFlag(GlobalSegment);
38769     } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38770
38771     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
38772     {
38773     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38774
38775     void
38776     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38777     {
38778         panicUnimplemented();
38779     }
38780
38781     Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
38782           InFmt_MIMG *iFmt)
38783         : Inst_MIMG(iFmt, "image_sample_c_l_o")
38784     {
38785         setFlag(GlobalSegment);
38786     } // Inst_MIMG__IMAGE_SAMPLE_C_L_O
38787
38788     Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
38789     {
38790     } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
38791
38792     void
38793     Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst)
38794     {
38795         panicUnimplemented();
38796     }
38797
38798     Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
38799           InFmt_MIMG *iFmt)
38800         : Inst_MIMG(iFmt, "image_sample_c_b_o")
38801     {
38802         setFlag(GlobalSegment);
38803     } // Inst_MIMG__IMAGE_SAMPLE_C_B_O
38804
38805     Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
38806     {
38807     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
38808
38809     void
38810     Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst)
38811     {
38812         panicUnimplemented();
38813     }
38814
38815     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
38816           InFmt_MIMG *iFmt)
38817         : Inst_MIMG(iFmt, "image_sample_c_b_cl_o")
38818     {
38819         setFlag(GlobalSegment);
38820     } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38821
38822     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
38823     {
38824     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38825
38826     void
38827     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38828     {
38829         panicUnimplemented();
38830     }
38831
38832     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
38833           InFmt_MIMG *iFmt)
38834         : Inst_MIMG(iFmt, "image_sample_c_lz_o")
38835     {
38836         setFlag(GlobalSegment);
38837     } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38838
38839     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
38840     {
38841     } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38842
38843     void
38844     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38845     {
38846         panicUnimplemented();
38847     }
38848
38849     Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt)
38850         : Inst_MIMG(iFmt, "image_gather4")
38851     {
38852         setFlag(GlobalSegment);
38853     } // Inst_MIMG__IMAGE_GATHER4
38854
38855     Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
38856     {
38857     } // ~Inst_MIMG__IMAGE_GATHER4
38858
38859     void
38860     Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst)
38861     {
38862         panicUnimplemented();
38863     }
38864
38865     Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt)
38866         : Inst_MIMG(iFmt, "image_gather4_cl")
38867     {
38868         setFlag(GlobalSegment);
38869     } // Inst_MIMG__IMAGE_GATHER4_CL
38870
38871     Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
38872     {
38873     } // ~Inst_MIMG__IMAGE_GATHER4_CL
38874
38875     void
38876     Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst)
38877     {
38878         panicUnimplemented();
38879     }
38880
38881     Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt)
38882         : Inst_MIMG(iFmt, "image_gather4_l")
38883     {
38884         setFlag(GlobalSegment);
38885     } // Inst_MIMG__IMAGE_GATHER4_L
38886
38887     Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
38888     {
38889     } // ~Inst_MIMG__IMAGE_GATHER4_L
38890
38891     void
38892     Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst)
38893     {
38894         panicUnimplemented();
38895     }
38896
38897     Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt)
38898         : Inst_MIMG(iFmt, "image_gather4_b")
38899     {
38900         setFlag(GlobalSegment);
38901     } // Inst_MIMG__IMAGE_GATHER4_B
38902
38903     Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
38904     {
38905     } // ~Inst_MIMG__IMAGE_GATHER4_B
38906
38907     void
38908     Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst)
38909     {
38910         panicUnimplemented();
38911     }
38912
38913     Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
38914           InFmt_MIMG *iFmt)
38915         : Inst_MIMG(iFmt, "image_gather4_b_cl")
38916     {
38917         setFlag(GlobalSegment);
38918     } // Inst_MIMG__IMAGE_GATHER4_B_CL
38919
38920     Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
38921     {
38922     } // ~Inst_MIMG__IMAGE_GATHER4_B_CL
38923
38924     void
38925     Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst)
38926     {
38927         panicUnimplemented();
38928     }
38929
38930     Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt)
38931         : Inst_MIMG(iFmt, "image_gather4_lz")
38932     {
38933         setFlag(GlobalSegment);
38934     } // Inst_MIMG__IMAGE_GATHER4_LZ
38935
38936     Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
38937     {
38938     } // ~Inst_MIMG__IMAGE_GATHER4_LZ
38939
38940     void
38941     Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst)
38942     {
38943         panicUnimplemented();
38944     }
38945
38946     Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt)
38947         : Inst_MIMG(iFmt, "image_gather4_c")
38948     {
38949         setFlag(GlobalSegment);
38950     } // Inst_MIMG__IMAGE_GATHER4_C
38951
38952     Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
38953     {
38954     } // ~Inst_MIMG__IMAGE_GATHER4_C
38955
38956     void
38957     Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst)
38958     {
38959         panicUnimplemented();
38960     }
38961
38962     Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
38963           InFmt_MIMG *iFmt)
38964         : Inst_MIMG(iFmt, "image_gather4_c_cl")
38965     {
38966         setFlag(GlobalSegment);
38967     } // Inst_MIMG__IMAGE_GATHER4_C_CL
38968
38969     Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
38970     {
38971     } // ~Inst_MIMG__IMAGE_GATHER4_C_CL
38972
38973     void
38974     Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst)
38975     {
38976         panicUnimplemented();
38977     }
38978
38979     Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
38980           InFmt_MIMG *iFmt)
38981         : Inst_MIMG(iFmt, "image_gather4_c_l")
38982     {
38983         setFlag(GlobalSegment);
38984     } // Inst_MIMG__IMAGE_GATHER4_C_L
38985
38986     Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
38987     {
38988     } // ~Inst_MIMG__IMAGE_GATHER4_C_L
38989
38990     void
38991     Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst)
38992     {
38993         panicUnimplemented();
38994     }
38995
38996     Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
38997           InFmt_MIMG *iFmt)
38998         : Inst_MIMG(iFmt, "image_gather4_c_b")
38999     {
39000         setFlag(GlobalSegment);
39001     } // Inst_MIMG__IMAGE_GATHER4_C_B
39002
39003     Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
39004     {
39005     } // ~Inst_MIMG__IMAGE_GATHER4_C_B
39006
39007     void
39008     Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst)
39009     {
39010         panicUnimplemented();
39011     }
39012
39013     Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
39014           InFmt_MIMG *iFmt)
39015         : Inst_MIMG(iFmt, "image_gather4_c_b_cl")
39016     {
39017         setFlag(GlobalSegment);
39018     } // Inst_MIMG__IMAGE_GATHER4_C_B_CL
39019
39020     Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
39021     {
39022     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
39023
39024     void
39025     Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
39026     {
39027         panicUnimplemented();
39028     }
39029
39030     Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
39031           InFmt_MIMG *iFmt)
39032         : Inst_MIMG(iFmt, "image_gather4_c_lz")
39033     {
39034         setFlag(GlobalSegment);
39035     } // Inst_MIMG__IMAGE_GATHER4_C_LZ
39036
39037     Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
39038     {
39039     } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
39040
39041     void
39042     Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst)
39043     {
39044         panicUnimplemented();
39045     }
39046
39047     Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt)
39048         : Inst_MIMG(iFmt, "image_gather4_o")
39049     {
39050         setFlag(GlobalSegment);
39051     } // Inst_MIMG__IMAGE_GATHER4_O
39052
39053     Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
39054     {
39055     } // ~Inst_MIMG__IMAGE_GATHER4_O
39056
39057     void
39058     Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst)
39059     {
39060         panicUnimplemented();
39061     }
39062
39063     Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
39064           InFmt_MIMG *iFmt)
39065         : Inst_MIMG(iFmt, "image_gather4_cl_o")
39066     {
39067         setFlag(GlobalSegment);
39068     } // Inst_MIMG__IMAGE_GATHER4_CL_O
39069
39070     Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
39071     {
39072     } // ~Inst_MIMG__IMAGE_GATHER4_CL_O
39073
39074     void
39075     Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst)
39076     {
39077         panicUnimplemented();
39078     }
39079
39080     Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
39081           InFmt_MIMG *iFmt)
39082         : Inst_MIMG(iFmt, "image_gather4_l_o")
39083     {
39084         setFlag(GlobalSegment);
39085     } // Inst_MIMG__IMAGE_GATHER4_L_O
39086
39087     Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
39088     {
39089     } // ~Inst_MIMG__IMAGE_GATHER4_L_O
39090
39091     void
39092     Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst)
39093     {
39094         panicUnimplemented();
39095     }
39096
39097     Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
39098           InFmt_MIMG *iFmt)
39099         : Inst_MIMG(iFmt, "image_gather4_b_o")
39100     {
39101         setFlag(GlobalSegment);
39102     } // Inst_MIMG__IMAGE_GATHER4_B_O
39103
39104     Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
39105     {
39106     } // ~Inst_MIMG__IMAGE_GATHER4_B_O
39107
39108     void
39109     Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst)
39110     {
39111         panicUnimplemented();
39112     }
39113
39114     Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
39115           InFmt_MIMG *iFmt)
39116         : Inst_MIMG(iFmt, "image_gather4_b_cl_o")
39117     {
39118         setFlag(GlobalSegment);
39119     } // Inst_MIMG__IMAGE_GATHER4_B_CL_O
39120
39121     Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
39122     {
39123     } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
39124
39125     void
39126     Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39127     {
39128         panicUnimplemented();
39129     }
39130
39131     Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
39132           InFmt_MIMG *iFmt)
39133         : Inst_MIMG(iFmt, "image_gather4_lz_o")
39134     {
39135         setFlag(GlobalSegment);
39136     } // Inst_MIMG__IMAGE_GATHER4_LZ_O
39137
39138     Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
39139     {
39140     } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
39141
39142     void
39143     Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39144     {
39145         panicUnimplemented();
39146     }
39147
39148     Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
39149           InFmt_MIMG *iFmt)
39150         : Inst_MIMG(iFmt, "image_gather4_c_o")
39151     {
39152         setFlag(GlobalSegment);
39153     } // Inst_MIMG__IMAGE_GATHER4_C_O
39154
39155     Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
39156     {
39157     } // ~Inst_MIMG__IMAGE_GATHER4_C_O
39158
39159     void
39160     Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst)
39161     {
39162         panicUnimplemented();
39163     }
39164
39165     Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
39166           InFmt_MIMG *iFmt)
39167         : Inst_MIMG(iFmt, "image_gather4_c_cl_o")
39168     {
39169         setFlag(GlobalSegment);
39170     } // Inst_MIMG__IMAGE_GATHER4_C_CL_O
39171
39172     Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
39173     {
39174     } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
39175
39176     void
39177     Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
39178     {
39179         panicUnimplemented();
39180     }
39181
39182     Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
39183           InFmt_MIMG *iFmt)
39184         : Inst_MIMG(iFmt, "image_gather4_c_l_o")
39185     {
39186         setFlag(GlobalSegment);
39187     } // Inst_MIMG__IMAGE_GATHER4_C_L_O
39188
39189     Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
39190     {
39191     } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
39192
39193     void
39194     Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst)
39195     {
39196         panicUnimplemented();
39197     }
39198
39199     Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
39200           InFmt_MIMG *iFmt)
39201         : Inst_MIMG(iFmt, "image_gather4_c_b_o")
39202     {
39203         setFlag(GlobalSegment);
39204     } // Inst_MIMG__IMAGE_GATHER4_C_B_O
39205
39206     Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
39207     {
39208     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
39209
39210     void
39211     Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst)
39212     {
39213         panicUnimplemented();
39214     }
39215
39216     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
39217           InFmt_MIMG *iFmt)
39218         : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o")
39219     {
39220         setFlag(GlobalSegment);
39221     } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39222
39223     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
39224     {
39225     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39226
39227     void
39228     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39229     {
39230         panicUnimplemented();
39231     }
39232
39233     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
39234           InFmt_MIMG *iFmt)
39235         : Inst_MIMG(iFmt, "image_gather4_c_lz_o")
39236     {
39237         setFlag(GlobalSegment);
39238     } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39239
39240     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
39241     {
39242     } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39243
39244     void
39245     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39246     {
39247         panicUnimplemented();
39248     }
39249
39250     Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt)
39251         : Inst_MIMG(iFmt, "image_get_lod")
39252     {
39253         setFlag(GlobalSegment);
39254     } // Inst_MIMG__IMAGE_GET_LOD
39255
39256     Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
39257     {
39258     } // ~Inst_MIMG__IMAGE_GET_LOD
39259
39260     void
39261     Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst)
39262     {
39263         panicUnimplemented();
39264     }
39265
39266     Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt)
39267         : Inst_MIMG(iFmt, "image_sample_cd")
39268     {
39269         setFlag(GlobalSegment);
39270     } // Inst_MIMG__IMAGE_SAMPLE_CD
39271
39272     Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
39273     {
39274     } // ~Inst_MIMG__IMAGE_SAMPLE_CD
39275
39276     void
39277     Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst)
39278     {
39279         panicUnimplemented();
39280     }
39281
39282     Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
39283           InFmt_MIMG *iFmt)
39284         : Inst_MIMG(iFmt, "image_sample_cd_cl")
39285     {
39286         setFlag(GlobalSegment);
39287     } // Inst_MIMG__IMAGE_SAMPLE_CD_CL
39288
39289     Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
39290     {
39291     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
39292
39293     void
39294     Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39295     {
39296         panicUnimplemented();
39297     }
39298
39299     Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
39300           InFmt_MIMG *iFmt)
39301         : Inst_MIMG(iFmt, "image_sample_c_cd")
39302     {
39303         setFlag(GlobalSegment);
39304     } // Inst_MIMG__IMAGE_SAMPLE_C_CD
39305
39306     Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
39307     {
39308     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
39309
39310     void
39311     Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst)
39312     {
39313         panicUnimplemented();
39314     }
39315
39316     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
39317           InFmt_MIMG *iFmt)
39318         : Inst_MIMG(iFmt, "image_sample_c_cd_cl")
39319     {
39320         setFlag(GlobalSegment);
39321     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39322
39323     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
39324     {
39325     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39326
39327     void
39328     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39329     {
39330         panicUnimplemented();
39331     }
39332
39333     Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
39334           InFmt_MIMG *iFmt)
39335         : Inst_MIMG(iFmt, "image_sample_cd_o")
39336     {
39337         setFlag(GlobalSegment);
39338     } // Inst_MIMG__IMAGE_SAMPLE_CD_O
39339
39340     Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
39341     {
39342     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
39343
39344     void
39345     Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst)
39346     {
39347         panicUnimplemented();
39348     }
39349
39350     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
39351           InFmt_MIMG *iFmt)
39352         : Inst_MIMG(iFmt, "image_sample_cd_cl_o")
39353     {
39354         setFlag(GlobalSegment);
39355     } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39356
39357     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
39358     {
39359     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39360
39361     void
39362     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39363     {
39364         panicUnimplemented();
39365     }
39366
39367     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
39368           InFmt_MIMG *iFmt)
39369         : Inst_MIMG(iFmt, "image_sample_c_cd_o")
39370     {
39371         setFlag(GlobalSegment);
39372     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39373
39374     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
39375     {
39376     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39377
39378     void
39379     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst)
39380     {
39381         panicUnimplemented();
39382     }
39383
39384     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
39385           InFmt_MIMG *iFmt)
39386         : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o")
39387     {
39388         setFlag(GlobalSegment);
39389     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39390
39391     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
39392     {
39393     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39394
39395     void
39396     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39397     {
39398         panicUnimplemented();
39399     }
39400
39401     Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt)
39402         : Inst_EXP(iFmt, "exp")
39403     {
39404     } // Inst_EXP__EXP
39405
39406     Inst_EXP__EXP::~Inst_EXP__EXP()
39407     {
39408     } // ~Inst_EXP__EXP
39409
39410     void
39411     Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst)
39412     {
39413         panicUnimplemented();
39414     }
39415
39416     Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt)
39417         : Inst_FLAT(iFmt, "flat_load_ubyte")
39418     {
39419         setFlag(MemoryRef);
39420         setFlag(Load);
39421     } // Inst_FLAT__FLAT_LOAD_UBYTE
39422
39423     Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
39424     {
39425     } // ~Inst_FLAT__FLAT_LOAD_UBYTE
39426
39427     // Untyped buffer load unsigned byte (zero extend to VGPR destination).
39428     void
39429     Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
39430     {
39431         Wavefront *wf = gpuDynInst->wavefront();
39432
39433         if (wf->execMask().none()) {
39434             wf->decVMemInstsIssued();
39435             wf->decLGKMInstsIssued();
39436             wf->rdGmReqsInPipe--;
39437             wf->rdLmReqsInPipe--;
39438             gpuDynInst->exec_mask = wf->execMask();
39439             wf->computeUnit->vrf[wf->simdId]->
39440                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39441             return;
39442         }
39443
39444         gpuDynInst->execUnitId = wf->execUnitId;
39445         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39446         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39447         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39448
39449         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39450
39451         addr.read();
39452
39453         calcAddr(gpuDynInst, addr);
39454
39455         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39456             gpuDynInst->computeUnit()->globalMemoryPipe
39457                 .issueRequest(gpuDynInst);
39458             wf->rdGmReqsInPipe--;
39459             wf->outstandingReqsRdGm++;
39460         } else {
39461             fatal("Non global flat instructions not implemented yet.\n");
39462         }
39463
39464         gpuDynInst->wavefront()->outstandingReqs++;
39465         gpuDynInst->wavefront()->validateRequestCounters();
39466     } // execute
39467
39468     void
39469     Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39470     {
39471         initMemRead<VecElemU8>(gpuDynInst);
39472     } // initiateAcc
39473
39474     void
39475     Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39476     {
39477         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39478
39479         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39480             if (gpuDynInst->exec_mask[lane]) {
39481                 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
39482                     gpuDynInst->d_data))[lane]);
39483             }
39484         }
39485         vdst.write();
39486     } // execute
39487     // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
39488
39489     Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt)
39490         : Inst_FLAT(iFmt, "flat_load_sbyte")
39491     {
39492         setFlag(MemoryRef);
39493         setFlag(Load);
39494     } // Inst_FLAT__FLAT_LOAD_SBYTE
39495
39496     Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
39497     {
39498     } // ~Inst_FLAT__FLAT_LOAD_SBYTE
39499
39500     // Untyped buffer load signed byte (sign extend to VGPR destination).
39501     void
39502     Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
39503     {
39504         Wavefront *wf = gpuDynInst->wavefront();
39505
39506         if (wf->execMask().none()) {
39507             wf->decVMemInstsIssued();
39508             wf->decLGKMInstsIssued();
39509             wf->rdGmReqsInPipe--;
39510             wf->rdLmReqsInPipe--;
39511             gpuDynInst->exec_mask = wf->execMask();
39512             wf->computeUnit->vrf[wf->simdId]->
39513                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39514             return;
39515         }
39516
39517         gpuDynInst->execUnitId = wf->execUnitId;
39518         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39519         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39520         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39521
39522         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39523
39524         addr.read();
39525
39526         calcAddr(gpuDynInst, addr);
39527
39528         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39529             gpuDynInst->computeUnit()->globalMemoryPipe
39530                 .issueRequest(gpuDynInst);
39531             wf->rdGmReqsInPipe--;
39532             wf->outstandingReqsRdGm++;
39533         } else {
39534             fatal("Non global flat instructions not implemented yet.\n");
39535         }
39536
39537         gpuDynInst->wavefront()->outstandingReqs++;
39538         gpuDynInst->wavefront()->validateRequestCounters();
39539     }
39540
39541     void
39542     Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39543     {
39544         initMemRead<VecElemI8>(gpuDynInst);
39545     } // initiateAcc
39546
39547     void
39548     Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39549     {
39550         VecOperandI32 vdst(gpuDynInst, extData.VDST);
39551
39552         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39553             if (gpuDynInst->exec_mask[lane]) {
39554                 vdst[lane] = (VecElemI32)((reinterpret_cast<VecElemI8*>(
39555                     gpuDynInst->d_data))[lane]);
39556             }
39557         }
39558         vdst.write();
39559     }
39560
39561     Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt)
39562         : Inst_FLAT(iFmt, "flat_load_ushort")
39563     {
39564         setFlag(MemoryRef);
39565         setFlag(Load);
39566     } // Inst_FLAT__FLAT_LOAD_USHORT
39567
39568     Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
39569     {
39570     } // ~Inst_FLAT__FLAT_LOAD_USHORT
39571
39572     // Untyped buffer load unsigned short (zero extend to VGPR destination).
39573     void
39574     Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
39575     {
39576         Wavefront *wf = gpuDynInst->wavefront();
39577
39578         if (wf->execMask().none()) {
39579             wf->decVMemInstsIssued();
39580             wf->decLGKMInstsIssued();
39581             wf->rdGmReqsInPipe--;
39582             wf->rdLmReqsInPipe--;
39583             gpuDynInst->exec_mask = wf->execMask();
39584             wf->computeUnit->vrf[wf->simdId]->
39585                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39586             return;
39587         }
39588
39589         gpuDynInst->execUnitId = wf->execUnitId;
39590         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39591         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39592         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39593
39594         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39595
39596         addr.read();
39597
39598         calcAddr(gpuDynInst, addr);
39599
39600         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39601             gpuDynInst->computeUnit()->globalMemoryPipe
39602                 .issueRequest(gpuDynInst);
39603             wf->rdGmReqsInPipe--;
39604             wf->outstandingReqsRdGm++;
39605         } else {
39606             fatal("Non global flat instructions not implemented yet.\n");
39607         }
39608
39609         gpuDynInst->wavefront()->outstandingReqs++;
39610         gpuDynInst->wavefront()->validateRequestCounters();
39611     }
39612
39613     void
39614     Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39615     {
39616         initMemRead<VecElemU16>(gpuDynInst);
39617     } // initiateAcc
39618
39619     void
39620     Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39621     {
39622         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39623
39624         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39625             if (gpuDynInst->exec_mask[lane]) {
39626                 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
39627                     gpuDynInst->d_data))[lane]);
39628             }
39629         }
39630         vdst.write();
39631     }
39632
39633
39634     Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt)
39635         : Inst_FLAT(iFmt, "flat_load_sshort")
39636     {
39637         setFlag(MemoryRef);
39638         setFlag(Load);
39639     } // Inst_FLAT__FLAT_LOAD_SSHORT
39640
39641     Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
39642     {
39643     } // ~Inst_FLAT__FLAT_LOAD_SSHORT
39644
39645     // Untyped buffer load signed short (sign extend to VGPR destination).
39646     void
39647     Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
39648     {
39649         panicUnimplemented();
39650     }
39651
39652     void
39653     Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39654     {
39655     } // initiateAcc
39656
39657     void
39658     Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39659     {
39660     }
39661
39662     Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt)
39663         : Inst_FLAT(iFmt, "flat_load_dword")
39664     {
39665         setFlag(MemoryRef);
39666         setFlag(Load);
39667     } // Inst_FLAT__FLAT_LOAD_DWORD
39668
39669     Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
39670     {
39671     } // ~Inst_FLAT__FLAT_LOAD_DWORD
39672
39673     // Untyped buffer load dword.
39674     void
39675     Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
39676     {
39677         Wavefront *wf = gpuDynInst->wavefront();
39678
39679         if (wf->execMask().none()) {
39680             wf->decVMemInstsIssued();
39681             wf->decLGKMInstsIssued();
39682             wf->rdGmReqsInPipe--;
39683             wf->rdLmReqsInPipe--;
39684             gpuDynInst->exec_mask = wf->execMask();
39685             wf->computeUnit->vrf[wf->simdId]->
39686                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39687             return;
39688         }
39689
39690         gpuDynInst->execUnitId = wf->execUnitId;
39691         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39692         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39693         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39694
39695         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39696
39697         addr.read();
39698
39699         calcAddr(gpuDynInst, addr);
39700
39701         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39702             gpuDynInst->computeUnit()->globalMemoryPipe
39703                 .issueRequest(gpuDynInst);
39704             wf->rdGmReqsInPipe--;
39705             wf->outstandingReqsRdGm++;
39706         } else {
39707             fatal("Non global flat instructions not implemented yet.\n");
39708         }
39709
39710         gpuDynInst->wavefront()->outstandingReqs++;
39711         gpuDynInst->wavefront()->validateRequestCounters();
39712     }
39713
39714     void
39715     Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
39716     {
39717         initMemRead<VecElemU32>(gpuDynInst);
39718     } // initiateAcc
39719
39720     void
39721     Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
39722     {
39723         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39724
39725         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39726             if (gpuDynInst->exec_mask[lane]) {
39727                 vdst[lane] = (reinterpret_cast<VecElemU32*>(
39728                     gpuDynInst->d_data))[lane];
39729             }
39730         }
39731         vdst.write();
39732     } // completeAcc
39733
39734     Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
39735           InFmt_FLAT *iFmt)
39736         : Inst_FLAT(iFmt, "flat_load_dwordx2")
39737     {
39738         setFlag(MemoryRef);
39739         setFlag(Load);
39740     } // Inst_FLAT__FLAT_LOAD_DWORDX2
39741
39742     Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
39743     {
39744     } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
39745
39746     // Untyped buffer load 2 dwords.
39747     void
39748     Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
39749     {
39750         Wavefront *wf = gpuDynInst->wavefront();
39751
39752         if (wf->execMask().none()) {
39753             wf->decVMemInstsIssued();
39754             wf->decLGKMInstsIssued();
39755             wf->rdGmReqsInPipe--;
39756             wf->rdLmReqsInPipe--;
39757             gpuDynInst->exec_mask = wf->execMask();
39758             wf->computeUnit->vrf[wf->simdId]->
39759                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39760             return;
39761         }
39762
39763         gpuDynInst->execUnitId = wf->execUnitId;
39764         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39765         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39766         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39767
39768         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39769
39770         addr.read();
39771
39772         calcAddr(gpuDynInst, addr);
39773
39774         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39775             gpuDynInst->computeUnit()->globalMemoryPipe
39776                 .issueRequest(gpuDynInst);
39777             wf->rdGmReqsInPipe--;
39778             wf->outstandingReqsRdGm++;
39779         } else {
39780             fatal("Non global flat instructions not implemented yet.\n");
39781         }
39782
39783         gpuDynInst->wavefront()->outstandingReqs++;
39784         gpuDynInst->wavefront()->validateRequestCounters();
39785     }
39786
39787     void
39788     Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
39789     {
39790         initMemRead<VecElemU64>(gpuDynInst);
39791     } // initiateAcc
39792
39793     void
39794     Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
39795     {
39796         VecOperandU64 vdst(gpuDynInst, extData.VDST);
39797
39798         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39799             if (gpuDynInst->exec_mask[lane]) {
39800                 vdst[lane] = (reinterpret_cast<VecElemU64*>(
39801                     gpuDynInst->d_data))[lane];
39802             }
39803         }
39804         vdst.write();
39805     } // completeAcc
39806
39807     Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
39808           InFmt_FLAT *iFmt)
39809         : Inst_FLAT(iFmt, "flat_load_dwordx3")
39810     {
39811         setFlag(MemoryRef);
39812         setFlag(Load);
39813     } // Inst_FLAT__FLAT_LOAD_DWORDX3
39814
39815     Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
39816     {
39817     } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
39818
39819     // Untyped buffer load 3 dwords.
39820     void
39821     Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
39822     {
39823         Wavefront *wf = gpuDynInst->wavefront();
39824
39825         if (wf->execMask().none()) {
39826             wf->decVMemInstsIssued();
39827             wf->decLGKMInstsIssued();
39828             wf->rdGmReqsInPipe--;
39829             wf->rdLmReqsInPipe--;
39830             gpuDynInst->exec_mask = wf->execMask();
39831             wf->computeUnit->vrf[wf->simdId]->
39832                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39833             return;
39834         }
39835
39836         gpuDynInst->execUnitId = wf->execUnitId;
39837         gpuDynInst->exec_mask = wf->execMask();
39838         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39839         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39840
39841         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39842
39843         addr.read();
39844
39845         calcAddr(gpuDynInst, addr);
39846
39847         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39848             gpuDynInst->computeUnit()->globalMemoryPipe
39849                 .issueRequest(gpuDynInst);
39850             wf->rdGmReqsInPipe--;
39851             wf->outstandingReqsRdGm++;
39852         } else {
39853             fatal("Non global flat instructions not implemented yet.\n");
39854         }
39855
39856         gpuDynInst->wavefront()->outstandingReqs++;
39857         gpuDynInst->wavefront()->validateRequestCounters();
39858     }
39859
39860     void
39861     Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
39862     {
39863         initMemRead<3>(gpuDynInst);
39864     } // initiateAcc
39865
39866     void
39867     Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
39868     {
39869         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39870         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39871         VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39872
39873         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39874             if (gpuDynInst->exec_mask[lane]) {
39875                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39876                     gpuDynInst->d_data))[lane * 3];
39877                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39878                     gpuDynInst->d_data))[lane * 3 + 1];
39879                 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39880                     gpuDynInst->d_data))[lane * 3 + 2];
39881             }
39882         }
39883
39884         vdst0.write();
39885         vdst1.write();
39886         vdst2.write();
39887     } // completeAcc
39888
39889     Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
39890           InFmt_FLAT *iFmt)
39891         : Inst_FLAT(iFmt, "flat_load_dwordx4")
39892     {
39893         setFlag(MemoryRef);
39894         setFlag(Load);
39895     } // Inst_FLAT__FLAT_LOAD_DWORDX4
39896
39897     Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
39898     {
39899     } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
39900
39901     // Untyped buffer load 4 dwords.
39902     void
39903     Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
39904     {
39905         Wavefront *wf = gpuDynInst->wavefront();
39906
39907         if (wf->execMask().none()) {
39908             wf->decVMemInstsIssued();
39909             wf->decLGKMInstsIssued();
39910             wf->rdGmReqsInPipe--;
39911             wf->rdLmReqsInPipe--;
39912             gpuDynInst->exec_mask = wf->execMask();
39913             wf->computeUnit->vrf[wf->simdId]->
39914                 scheduleWriteOperandsFromLoad(wf, gpuDynInst);
39915             return;
39916         }
39917
39918         gpuDynInst->execUnitId = wf->execUnitId;
39919         gpuDynInst->exec_mask = wf->execMask();
39920         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39921         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39922
39923         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39924
39925         addr.read();
39926
39927         calcAddr(gpuDynInst, addr);
39928
39929         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39930             gpuDynInst->computeUnit()->globalMemoryPipe
39931                 .issueRequest(gpuDynInst);
39932             wf->rdGmReqsInPipe--;
39933             wf->outstandingReqsRdGm++;
39934         } else {
39935             fatal("Non global flat instructions not implemented yet.\n");
39936         }
39937
39938         gpuDynInst->wavefront()->outstandingReqs++;
39939         gpuDynInst->wavefront()->validateRequestCounters();
39940     }
39941
39942     void
39943     Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
39944     {
39945         initMemRead<4>(gpuDynInst);
39946     } // initiateAcc
39947
39948     void
39949     Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
39950     {
39951         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39952         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39953         VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39954         VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
39955
39956         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39957             if (gpuDynInst->exec_mask[lane]) {
39958                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39959                     gpuDynInst->d_data))[lane * 4];
39960                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39961                     gpuDynInst->d_data))[lane * 4 + 1];
39962                 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39963                     gpuDynInst->d_data))[lane * 4 + 2];
39964                 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
39965                     gpuDynInst->d_data))[lane * 4 + 3];
39966             }
39967         }
39968
39969         vdst0.write();
39970         vdst1.write();
39971         vdst2.write();
39972         vdst3.write();
39973     } // completeAcc
39974
39975     Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt)
39976         : Inst_FLAT(iFmt, "flat_store_byte")
39977     {
39978         setFlag(MemoryRef);
39979         setFlag(Store);
39980     } // Inst_FLAT__FLAT_STORE_BYTE
39981
39982     Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
39983     {
39984     } // ~Inst_FLAT__FLAT_STORE_BYTE
39985
39986     // Untyped buffer store byte.
39987     void
39988     Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
39989     {
39990         Wavefront *wf = gpuDynInst->wavefront();
39991
39992         if (wf->execMask().none()) {
39993             wf->decVMemInstsIssued();
39994             wf->decLGKMInstsIssued();
39995             wf->wrGmReqsInPipe--;
39996             wf->wrLmReqsInPipe--;
39997             return;
39998         }
39999
40000         gpuDynInst->execUnitId = wf->execUnitId;
40001         gpuDynInst->exec_mask = wf->execMask();
40002         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40003         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40004
40005         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40006
40007         addr.read();
40008
40009         calcAddr(gpuDynInst, addr);
40010
40011         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40012             gpuDynInst->computeUnit()->globalMemoryPipe
40013                 .issueRequest(gpuDynInst);
40014             wf->wrGmReqsInPipe--;
40015             wf->outstandingReqsWrGm++;
40016         } else {
40017             fatal("Non global flat instructions not implemented yet.\n");
40018         }
40019
40020         gpuDynInst->wavefront()->outstandingReqs++;
40021         gpuDynInst->wavefront()->validateRequestCounters();
40022     } // execute
40023
40024     void
40025     Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
40026     {
40027         ConstVecOperandU8 data(gpuDynInst, extData.DATA);
40028         data.read();
40029
40030         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40031             if (gpuDynInst->exec_mask[lane]) {
40032                 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
40033                     = data[lane];
40034             }
40035         }
40036
40037         initMemWrite<VecElemU8>(gpuDynInst);
40038     } // initiateAcc
40039
40040     void
40041     Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
40042     {
40043     }
40044
40045     Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt)
40046         : Inst_FLAT(iFmt, "flat_store_short")
40047     {
40048         setFlag(MemoryRef);
40049         setFlag(Store);
40050     } // Inst_FLAT__FLAT_STORE_SHORT
40051
40052     Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
40053     {
40054     } // ~Inst_FLAT__FLAT_STORE_SHORT
40055
40056     // Untyped buffer store short.
40057     void
40058     Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
40059     {
40060         Wavefront *wf = gpuDynInst->wavefront();
40061
40062         if (wf->execMask().none()) {
40063             wf->decVMemInstsIssued();
40064             wf->decLGKMInstsIssued();
40065             wf->wrGmReqsInPipe--;
40066             wf->wrLmReqsInPipe--;
40067             return;
40068         }
40069
40070         gpuDynInst->execUnitId = wf->execUnitId;
40071         gpuDynInst->exec_mask = wf->execMask();
40072         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40073         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40074
40075         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40076
40077         addr.read();
40078
40079         calcAddr(gpuDynInst, addr);
40080
40081         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40082             gpuDynInst->computeUnit()->globalMemoryPipe
40083                 .issueRequest(gpuDynInst);
40084             wf->wrGmReqsInPipe--;
40085             wf->outstandingReqsWrGm++;
40086         } else {
40087             fatal("Non global flat instructions not implemented yet.\n");
40088         }
40089
40090         gpuDynInst->wavefront()->outstandingReqs++;
40091         gpuDynInst->wavefront()->validateRequestCounters();
40092     }
40093
40094     void
40095     Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
40096     {
40097         ConstVecOperandU16 data(gpuDynInst, extData.DATA);
40098
40099         data.read();
40100
40101         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40102             if (gpuDynInst->exec_mask[lane]) {
40103                 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
40104                     = data[lane];
40105             }
40106         }
40107
40108         initMemWrite<VecElemU16>(gpuDynInst);
40109     } // initiateAcc
40110
40111     void
40112     Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
40113     {
40114     } // completeAcc
40115
40116     Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt)
40117         : Inst_FLAT(iFmt, "flat_store_dword")
40118     {
40119         setFlag(MemoryRef);
40120         setFlag(Store);
40121     } // Inst_FLAT__FLAT_STORE_DWORD
40122
40123     Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
40124     {
40125     } // ~Inst_FLAT__FLAT_STORE_DWORD
40126
40127     // Untyped buffer store dword.
40128     void
40129     Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
40130     {
40131         Wavefront *wf = gpuDynInst->wavefront();
40132
40133         if (wf->execMask().none()) {
40134             wf->decVMemInstsIssued();
40135             wf->decLGKMInstsIssued();
40136             wf->wrGmReqsInPipe--;
40137             wf->wrLmReqsInPipe--;
40138             return;
40139         }
40140
40141         gpuDynInst->execUnitId = wf->execUnitId;
40142         gpuDynInst->exec_mask = wf->execMask();
40143         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40144         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40145
40146         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40147
40148         addr.read();
40149
40150         calcAddr(gpuDynInst, addr);
40151
40152         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40153             gpuDynInst->computeUnit()->globalMemoryPipe
40154                 .issueRequest(gpuDynInst);
40155             wf->wrGmReqsInPipe--;
40156             wf->outstandingReqsWrGm++;
40157         } else {
40158             fatal("Non global flat instructions not implemented yet.\n");
40159         }
40160
40161         gpuDynInst->wavefront()->outstandingReqs++;
40162         gpuDynInst->wavefront()->validateRequestCounters();
40163     }
40164
40165     void
40166     Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
40167     {
40168         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40169         data.read();
40170
40171         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40172             if (gpuDynInst->exec_mask[lane]) {
40173                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
40174                     = data[lane];
40175             }
40176         }
40177
40178         initMemWrite<VecElemU32>(gpuDynInst);
40179     } // initiateAcc
40180
40181     void
40182     Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
40183     {
40184     } // completeAcc
40185
40186     Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
40187           InFmt_FLAT *iFmt)
40188         : Inst_FLAT(iFmt, "flat_store_dwordx2")
40189     {
40190         setFlag(MemoryRef);
40191         setFlag(Store);
40192     } // Inst_FLAT__FLAT_STORE_DWORDX2
40193
40194     Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
40195     {
40196     } // ~Inst_FLAT__FLAT_STORE_DWORDX2
40197
40198     // Untyped buffer store 2 dwords.
40199     void
40200     Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
40201     {
40202         Wavefront *wf = gpuDynInst->wavefront();
40203
40204         if (wf->execMask().none()) {
40205             wf->decVMemInstsIssued();
40206             wf->decLGKMInstsIssued();
40207             wf->wrGmReqsInPipe--;
40208             wf->wrLmReqsInPipe--;
40209             return;
40210         }
40211
40212         gpuDynInst->execUnitId = wf->execUnitId;
40213         gpuDynInst->exec_mask = wf->execMask();
40214         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40215         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40216
40217         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40218
40219         addr.read();
40220
40221         calcAddr(gpuDynInst, addr);
40222
40223         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40224             gpuDynInst->computeUnit()->globalMemoryPipe
40225                 .issueRequest(gpuDynInst);
40226             wf->wrGmReqsInPipe--;
40227             wf->outstandingReqsWrGm++;
40228         } else {
40229             fatal("Non global flat instructions not implemented yet.\n");
40230         }
40231
40232         wf->outstandingReqs++;
40233         wf->validateRequestCounters();
40234     }
40235
40236     void
40237     Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
40238     {
40239         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
40240         data.read();
40241
40242         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40243             if (gpuDynInst->exec_mask[lane]) {
40244                 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
40245                     = data[lane];
40246             }
40247         }
40248
40249         initMemWrite<VecElemU64>(gpuDynInst);
40250     } // initiateAcc
40251
40252     void
40253     Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
40254     {
40255     } // completeAcc
40256
40257     Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
40258           InFmt_FLAT *iFmt)
40259         : Inst_FLAT(iFmt, "flat_store_dwordx3")
40260     {
40261         setFlag(MemoryRef);
40262         setFlag(Store);
40263     } // Inst_FLAT__FLAT_STORE_DWORDX3
40264
40265     Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
40266     {
40267     } // ~Inst_FLAT__FLAT_STORE_DWORDX3
40268
40269     // Untyped buffer store 3 dwords.
40270     void
40271     Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
40272     {
40273         Wavefront *wf = gpuDynInst->wavefront();
40274
40275         if (wf->execMask().none()) {
40276             wf->decVMemInstsIssued();
40277             wf->decLGKMInstsIssued();
40278             wf->wrGmReqsInPipe--;
40279             wf->wrLmReqsInPipe--;
40280             return;
40281         }
40282
40283         gpuDynInst->execUnitId = wf->execUnitId;
40284         gpuDynInst->exec_mask = wf->execMask();
40285         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40286         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40287
40288         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40289
40290         addr.read();
40291
40292         calcAddr(gpuDynInst, addr);
40293
40294         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40295             gpuDynInst->computeUnit()->globalMemoryPipe
40296                 .issueRequest(gpuDynInst);
40297             wf->wrGmReqsInPipe--;
40298             wf->outstandingReqsWrGm++;
40299         } else {
40300             fatal("Non global flat instructions not implemented yet.\n");
40301         }
40302
40303         gpuDynInst->wavefront()->outstandingReqs++;
40304         gpuDynInst->wavefront()->validateRequestCounters();
40305     }
40306
40307     void
40308     Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
40309     {
40310         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40311         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40312         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40313
40314         data0.read();
40315         data1.read();
40316         data2.read();
40317
40318         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40319             if (gpuDynInst->exec_mask[lane]) {
40320                 (reinterpret_cast<VecElemU32*>(
40321                     gpuDynInst->d_data))[lane * 3] = data0[lane];
40322                 (reinterpret_cast<VecElemU32*>(
40323                     gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
40324                 (reinterpret_cast<VecElemU32*>(
40325                     gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
40326             }
40327         }
40328
40329         initMemWrite<3>(gpuDynInst);
40330     } // initiateAcc
40331
40332     void
40333     Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
40334     {
40335     } // completeAcc
40336
40337     Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
40338           InFmt_FLAT *iFmt)
40339         : Inst_FLAT(iFmt, "flat_store_dwordx4")
40340     {
40341         setFlag(MemoryRef);
40342         setFlag(Store);
40343     } // Inst_FLAT__FLAT_STORE_DWORDX4
40344
40345     Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
40346     {
40347     } // ~Inst_FLAT__FLAT_STORE_DWORDX4
40348
40349     // Untyped buffer store 4 dwords.
40350     void
40351     Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
40352     {
40353         Wavefront *wf = gpuDynInst->wavefront();
40354
40355         if (wf->execMask().none()) {
40356             wf->decVMemInstsIssued();
40357             wf->decLGKMInstsIssued();
40358             wf->wrGmReqsInPipe--;
40359             wf->wrLmReqsInPipe--;
40360             return;
40361         }
40362
40363         gpuDynInst->execUnitId = wf->execUnitId;
40364         gpuDynInst->exec_mask = wf->execMask();
40365         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40366         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40367
40368         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40369
40370         addr.read();
40371
40372         calcAddr(gpuDynInst, addr);
40373
40374         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40375             gpuDynInst->computeUnit()->globalMemoryPipe
40376                 .issueRequest(gpuDynInst);
40377             wf->wrGmReqsInPipe--;
40378             wf->outstandingReqsWrGm++;
40379         } else {
40380             fatal("Non global flat instructions not implemented yet.\n");
40381         }
40382
40383         gpuDynInst->wavefront()->outstandingReqs++;
40384         gpuDynInst->wavefront()->validateRequestCounters();
40385     }
40386
40387     void
40388     Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
40389     {
40390         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40391         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40392         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40393         ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
40394
40395         data0.read();
40396         data1.read();
40397         data2.read();
40398         data3.read();
40399
40400         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40401             if (gpuDynInst->exec_mask[lane]) {
40402                 (reinterpret_cast<VecElemU32*>(
40403                     gpuDynInst->d_data))[lane * 4] = data0[lane];
40404                 (reinterpret_cast<VecElemU32*>(
40405                     gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
40406                 (reinterpret_cast<VecElemU32*>(
40407                     gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
40408                 (reinterpret_cast<VecElemU32*>(
40409                     gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
40410             }
40411         }
40412
40413         initMemWrite<4>(gpuDynInst);
40414     } // initiateAcc
40415
40416     void
40417     Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
40418     {
40419     } // completeAcc
40420
40421     Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt)
40422         : Inst_FLAT(iFmt, "flat_atomic_swap")
40423     {
40424         setFlag(AtomicExch);
40425         if (instData.GLC) {
40426             setFlag(AtomicReturn);
40427         } else {
40428             setFlag(AtomicNoReturn);
40429         } // if
40430         setFlag(MemoryRef);
40431     } // Inst_FLAT__FLAT_ATOMIC_SWAP
40432
40433     Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
40434     {
40435     } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
40436
40437     // tmp = MEM[ADDR];
40438     // MEM[ADDR] = DATA;
40439     // RETURN_DATA = tmp.
40440     void
40441     Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
40442     {
40443         Wavefront *wf = gpuDynInst->wavefront();
40444
40445         if (wf->execMask().none()) {
40446             wf->decVMemInstsIssued();
40447             wf->decLGKMInstsIssued();
40448             wf->wrGmReqsInPipe--;
40449             wf->rdGmReqsInPipe--;
40450             wf->wrLmReqsInPipe--;
40451             wf->rdLmReqsInPipe--;
40452             if (instData.GLC) {
40453                 gpuDynInst->exec_mask = wf->execMask();
40454                 wf->computeUnit->vrf[wf->simdId]->
40455                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
40456             }
40457             return;
40458         }
40459
40460         gpuDynInst->execUnitId = wf->execUnitId;
40461         gpuDynInst->exec_mask = wf->execMask();
40462         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40463         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40464
40465         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40466
40467         addr.read();
40468
40469         calcAddr(gpuDynInst, addr);
40470
40471         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40472             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40473             // TODO: additional address computation required for scratch
40474             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40475                      "Flats to private aperture not tested yet\n");
40476             gpuDynInst->computeUnit()->globalMemoryPipe.
40477                 issueRequest(gpuDynInst);
40478             wf->wrGmReqsInPipe--;
40479             wf->outstandingReqsWrGm++;
40480             wf->rdGmReqsInPipe--;
40481             wf->outstandingReqsRdGm++;
40482         } else {
40483             fatal("Non global flat instructions not implemented yet.\n");
40484         }
40485
40486         gpuDynInst->wavefront()->outstandingReqs++;
40487         gpuDynInst->wavefront()->validateRequestCounters();
40488
40489         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40490
40491         data.read();
40492
40493         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40494             if (gpuDynInst->exec_mask[lane]) {
40495                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40496                     = data[lane];
40497             }
40498         }
40499
40500     } // execute
40501
40502     void
40503     Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40504     {
40505         initAtomicAccess<VecElemU32>(gpuDynInst);
40506     } // initiateAcc
40507
40508     void
40509     Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40510     {
40511         if (isAtomicRet()) {
40512             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40513
40514             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40515                 if (gpuDynInst->exec_mask[lane]) {
40516                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40517                         gpuDynInst->d_data))[lane];
40518                 }
40519             }
40520
40521             vdst.write();
40522         }
40523     } // completeAcc
40524
40525     // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
40526
40527     Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40528         ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
40529         : Inst_FLAT(iFmt, "flat_atomic_cmpswap")
40530     {
40531         setFlag(AtomicCAS);
40532         if (instData.GLC) {
40533             setFlag(AtomicReturn);
40534         } else {
40535             setFlag(AtomicNoReturn);
40536         } // if
40537         setFlag(MemoryRef);
40538     } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40539
40540     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
40541     {
40542     } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40543
40544     // tmp = MEM[ADDR];
40545     // src = DATA[0];
40546     // cmp = DATA[1];
40547     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
40548     // RETURN_DATA[0] = tmp.
40549     void
40550     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
40551     {
40552         Wavefront *wf = gpuDynInst->wavefront();
40553
40554         if (wf->execMask().none()) {
40555             wf->decVMemInstsIssued();
40556             wf->decLGKMInstsIssued();
40557             wf->wrGmReqsInPipe--;
40558             wf->rdGmReqsInPipe--;
40559             wf->wrLmReqsInPipe--;
40560             wf->rdLmReqsInPipe--;
40561             if (instData.GLC) {
40562                 gpuDynInst->exec_mask = wf->execMask();
40563                 wf->computeUnit->vrf[wf->simdId]->
40564                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
40565             }
40566             return;
40567         }
40568
40569         gpuDynInst->execUnitId = wf->execUnitId;
40570         gpuDynInst->exec_mask = wf->execMask();
40571         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40572         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40573
40574         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40575         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40576         ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
40577
40578         addr.read();
40579         data.read();
40580         cmp.read();
40581
40582         calcAddr(gpuDynInst, addr);
40583
40584         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40585             if (gpuDynInst->exec_mask[lane]) {
40586                 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
40587                     = data[lane];
40588                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40589                     = cmp[lane];
40590             }
40591         }
40592
40593         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40594             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40595             /**
40596              * TODO: If you encounter this panic, just remove this panic
40597              * and restart the simulation. It should just work fine but
40598              * this is to warn user that this path is never tested although
40599              * all the necessary logic is implemented
40600              */
40601             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40602                      "Flats to private aperture not tested yet\n");
40603             gpuDynInst->computeUnit()->globalMemoryPipe.
40604                 issueRequest(gpuDynInst);
40605             wf->wrGmReqsInPipe--;
40606             wf->outstandingReqsWrGm++;
40607             wf->rdGmReqsInPipe--;
40608             wf->outstandingReqsRdGm++;
40609         } else {
40610             fatal("Non global flat instructions not implemented yet.\n");
40611         }
40612
40613         gpuDynInst->wavefront()->outstandingReqs++;
40614         gpuDynInst->wavefront()->validateRequestCounters();
40615     }
40616
40617     void
40618     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40619     {
40620         initAtomicAccess<VecElemU32>(gpuDynInst);
40621     } // initiateAcc
40622
40623     void
40624     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40625     {
40626         if (isAtomicRet()) {
40627             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40628
40629             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40630                 if (gpuDynInst->exec_mask[lane]) {
40631                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40632                         gpuDynInst->d_data))[lane];
40633                 }
40634             }
40635
40636             vdst.write();
40637         }
40638     } // completeAcc
40639
40640     Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt)
40641         : Inst_FLAT(iFmt, "flat_atomic_add")
40642     {
40643         setFlag(AtomicAdd);
40644         if (instData.GLC) {
40645             setFlag(AtomicReturn);
40646         } else {
40647             setFlag(AtomicNoReturn);
40648         } // if
40649         setFlag(MemoryRef);
40650     } // Inst_FLAT__FLAT_ATOMIC_ADD
40651
40652     Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
40653     {
40654     } // ~Inst_FLAT__FLAT_ATOMIC_ADD
40655
40656     // tmp = MEM[ADDR];
40657     // MEM[ADDR] += DATA;
40658     // RETURN_DATA = tmp.
40659     void
40660     Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
40661     {
40662         Wavefront *wf = gpuDynInst->wavefront();
40663
40664         if (wf->execMask().none()) {
40665             wf->decVMemInstsIssued();
40666             wf->decLGKMInstsIssued();
40667             wf->wrGmReqsInPipe--;
40668             wf->rdGmReqsInPipe--;
40669             wf->wrLmReqsInPipe--;
40670             wf->rdLmReqsInPipe--;
40671             if (instData.GLC) {
40672                 gpuDynInst->exec_mask = wf->execMask();
40673                 wf->computeUnit->vrf[wf->simdId]->
40674                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
40675             }
40676             return;
40677         }
40678
40679         gpuDynInst->execUnitId = wf->execUnitId;
40680         gpuDynInst->exec_mask = wf->execMask();
40681         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40682         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40683
40684         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40685         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40686
40687         addr.read();
40688         data.read();
40689
40690         calcAddr(gpuDynInst, addr);
40691
40692         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40693             if (gpuDynInst->exec_mask[lane]) {
40694                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40695                     = data[lane];
40696             }
40697         }
40698
40699         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40700             gpuDynInst->computeUnit()->globalMemoryPipe.
40701                 issueRequest(gpuDynInst);
40702             wf->wrGmReqsInPipe--;
40703             wf->outstandingReqsWrGm++;
40704             wf->rdGmReqsInPipe--;
40705             wf->outstandingReqsRdGm++;
40706         } else {
40707             fatal("Non global flat instructions not implemented yet.\n");
40708         }
40709
40710         gpuDynInst->wavefront()->outstandingReqs++;
40711         gpuDynInst->wavefront()->validateRequestCounters();
40712     }
40713
40714     void
40715     Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst)
40716     {
40717         initAtomicAccess<VecElemU32>(gpuDynInst);
40718     } // initiateAcc
40719
40720     void
40721     Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst)
40722     {
40723         if (isAtomicRet()) {
40724             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40725
40726             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40727                 if (gpuDynInst->exec_mask[lane]) {
40728                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40729                         gpuDynInst->d_data))[lane];
40730                 }
40731             }
40732
40733             vdst.write();
40734         }
40735     } // completeAcc
40736
40737     Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt)
40738         : Inst_FLAT(iFmt, "flat_atomic_sub")
40739     {
40740         setFlag(AtomicSub);
40741         if (instData.GLC) {
40742             setFlag(AtomicReturn);
40743         } else {
40744             setFlag(AtomicNoReturn);
40745         } // if
40746         setFlag(MemoryRef);
40747     } // Inst_FLAT__FLAT_ATOMIC_SUB
40748
40749     Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
40750     {
40751     } // ~Inst_FLAT__FLAT_ATOMIC_SUB
40752
40753     // tmp = MEM[ADDR];
40754     // MEM[ADDR] -= DATA;
40755     // RETURN_DATA = tmp.
40756     void
40757     Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
40758     {
40759         Wavefront *wf = gpuDynInst->wavefront();
40760
40761         if (wf->execMask().none()) {
40762             wf->decVMemInstsIssued();
40763             wf->decLGKMInstsIssued();
40764             wf->wrGmReqsInPipe--;
40765             wf->rdGmReqsInPipe--;
40766             wf->wrLmReqsInPipe--;
40767             wf->rdLmReqsInPipe--;
40768             if (instData.GLC) {
40769                 gpuDynInst->exec_mask = wf->execMask();
40770                 wf->computeUnit->vrf[wf->simdId]->
40771                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
40772             }
40773             return;
40774         }
40775
40776         gpuDynInst->execUnitId = wf->execUnitId;
40777         gpuDynInst->exec_mask = wf->execMask();
40778         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40779         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40780
40781         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40782         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40783
40784         addr.read();
40785         data.read();
40786
40787         calcAddr(gpuDynInst, addr);
40788
40789         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40790             if (gpuDynInst->exec_mask[lane]) {
40791                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40792                     = data[lane];
40793             }
40794         }
40795
40796         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40797             gpuDynInst->computeUnit()->globalMemoryPipe.
40798                 issueRequest(gpuDynInst);
40799             wf->wrGmReqsInPipe--;
40800             wf->outstandingReqsWrGm++;
40801             wf->rdGmReqsInPipe--;
40802             wf->outstandingReqsRdGm++;
40803         } else {
40804             fatal("Non global flat instructions not implemented yet.\n");
40805         }
40806
40807         gpuDynInst->wavefront()->outstandingReqs++;
40808         gpuDynInst->wavefront()->validateRequestCounters();
40809     }
40810     void
40811     Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst)
40812     {
40813         initAtomicAccess<VecElemU32>(gpuDynInst);
40814     } // initiateAcc
40815
40816     void
40817     Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst)
40818     {
40819         if (isAtomicRet()) {
40820             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40821
40822             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40823                 if (gpuDynInst->exec_mask[lane]) {
40824                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40825                         gpuDynInst->d_data))[lane];
40826                 }
40827             }
40828
40829             vdst.write();
40830         }
40831     } // completeAcc
40832
40833     Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt)
40834         : Inst_FLAT(iFmt, "flat_atomic_smin")
40835     {
40836         setFlag(AtomicMin);
40837         if (instData.GLC) {
40838             setFlag(AtomicReturn);
40839         } else {
40840             setFlag(AtomicNoReturn);
40841         }
40842         setFlag(MemoryRef);
40843     } // Inst_FLAT__FLAT_ATOMIC_SMIN
40844
40845     Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
40846     {
40847     } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
40848
40849     // tmp = MEM[ADDR];
40850     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
40851     // RETURN_DATA = tmp.
40852     void
40853     Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
40854     {
40855         panicUnimplemented();
40856     }
40857
40858     Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
40859         : Inst_FLAT(iFmt, "flat_atomic_umin")
40860     {
40861         setFlag(AtomicMin);
40862         if (instData.GLC) {
40863             setFlag(AtomicReturn);
40864         } else {
40865             setFlag(AtomicNoReturn);
40866         }
40867         setFlag(MemoryRef);
40868     } // Inst_FLAT__FLAT_ATOMIC_UMIN
40869
40870     Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
40871     {
40872     } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
40873
40874     // tmp = MEM[ADDR];
40875     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
40876     // RETURN_DATA = tmp.
40877     void
40878     Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
40879     {
40880         panicUnimplemented();
40881     }
40882
40883     Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt)
40884         : Inst_FLAT(iFmt, "flat_atomic_smax")
40885     {
40886         setFlag(AtomicMax);
40887         if (instData.GLC) {
40888             setFlag(AtomicReturn);
40889         } else {
40890             setFlag(AtomicNoReturn);
40891         }
40892         setFlag(MemoryRef);
40893     } // Inst_FLAT__FLAT_ATOMIC_SMAX
40894
40895     Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
40896     {
40897     } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
40898
40899     // tmp = MEM[ADDR];
40900     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
40901     // RETURN_DATA = tmp.
40902     void
40903     Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
40904     {
40905         panicUnimplemented();
40906     }
40907
40908     Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt)
40909         : Inst_FLAT(iFmt, "flat_atomic_umax")
40910     {
40911         setFlag(AtomicMax);
40912         if (instData.GLC) {
40913             setFlag(AtomicReturn);
40914         } else {
40915             setFlag(AtomicNoReturn);
40916         }
40917         setFlag(MemoryRef);
40918     } // Inst_FLAT__FLAT_ATOMIC_UMAX
40919
40920     Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
40921     {
40922     } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
40923
40924     // tmp = MEM[ADDR];
40925     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
40926     // RETURN_DATA = tmp.
40927     void
40928     Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
40929     {
40930         panicUnimplemented();
40931     }
40932
40933     Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt)
40934         : Inst_FLAT(iFmt, "flat_atomic_and")
40935     {
40936         setFlag(AtomicAnd);
40937         if (instData.GLC) {
40938             setFlag(AtomicReturn);
40939         } else {
40940             setFlag(AtomicNoReturn);
40941         }
40942         setFlag(MemoryRef);
40943     } // Inst_FLAT__FLAT_ATOMIC_AND
40944
40945     Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
40946     {
40947     } // ~Inst_FLAT__FLAT_ATOMIC_AND
40948
40949     // tmp = MEM[ADDR];
40950     // MEM[ADDR] &= DATA;
40951     // RETURN_DATA = tmp.
40952     void
40953     Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
40954     {
40955         panicUnimplemented();
40956     }
40957
40958     Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt)
40959         : Inst_FLAT(iFmt, "flat_atomic_or")
40960     {
40961         setFlag(AtomicOr);
40962         if (instData.GLC) {
40963             setFlag(AtomicReturn);
40964         } else {
40965             setFlag(AtomicNoReturn);
40966         }
40967         setFlag(MemoryRef);
40968     } // Inst_FLAT__FLAT_ATOMIC_OR
40969
40970     Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
40971     {
40972     } // ~Inst_FLAT__FLAT_ATOMIC_OR
40973
40974     // tmp = MEM[ADDR];
40975     // MEM[ADDR] |= DATA;
40976     // RETURN_DATA = tmp.
40977     void
40978     Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
40979     {
40980         panicUnimplemented();
40981     }
40982
40983     Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt)
40984         : Inst_FLAT(iFmt, "flat_atomic_xor")
40985     {
40986         setFlag(AtomicXor);
40987         if (instData.GLC) {
40988             setFlag(AtomicReturn);
40989         } else {
40990             setFlag(AtomicNoReturn);
40991         }
40992         setFlag(MemoryRef);
40993     } // Inst_FLAT__FLAT_ATOMIC_XOR
40994
40995     Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
40996     {
40997     } // ~Inst_FLAT__FLAT_ATOMIC_XOR
40998
40999     // tmp = MEM[ADDR];
41000     // MEM[ADDR] ^= DATA;
41001     // RETURN_DATA = tmp.
41002     void
41003     Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
41004     {
41005         panicUnimplemented();
41006     }
41007
41008     Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt)
41009         : Inst_FLAT(iFmt, "flat_atomic_inc")
41010     {
41011         setFlag(AtomicInc);
41012         if (instData.GLC) {
41013             setFlag(AtomicReturn);
41014         } else {
41015             setFlag(AtomicNoReturn);
41016         }
41017         setFlag(MemoryRef);
41018     } // Inst_FLAT__FLAT_ATOMIC_INC
41019
41020     Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
41021     {
41022     } // ~Inst_FLAT__FLAT_ATOMIC_INC
41023
41024     // tmp = MEM[ADDR];
41025     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
41026     // RETURN_DATA = tmp.
41027     void
41028     Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
41029     {
41030         Wavefront *wf = gpuDynInst->wavefront();
41031
41032         if (wf->execMask().none()) {
41033             wf->decVMemInstsIssued();
41034             wf->decLGKMInstsIssued();
41035             wf->wrGmReqsInPipe--;
41036             wf->rdGmReqsInPipe--;
41037             wf->wrLmReqsInPipe--;
41038             wf->rdLmReqsInPipe--;
41039             if (instData.GLC) {
41040                 gpuDynInst->exec_mask = wf->execMask();
41041                 wf->computeUnit->vrf[wf->simdId]->
41042                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41043             }
41044             return;
41045         }
41046
41047         gpuDynInst->execUnitId = wf->execUnitId;
41048         gpuDynInst->exec_mask = wf->execMask();
41049         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41050         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41051
41052         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41053         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
41054
41055         addr.read();
41056         data.read();
41057
41058         calcAddr(gpuDynInst, addr);
41059
41060         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41061             if (gpuDynInst->exec_mask[lane]) {
41062                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
41063                     = data[lane];
41064             }
41065         }
41066
41067         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41068             gpuDynInst->computeUnit()->globalMemoryPipe.
41069                 issueRequest(gpuDynInst);
41070             wf->wrGmReqsInPipe--;
41071             wf->outstandingReqsWrGm++;
41072             wf->rdGmReqsInPipe--;
41073             wf->outstandingReqsRdGm++;
41074         } else {
41075             fatal("Non global flat instructions not implemented yet.\n");
41076         }
41077
41078         gpuDynInst->wavefront()->outstandingReqs++;
41079         gpuDynInst->wavefront()->validateRequestCounters();
41080     }
41081
41082     void
41083     Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst)
41084     {
41085         initAtomicAccess<VecElemU32>(gpuDynInst);
41086     } // initiateAcc
41087
41088     void
41089     Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst)
41090     {
41091         if (isAtomicRet()) {
41092             VecOperandU32 vdst(gpuDynInst, extData.VDST);
41093
41094             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41095                 if (gpuDynInst->exec_mask[lane]) {
41096                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
41097                         gpuDynInst->d_data))[lane];
41098                 }
41099             }
41100
41101             vdst.write();
41102         }
41103     } // completeAcc
41104
41105     Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt)
41106         : Inst_FLAT(iFmt, "flat_atomic_dec")
41107     {
41108         setFlag(AtomicDec);
41109         if (instData.GLC) {
41110             setFlag(AtomicReturn);
41111         } else {
41112             setFlag(AtomicNoReturn);
41113         }
41114         setFlag(MemoryRef);
41115     } // Inst_FLAT__FLAT_ATOMIC_DEC
41116
41117     Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
41118     {
41119     } // ~Inst_FLAT__FLAT_ATOMIC_DEC
41120
41121     // tmp = MEM[ADDR];
41122     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
41123     // (unsigned compare); RETURN_DATA = tmp.
41124     void
41125     Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
41126     {
41127         Wavefront *wf = gpuDynInst->wavefront();
41128
41129         if (wf->execMask().none()) {
41130             wf->decVMemInstsIssued();
41131             wf->decLGKMInstsIssued();
41132             wf->wrGmReqsInPipe--;
41133             wf->rdGmReqsInPipe--;
41134             wf->wrLmReqsInPipe--;
41135             wf->rdLmReqsInPipe--;
41136             if (instData.GLC) {
41137                 gpuDynInst->exec_mask = wf->execMask();
41138                 wf->computeUnit->vrf[wf->simdId]->
41139                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41140             }
41141             return;
41142         }
41143
41144         gpuDynInst->execUnitId = wf->execUnitId;
41145         gpuDynInst->exec_mask = wf->execMask();
41146         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41147         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41148
41149         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41150         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
41151
41152         addr.read();
41153         data.read();
41154
41155         calcAddr(gpuDynInst, addr);
41156
41157         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41158             if (gpuDynInst->exec_mask[lane]) {
41159                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
41160                     = data[lane];
41161             }
41162         }
41163
41164         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41165             gpuDynInst->computeUnit()->globalMemoryPipe.
41166                 issueRequest(gpuDynInst);
41167             wf->wrGmReqsInPipe--;
41168             wf->outstandingReqsWrGm++;
41169             wf->rdGmReqsInPipe--;
41170             wf->outstandingReqsRdGm++;
41171         } else {
41172             fatal("Non global flat instructions not implemented yet.\n");
41173         }
41174
41175         gpuDynInst->wavefront()->outstandingReqs++;
41176         gpuDynInst->wavefront()->validateRequestCounters();
41177     }
41178
41179     void
41180     Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst)
41181     {
41182         initAtomicAccess<VecElemU32>(gpuDynInst);
41183     } // initiateAcc
41184
41185     void
41186     Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst)
41187     {
41188         if (isAtomicRet()) {
41189             VecOperandU32 vdst(gpuDynInst, extData.VDST);
41190
41191             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41192                 if (gpuDynInst->exec_mask[lane]) {
41193                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
41194                         gpuDynInst->d_data))[lane];
41195                 }
41196             }
41197
41198             vdst.write();
41199         }
41200     } // completeAcc
41201
41202     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
41203           InFmt_FLAT *iFmt)
41204         : Inst_FLAT(iFmt, "flat_atomic_swap_x2")
41205     {
41206         setFlag(AtomicExch);
41207         if (instData.GLC) {
41208             setFlag(AtomicReturn);
41209         } else {
41210             setFlag(AtomicNoReturn);
41211         }
41212         setFlag(MemoryRef);
41213     } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41214
41215     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
41216     {
41217     } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41218
41219     // tmp = MEM[ADDR];
41220     // MEM[ADDR] = DATA[0:1];
41221     // RETURN_DATA[0:1] = tmp.
41222     void
41223     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41224     {
41225         panicUnimplemented();
41226     }
41227
41228     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
41229           InFmt_FLAT *iFmt)
41230         : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
41231     {
41232         setFlag(AtomicCAS);
41233         if (instData.GLC) {
41234             setFlag(AtomicReturn);
41235         } else {
41236             setFlag(AtomicNoReturn);
41237         }
41238         setFlag(MemoryRef);
41239     } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41240
41241     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
41242     {
41243     } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41244
41245     // tmp = MEM[ADDR];
41246     // src = DATA[0:1];
41247     // cmp = DATA[2:3];
41248     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
41249     // RETURN_DATA[0:1] = tmp.
41250     void
41251     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41252     {
41253         Wavefront *wf = gpuDynInst->wavefront();
41254
41255         if (wf->execMask().none()) {
41256             wf->decVMemInstsIssued();
41257             wf->decLGKMInstsIssued();
41258             wf->wrGmReqsInPipe--;
41259             wf->rdGmReqsInPipe--;
41260             wf->wrLmReqsInPipe--;
41261             wf->rdLmReqsInPipe--;
41262             if (instData.GLC) {
41263                 gpuDynInst->exec_mask = wf->execMask();
41264                 wf->computeUnit->vrf[wf->simdId]->
41265                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41266             }
41267             return;
41268         }
41269
41270         gpuDynInst->execUnitId = wf->execUnitId;
41271         gpuDynInst->exec_mask = wf->execMask();
41272         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41273         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41274
41275         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41276         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41277         ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
41278
41279         addr.read();
41280         data.read();
41281         cmp.read();
41282
41283         calcAddr(gpuDynInst, addr);
41284
41285         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41286             if (gpuDynInst->exec_mask[lane]) {
41287                 (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
41288                     = data[lane];
41289                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41290                     = cmp[lane];
41291             }
41292         }
41293
41294         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
41295             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
41296             /**
41297              * TODO: If you encounter this panic, just remove this panic
41298              * and restart the simulation. It should just work fine but
41299              * this is to warn user that this path is never tested although
41300              * all the necessary logic is implemented
41301              */
41302             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
41303                      "Flats to private aperture not tested yet\n");
41304             gpuDynInst->computeUnit()->globalMemoryPipe.
41305                 issueRequest(gpuDynInst);
41306             wf->wrGmReqsInPipe--;
41307             wf->outstandingReqsWrGm++;
41308             wf->rdGmReqsInPipe--;
41309             wf->outstandingReqsRdGm++;
41310         } else {
41311             fatal("Non global flat instructions not implemented yet.\n");
41312         }
41313
41314         gpuDynInst->wavefront()->outstandingReqs++;
41315         gpuDynInst->wavefront()->validateRequestCounters();
41316     }
41317
41318     void
41319     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41320     {
41321         initAtomicAccess<VecElemU64>(gpuDynInst);
41322     } // initiateAcc
41323
41324     void
41325     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41326     {
41327         if (isAtomicRet()) {
41328             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41329
41330             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41331                 if (gpuDynInst->exec_mask[lane]) {
41332                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41333                         gpuDynInst->d_data))[lane];
41334                 }
41335             }
41336
41337             vdst.write();
41338         }
41339     } // completeAcc
41340
41341     Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
41342           InFmt_FLAT *iFmt)
41343         : Inst_FLAT(iFmt, "flat_atomic_add_x2")
41344     {
41345         setFlag(AtomicAdd);
41346         if (instData.GLC) {
41347             setFlag(AtomicReturn);
41348         } else {
41349             setFlag(AtomicNoReturn);
41350         }
41351         setFlag(MemoryRef);
41352     } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
41353
41354     Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
41355     {
41356     } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
41357
41358     // tmp = MEM[ADDR];
41359     // MEM[ADDR] += DATA[0:1];
41360     // RETURN_DATA[0:1] = tmp.
41361     void
41362     Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
41363     {
41364         Wavefront *wf = gpuDynInst->wavefront();
41365
41366         if (wf->execMask().none()) {
41367             wf->decVMemInstsIssued();
41368             wf->decLGKMInstsIssued();
41369             wf->wrGmReqsInPipe--;
41370             wf->rdGmReqsInPipe--;
41371             wf->wrLmReqsInPipe--;
41372             wf->rdLmReqsInPipe--;
41373             if (instData.GLC) {
41374                 gpuDynInst->exec_mask = wf->execMask();
41375                 wf->computeUnit->vrf[wf->simdId]->
41376                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41377             }
41378             return;
41379         }
41380
41381         gpuDynInst->execUnitId = wf->execUnitId;
41382         gpuDynInst->exec_mask = wf->execMask();
41383         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41384         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41385
41386         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41387         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41388
41389         addr.read();
41390         data.read();
41391
41392         calcAddr(gpuDynInst, addr);
41393
41394         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41395             if (gpuDynInst->exec_mask[lane]) {
41396                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41397                     = data[lane];
41398             }
41399         }
41400
41401         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41402             gpuDynInst->computeUnit()->globalMemoryPipe.
41403                 issueRequest(gpuDynInst);
41404             wf->wrGmReqsInPipe--;
41405             wf->outstandingReqsWrGm++;
41406             wf->rdGmReqsInPipe--;
41407             wf->outstandingReqsRdGm++;
41408         } else {
41409             fatal("Non global flat instructions not implemented yet.\n");
41410         }
41411
41412         gpuDynInst->wavefront()->outstandingReqs++;
41413         gpuDynInst->wavefront()->validateRequestCounters();
41414     }
41415
41416     void
41417     Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41418     {
41419         initAtomicAccess<VecElemU64>(gpuDynInst);
41420     } // initiateAcc
41421
41422     void
41423     Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41424     {
41425         if (isAtomicRet()) {
41426             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41427
41428
41429             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41430                 if (gpuDynInst->exec_mask[lane]) {
41431                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41432                         gpuDynInst->d_data))[lane];
41433                 }
41434             }
41435
41436             vdst.write();
41437         }
41438     } // completeAcc
41439
41440     Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
41441           InFmt_FLAT *iFmt)
41442         : Inst_FLAT(iFmt, "flat_atomic_sub_x2")
41443     {
41444         setFlag(AtomicSub);
41445         if (instData.GLC) {
41446             setFlag(AtomicReturn);
41447         } else {
41448             setFlag(AtomicNoReturn);
41449         }
41450         setFlag(MemoryRef);
41451     } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
41452
41453     Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
41454     {
41455     } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
41456
41457     // tmp = MEM[ADDR];
41458     // MEM[ADDR] -= DATA[0:1];
41459     // RETURN_DATA[0:1] = tmp.
41460     void
41461     Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
41462     {
41463         Wavefront *wf = gpuDynInst->wavefront();
41464
41465         if (wf->execMask().none()) {
41466             wf->decVMemInstsIssued();
41467             wf->decLGKMInstsIssued();
41468             wf->wrGmReqsInPipe--;
41469             wf->rdGmReqsInPipe--;
41470             wf->wrLmReqsInPipe--;
41471             wf->rdLmReqsInPipe--;
41472             if (instData.GLC) {
41473                 gpuDynInst->exec_mask = wf->execMask();
41474                 wf->computeUnit->vrf[wf->simdId]->
41475                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41476             }
41477             return;
41478         }
41479
41480         gpuDynInst->execUnitId = wf->execUnitId;
41481         gpuDynInst->exec_mask = wf->execMask();
41482         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41483         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41484
41485         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41486         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41487
41488         addr.read();
41489         data.read();
41490
41491         calcAddr(gpuDynInst, addr);
41492
41493         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41494             if (gpuDynInst->exec_mask[lane]) {
41495                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41496                     = data[lane];
41497             }
41498         }
41499
41500         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41501             gpuDynInst->computeUnit()->globalMemoryPipe.
41502                 issueRequest(gpuDynInst);
41503             wf->wrGmReqsInPipe--;
41504             wf->outstandingReqsWrGm++;
41505             wf->rdGmReqsInPipe--;
41506             wf->outstandingReqsRdGm++;
41507         } else {
41508             fatal("Non global flat instructions not implemented yet.\n");
41509         }
41510
41511         gpuDynInst->wavefront()->outstandingReqs++;
41512         gpuDynInst->wavefront()->validateRequestCounters();
41513     }
41514
41515     void
41516     Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41517     {
41518         initAtomicAccess<VecElemU64>(gpuDynInst);
41519     } // initiateAcc
41520
41521     void
41522     Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41523     {
41524         if (isAtomicRet()) {
41525             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41526
41527
41528             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41529                 if (gpuDynInst->exec_mask[lane]) {
41530                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41531                         gpuDynInst->d_data))[lane];
41532                 }
41533             }
41534
41535             vdst.write();
41536         }
41537     } // completeAcc
41538
41539     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
41540           InFmt_FLAT *iFmt)
41541         : Inst_FLAT(iFmt, "flat_atomic_smin_x2")
41542     {
41543         setFlag(AtomicMin);
41544         if (instData.GLC) {
41545             setFlag(AtomicReturn);
41546         } else {
41547             setFlag(AtomicNoReturn);
41548         }
41549         setFlag(MemoryRef);
41550     } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41551
41552     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
41553     {
41554     } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41555
41556     // tmp = MEM[ADDR];
41557     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
41558     // RETURN_DATA[0:1] = tmp.
41559     void
41560     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41561     {
41562         panicUnimplemented();
41563     }
41564
41565     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
41566           InFmt_FLAT *iFmt)
41567         : Inst_FLAT(iFmt, "flat_atomic_umin_x2")
41568     {
41569         setFlag(AtomicMin);
41570         if (instData.GLC) {
41571             setFlag(AtomicReturn);
41572         } else {
41573             setFlag(AtomicNoReturn);
41574         }
41575         setFlag(MemoryRef);
41576     } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41577
41578     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
41579     {
41580     } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41581
41582     // tmp = MEM[ADDR];
41583     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
41584     // RETURN_DATA[0:1] = tmp.
41585     void
41586     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41587     {
41588         panicUnimplemented();
41589     }
41590
41591     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
41592           InFmt_FLAT *iFmt)
41593         : Inst_FLAT(iFmt, "flat_atomic_smax_x2")
41594     {
41595         setFlag(AtomicMax);
41596         if (instData.GLC) {
41597             setFlag(AtomicReturn);
41598         } else {
41599             setFlag(AtomicNoReturn);
41600         }
41601         setFlag(MemoryRef);
41602     } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41603
41604     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
41605     {
41606     } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41607
41608     // tmp = MEM[ADDR];
41609     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
41610     // RETURN_DATA[0:1] = tmp.
41611     void
41612     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41613     {
41614         panicUnimplemented();
41615     }
41616
41617     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
41618           InFmt_FLAT *iFmt)
41619         : Inst_FLAT(iFmt, "flat_atomic_umax_x2")
41620     {
41621         setFlag(AtomicMax);
41622         if (instData.GLC) {
41623             setFlag(AtomicReturn);
41624         } else {
41625             setFlag(AtomicNoReturn);
41626         }
41627         setFlag(MemoryRef);
41628     } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41629
41630     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
41631     {
41632     } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41633
41634     // tmp = MEM[ADDR];
41635     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
41636     // RETURN_DATA[0:1] = tmp.
41637     void
41638     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41639     {
41640         panicUnimplemented();
41641     }
41642
41643     Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
41644           InFmt_FLAT *iFmt)
41645         : Inst_FLAT(iFmt, "flat_atomic_and_x2")
41646     {
41647         setFlag(AtomicAnd);
41648         if (instData.GLC) {
41649             setFlag(AtomicReturn);
41650         } else {
41651             setFlag(AtomicNoReturn);
41652         }
41653         setFlag(MemoryRef);
41654     } // Inst_FLAT__FLAT_ATOMIC_AND_X2
41655
41656     Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
41657     {
41658     } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
41659
41660     // tmp = MEM[ADDR];
41661     // MEM[ADDR] &= DATA[0:1];
41662     // RETURN_DATA[0:1] = tmp.
41663     void
41664     Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
41665     {
41666         panicUnimplemented();
41667     }
41668
41669     Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
41670           InFmt_FLAT *iFmt)
41671         : Inst_FLAT(iFmt, "flat_atomic_or_x2")
41672     {
41673         setFlag(AtomicOr);
41674         if (instData.GLC) {
41675             setFlag(AtomicReturn);
41676         } else {
41677             setFlag(AtomicNoReturn);
41678         }
41679         setFlag(MemoryRef);
41680     } // Inst_FLAT__FLAT_ATOMIC_OR_X2
41681
41682     Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
41683     {
41684     } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
41685
41686     // tmp = MEM[ADDR];
41687     // MEM[ADDR] |= DATA[0:1];
41688     // RETURN_DATA[0:1] = tmp.
41689     void
41690     Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
41691     {
41692         panicUnimplemented();
41693     }
41694
41695     Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
41696           InFmt_FLAT *iFmt)
41697         : Inst_FLAT(iFmt, "flat_atomic_xor_x2")
41698     {
41699         setFlag(AtomicXor);
41700         if (instData.GLC) {
41701             setFlag(AtomicReturn);
41702         } else {
41703             setFlag(AtomicNoReturn);
41704         }
41705         setFlag(MemoryRef);
41706     } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
41707
41708     Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
41709     {
41710     } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
41711
41712     // tmp = MEM[ADDR];
41713     // MEM[ADDR] ^= DATA[0:1];
41714     // RETURN_DATA[0:1] = tmp.
41715     void
41716     Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
41717     {
41718         panicUnimplemented();
41719     }
41720
41721     Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
41722           InFmt_FLAT *iFmt)
41723         : Inst_FLAT(iFmt, "flat_atomic_inc_x2")
41724     {
41725         setFlag(AtomicInc);
41726         if (instData.GLC) {
41727             setFlag(AtomicReturn);
41728         } else {
41729             setFlag(AtomicNoReturn);
41730         }
41731         setFlag(MemoryRef);
41732     } // Inst_FLAT__FLAT_ATOMIC_INC_X2
41733
41734     Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
41735     {
41736     } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
41737
41738     // tmp = MEM[ADDR];
41739     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
41740     // RETURN_DATA[0:1] = tmp.
41741     void
41742     Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
41743     {
41744         Wavefront *wf = gpuDynInst->wavefront();
41745
41746         if (wf->execMask().none()) {
41747             wf->decVMemInstsIssued();
41748             wf->decLGKMInstsIssued();
41749             wf->wrGmReqsInPipe--;
41750             wf->rdGmReqsInPipe--;
41751             wf->wrLmReqsInPipe--;
41752             wf->rdLmReqsInPipe--;
41753             if (instData.GLC) {
41754                 gpuDynInst->exec_mask = wf->execMask();
41755                 wf->computeUnit->vrf[wf->simdId]->
41756                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41757             }
41758             return;
41759         }
41760
41761         gpuDynInst->execUnitId = wf->execUnitId;
41762         gpuDynInst->exec_mask = wf->execMask();
41763         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41764         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41765
41766         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41767         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41768
41769         addr.read();
41770         data.read();
41771
41772         calcAddr(gpuDynInst, addr);
41773
41774         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41775             if (gpuDynInst->exec_mask[lane]) {
41776                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41777                     = data[lane];
41778             }
41779         }
41780
41781         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41782             gpuDynInst->computeUnit()->globalMemoryPipe.
41783                 issueRequest(gpuDynInst);
41784             wf->wrGmReqsInPipe--;
41785             wf->outstandingReqsWrGm++;
41786             wf->rdGmReqsInPipe--;
41787             wf->outstandingReqsRdGm++;
41788         } else {
41789             fatal("Non global flat instructions not implemented yet.\n");
41790         }
41791
41792         gpuDynInst->wavefront()->outstandingReqs++;
41793         gpuDynInst->wavefront()->validateRequestCounters();
41794     }
41795
41796     void
41797     Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41798     {
41799         initAtomicAccess<VecElemU64>(gpuDynInst);
41800     } // initiateAcc
41801
41802     void
41803     Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41804     {
41805         if (isAtomicRet()) {
41806             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41807
41808
41809             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41810                 if (gpuDynInst->exec_mask[lane]) {
41811                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41812                         gpuDynInst->d_data))[lane];
41813                 }
41814             }
41815
41816             vdst.write();
41817         }
41818     } // completeAcc
41819
41820     Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
41821         InFmt_FLAT *iFmt)
41822         : Inst_FLAT(iFmt, "flat_atomic_dec_x2")
41823     {
41824         setFlag(AtomicDec);
41825         if (instData.GLC) {
41826             setFlag(AtomicReturn);
41827         } else {
41828             setFlag(AtomicNoReturn);
41829         }
41830         setFlag(MemoryRef);
41831     } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
41832
41833     Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
41834     {
41835     } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
41836
41837     // tmp = MEM[ADDR];
41838     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
41839     // (unsigned compare);
41840     // RETURN_DATA[0:1] = tmp.
41841     void
41842     Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
41843     {
41844         Wavefront *wf = gpuDynInst->wavefront();
41845
41846         if (wf->execMask().none()) {
41847             wf->decVMemInstsIssued();
41848             wf->decLGKMInstsIssued();
41849             wf->wrGmReqsInPipe--;
41850             wf->rdGmReqsInPipe--;
41851             wf->wrLmReqsInPipe--;
41852             wf->rdLmReqsInPipe--;
41853             if (instData.GLC) {
41854                 gpuDynInst->exec_mask = wf->execMask();
41855                 wf->computeUnit->vrf[wf->simdId]->
41856                     scheduleWriteOperandsFromLoad(wf, gpuDynInst);
41857             }
41858             return;
41859         }
41860
41861         gpuDynInst->execUnitId = wf->execUnitId;
41862         gpuDynInst->exec_mask = wf->execMask();
41863         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41864         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41865
41866         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41867         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41868
41869         addr.read();
41870         data.read();
41871
41872         calcAddr(gpuDynInst, addr);
41873
41874         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41875             if (gpuDynInst->exec_mask[lane]) {
41876                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41877                     = data[lane];
41878             }
41879         }
41880
41881         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41882             gpuDynInst->computeUnit()->globalMemoryPipe.
41883                 issueRequest(gpuDynInst);
41884             wf->wrGmReqsInPipe--;
41885             wf->outstandingReqsWrGm++;
41886             wf->rdGmReqsInPipe--;
41887             wf->outstandingReqsRdGm++;
41888         } else {
41889             fatal("Non global flat instructions not implemented yet.\n");
41890         }
41891
41892         gpuDynInst->wavefront()->outstandingReqs++;
41893         gpuDynInst->wavefront()->validateRequestCounters();
41894     }
41895
41896     void
41897     Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41898     {
41899         initAtomicAccess<VecElemU64>(gpuDynInst);
41900     } // initiateAcc
41901
41902     void
41903     Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41904     {
41905         if (isAtomicRet()) {
41906             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41907
41908
41909             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41910                 if (gpuDynInst->exec_mask[lane]) {
41911                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41912                         gpuDynInst->d_data))[lane];
41913                 }
41914             }
41915
41916             vdst.write();
41917         }
41918     } // completeAcc
41919 } // namespace Gcn3ISA