arch, gpu-compute: Remove HSAIL related files
authorTony Gutierrez <anthony.gutierrez@amd.com>
Tue, 1 May 2018 21:34:29 +0000 (17:34 -0400)
committerAnthony Gutierrez <anthony.gutierrez@amd.com>
Wed, 17 Jun 2020 02:53:47 +0000 (02:53 +0000)
Change-Id: Iefba0a38d62da7598bbfe3fe6ff46454d35144b1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28410
Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com>
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
45 files changed:
MAINTAINERS
SConstruct
build_opts/HSAIL_X86 [deleted file]
src/arch/gcn3/SConscript
src/arch/hsail/Brig.h [deleted file]
src/arch/hsail/Brig_new.hpp [deleted file]
src/arch/hsail/SConscript [deleted file]
src/arch/hsail/SConsopts [deleted file]
src/arch/hsail/gen.py [deleted file]
src/arch/hsail/gpu_decoder.hh [deleted file]
src/arch/hsail/gpu_isa.hh [deleted file]
src/arch/hsail/gpu_types.hh [deleted file]
src/arch/hsail/insts/branch.cc [deleted file]
src/arch/hsail/insts/branch.hh [deleted file]
src/arch/hsail/insts/decl.hh [deleted file]
src/arch/hsail/insts/gpu_static_inst.cc [deleted file]
src/arch/hsail/insts/gpu_static_inst.hh [deleted file]
src/arch/hsail/insts/main.cc [deleted file]
src/arch/hsail/insts/mem.cc [deleted file]
src/arch/hsail/insts/mem.hh [deleted file]
src/arch/hsail/insts/mem_impl.hh [deleted file]
src/arch/hsail/insts/pseudo_inst.cc [deleted file]
src/arch/hsail/operand.cc [deleted file]
src/arch/hsail/operand.hh [deleted file]
src/gpu-compute/brig_object.cc [deleted file]
src/gpu-compute/brig_object.hh [deleted file]
src/gpu-compute/cl_driver.cc [deleted file]
src/gpu-compute/cl_driver.hh [deleted file]
src/gpu-compute/cl_event.hh [deleted file]
src/gpu-compute/condition_register_state.cc [deleted file]
src/gpu-compute/condition_register_state.hh [deleted file]
src/gpu-compute/hsa_code.hh [deleted file]
src/gpu-compute/hsa_kernel_info.hh [deleted file]
src/gpu-compute/hsa_object.cc [deleted file]
src/gpu-compute/hsa_object.hh [deleted file]
src/gpu-compute/hsail_code.cc [deleted file]
src/gpu-compute/hsail_code.hh [deleted file]
src/gpu-compute/kernel_cfg.cc [deleted file]
src/gpu-compute/kernel_cfg.hh [deleted file]
src/gpu-compute/ndrange.hh [deleted file]
src/gpu-compute/qstruct.hh [deleted file]
src/gpu-compute/vector_register_state.cc [deleted file]
src/gpu-compute/vector_register_state.hh [deleted file]
util/git-commit-msg.py
util/regress

index 9a4d7fc51185f4abbd9355d07140b2c937962202..92c4ce8188969621f019987b02cc04d8aadc54dc 100644 (file)
@@ -29,7 +29,6 @@ arch-arm:
   Andreas Sandberg <andreas.sandberg@arm.com>
   Giacomo Travaglini <giacomo.travaglini@arm.com>
 arch-gcn3:
-arch-hsail:
   Tony Gutierrez <anthony.gutierrez@amd.com>
 arch-mips:
 arch-power:
index 3a03af4974dea77b59392a8e9f2ecea5c9364874..4bc3d0e39ccd03202ccfc0354fa2547bb51a5aa6 100755 (executable)
@@ -989,7 +989,7 @@ all_gpu_isa_list.sort()
 
 sticky_vars.AddVariables(
     EnumVariable('TARGET_ISA', 'Target ISA', 'null', all_isa_list),
-    EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', all_gpu_isa_list),
+    EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'gcn3', all_gpu_isa_list),
     ListVariable('CPU_MODELS', 'CPU models',
                  sorted(n for n,m in CpuModel.dict.items() if m.default),
                  sorted(CpuModel.dict.keys())),
diff --git a/build_opts/HSAIL_X86 b/build_opts/HSAIL_X86
deleted file mode 100644 (file)
index 105f82c..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-PROTOCOL = 'GPU_RfO'
-TARGET_ISA = 'x86'
-TARGET_GPU_ISA = 'hsail'
-BUILD_GPU = True
-CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
index f438cdb1018fbd9313fe9d9f724d07095407a154..da57bf552ca4d1e5c60c651250da87115a563ece 100644 (file)
@@ -37,6 +37,9 @@ import sys
 
 Import('*')
 
+if not env['BUILD_GPU']:
+    Return()
+
 if env['TARGET_GPU_ISA'] == 'gcn3':
     Source('decoder.cc')
     Source('insts/gpu_static_inst.cc')
diff --git a/src/arch/hsail/Brig.h b/src/arch/hsail/Brig.h
deleted file mode 100644 (file)
index b260157..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-//     HSA Team
-//
-//     Advanced Micro Devices, Inc
-//
-//     www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-//     * Redistributions of source code must retain the above copyright notice,
-//       this list of conditions and the following disclaimers.
-//
-//     * Redistributions in binary form must reproduce the above copyright notice,
-//       this list of conditions and the following disclaimers in the
-//       documentation and/or other materials provided with the distribution.
-//
-//     * Neither the names of the LLVM Team, University of Illinois at
-//       Urbana-Champaign, nor the names of its contributors may be used to
-//       endorse or promote products derived from this Software without specific
-//       prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-#ifndef INTERNAL_BRIG_H
-#define INTERNAL_BRIG_H
-
-#include <stdint.h>
-
-namespace Brig {
-#include "Brig_new.hpp"
-
-// These typedefs provide some backward compatibility with earlier versions
-// of Brig.h, reducing the number of code changes. The distinct names also
-// increase legibility by showing the code's intent.
-typedef BrigBase BrigDirective;
-typedef BrigBase BrigOperand;
-
-enum BrigMemoryFenceSegments { // for internal use only
-    //.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc }
-    //.mnemo_token=_EMMemoryFenceSegments
-    //.mnemo_context=EInstModifierInstFenceContext
-    BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0,
-    BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1,
-    BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2,
-    BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip
-};
-
-}
-
-#endif // defined(INTERNAL_BRIG_H)
diff --git a/src/arch/hsail/Brig_new.hpp b/src/arch/hsail/Brig_new.hpp
deleted file mode 100644 (file)
index 95fcf4d..0000000
+++ /dev/null
@@ -1,1589 +0,0 @@
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-//     HSA Team
-//
-//     Advanced Micro Devices, Inc
-//
-//     www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-//     * Redistributions of source code must retain the above copyright notice,
-//       this list of conditions and the following disclaimers.
-//
-//     * Redistributions in binary form must reproduce the above copyright notice,
-//       this list of conditions and the following disclaimers in the
-//       documentation and/or other materials provided with the distribution.
-//
-//     * Neither the names of the LLVM Team, University of Illinois at
-//       Urbana-Champaign, nor the names of its contributors may be used to
-//       endorse or promote products derived from this Software without specific
-//       prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-
-//.ignore{
-
-#ifndef INCLUDED_BRIG_H
-#define INCLUDED_BRIG_H
-
-#include <stdint.h>
-
-enum BrigAuxDefs {
-  MAX_OPERANDS_NUM = 6
-};
-
-//}
-
-typedef uint32_t BrigVersion32_t;
-
-enum BrigVersion {
-
-    //.nowrap
-    //.nodump
-    //.nollvm
-
-    BRIG_VERSION_HSAIL_MAJOR = 1,
-    BRIG_VERSION_HSAIL_MINOR = 0,
-    BRIG_VERSION_BRIG_MAJOR  = 1,
-    BRIG_VERSION_BRIG_MINOR  = 0
-};
-
-typedef uint8_t BrigAlignment8_t;                           //.defValue=BRIG_ALIGNMENT_NONE
-
-typedef uint8_t BrigAllocation8_t;                          //.defValue=BRIG_ALLOCATION_NONE
-
-typedef uint8_t BrigAluModifier8_t;
-
-typedef uint8_t BrigAtomicOperation8_t;
-
-typedef uint32_t BrigCodeOffset32_t;                        //.defValue=0   //.wtype=ItemRef<Code>
-
-typedef uint8_t BrigCompareOperation8_t;
-
-typedef uint16_t BrigControlDirective16_t;
-
-typedef uint32_t BrigDataOffset32_t;
-
-typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;      //.wtype=ListRef<Code>      //.defValue=0
-
-typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;   //.wtype=ListRef<Operand>   //.defValue=0
-
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;        //.wtype=StrRef             //.defValue=0
-
-typedef uint8_t BrigExecutableModifier8_t;
-
-typedef uint8_t BrigImageChannelOrder8_t;                   //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN
-
-typedef uint8_t BrigImageChannelType8_t;                    //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN
-
-typedef uint8_t BrigImageGeometry8_t;                       //.defValue=BRIG_GEOMETRY_UNKNOWN
-
-typedef uint8_t BrigImageQuery8_t;
-
-typedef uint16_t BrigKind16_t;
-
-typedef uint8_t BrigLinkage8_t;                             //.defValue=BRIG_LINKAGE_NONE
-
-typedef uint8_t BrigMachineModel8_t;                        //.defValue=BRIG_MACHINE_LARGE
-
-typedef uint8_t BrigMemoryModifier8_t;
-
-typedef uint8_t BrigMemoryOrder8_t;                         //.defValue=BRIG_MEMORY_ORDER_RELAXED
-
-typedef uint8_t BrigMemoryScope8_t;                         //.defValue=BRIG_MEMORY_SCOPE_SYSTEM
-
-typedef uint16_t BrigOpcode16_t;
-
-typedef uint32_t BrigOperandOffset32_t;                     //.defValue=0 //.wtype=ItemRef<Operand>
-
-typedef uint8_t BrigPack8_t;                                //.defValue=BRIG_PACK_NONE
-
-typedef uint8_t BrigProfile8_t;                             //.defValue=BRIG_PROFILE_FULL
-
-typedef uint16_t BrigRegisterKind16_t;
-
-typedef uint8_t BrigRound8_t;                               //.defValue=BRIG_ROUND_NONE
-
-typedef uint8_t BrigSamplerAddressing8_t;                   //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE
-
-typedef uint8_t BrigSamplerCoordNormalization8_t;
-
-typedef uint8_t BrigSamplerFilter8_t;
-
-typedef uint8_t BrigSamplerQuery8_t;
-
-typedef uint32_t BrigSectionIndex32_t;
-
-typedef uint8_t BrigSegCvtModifier8_t;
-
-typedef uint8_t BrigSegment8_t;                             //.defValue=BRIG_SEGMENT_NONE
-
-typedef uint32_t BrigStringOffset32_t;                      //.defValue=0       //.wtype=StrRef
-
-typedef uint16_t BrigType16_t;
-
-typedef uint8_t BrigVariableModifier8_t;
-
-typedef uint8_t BrigWidth8_t;
-
-typedef uint32_t BrigExceptions32_t;
-
-enum BrigKind {
-
-    //.nollvm
-    //
-    //.wname={ s/^BRIG_KIND//; MACRO2Name($_) }
-    //.mnemo=$wname{ $wname }
-    //
-    //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" }
-    //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1"
-    //
-    //.isBodyOnly={ "false" }
-    //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()"
-    //.isBodyOnly_default="assert(false); return false"
-    //
-    //.isToplevelOnly={ "false" }
-    //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()"
-    //.isToplevelOnly_default="assert(false); return false"
-
-    BRIG_KIND_NONE = 0x0000,                        //.skip
-
-    BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,             //.skip
-    BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,     //.isBodyOnly=true
-    BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,   //.isBodyOnly=true
-    BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
-    BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,           //.isBodyOnly=true
-    BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,         //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
-    BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,          //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,            //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_LABEL = 0x1009,             //.isBodyOnly=true
-    BRIG_KIND_DIRECTIVE_LOC = 0x100a,
-    BRIG_KIND_DIRECTIVE_MODULE = 0x100b,            //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
-    BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,         //.isToplevelOnly=true
-    BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
-    BRIG_KIND_DIRECTIVE_END = 0x100f,               //.skip
-
-    BRIG_KIND_INST_BEGIN = 0x2000,                  //.skip
-    BRIG_KIND_INST_ADDR = 0x2000,
-    BRIG_KIND_INST_ATOMIC = 0x2001,
-    BRIG_KIND_INST_BASIC = 0x2002,
-    BRIG_KIND_INST_BR = 0x2003,
-    BRIG_KIND_INST_CMP = 0x2004,
-    BRIG_KIND_INST_CVT = 0x2005,
-    BRIG_KIND_INST_IMAGE = 0x2006,
-    BRIG_KIND_INST_LANE = 0x2007,
-    BRIG_KIND_INST_MEM = 0x2008,
-    BRIG_KIND_INST_MEM_FENCE = 0x2009,
-    BRIG_KIND_INST_MOD = 0x200a,
-    BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
-    BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
-    BRIG_KIND_INST_QUEUE = 0x200d,
-    BRIG_KIND_INST_SEG = 0x200e,
-    BRIG_KIND_INST_SEG_CVT = 0x200f,
-    BRIG_KIND_INST_SIGNAL = 0x2010,
-    BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
-    BRIG_KIND_INST_END = 0x2012,                    //.skip
-
-    BRIG_KIND_OPERAND_BEGIN = 0x3000,               //.skip
-    BRIG_KIND_OPERAND_ADDRESS = 0x3000,
-    BRIG_KIND_OPERAND_ALIGN = 0x3001,
-    BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
-    BRIG_KIND_OPERAND_CODE_REF = 0x3003,
-    BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
-    BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip
-    BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
-    BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
-    BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
-    BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
-    BRIG_KIND_OPERAND_REGISTER = 0x300a,
-    BRIG_KIND_OPERAND_STRING = 0x300b,
-    BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
-    BRIG_KIND_OPERAND_END = 0x300d                  //.skip
-};
-
-enum BrigAlignment {
-
-    //.mnemo={ s/^BRIG_ALIGNMENT_//; lc }
-    //.mnemo_proto="const char* align2str(unsigned arg)"
-    //
-    //.bytes={ /(\d+)/ ? $1 : undef }
-    //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1"
-    //
-    //.rbytes=$bytes{ $bytes }
-    //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)"
-    //.rbytes_default="return BRIG_ALIGNMENT_LAST"
-    //
-    //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" }
-
-    BRIG_ALIGNMENT_NONE = 0,                        //.no_mnemo
-    BRIG_ALIGNMENT_1 = 1,                           //.mnemo=""
-    BRIG_ALIGNMENT_2 = 2,
-    BRIG_ALIGNMENT_4 = 3,
-    BRIG_ALIGNMENT_8 = 4,
-    BRIG_ALIGNMENT_16 = 5,
-    BRIG_ALIGNMENT_32 = 6,
-    BRIG_ALIGNMENT_64 = 7,
-    BRIG_ALIGNMENT_128 = 8,
-    BRIG_ALIGNMENT_256 = 9,
-
-    BRIG_ALIGNMENT_LAST,                            //.skip
-    BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1    //.skip
-};
-
-enum BrigAllocation {
-
-    //.mnemo={ s/^BRIG_ALLOCATION_//;lc }
-    //.mnemo_token=EAllocKind
-
-    BRIG_ALLOCATION_NONE = 0,       //.mnemo=""
-    BRIG_ALLOCATION_PROGRAM = 1,
-    BRIG_ALLOCATION_AGENT = 2,
-    BRIG_ALLOCATION_AUTOMATIC = 3
-};
-
-enum BrigAluModifierMask {
-    BRIG_ALU_FTZ = 1
-};
-
-enum BrigAtomicOperation {
-
-    //.tdcaption="Atomic Operations"
-    //
-    //.mnemo={ s/^BRIG_ATOMIC_//;lc }
-    //.mnemo_token=_EMAtomicOp
-    //.mnemo_context=EInstModifierInstAtomicContext
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_ATOMIC_ADD = 0,
-    BRIG_ATOMIC_AND = 1,
-    BRIG_ATOMIC_CAS = 2,
-    BRIG_ATOMIC_EXCH = 3,
-    BRIG_ATOMIC_LD = 4,
-    BRIG_ATOMIC_MAX = 5,
-    BRIG_ATOMIC_MIN = 6,
-    BRIG_ATOMIC_OR = 7,
-    BRIG_ATOMIC_ST = 8,
-    BRIG_ATOMIC_SUB = 9,
-    BRIG_ATOMIC_WRAPDEC = 10,
-    BRIG_ATOMIC_WRAPINC = 11,
-    BRIG_ATOMIC_XOR = 12,
-    BRIG_ATOMIC_WAIT_EQ = 13,
-    BRIG_ATOMIC_WAIT_NE = 14,
-    BRIG_ATOMIC_WAIT_LT = 15,
-    BRIG_ATOMIC_WAIT_GTE = 16,
-    BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
-    BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
-    BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
-    BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-enum BrigCompareOperation {
-
-    //.tdcaption="Comparison Operators"
-    //
-    //.mnemo={ s/^BRIG_COMPARE_//;lc }
-    //.mnemo_token=_EMCompare
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_COMPARE_EQ = 0,
-    BRIG_COMPARE_NE = 1,
-    BRIG_COMPARE_LT = 2,
-    BRIG_COMPARE_LE = 3,
-    BRIG_COMPARE_GT = 4,
-    BRIG_COMPARE_GE = 5,
-    BRIG_COMPARE_EQU = 6,
-    BRIG_COMPARE_NEU = 7,
-    BRIG_COMPARE_LTU = 8,
-    BRIG_COMPARE_LEU = 9,
-    BRIG_COMPARE_GTU = 10,
-    BRIG_COMPARE_GEU = 11,
-    BRIG_COMPARE_NUM = 12,
-    BRIG_COMPARE_NAN = 13,
-    BRIG_COMPARE_SEQ = 14,
-    BRIG_COMPARE_SNE = 15,
-    BRIG_COMPARE_SLT = 16,
-    BRIG_COMPARE_SLE = 17,
-    BRIG_COMPARE_SGT = 18,
-    BRIG_COMPARE_SGE = 19,
-    BRIG_COMPARE_SGEU = 20,
-    BRIG_COMPARE_SEQU = 21,
-    BRIG_COMPARE_SNEU = 22,
-    BRIG_COMPARE_SLTU = 23,
-    BRIG_COMPARE_SLEU = 24,
-    BRIG_COMPARE_SNUM = 25,
-    BRIG_COMPARE_SNAN = 26,
-    BRIG_COMPARE_SGTU = 27
-};
-
-enum BrigControlDirective {
-
-    //.mnemo={ s/^BRIG_CONTROL_//;lc }
-    //.mnemo_token=EControl
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_CONTROL_NONE = 0, //.skip
-    BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
-    BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
-    BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
-    BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
-    BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
-    BRIG_CONTROL_REQUIREDDIM = 6,
-    BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
-    BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
-    BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
-};
-
-enum BrigExecutableModifierMask {
-    //.nodump
-    BRIG_EXECUTABLE_DEFINITION = 1
-};
-
-enum BrigImageChannelOrder {
-
-    //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc }
-    //.mnemo_token=EImageOrder
-    //.mnemo_context=EImageOrderContext
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_CHANNEL_ORDER_A = 0,
-    BRIG_CHANNEL_ORDER_R = 1,
-    BRIG_CHANNEL_ORDER_RX = 2,
-    BRIG_CHANNEL_ORDER_RG = 3,
-    BRIG_CHANNEL_ORDER_RGX = 4,
-    BRIG_CHANNEL_ORDER_RA = 5,
-    BRIG_CHANNEL_ORDER_RGB = 6,
-    BRIG_CHANNEL_ORDER_RGBX = 7,
-    BRIG_CHANNEL_ORDER_RGBA = 8,
-    BRIG_CHANNEL_ORDER_BGRA = 9,
-    BRIG_CHANNEL_ORDER_ARGB = 10,
-    BRIG_CHANNEL_ORDER_ABGR = 11,
-    BRIG_CHANNEL_ORDER_SRGB = 12,
-    BRIG_CHANNEL_ORDER_SRGBX = 13,
-    BRIG_CHANNEL_ORDER_SRGBA = 14,
-    BRIG_CHANNEL_ORDER_SBGRA = 15,
-    BRIG_CHANNEL_ORDER_INTENSITY = 16,
-    BRIG_CHANNEL_ORDER_LUMINANCE = 17,
-    BRIG_CHANNEL_ORDER_DEPTH = 18,
-    BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
-
-    // used internally
-    BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified
-
-    BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip
-
-};
-
-enum BrigImageChannelType {
-
-    //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc }
-    //.mnemo_token=EImageFormat
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
-    BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
-    BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
-    BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
-    BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
-    BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
-    BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
-    BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
-    BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
-    BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
-    BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
-    BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
-    BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
-    BRIG_CHANNEL_TYPE_FLOAT = 15,
-
-    // used internally
-    BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo=""
-
-    BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigImageGeometry {
-
-    //.tdcaption="Geometry"
-    //
-    //.mnemo={ s/^BRIG_GEOMETRY_//;lc }
-    //.mnemo_token=EImageGeometry
-    //
-    //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef}
-    //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo"
-    //.dim_default="assert(0); return 0"
-    //
-    //.depth={/DEPTH$/?"true":"false"}
-    //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo"
-    //.depth_default="return false"
-
-    BRIG_GEOMETRY_1D = 0,
-    BRIG_GEOMETRY_2D = 1,
-    BRIG_GEOMETRY_3D = 2,
-    BRIG_GEOMETRY_1DA = 3,
-    BRIG_GEOMETRY_2DA = 4,
-    BRIG_GEOMETRY_1DB = 5,
-    BRIG_GEOMETRY_2DDEPTH = 6,
-    BRIG_GEOMETRY_2DADEPTH = 7,
-
-    // used internally
-    BRIG_GEOMETRY_UNKNOWN, //.mnemo=""
-
-    BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigImageQuery {
-
-    //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc }
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_IMAGE_QUERY_WIDTH = 0,
-    BRIG_IMAGE_QUERY_HEIGHT = 1,
-    BRIG_IMAGE_QUERY_DEPTH = 2,
-    BRIG_IMAGE_QUERY_ARRAY = 3,
-    BRIG_IMAGE_QUERY_CHANNELORDER = 4,
-    BRIG_IMAGE_QUERY_CHANNELTYPE = 5,
-    BRIG_IMAGE_QUERY_NUMMIPLEVELS = 6
-};
-
-enum BrigLinkage {
-
-    //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc }
-
-    BRIG_LINKAGE_NONE = 0,
-    BRIG_LINKAGE_PROGRAM = 1,
-    BRIG_LINKAGE_MODULE = 2,
-    BRIG_LINKAGE_FUNCTION = 3,
-    BRIG_LINKAGE_ARG = 4
-};
-
-enum BrigMachineModel {
-
-    //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc }
-    //.mnemo_token=ETargetMachine
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_MACHINE_SMALL = 0,
-    BRIG_MACHINE_LARGE = 1,
-
-    BRIG_MACHINE_UNDEF = 2 //.skip
-};
-
-enum BrigMemoryModifierMask { //.tddef=0
-    BRIG_MEMORY_CONST = 1
-};
-
-enum BrigMemoryOrder {
-
-    //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc }
-    //.mnemo_token=_EMMemoryOrder
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_MEMORY_ORDER_NONE = 0,                 //.mnemo=""
-    BRIG_MEMORY_ORDER_RELAXED = 1,              //.mnemo=rlx
-    BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,           //.mnemo=scacq
-    BRIG_MEMORY_ORDER_SC_RELEASE = 3,           //.mnemo=screl
-    BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4,   //.mnemo=scar
-
-    BRIG_MEMORY_ORDER_LAST = 5 //.skip
-};
-
-enum BrigMemoryScope {
-
-    //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc }
-    //.mnemo_token=_EMMemoryScope
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_MEMORY_SCOPE_NONE = 0,         //.mnemo=""
-    BRIG_MEMORY_SCOPE_WORKITEM = 1,     //.mnemo=""
-    BRIG_MEMORY_SCOPE_WAVEFRONT = 2,    //.mnemo=wave
-    BRIG_MEMORY_SCOPE_WORKGROUP = 3,    //.mnemo=wg
-    BRIG_MEMORY_SCOPE_AGENT = 4,        //.mnemo=agent
-    BRIG_MEMORY_SCOPE_SYSTEM = 5,       //.mnemo=system
-
-    BRIG_MEMORY_SCOPE_LAST = 6 //.skip
-};
-
-enum BrigOpcode {
-
-    //.tdcaption="Instruction Opcodes"
-    //
-    //.k={ "BASIC" }
-    //.pscode=$k{ MACRO2Name("_".$k) }
-    //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" }
-    //.opcodeparser_incfile=ParserUtilities
-    //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic"
-    //
-    //.psopnd={undef}
-    //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" }
-    //.opndparser_incfile=ParserUtilities
-    //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands"
-    //
-    //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc }
-    //.mnemo_scanner=Instructions //.mnemo_token=EInstruction
-    //.mnemo_context=EDefaultContext
-    //
-    //.has_memory_order={undef}
-    //.semsupport=$has_memory_order{ return $has_memory_order && "true" }
-    //
-    //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; }
-    //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true"
-    //
-    //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" }
-    //.opcodevis_switch //.opcodevis_proto="template <typename RetType, typename Visitor> RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)"
-    //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()"
-    //.opcodevis_incfile=ItemUtils
-    //
-    //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef }
-    //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false"
-    //
-    //.vecOpndIndex={undef}
-    //.vecOpndIndex_switch  //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1"
-    //.vecOpndIndex_incfile=ParserUtilities
-    //
-    //.numdst={undef}
-    //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1"
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_OPCODE_NOP = 0,                    //.k=NOP            //.hasType=false
-    BRIG_OPCODE_ABS = 1,                    //.k=BASIC_OR_MOD
-    BRIG_OPCODE_ADD = 2,                    //.k=BASIC_OR_MOD
-    BRIG_OPCODE_BORROW = 3,
-    BRIG_OPCODE_CARRY = 4,
-    BRIG_OPCODE_CEIL = 5,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_COPYSIGN = 6,               //.k=BASIC_OR_MOD
-    BRIG_OPCODE_DIV = 7,                    //.k=BASIC_OR_MOD
-    BRIG_OPCODE_FLOOR = 8,                  //.k=BASIC_OR_MOD
-    BRIG_OPCODE_FMA = 9,                    //.k=BASIC_OR_MOD
-    BRIG_OPCODE_FRACT = 10,                 //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MAD = 11,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MAX = 12,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MIN = 13,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MUL = 14,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MULHI = 15,                 //.k=BASIC_OR_MOD
-    BRIG_OPCODE_NEG = 16,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_REM = 17,
-    BRIG_OPCODE_RINT = 18,                  //.k=BASIC_OR_MOD
-    BRIG_OPCODE_SQRT = 19,                  //.k=BASIC_OR_MOD
-    BRIG_OPCODE_SUB = 20,                   //.k=BASIC_OR_MOD
-    BRIG_OPCODE_TRUNC = 21,                 //.k=BASIC_OR_MOD
-    BRIG_OPCODE_MAD24 = 22,
-    BRIG_OPCODE_MAD24HI = 23,
-    BRIG_OPCODE_MUL24 = 24,
-    BRIG_OPCODE_MUL24HI = 25,
-    BRIG_OPCODE_SHL = 26,
-    BRIG_OPCODE_SHR = 27,
-    BRIG_OPCODE_AND = 28,
-    BRIG_OPCODE_NOT = 29,
-    BRIG_OPCODE_OR = 30,
-    BRIG_OPCODE_POPCOUNT = 31,              //.k=SOURCE_TYPE
-    BRIG_OPCODE_XOR = 32,
-    BRIG_OPCODE_BITEXTRACT = 33,
-    BRIG_OPCODE_BITINSERT = 34,
-    BRIG_OPCODE_BITMASK = 35,
-    BRIG_OPCODE_BITREV = 36,
-    BRIG_OPCODE_BITSELECT = 37,
-    BRIG_OPCODE_FIRSTBIT = 38,              //.k=SOURCE_TYPE
-    BRIG_OPCODE_LASTBIT = 39,               //.k=SOURCE_TYPE
-    BRIG_OPCODE_COMBINE = 40,               //.k=SOURCE_TYPE    //.vecOpndIndex=1
-    BRIG_OPCODE_EXPAND = 41,                //.k=SOURCE_TYPE    //.vecOpndIndex=0
-    BRIG_OPCODE_LDA = 42,                   //.k=ADDR
-    BRIG_OPCODE_MOV = 43,
-    BRIG_OPCODE_SHUFFLE = 44,
-    BRIG_OPCODE_UNPACKHI = 45,
-    BRIG_OPCODE_UNPACKLO = 46,
-    BRIG_OPCODE_PACK = 47,                  //.k=SOURCE_TYPE
-    BRIG_OPCODE_UNPACK = 48,                //.k=SOURCE_TYPE
-    BRIG_OPCODE_CMOV = 49,
-    BRIG_OPCODE_CLASS = 50,                 //.k=SOURCE_TYPE
-    BRIG_OPCODE_NCOS = 51,
-    BRIG_OPCODE_NEXP2 = 52,
-    BRIG_OPCODE_NFMA = 53,
-    BRIG_OPCODE_NLOG2 = 54,
-    BRIG_OPCODE_NRCP = 55,
-    BRIG_OPCODE_NRSQRT = 56,
-    BRIG_OPCODE_NSIN = 57,
-    BRIG_OPCODE_NSQRT = 58,
-    BRIG_OPCODE_BITALIGN = 59,
-    BRIG_OPCODE_BYTEALIGN = 60,
-    BRIG_OPCODE_PACKCVT = 61,               //.k=SOURCE_TYPE
-    BRIG_OPCODE_UNPACKCVT = 62,             //.k=SOURCE_TYPE
-    BRIG_OPCODE_LERP = 63,
-    BRIG_OPCODE_SAD = 64,                   //.k=SOURCE_TYPE
-    BRIG_OPCODE_SADHI = 65,                 //.k=SOURCE_TYPE
-    BRIG_OPCODE_SEGMENTP = 66,              //.k=SEG_CVT
-    BRIG_OPCODE_FTOS = 67,                  //.k=SEG_CVT
-    BRIG_OPCODE_STOF = 68,                  //.k=SEG_CVT
-    BRIG_OPCODE_CMP = 69,                   //.k=CMP
-    BRIG_OPCODE_CVT = 70,                   //.k=CVT
-    BRIG_OPCODE_LD = 71,                    //.k=MEM            //.has_memory_order //.vecOpndIndex=0
-    BRIG_OPCODE_ST = 72,                    //.k=MEM            //.has_memory_order //.vecOpndIndex=0 //.numdst=0
-    BRIG_OPCODE_ATOMIC = 73,                //.k=ATOMIC
-    BRIG_OPCODE_ATOMICNORET = 74,           //.k=ATOMIC         //.numdst=0
-    BRIG_OPCODE_SIGNAL = 75,                //.k=SIGNAL
-    BRIG_OPCODE_SIGNALNORET = 76,           //.k=SIGNAL         //.numdst=0
-    BRIG_OPCODE_MEMFENCE = 77,              //.k=MEM_FENCE      //.numdst=0
-    BRIG_OPCODE_RDIMAGE = 78,               //.k=IMAGE          //.vecOpndIndex=0
-    BRIG_OPCODE_LDIMAGE = 79,               //.k=IMAGE          //.vecOpndIndex=0
-    BRIG_OPCODE_STIMAGE = 80,               //.k=IMAGE          //.vecOpndIndex=0 //.numdst=0
-    BRIG_OPCODE_IMAGEFENCE = 81,            //.k=BASIC_NO_TYPE
-    BRIG_OPCODE_QUERYIMAGE = 82,            //.k=QUERY_IMAGE
-    BRIG_OPCODE_QUERYSAMPLER = 83,          //.k=QUERY_SAMPLER
-    BRIG_OPCODE_CBR = 84,                   //.k=BR             //.numdst=0
-    BRIG_OPCODE_BR = 85,                    //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_SBR = 86,                   //.k=BR             //.numdst=0     //.psopnd=SbrOperands
-    BRIG_OPCODE_BARRIER = 87,               //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_WAVEBARRIER = 88,           //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_ARRIVEFBAR = 89,            //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_INITFBAR = 90,              //.k=BASIC_NO_TYPE  //.numdst=0     //.hasType=false
-    BRIG_OPCODE_JOINFBAR = 91,              //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_LEAVEFBAR = 92,             //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_RELEASEFBAR = 93,           //.k=BASIC_NO_TYPE  //.numdst=0
-    BRIG_OPCODE_WAITFBAR = 94,              //.k=BR             //.numdst=0     //.hasType=false
-    BRIG_OPCODE_LDF = 95,
-    BRIG_OPCODE_ACTIVELANECOUNT = 96,       //.k=LANE
-    BRIG_OPCODE_ACTIVELANEID = 97,          //.k=LANE
-    BRIG_OPCODE_ACTIVELANEMASK = 98,        //.k=LANE           //.vecOpndIndex=0
-    BRIG_OPCODE_ACTIVELANEPERMUTE = 99,     //.k=LANE
-    BRIG_OPCODE_CALL = 100,                 //.k=BR             //.psopnd=CallOperands //.numdst=0 //.hasType=false
-    BRIG_OPCODE_SCALL = 101,                //.k=BR             //.psopnd=CallOperands //.numdst=0
-    BRIG_OPCODE_ICALL = 102,                //.k=BR             //.psopnd=CallOperands //.numdst=0
-    BRIG_OPCODE_RET = 103,                  //.k=BASIC_NO_TYPE
-    BRIG_OPCODE_ALLOCA = 104,               //.k=MEM
-    BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
-    BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
-    BRIG_OPCODE_DIM = 107,
-    BRIG_OPCODE_GRIDGROUPS = 108,
-    BRIG_OPCODE_GRIDSIZE = 109,
-    BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
-    BRIG_OPCODE_PACKETID = 111,
-    BRIG_OPCODE_WORKGROUPID = 112,
-    BRIG_OPCODE_WORKGROUPSIZE = 113,
-    BRIG_OPCODE_WORKITEMABSID = 114,
-    BRIG_OPCODE_WORKITEMFLATABSID = 115,
-    BRIG_OPCODE_WORKITEMFLATID = 116,
-    BRIG_OPCODE_WORKITEMID = 117,
-    BRIG_OPCODE_CLEARDETECTEXCEPT = 118,    //.numdst=0
-    BRIG_OPCODE_GETDETECTEXCEPT = 119,
-    BRIG_OPCODE_SETDETECTEXCEPT = 120,      //.numdst=0
-    BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,   //.k=QUEUE
-    BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,   //.k=QUEUE
-    BRIG_OPCODE_LDQUEUEREADINDEX = 123,     //.k=QUEUE
-    BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,    //.k=QUEUE
-    BRIG_OPCODE_STQUEUEREADINDEX = 125,     //.k=QUEUE      //.numdst=0
-    BRIG_OPCODE_STQUEUEWRITEINDEX = 126,    //.k=QUEUE      //.numdst=0
-    BRIG_OPCODE_CLOCK = 127,
-    BRIG_OPCODE_CUID = 128,
-    BRIG_OPCODE_DEBUGTRAP = 129,            //.numdst=0
-    BRIG_OPCODE_GROUPBASEPTR = 130,
-    BRIG_OPCODE_KERNARGBASEPTR = 131,
-    BRIG_OPCODE_LANEID = 132,
-    BRIG_OPCODE_MAXCUID = 133,
-    BRIG_OPCODE_MAXWAVEID = 134,
-    BRIG_OPCODE_NULLPTR = 135,              //.k=SEG
-    BRIG_OPCODE_WAVEID = 136,
-    BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip
-
-    BRIG_OPCODE_GCNMADU = (1u << 15) | 0,           //.k=BASIC_NO_TYPE
-    BRIG_OPCODE_GCNMADS = (1u << 15) | 1,           //.k=BASIC_NO_TYPE
-    BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2,
-    BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3,
-    BRIG_OPCODE_GCNMED3 = (1u << 15) | 4,
-    BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5,         //.k=BASIC_OR_MOD
-    BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6,      //.k=BASIC_OR_MOD
-    BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7,     //.k=BASIC_OR_MOD
-    BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8,     //.k=BASIC_OR_MOD
-    BRIG_OPCODE_GCNBFM = (1u << 15) | 9,
-    BRIG_OPCODE_GCNLD = (1u << 15) | 10,            //.k=MEM            //.has_memory_order //.vecOpndIndex=0
-    BRIG_OPCODE_GCNST = (1u << 15) | 11,            //.k=MEM            //.has_memory_order //.vecOpndIndex=0
-    BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12,        //.k=ATOMIC
-    BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13,   //.k=ATOMIC         //.mnemo=gcn_atomicNoRet
-    BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14,
-    BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15,
-    BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16,   //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc
-    BRIG_OPCODE_GCNMSAD = (1u << 15) | 17,
-    BRIG_OPCODE_GCNQSAD = (1u << 15) | 18,
-    BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19,
-    BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20,        //.k=BASIC_NO_TYPE
-    BRIG_OPCODE_GCNSADW = (1u << 15) | 21,
-    BRIG_OPCODE_GCNSADD = (1u << 15) | 22,
-    BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23,       //.k=ADDR           //.mnemo=gcn_atomic_consume
-    BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24,        //.k=ADDR           //.mnemo=gcn_atomic_append
-    BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25,        //.mnemo=gcn_b4xchg
-    BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26,       //.mnemo=gcn_b32xchg
-    BRIG_OPCODE_GCNMAX = (1u << 15) | 27,
-    BRIG_OPCODE_GCNMIN = (1u << 15) | 28,
-    BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29,    //.k=BASIC_OR_MOD
-    BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30,
-
-    BRIG_OPCODE_AMDRDIMAGELOD  = (1u << 15) | 31,    //.k=IMAGE //.mnemo=amd_rdimagelod  //.vecOpndIndex=0
-    BRIG_OPCODE_AMDRDIMAGEGRAD = (1u << 15) | 32,    //.k=IMAGE //.mnemo=amd_rdimagegrad //.vecOpndIndex=0
-    BRIG_OPCODE_AMDLDIMAGEMIP  = (1u << 15) | 33,    //.k=IMAGE //.mnemo=amd_ldimagemip //.vecOpndIndex=0
-    BRIG_OPCODE_AMDSTIMAGEMIP  = (1u << 15) | 34,    //.k=IMAGE //.mnemo=amd_stimagemip //.vecOpndIndex=0 //.numdst=0
-    BRIG_OPCODE_AMDQUERYIMAGE  = (1u << 15) | 35     //.k=QUERY_IMAGE //.mnemo=amd_queryimage
-};
-
-enum BrigPack {
-
-    //.tdcaption="Packing"
-    //
-    //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc }
-    //.mnemo_token=_EMPacking
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_PACK_NONE = 0, //.mnemo=""
-    BRIG_PACK_PP = 1,
-    BRIG_PACK_PS = 2,
-    BRIG_PACK_SP = 3,
-    BRIG_PACK_SS = 4,
-    BRIG_PACK_S = 5,
-    BRIG_PACK_P = 6,
-    BRIG_PACK_PPSAT = 7,
-    BRIG_PACK_PSSAT = 8,
-    BRIG_PACK_SPSAT = 9,
-    BRIG_PACK_SSSAT = 10,
-    BRIG_PACK_SSAT = 11,
-    BRIG_PACK_PSAT = 12
-};
-
-enum BrigProfile {
-
-    //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc }
-    //.mnemo_token=ETargetProfile
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_PROFILE_BASE = 0,
-    BRIG_PROFILE_FULL = 1,
-
-    BRIG_PROFILE_UNDEF = 2 //.skip
-};
-
-enum BrigRegisterKind {
-
-    //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) }
-    //
-    //.bits={ }
-    //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1"
-    //
-    //.nollvm
-
-    BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1
-    BRIG_REGISTER_KIND_SINGLE = 1,  //.bits=32
-    BRIG_REGISTER_KIND_DOUBLE = 2,  //.bits=64
-    BRIG_REGISTER_KIND_QUAD = 3     //.bits=128
-};
-
-enum BrigRound {
-
-    //.mnemo={}
-    //.mnemo_fn=round2str //.mnemo_token=_EMRound
-    //
-    //.sat={/_SAT$/? "true" : "false"}
-    //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding"
-    //.sat_default="return false"
-    //
-    //.sig={/_SIGNALING_/? "true" : "false"}
-    //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding"
-    //.sig_default="return false"
-    //
-    //.int={/_INTEGER_/? "true" : "false"}
-    //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding"
-    //.int_default="return false"
-    //
-    //.flt={/_FLOAT_/? "true" : "false"}
-    //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding"
-    //.flt_default="return false"
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_ROUND_NONE = 0,                                    //.no_mnemo
-    BRIG_ROUND_FLOAT_DEFAULT = 1,                           //.no_mnemo
-    BRIG_ROUND_FLOAT_NEAR_EVEN = 2,                         //.mnemo=near
-    BRIG_ROUND_FLOAT_ZERO = 3,                              //.mnemo=zero
-    BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,                     //.mnemo=up
-    BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,                    //.mnemo=down
-    BRIG_ROUND_INTEGER_NEAR_EVEN = 6,                       //.mnemo=neari
-    BRIG_ROUND_INTEGER_ZERO = 7,                            //.mnemo=zeroi
-    BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,                   //.mnemo=upi
-    BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,                  //.mnemo=downi
-    BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,                  //.mnemo=neari_sat
-    BRIG_ROUND_INTEGER_ZERO_SAT = 11,                       //.mnemo=zeroi_sat
-    BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,              //.mnemo=upi_sat
-    BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,             //.mnemo=downi_sat
-    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,            //.mnemo=sneari
-    BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,                 //.mnemo=szeroi
-    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,        //.mnemo=supi
-    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,       //.mnemo=sdowni
-    BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,        //.mnemo=sneari_sat
-    BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,             //.mnemo=szeroi_sat
-    BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,    //.mnemo=supi_sat
-    BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21    //.mnemo=sdowni_sat
-};
-
-enum BrigSamplerAddressing {
-
-    //.mnemo={ s/^BRIG_ADDRESSING_//;lc }
-    //.mnemo_token=ESamplerAddressingMode
-
-    BRIG_ADDRESSING_UNDEFINED = 0,
-    BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
-    BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
-    BRIG_ADDRESSING_REPEAT = 3,
-    BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
-
-    BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigSamplerCoordNormalization {
-
-    //.mnemo={ s/^BRIG_COORD_//;lc }
-    //.mnemo_token=ESamplerCoord
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_COORD_UNNORMALIZED = 0,
-    BRIG_COORD_NORMALIZED = 1
-};
-
-enum BrigSamplerFilter {
-
-    //.mnemo={ s/^BRIG_FILTER_//;lc }
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_FILTER_NEAREST = 0,
-    BRIG_FILTER_LINEAR = 1,
-
-    BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigSamplerQuery {
-
-    //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc }
-    //.mnemo_token=_EMSamplerQuery
-    //
-    //.print=$mnemo{ $mnemo }
-
-    BRIG_SAMPLER_QUERY_ADDRESSING = 0,
-    BRIG_SAMPLER_QUERY_COORD = 1,
-    BRIG_SAMPLER_QUERY_FILTER = 2
-};
-
-enum BrigSectionIndex {
-
-    //.nollvm
-    //
-    //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc }
-
-    BRIG_SECTION_INDEX_DATA = 0,
-    BRIG_SECTION_INDEX_CODE = 1,
-    BRIG_SECTION_INDEX_OPERAND = 2,
-    BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
-
-    // used internally
-    BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip
-};
-
-enum BrigSegCvtModifierMask {
-    BRIG_SEG_CVT_NONULL = 1         //.mnemo="nonull" //.print="_nonull"
-};
-
-enum BrigSegment {
-
-    //.mnemo={ s/^BRIG_SEGMENT_//;lc}
-    //.mnemo_token=_EMSegment
-    //.mnemo_context=EInstModifierContext
-    //
-    //.print=$mnemo{ $mnemo ? "_$mnemo" : "" }
-
-    BRIG_SEGMENT_NONE = 0, //.mnemo=""
-    BRIG_SEGMENT_FLAT = 1, //.mnemo=""
-    BRIG_SEGMENT_GLOBAL = 2,
-    BRIG_SEGMENT_READONLY = 3,
-    BRIG_SEGMENT_KERNARG = 4,
-    BRIG_SEGMENT_GROUP = 5,
-    BRIG_SEGMENT_PRIVATE = 6,
-    BRIG_SEGMENT_SPILL = 7,
-    BRIG_SEGMENT_ARG = 8,
-
-    BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip
-
-    BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region"
-};
-
-enum BrigPackedTypeBits {
-
-    //.nodump
-    //
-    //.nollvm
-
-    BRIG_TYPE_BASE_SIZE  = 5,
-    BRIG_TYPE_PACK_SIZE  = 2,
-    BRIG_TYPE_ARRAY_SIZE = 1,
-
-    BRIG_TYPE_BASE_SHIFT  = 0,
-    BRIG_TYPE_PACK_SHIFT  = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
-    BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
-
-    BRIG_TYPE_BASE_MASK  = ((1 << BRIG_TYPE_BASE_SIZE)  - 1) << BRIG_TYPE_BASE_SHIFT,
-    BRIG_TYPE_PACK_MASK  = ((1 << BRIG_TYPE_PACK_SIZE)  - 1) << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
-
-    BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_32   = 1 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_64   = 2 << BRIG_TYPE_PACK_SHIFT,
-    BRIG_TYPE_PACK_128  = 3 << BRIG_TYPE_PACK_SHIFT,
-
-    BRIG_TYPE_ARRAY     = 1 << BRIG_TYPE_ARRAY_SHIFT
-};
-
-enum BrigType {
-
-    //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef }
-    //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0"
-    //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef }
-    //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0"
-    //
-    //.mnemo={ s/^BRIG_TYPE_//;lc }
-    //.mnemo_token=_EMType
-    //
-    //.array={/ARRAY$/?"true":"false"}
-    //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type"
-    //.array_default="return false"
-    //
-    //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"}
-    //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type"
-    //.a2e_default="return BRIG_TYPE_NONE"
-    //
-    //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"}
-    //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type"
-    //.e2a_default="return BRIG_TYPE_NONE"
-    //
-    //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc}
-    //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type"
-    //.t2s_default="return NULL"
-    //
-    //.dispatch_switch //.dispatch_incfile=TemplateUtilities
-    //.dispatch_proto="template<typename RetType, typename Visitor>\nRetType dispatchByType_gen(unsigned type, Visitor& v)"
-    //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" }
-    //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)"
-    //
-    //- .tdname=BrigType
-    //
-    //.print=$mnemo{ "_$mnemo" }
-
-    BRIG_TYPE_NONE  = 0,  //.mnemo=""       //.print=""
-    BRIG_TYPE_U8    = 1,  //.ctype=uint8_t
-    BRIG_TYPE_U16   = 2,  //.ctype=uint16_t
-    BRIG_TYPE_U32   = 3,  //.ctype=uint32_t
-    BRIG_TYPE_U64   = 4,  //.ctype=uint64_t
-    BRIG_TYPE_S8    = 5,  //.ctype=int8_t
-    BRIG_TYPE_S16   = 6,  //.ctype=int16_t
-    BRIG_TYPE_S32   = 7,  //.ctype=int32_t
-    BRIG_TYPE_S64   = 8,  //.ctype=int64_t
-    BRIG_TYPE_F16   = 9,  //.ctype=f16_t
-    BRIG_TYPE_F32   = 10, //.ctype=float
-    BRIG_TYPE_F64   = 11, //.ctype=double
-    BRIG_TYPE_B1    = 12, //.ctype=bool     //.numBytes=1
-    BRIG_TYPE_B8    = 13, //.ctype=uint8_t
-    BRIG_TYPE_B16   = 14, //.ctype=uint16_t
-    BRIG_TYPE_B32   = 15, //.ctype=uint32_t
-    BRIG_TYPE_B64   = 16, //.ctype=uint64_t
-    BRIG_TYPE_B128  = 17, //.ctype=b128_t
-    BRIG_TYPE_SAMP  = 18, //.mnemo=samp     //.numBits=64
-    BRIG_TYPE_ROIMG = 19, //.mnemo=roimg    //.numBits=64
-    BRIG_TYPE_WOIMG = 20, //.mnemo=woimg    //.numBits=64
-    BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg    //.numBits=64
-    BRIG_TYPE_SIG32 = 22, //.mnemo=sig32    //.numBits=64
-    BRIG_TYPE_SIG64 = 23, //.mnemo=sig64    //.numBits=64
-
-    BRIG_TYPE_U8X4  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_32,  //.ctype=uint8_t
-    BRIG_TYPE_U8X8  = BRIG_TYPE_U8  | BRIG_TYPE_PACK_64,  //.ctype=uint8_t
-    BRIG_TYPE_U8X16 = BRIG_TYPE_U8  | BRIG_TYPE_PACK_128, //.ctype=uint8_t
-    BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,  //.ctype=uint16_t
-    BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,  //.ctype=uint16_t
-    BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t
-    BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,  //.ctype=uint32_t
-    BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t
-    BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t
-    BRIG_TYPE_S8X4  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_32,  //.ctype=int8_t
-    BRIG_TYPE_S8X8  = BRIG_TYPE_S8  | BRIG_TYPE_PACK_64,  //.ctype=int8_t
-    BRIG_TYPE_S8X16 = BRIG_TYPE_S8  | BRIG_TYPE_PACK_128, //.ctype=int8_t
-    BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,  //.ctype=int16_t
-    BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,  //.ctype=int16_t
-    BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t
-    BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,  //.ctype=int32_t
-    BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t
-    BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t
-    BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,  //.ctype=f16_t
-    BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,  //.ctype=f16_t
-    BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t
-    BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,  //.ctype=float
-    BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float
-    BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double
-
-    BRIG_TYPE_U8_ARRAY    = BRIG_TYPE_U8    | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U16_ARRAY   = BRIG_TYPE_U16   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U32_ARRAY   = BRIG_TYPE_U32   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U64_ARRAY   = BRIG_TYPE_U64   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S8_ARRAY    = BRIG_TYPE_S8    | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S16_ARRAY   = BRIG_TYPE_S16   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S32_ARRAY   = BRIG_TYPE_S32   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S64_ARRAY   = BRIG_TYPE_S64   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F16_ARRAY   = BRIG_TYPE_F16   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F32_ARRAY   = BRIG_TYPE_F32   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F64_ARRAY   = BRIG_TYPE_F64   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_B8_ARRAY    = BRIG_TYPE_B8    | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_B16_ARRAY   = BRIG_TYPE_B16   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_B32_ARRAY   = BRIG_TYPE_B32   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_B64_ARRAY   = BRIG_TYPE_B64   | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_B128_ARRAY  = BRIG_TYPE_B128  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_SAMP_ARRAY  = BRIG_TYPE_SAMP  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U8X4_ARRAY  = BRIG_TYPE_U8X4  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U8X8_ARRAY  = BRIG_TYPE_U8X8  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S8X4_ARRAY  = BRIG_TYPE_S8X4  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S8X8_ARRAY  = BRIG_TYPE_S8X8  | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-    BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY,  //.mnemo=""     //.print=""
-
-    // Used internally
-    BRIG_TYPE_INVALID = (unsigned) -1 //.skip
-};
-
-enum BrigVariableModifierMask {
-
-    //.nodump
-
-    BRIG_VARIABLE_DEFINITION = 1,
-    BRIG_VARIABLE_CONST = 2
-};
-
-enum BrigWidth {
-
-    //.tddef=1
-    //
-    //.print={ s/^BRIG_WIDTH_//; "_width($_)" }
-
-    BRIG_WIDTH_NONE = 0,
-    BRIG_WIDTH_1 = 1,
-    BRIG_WIDTH_2 = 2,
-    BRIG_WIDTH_4 = 3,
-    BRIG_WIDTH_8 = 4,
-    BRIG_WIDTH_16 = 5,
-    BRIG_WIDTH_32 = 6,
-    BRIG_WIDTH_64 = 7,
-    BRIG_WIDTH_128 = 8,
-    BRIG_WIDTH_256 = 9,
-    BRIG_WIDTH_512 = 10,
-    BRIG_WIDTH_1024 = 11,
-    BRIG_WIDTH_2048 = 12,
-    BRIG_WIDTH_4096 = 13,
-    BRIG_WIDTH_8192 = 14,
-    BRIG_WIDTH_16384 = 15,
-    BRIG_WIDTH_32768 = 16,
-    BRIG_WIDTH_65536 = 17,
-    BRIG_WIDTH_131072 = 18,
-    BRIG_WIDTH_262144 = 19,
-    BRIG_WIDTH_524288 = 20,
-    BRIG_WIDTH_1048576 = 21,
-    BRIG_WIDTH_2097152 = 22,
-    BRIG_WIDTH_4194304 = 23,
-    BRIG_WIDTH_8388608 = 24,
-    BRIG_WIDTH_16777216 = 25,
-    BRIG_WIDTH_33554432 = 26,
-    BRIG_WIDTH_67108864 = 27,
-    BRIG_WIDTH_134217728 = 28,
-    BRIG_WIDTH_268435456 = 29,
-    BRIG_WIDTH_536870912 = 30,
-    BRIG_WIDTH_1073741824 = 31,
-    BRIG_WIDTH_2147483648 = 32,
-    BRIG_WIDTH_WAVESIZE = 33,
-    BRIG_WIDTH_ALL = 34,
-
-    BRIG_WIDTH_LAST //.skip
-};
-
-struct BrigUInt64 { //.isroot //.standalone
-    uint32_t lo;     //.defValue=0
-    uint32_t hi;     //.defValue=0
-
-    //+hcode KLASS& operator=(uint64_t rhs);
-    //+hcode operator uint64_t();
-    //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; }
-    //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); }
-};
-
-struct BrigAluModifier { //.isroot //.standalone
-    BrigAluModifier8_t allBits; //.defValue=0
-    //^^ bool ftz; //.wtype=BitValRef<0>
-};
-
-struct BrigBase { //.nowrap
-    uint16_t byteCount;
-    BrigKind16_t kind;
-};
-
-//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE };
-//.alias Directive:Code { //.generic };
-//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND };
-
-struct BrigData {
-    //.nowrap
-    uint32_t byteCount;
-    uint8_t bytes[1];
-};
-
-struct BrigExecutableModifier { //.isroot //.standalone
-    BrigExecutableModifier8_t allBits; //.defValue=0
-    //^^ bool isDefinition; //.wtype=BitValRef<0>
-};
-
-struct BrigMemoryModifier { //.isroot //.standalone
-    BrigMemoryModifier8_t allBits; //.defValue=0
-    //^^ bool isConst; //.wtype=BitValRef<0>
-};
-
-struct BrigSegCvtModifier { //.isroot //.standalone
-    BrigSegCvtModifier8_t allBits; //.defValue=0
-    //^^ bool isNoNull; //.wtype=BitValRef<0>
-};
-
-struct BrigVariableModifier { //.isroot //.standalone
-    BrigVariableModifier8_t allBits;    //.defValue=0
-
-    //^^ bool isDefinition;     //.wtype=BitValRef<0>
-    //^^ bool isConst;          //.wtype=BitValRef<1>
-};
-
-struct BrigDirectiveArgBlockEnd {
-    BrigBase base;
-};
-
-struct BrigDirectiveArgBlockStart {
-    BrigBase base;
-};
-
-struct BrigDirectiveComment {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveControl {
-    BrigBase base;
-    BrigControlDirective16_t control;
-    uint16_t reserved; //.defValue=0
-    BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveExecutable { //.generic
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    uint16_t outArgCount; //.defValue=0
-    uint16_t inArgCount;  //.defValue=0
-    BrigCodeOffset32_t firstInArg;
-    BrigCodeOffset32_t firstCodeBlockEntry;
-    BrigCodeOffset32_t nextModuleEntry;
-    BrigExecutableModifier modifier; //.acc=subItem<ExecutableModifier> //.wtype=ExecutableModifier
-    BrigLinkage8_t linkage;
-    uint16_t reserved; //.defValue=0
-};
-
-//.alias DirectiveKernel:DirectiveExecutable { };
-//.alias DirectiveFunction:DirectiveExecutable { };
-//.alias DirectiveSignature:DirectiveExecutable { };
-//.alias DirectiveIndirectFunction:DirectiveExecutable { };
-
-struct BrigDirectiveExtension {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveFbarrier {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
-    BrigLinkage8_t linkage;
-    uint16_t reserved; //.defValue=0
-};
-
-struct BrigDirectiveLabel {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveLoc {
-    BrigBase base;
-    BrigDataOffsetString32_t filename;
-    uint32_t line;
-    uint32_t column; //.defValue=1
-};
-
-struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE
-    BrigBase base;
-};
-
-struct BrigDirectivePragma {
-    BrigBase base;
-    BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveVariable {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigOperandOffset32_t init;
-    BrigType16_t type;
-
-    //+hcode bool isArray();
-    //+implcode inline bool KLASS::isArray() { return isArrayType(type()); }
-
-    //+hcode unsigned elementType();
-    //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); }
-
-    BrigSegment8_t segment;
-    BrigAlignment8_t align;
-    BrigUInt64 dim; //.acc=subItem<UInt64> //.wtype=UInt64
-    BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
-    BrigLinkage8_t linkage;
-    BrigAllocation8_t allocation;
-    uint8_t reserved; //.defValue=0
-};
-
-struct BrigDirectiveModule {
-    BrigBase base;
-    BrigDataOffsetString32_t name;
-    BrigVersion32_t hsailMajor;         //.wtype=ValRef<uint32_t>
-    BrigVersion32_t hsailMinor;         //.wtype=ValRef<uint32_t>
-    BrigProfile8_t profile;
-    BrigMachineModel8_t machineModel;
-    BrigRound8_t defaultFloatRound;
-    uint8_t reserved;                   //.defValue=0
-};
-
-struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode
-    BrigBase base;
-    BrigOpcode16_t opcode;
-    BrigType16_t type;
-    BrigDataOffsetOperandList32_t operands;
-
-    //+hcode Operand operand(int index);
-    //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; }
-};
-
-struct BrigInstAddr {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstAtomic {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigMemoryScope8_t memoryScope;
-    BrigAtomicOperation8_t atomicOperation;
-    uint8_t equivClass;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstBasic {
-    BrigInstBase base;
-};
-
-struct BrigInstBr {
-    BrigInstBase base;
-    BrigWidth8_t width;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstCmp {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
-    BrigCompareOperation8_t compare;
-    BrigPack8_t pack;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstCvt {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
-    BrigRound8_t round;
-};
-
-struct BrigInstImage {
-    BrigInstBase base;
-    BrigType16_t imageType;
-    BrigType16_t coordType;
-    BrigImageGeometry8_t geometry;
-    uint8_t equivClass;
-    uint16_t reserved; //.defValue=0
-};
-
-struct BrigInstLane {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigWidth8_t width;
-    uint8_t reserved; //.defValue=0
-};
-
-struct BrigInstMem {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigAlignment8_t align;
-    uint8_t equivClass;
-    BrigWidth8_t width;
-    BrigMemoryModifier modifier; //.acc=subItem<MemoryModifier> //.wtype=MemoryModifier
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstMemFence {
-    BrigInstBase base;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigMemoryScope8_t globalSegmentMemoryScope;
-    BrigMemoryScope8_t groupSegmentMemoryScope;
-    BrigMemoryScope8_t imageSegmentMemoryScope;
-};
-
-struct BrigInstMod {
-    BrigInstBase base;
-    BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
-    BrigRound8_t round;
-    BrigPack8_t pack;
-    uint8_t reserved; //.defValue=0
-};
-
-struct BrigInstQueryImage {
-    BrigInstBase base;
-    BrigType16_t imageType;
-    BrigImageGeometry8_t geometry;
-    BrigImageQuery8_t imageQuery;
-};
-
-struct BrigInstQuerySampler {
-    BrigInstBase base;
-    BrigSamplerQuery8_t samplerQuery;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstQueue {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    BrigMemoryOrder8_t memoryOrder;
-    uint16_t reserved; //.defValue=0
-};
-
-struct BrigInstSeg {
-    BrigInstBase base;
-    BrigSegment8_t segment;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstSegCvt {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    BrigSegment8_t segment;
-    BrigSegCvtModifier modifier; //.acc=subItem<SegCvtModifier> //.wtype=SegCvtModifier
-};
-
-struct BrigInstSignal {
-    BrigInstBase base;
-    BrigType16_t signalType;
-    BrigMemoryOrder8_t memoryOrder;
-    BrigAtomicOperation8_t signalOperation;
-};
-
-struct BrigInstSourceType {
-    BrigInstBase base;
-    BrigType16_t sourceType;
-    uint16_t reserved; //.defValue=0
-};
-
-typedef BrigInstSourceType BrigInstPopcount;
-
-struct BrigOperandAddress {
-    BrigBase base;
-    BrigCodeOffset32_t symbol; //.wtype=ItemRef<DirectiveVariable>
-    BrigOperandOffset32_t reg; //.wtype=ItemRef<OperandRegister>
-    BrigUInt64 offset; //.acc=subItem<UInt64> //.wtype=UInt64
-};
-
-struct BrigOperandAlign {
-    BrigBase base;
-    BrigAlignment8_t align;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigOperandCodeList {
-    BrigBase base;
-    BrigDataOffsetCodeList32_t elements;
-
-    //+hcode unsigned elementCount();
-    //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
-    //+hcode Code elements(int index);
-    //+implcode inline Code KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandCodeRef {
-    BrigBase base;
-    BrigCodeOffset32_t ref;
-};
-
-struct BrigOperandConstantBytes {
-    BrigBase base;
-    BrigType16_t type; //.defValue=0
-    uint16_t reserved; //.defValue=0
-    BrigDataOffsetString32_t bytes;
-};
-
-struct BrigOperandConstantOperandList {
-    BrigBase base;
-    BrigType16_t type;
-    uint16_t reserved; //.defValue=0
-    BrigDataOffsetOperandList32_t elements;
-
-    //+hcode unsigned elementCount();
-    //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
-    //+hcode Operand elements(int index);
-    //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandConstantImage {
-    BrigBase base;
-    BrigType16_t type;
-    BrigImageGeometry8_t geometry;
-    BrigImageChannelOrder8_t channelOrder;
-    BrigImageChannelType8_t channelType;
-    uint8_t reserved[3]; //.defValue=0
-    BrigUInt64 width;    //.acc=subItem<UInt64> //.wtype=UInt64
-    BrigUInt64 height;   //.acc=subItem<UInt64> //.wtype=UInt64
-    BrigUInt64 depth;    //.acc=subItem<UInt64> //.wtype=UInt64
-    BrigUInt64 array;    //.acc=subItem<UInt64> //.wtype=UInt64
-};
-
-struct BrigOperandOperandList {
-    BrigBase base;
-    BrigDataOffsetOperandList32_t elements;
-
-    //+hcode unsigned elementCount();
-    //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
-    //+hcode Operand elements(int index);
-    //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandRegister {
-    BrigBase base;
-    BrigRegisterKind16_t regKind;
-    uint16_t regNum;
-};
-
-struct BrigOperandConstantSampler {
-    BrigBase base;
-    BrigType16_t type;
-    BrigSamplerCoordNormalization8_t coord;
-    BrigSamplerFilter8_t filter;
-    BrigSamplerAddressing8_t addressing;
-    uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigOperandString {
-    BrigBase base;
-    BrigDataOffsetString32_t string;
-};
-
-struct BrigOperandWavesize {
-    BrigBase base;
-};
-
-//.ignore{
-
-enum BrigExceptionsMask {
-    BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
-    BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
-    BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
-    BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
-    BRIG_EXCEPTIONS_INEXACT = 1 << 4,
-
-    BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
-};
-
-struct BrigSectionHeader {
-    uint64_t byteCount;
-    uint32_t headerByteCount;
-    uint32_t nameLength;
-    uint8_t name[1];
-};
-
-#define MODULE_IDENTIFICATION_LENGTH (8)
-
-struct BrigModuleHeader {
-    char identification[MODULE_IDENTIFICATION_LENGTH];
-    BrigVersion32_t brigMajor;
-    BrigVersion32_t brigMinor;
-    uint64_t byteCount;
-    uint8_t hash[64];
-    uint32_t reserved;
-    uint32_t sectionCount;
-    uint64_t sectionIndex;
-};
-
-typedef BrigModuleHeader* BrigModule_t;
-
-#endif // defined(INCLUDED_BRIG_H)
-//}
diff --git a/src/arch/hsail/SConscript b/src/arch/hsail/SConscript
deleted file mode 100644 (file)
index 251c103..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-# -*- mode:python -*-
-
-#  Copyright (c) 2015 Advanced Micro Devices, Inc.
-#  All rights reserved.
-#
-#  For use for simulation and test purposes only
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice,
-#  this list of conditions and the following disclaimer.
-#
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#  this list of conditions and the following disclaimer in the documentation
-#  and/or other materials provided with the distribution.
-#
-#  3. Neither the name of the copyright holder nor the names of its contributors
-#  may be used to endorse or promote products derived from this software
-#  without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-#
-#  Author: Anthony Gutierrez
-#
-
-Import('*')
-
-if not env['BUILD_GPU']:
-    Return()
-
-if env['TARGET_GPU_ISA'] == 'hsail':
-    env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
-                'gen.py', '$SOURCE $TARGETS')
-
-    Source('gpu_decoder.cc')
-    Source('insts/branch.cc')
-    Source('insts/gen_exec.cc')
-    Source('insts/gpu_static_inst.cc')
-    Source('insts/main.cc')
-    Source('insts/pseudo_inst.cc')
-    Source('insts/mem.cc')
-    Source('operand.cc')
diff --git a/src/arch/hsail/SConsopts b/src/arch/hsail/SConsopts
deleted file mode 100644 (file)
index 641963c..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-# -*- mode:python -*-
-
-#
-#  Copyright (c) 2015 Advanced Micro Devices, Inc.
-#  All rights reserved.
-#
-#  For use for simulation and test purposes only
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice,
-#  this list of conditions and the following disclaimer.
-#
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#  this list of conditions and the following disclaimer in the documentation
-#  and/or other materials provided with the distribution.
-#
-#  3. Neither the name of the copyright holder nor the names of its contributors
-#  may be used to endorse or promote products derived from this software
-#  without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-#
-#  Author: Anthony Gutierrez
-#
-
-Import('*')
-
-all_gpu_isa_list.append('hsail')
diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py
deleted file mode 100755 (executable)
index 5640424..0000000
+++ /dev/null
@@ -1,912 +0,0 @@
-#!/usr/bin/env python
-#  Copyright (c) 2015 Advanced Micro Devices, Inc.
-#  All rights reserved.
-#
-#  For use for simulation and test purposes only
-#
-#  Redistribution and use in source and binary forms, with or without
-#  modification, are permitted provided that the following conditions are met:
-#
-#  1. Redistributions of source code must retain the above copyright notice,
-#  this list of conditions and the following disclaimer.
-#
-#  2. Redistributions in binary form must reproduce the above copyright notice,
-#  this list of conditions and the following disclaimer in the documentation
-#  and/or other materials provided with the distribution.
-#
-#  3. Neither the name of the copyright holder nor the names of its contributors
-#  may be used to endorse or promote products derived from this software
-#  without specific prior written permission.
-#
-#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#  POSSIBILITY OF SUCH DAMAGE.
-#
-#  Author: Steve Reinhardt
-#
-
-from __future__ import print_function
-
-import sys, re
-
-from m5.util import code_formatter
-
-if len(sys.argv) != 4:
-    print("Error: need 3 args (file names)")
-    sys.exit(0)
-
-header_code = code_formatter()
-decoder_code = code_formatter()
-exec_code = code_formatter()
-
-###############
-#
-# Generate file prologs (includes etc.)
-#
-###############
-
-header_code('''
-#include "arch/hsail/insts/decl.hh"
-#include "base/bitfield.hh"
-#include "gpu-compute/hsail_code.hh"
-#include "gpu-compute/wavefront.hh"
-
-namespace HsailISA
-{
-''')
-header_code.indent()
-
-decoder_code('''
-#include "arch/hsail/gpu_decoder.hh"
-#include "arch/hsail/insts/branch.hh"
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/gen_decl.hh"
-#include "arch/hsail/insts/mem.hh"
-#include "arch/hsail/insts/mem_impl.hh"
-#include "gpu-compute/brig_object.hh"
-
-namespace HsailISA
-{
-    std::vector<GPUStaticInst*> Decoder::decodedInsts;
-
-    GPUStaticInst*
-    Decoder::decode(MachInst machInst)
-    {
-        using namespace Brig;
-
-        const BrigInstBase *ib = machInst.brigInstBase;
-        const BrigObject *obj = machInst.brigObj;
-
-        switch(ib->opcode) {
-''')
-decoder_code.indent()
-decoder_code.indent()
-
-exec_code('''
-#include "arch/hsail/insts/gen_decl.hh"
-#include "base/intmath.hh"
-
-namespace HsailISA
-{
-''')
-exec_code.indent()
-
-###############
-#
-# Define code templates for class declarations (for header file)
-#
-###############
-
-# Basic header template for an instruction stub.
-header_template_stub = '''
-class $class_name : public $base_class
-{
-  public:
-    typedef $base_class Base;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-
-    void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Basic header template for an instruction with no template parameters.
-header_template_nodt = '''
-class $class_name : public $base_class
-{
-  public:
-    typedef $base_class Base;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-
-    void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Basic header template for an instruction with a single DataType
-# template parameter.
-header_template_1dt = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType>
-{
-  public:
-    typedef $base_class<DataType> Base;
-    typedef typename DataType::CType CType;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-
-    void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-header_template_1dt_noexec = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType>
-{
-  public:
-    typedef $base_class<DataType> Base;
-    typedef typename DataType::CType CType;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-};
-
-'''
-
-# Same as header_template_1dt, except the base class has a second
-# template parameter NumSrcOperands to allow a variable number of
-# source operands.  Note that since this is implemented with an array,
-# it only works for instructions where all sources are of the same
-# type (like most arithmetics).
-header_template_1dt_varsrcs = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType, $num_srcs>
-{
-  public:
-    typedef $base_class<DataType, $num_srcs> Base;
-    typedef typename DataType::CType CType;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-
-    void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Header template for instruction with two DataType template
-# parameters, one for the dest and one for the source.  This is used
-# by compare and convert.
-header_template_2dt = '''
-template<typename DestDataType, class SrcDataType>
-class $class_name : public $base_class<DestDataType, SrcDataType>
-{
-  public:
-    typedef $base_class<DestDataType, SrcDataType> Base;
-    typedef typename DestDataType::CType DestCType;
-    typedef typename SrcDataType::CType SrcCType;
-
-    $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
-       : Base(ib, obj, "$opcode")
-    {
-    }
-
-    void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-header_templates = {
-    'ArithInst': header_template_1dt_varsrcs,
-    'CmovInst': header_template_1dt,
-    'ClassInst': header_template_1dt,
-    'ShiftInst': header_template_1dt,
-    'ExtractInsertInst': header_template_1dt,
-    'CmpInst': header_template_2dt,
-    'CvtInst': header_template_2dt,
-    'PopcountInst': header_template_2dt,
-    'LdInst': '',
-    'StInst': '',
-    'SpecialInstNoSrc': header_template_nodt,
-    'SpecialInst1Src': header_template_nodt,
-    'SpecialInstNoSrcNoDest': '',
-    'Stub': header_template_stub,
-}
-
-###############
-#
-# Define code templates for exec functions
-#
-###############
-
-# exec function body
-exec_template_stub = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
-    fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
-}
-
-'''
-exec_template_nodt_nosrc = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    typedef Base::DestCType DestCType;
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            DestCType dest_val = $expr;
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_nodt_1src = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    typedef Base::DestCType DestCType;
-    typedef Base::SrcCType  SrcCType;
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
-            DestCType dest_val = $expr;
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_1dt_varsrcs = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            CType dest_val;
-            if ($dest_is_src_flag) {
-                dest_val = this->dest.template get<CType>(w, lane);
-            }
-
-            CType src_val[$num_srcs];
-
-            for (int i = 0; i < $num_srcs; ++i) {
-                src_val[i] = this->src[i].template get<CType>(w, lane);
-            }
-
-            dest_val = (CType)($expr);
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_1dt_3srcs = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    typedef typename Base::Src0CType Src0T;
-    typedef typename Base::Src1CType Src1T;
-    typedef typename Base::Src2CType Src2T;
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            CType dest_val;
-
-            if ($dest_is_src_flag) {
-                dest_val = this->dest.template get<CType>(w, lane);
-            }
-
-            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
-            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
-            Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
-
-            dest_val = $expr;
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_1dt_2src_1dest = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    typedef typename Base::DestCType DestT;
-    typedef CType Src0T;
-    typedef typename Base::Src1CType Src1T;
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            DestT dest_val;
-            if ($dest_is_src_flag) {
-                dest_val = this->dest.template get<DestT>(w, lane);
-            }
-            Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
-            Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
-
-            dest_val = $expr;
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_shift = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    const VectorMask &mask = w->getPred();
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            CType dest_val;
-
-            if ($dest_is_src_flag) {
-                dest_val = this->dest.template get<CType>(w, lane);
-            }
-
-            CType src_val0 = this->src0.template get<CType>(w, lane);
-            uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
-
-            dest_val = $expr;
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_template_2dt = '''
-template<typename DestDataType, class SrcDataType>
-void
-$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            DestCType dest_val;
-            SrcCType src_val[$num_srcs];
-
-            for (int i = 0; i < $num_srcs; ++i) {
-                src_val[i] = this->src[i].template get<SrcCType>(w, lane);
-            }
-
-            dest_val = $expr;
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-'''
-
-exec_templates = {
-    'ArithInst': exec_template_1dt_varsrcs,
-    'CmovInst': exec_template_1dt_3srcs,
-    'ExtractInsertInst': exec_template_1dt_3srcs,
-    'ClassInst': exec_template_1dt_2src_1dest,
-    'CmpInst': exec_template_2dt,
-    'CvtInst': exec_template_2dt,
-    'PopcountInst': exec_template_2dt,
-    'LdInst': '',
-    'StInst': '',
-    'SpecialInstNoSrc': exec_template_nodt_nosrc,
-    'SpecialInst1Src': exec_template_nodt_1src,
-    'SpecialInstNoSrcNoDest': '',
-    'Stub': exec_template_stub,
-}
-
-###############
-#
-# Define code templates for the decoder cases
-#
-###############
-
-# decode template for nodt-opcode case
-decode_nodt_template = '''
-  case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
-
-decode_case_prolog_class_inst = '''
-  case BRIG_OPCODE_$brig_opcode_upper:
-    {
-        //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
-        BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
-        //switch (baseOp->kind) {
-        //    case BRIG_OPERAND_REG:
-        //        type = ((const BrigOperandReg*)baseOp)->type;
-        //        break;
-        //    case BRIG_OPERAND_IMMED:
-        //        type = ((const BrigOperandImmed*)baseOp)->type;
-        //        break;
-        //    default:
-        //        fatal("CLASS unrecognized kind of operand %d\\n",
-        //               baseOp->kind);
-        //}
-        switch (type) {'''
-
-# common prolog for 1dt- or 2dt-opcode case: switch on data type
-decode_case_prolog = '''
-  case BRIG_OPCODE_$brig_opcode_upper:
-    {
-        switch (ib->type) {'''
-
-# single-level decode case entry (for 1dt opcodes)
-decode_case_entry = \
-'      case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
-
-decode_store_prolog = \
-'      case BRIG_TYPE_$type_name: {'
-
-decode_store_case_epilog = '''
-    }'''
-
-decode_store_case_entry = \
-'          return $constructor(ib, obj);'
-
-# common epilog for type switch
-decode_case_epilog = '''
-          default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
-              ib->type);
-        }
-    }
-    break;'''
-
-# Additional templates for nested decode on a second type field (for
-# compare and convert).  These are used in place of the
-# decode_case_entry template to create a second-level switch on on the
-# second type field inside each case of the first-level type switch.
-# Because the name and location of the second type can vary, the Brig
-# instruction type must be provided in $brig_type, and the name of the
-# second type field must be provided in $type_field.
-decode_case2_prolog = '''
-        case BRIG_TYPE_$type_name:
-          switch (((Brig$brig_type*)ib)->$type2_field) {'''
-
-decode_case2_entry = \
-'          case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
-
-decode_case2_epilog = '''
-          default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
-                         ((Brig$brig_type*)ib)->$type2_field);
-        }
-        break;'''
-
-# Figure out how many source operands an expr needs by looking for the
-# highest-numbered srcN value referenced.  Since sources are numbered
-# starting at 0, the return value is N+1.
-def num_src_operands(expr):
-    if expr.find('src2') != -1:
-        return 3
-    elif expr.find('src1') != -1:
-        return 2
-    elif expr.find('src0') != -1:
-        return 1
-    else:
-        return 0
-
-###############
-#
-# Define final code generation methods
-#
-# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
-# generating actual instructions.
-#
-###############
-
-# Generate class declaration, exec function, and decode switch case
-# for an brig_opcode with a single-level type switch.  The 'types'
-# parameter is a list or tuple of types for which the instruction
-# should be instantiated.
-def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
-        type2_info=None, constructor_prefix='new ', is_store=False):
-    brig_opcode_upper = brig_opcode.upper()
-    class_name = brig_opcode
-    opcode = class_name.lower()
-
-    if base_class == 'ArithInst':
-        # note that expr must be provided with ArithInst so we can
-        # derive num_srcs for the template
-        assert expr
-
-    if expr:
-        # Derive several bits of info from expr.  If expr is not used,
-        # this info will be irrelevant.
-        num_srcs = num_src_operands(expr)
-        # if the RHS expression includes 'dest', then we're doing an RMW
-        # on the reg and we need to treat it like a source
-        dest_is_src = expr.find('dest') != -1
-        dest_is_src_flag = str(dest_is_src).lower() # for C++
-        if base_class in ['ShiftInst']:
-            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
-        elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
-            expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
-        else:
-            expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
-        expr = re.sub(r'\bdest\b', r'dest_val', expr)
-
-    # Strip template arguments off of base class before looking up
-    # appropriate templates
-    base_class_base = re.sub(r'<.*>$', '', base_class)
-    header_code(header_templates[base_class_base])
-
-    if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
-        exec_code(exec_templates[base_class_base])
-    elif base_class.startswith('ShiftInst'):
-        header_code(exec_template_shift)
-    else:
-        header_code(exec_templates[base_class_base])
-
-    if not types or isinstance(types, str):
-        # Just a single type
-        constructor = constructor_prefix + class_name
-        decoder_code(decode_nodt_template)
-    else:
-        # multiple types, need at least one level of decode
-        if brig_opcode == 'Class':
-            decoder_code(decode_case_prolog_class_inst)
-        else:
-            decoder_code(decode_case_prolog)
-        if not type2_info:
-            if not is_store:
-                # single list of types, to basic one-level decode
-                for type_name in types:
-                    full_class_name = '%s<%s>' % (class_name, type_name.upper())
-                    constructor = constructor_prefix + full_class_name
-                    decoder_code(decode_case_entry)
-            else:
-                # single list of types, to basic one-level decode
-                for type_name in types:
-                    decoder_code(decode_store_prolog)
-                    type_size = int(re.findall(r'[0-9]+', type_name)[0])
-                    src_size = 32
-                    type_type = type_name[0]
-                    full_class_name = '%s<%s,%s>' % (class_name, \
-                                                     type_name.upper(), \
-                                                     '%s%d' % \
-                                                     (type_type.upper(), \
-                                                     type_size))
-                    constructor = constructor_prefix + full_class_name
-                    decoder_code(decode_store_case_entry)
-                    decoder_code(decode_store_case_epilog)
-        else:
-            # need secondary type switch (convert, compare)
-            # unpack extra info on second switch
-            (type2_field, types2) = type2_info
-            brig_type = 'Inst%s' % brig_opcode
-            for type_name in types:
-                decoder_code(decode_case2_prolog)
-                fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
-                for type2_name in types2:
-                    full_class_name = fmt % type2_name.upper()
-                    constructor = constructor_prefix + full_class_name
-                    decoder_code(decode_case2_entry)
-
-                decoder_code(decode_case2_epilog)
-
-        decoder_code(decode_case_epilog)
-
-###############
-#
-# Generate instructions
-#
-###############
-
-# handy abbreviations for common sets of types
-
-# arithmetic ops are typically defined only on 32- and 64-bit sizes
-arith_int_types = ('S32', 'U32', 'S64', 'U64')
-arith_float_types = ('F32', 'F64')
-arith_types = arith_int_types + arith_float_types
-
-bit_types = ('B1', 'B32', 'B64')
-
-all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
-
-# I think you might be able to do 'f16' memory ops too, but we'll
-# ignore them for now.
-mem_types = all_int_types + arith_float_types
-mem_atom_types = all_int_types + ('B32', 'B64')
-
-##### Arithmetic & logical operations
-gen('Add', arith_types, 'src0 + src1')
-gen('Sub', arith_types, 'src0 - src1')
-gen('Mul', arith_types, 'src0 * src1')
-gen('Div', arith_types, 'src0 / src1')
-gen('Min', arith_types, 'std::min(src0, src1)')
-gen('Max', arith_types, 'std::max(src0, src1)')
-gen('Gcnmin', arith_types, 'std::min(src0, src1)')
-
-gen('CopySign', arith_float_types,
-    'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
-gen('Sqrt', arith_float_types, 'sqrt(src0)')
-gen('Floor', arith_float_types, 'floor(src0)')
-
-# "fast" sqrt... same as slow for us
-gen('Nsqrt', arith_float_types, 'sqrt(src0)')
-gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
-gen('Nrcp', arith_float_types, '1.0/src0')
-gen('Fract', arith_float_types,
-    '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
-
-gen('Ncos', arith_float_types, 'cos(src0)');
-gen('Nsin', arith_float_types, 'sin(src0)');
-
-gen('And', bit_types, 'src0 & src1')
-gen('Or', bit_types,  'src0 | src1')
-gen('Xor', bit_types, 'src0 ^ src1')
-
-gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
-gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
-    ('sourceType', ('B32', 'B64')))
-
-gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
-gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
-
-# gen('Mul_hi', types=('s32','u32', '??'))
-# gen('Mul24', types=('s32','u32', '??'))
-gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
-
-gen('Abs', arith_types, 'std::abs(src0)')
-gen('Neg', arith_types, '-src0')
-
-gen('Mov', bit_types + arith_types, 'src0')
-gen('Not', bit_types, 'heynot(src0)')
-
-# mad and fma differ only in rounding behavior, which we don't emulate
-# also there's an integer form of mad, but not of fma
-gen('Mad', arith_types, 'src0 * src1 + src2')
-gen('Fma', arith_float_types, 'src0 * src1 + src2')
-
-#native floating point operations
-gen('Nfma', arith_float_types, 'src0 * src1 + src2')
-
-gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
-gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
-gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
-
-# see base/bitfield.hh
-gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
-    'ExtractInsertInst')
-
-gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
-    'ExtractInsertInst')
-
-##### Compare
-gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
-    'CmpInst', ('sourceType', arith_types + bit_types))
-gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
-
-##### Conversion
-
-# Conversion operations are only defined on B1, not B32 or B64
-cvt_types = ('B1',) + mem_types
-
-gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
-
-
-##### Load & Store
-gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
-gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
-gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
-    is_store=True)
-gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
-gen('AtomicNoRet', mem_atom_types, base_class='StInst',
-    constructor_prefix='decode')
-
-gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
-gen('Br', base_class = 'LdInst', constructor_prefix='decode')
-
-##### Special operations
-def gen_special(brig_opcode, expr, dest_type='U32'):
-    num_srcs = num_src_operands(expr)
-    if num_srcs == 0:
-        base_class = 'SpecialInstNoSrc<%s>' % dest_type
-    elif num_srcs == 1:
-        base_class = 'SpecialInst1Src<%s>' % dest_type
-    else:
-        assert false
-
-    gen(brig_opcode, None, expr, base_class)
-
-gen_special('WorkItemId', 'w->workItemId[src0][lane]')
-gen_special('WorkItemAbsId',
-    'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
-gen_special('WorkGroupId', 'w->workGroupId[src0]')
-gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
-gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
-gen_special('GridSize', 'w->gridSz[src0]')
-gen_special('GridGroups',
-    'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
-gen_special('LaneId', 'lane')
-gen_special('WaveId', 'w->wfId')
-gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
-
-# gen_special('CU'', ')
-
-gen('Ret', base_class='SpecialInstNoSrcNoDest')
-gen('Barrier', base_class='SpecialInstNoSrcNoDest')
-gen('MemFence', base_class='SpecialInstNoSrcNoDest')
-
-# Map magic instructions to the BrigSyscall opcode
-# Magic instructions are defined in magic.hh
-#
-# In the future, real HSA kernel system calls can be implemented and coexist
-# with magic instructions.
-gen('Call', base_class='SpecialInstNoSrcNoDest')
-
-# Stubs for unimplemented instructions:
-# These may need to be implemented at some point in the future, but
-# for now we just match the instructions with their operands.
-#
-# By defining stubs for these instructions, we can work with
-# applications that have them in dead/unused code paths.
-#
-# Needed for rocm-hcc compilations for HSA backends since
-# builtins-hsail library is `cat`d onto the generated kernels.
-# The builtins-hsail library consists of handcoded hsail functions
-# that __might__ be needed by the rocm-hcc compiler in certain binaries.
-gen('Bitmask', base_class='Stub')
-gen('Bitrev', base_class='Stub')
-gen('Firstbit', base_class='Stub')
-gen('Lastbit', base_class='Stub')
-gen('Unpacklo', base_class='Stub')
-gen('Unpackhi', base_class='Stub')
-gen('Pack', base_class='Stub')
-gen('Unpack', base_class='Stub')
-gen('Lerp', base_class='Stub')
-gen('Packcvt', base_class='Stub')
-gen('Unpackcvt', base_class='Stub')
-gen('Sad', base_class='Stub')
-gen('Sadhi', base_class='Stub')
-gen('Activelanecount', base_class='Stub')
-gen('Activelaneid', base_class='Stub')
-gen('Activelanemask', base_class='Stub')
-gen('Activelanepermute', base_class='Stub')
-gen('Groupbaseptr', base_class='Stub')
-gen('Signalnoret', base_class='Stub')
-
-###############
-#
-# Generate file epilogs
-#
-###############
-header_code('''
-template<>
-inline void
-Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            CType dest_val;
-            CType src_val;
-
-            src_val = this->src[0].template get<CType>(w, lane);
-
-            dest_val = (CType)(src_val);
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-
-template<>
-inline void
-Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
-{
-    Wavefront *w = gpuDynInst->wavefront();
-
-    const VectorMask &mask = w->getPred();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (mask[lane]) {
-            CType dest_val;
-            CType src_val;
-
-            src_val = this->src[0].template get<CType>(w, lane);
-
-            dest_val = (CType)(src_val);
-
-            this->dest.set(w, lane, dest_val);
-        }
-    }
-}
-''')
-
-header_code.dedent()
-header_code('''
-} // namespace HsailISA
-''')
-
-# close off main decode switch
-decoder_code.dedent()
-decoder_code.dedent()
-decoder_code('''
-          default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
-        } // end switch(ib->opcode)
-    } // end decode()
-} // namespace HsailISA
-''')
-
-exec_code.dedent()
-exec_code('''
-} // namespace HsailISA
-''')
-
-###############
-#
-# Output accumulated code to files
-#
-###############
-header_code.write(sys.argv[1])
-decoder_code.write(sys.argv[2])
-exec_code.write(sys.argv[3])
diff --git a/src/arch/hsail/gpu_decoder.hh b/src/arch/hsail/gpu_decoder.hh
deleted file mode 100644 (file)
index 98a6896..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_GPU_DECODER_HH__
-#define __ARCH_HSAIL_GPU_DECODER_HH__
-
-#include <vector>
-
-#include "arch/hsail/gpu_types.hh"
-
-class BrigObject;
-class GPUStaticInst;
-
-namespace Brig
-{
-    class BrigInstBase;
-}
-
-namespace HsailISA
-{
-    class Decoder
-    {
-      public:
-        GPUStaticInst* decode(MachInst machInst);
-
-        GPUStaticInst*
-        decode(RawMachInst inst)
-        {
-            return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr;
-        }
-
-        RawMachInst
-        saveInst(GPUStaticInst *decodedInst)
-        {
-            decodedInsts.push_back(decodedInst);
-
-            return decodedInsts.size() - 1;
-        }
-
-      private:
-        static std::vector<GPUStaticInst*> decodedInsts;
-    };
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_GPU_DECODER_HH__
diff --git a/src/arch/hsail/gpu_isa.hh b/src/arch/hsail/gpu_isa.hh
deleted file mode 100644 (file)
index 75063cb..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __ARCH_HSAIL_GPU_ISA_HH__
-#define __ARCH_HSAIL_GPU_ISA_HH__
-
-#include <cstdint>
-
-#include "arch/hsail/gpu_types.hh"
-#include "base/logging.hh"
-#include "base/types.hh"
-#include "gpu-compute/misc.hh"
-
-namespace HsailISA
-{
-    class GPUISA
-    {
-      public:
-        GPUISA()
-        {
-        }
-
-        void
-        writeMiscReg(int opIdx, RegVal operandVal)
-        {
-            fatal("HSAIL does not implement misc registers yet\n");
-        }
-
-        RegVal
-        readMiscReg(int opIdx) const
-        {
-            fatal("HSAIL does not implement misc registers yet\n");
-        }
-
-        bool hasScalarUnit() const { return false; }
-
-        uint32_t
-        advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
-        {
-            return old_pc + sizeof(RawMachInst);
-        }
-    };
-}
-
-#endif // __ARCH_HSAIL_GPU_ISA_HH__
diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh
deleted file mode 100644 (file)
index 7b6689d..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_GPU_TYPES_HH__
-#define __ARCH_HSAIL_GPU_TYPES_HH__
-
-#include <cstdint>
-
-namespace Brig
-{
-    class BrigInstBase;
-}
-
-class BrigObject;
-
-namespace HsailISA
-{
-    // A raw machine instruction represents the raw bits that
-    // our model uses to represent an actual instruction. In
-    // the case of HSAIL this is just an index into a list of
-    // instruction objects.
-    typedef uint32_t RawMachInst;
-
-    // The MachInst is a representation of an instruction
-    // that has more information than just the machine code.
-    // For HSAIL the actual machine code is a BrigInstBase
-    // and the BrigObject contains more pertinent
-    // information related to operaands, etc.
-
-    struct MachInst
-    {
-        const Brig::BrigInstBase *brigInstBase;
-        const BrigObject *brigObj;
-    };
-}
-
-#endif // __ARCH_HSAIL_GPU_TYPES_HH__
diff --git a/src/arch/hsail/insts/branch.cc b/src/arch/hsail/insts/branch.cc
deleted file mode 100644 (file)
index d65279c..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "arch/hsail/insts/branch.hh"
-
-#include "gpu-compute/hsail_code.hh"
-
-namespace HsailISA
-{
-    GPUStaticInst*
-    decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        // Detect direct vs indirect branch by seeing whether we have a
-        // register operand.
-        unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-        const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
-        if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            return new BrnIndirectInst(ib, obj);
-        } else {
-            return new BrnDirectInst(ib, obj);
-        }
-    }
-
-    GPUStaticInst*
-    decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        // Detect direct vs indirect branch by seeing whether we have a
-        // second register operand (after the condition).
-        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
-        const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
-        if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            return new CbrIndirectInst(ib, obj);
-        } else {
-            return new CbrDirectInst(ib, obj);
-        }
-    }
-
-    GPUStaticInst*
-    decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        // Detect direct vs indirect branch by seeing whether we have a
-        // second register operand (after the condition).
-        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
-        const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
-        if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            return new BrIndirectInst(ib, obj);
-        } else {
-            return new BrDirectInst(ib, obj);
-        }
-    }
-} // namespace HsailISA
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh
deleted file mode 100644 (file)
index 79603f4..0000000
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
-#define __ARCH_HSAIL_INSTS_BRANCH_HH__
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "gpu-compute/gpu_dyn_inst.hh"
-#include "gpu-compute/wavefront.hh"
-
-namespace HsailISA
-{
-
-    // The main difference between a direct branch and an indirect branch
-    // is whether the target is a register or a label, so we can share a
-    // lot of code if we template the base implementation on that type.
-    template<typename TargetType>
-    class BrnInstBase : public HsailGPUStaticInst
-    {
-    public:
-        void generateDisassembly() override;
-
-        Brig::BrigWidth8_t width;
-        TargetType target;
-
-        BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
-           : HsailGPUStaticInst(obj, "brn")
-        {
-            setFlag(Branch);
-            setFlag(UnconditionalJump);
-            width = ((Brig::BrigInstBr*)ib)->width;
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            target.init(op_offs, obj);
-        }
-
-        uint32_t getTargetPc()  override { return target.getTarget(0, 0); }
-
-        bool isVectorRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isScalarRegister();
-        }
-
-        bool isSrcOperand(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return true;
-        }
-
-        bool isDstOperand(int operandIndex) override {
-            return false;
-        }
-
-        int getOperandSize(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.opSize();
-        }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.regIndex();
-        }
-
-        int getNumOperands() override {
-            return 1;
-        }
-
-        void execute(GPUDynInstPtr gpuDynInst) override;
-    };
-
-    template<typename TargetType>
-    void
-    BrnInstBase<TargetType>::generateDisassembly()
-    {
-        std::string widthClause;
-
-        if (width != 1) {
-            widthClause = csprintf("_width(%d)", width);
-        }
-
-        disassembly = csprintf("%s%s %s", opcode, widthClause,
-                               target.disassemble());
-    }
-
-    template<typename TargetType>
-    void
-    BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        if (getTargetPc() == w->rpc()) {
-            w->popFromReconvergenceStack();
-        } else {
-            // Rpc and execution mask remain the same
-            w->pc(getTargetPc());
-        }
-    }
-
-    class BrnDirectInst : public BrnInstBase<LabelOperand>
-    {
-      public:
-        BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : BrnInstBase<LabelOperand>(ib, obj)
-        {
-        }
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return 0; }
-    };
-
-    class BrnIndirectInst : public BrnInstBase<SRegOperand>
-    {
-      public:
-        BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : BrnInstBase<SRegOperand>(ib, obj)
-        {
-        }
-        int numSrcRegOperands() { return target.isVectorRegister(); }
-        int numDstRegOperands() { return 0; }
-    };
-
-    GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
-                             const BrigObject *obj);
-
-    template<typename TargetType>
-    class CbrInstBase : public HsailGPUStaticInst
-    {
-      public:
-        void generateDisassembly() override;
-
-        Brig::BrigWidth8_t width;
-        CRegOperand cond;
-        TargetType target;
-
-        CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
-           : HsailGPUStaticInst(obj, "cbr")
-        {
-            setFlag(Branch);
-            width = ((Brig::BrigInstBr *)ib)->width;
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            cond.init(op_offs, obj);
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            target.init(op_offs, obj);
-        }
-
-        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
-
-        void execute(GPUDynInstPtr gpuDynInst) override;
-        // Assumption: Target is operand 0, Condition Register is operand 1
-        bool isVectorRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            if (!operandIndex)
-                return target.isVectorRegister();
-            else
-                return false;
-        }
-        bool isCondRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            if (!operandIndex)
-                return target.isCondRegister();
-            else
-                return true;
-        }
-        bool isScalarRegister(int operandIndex) override {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return target.isScalarRegister();
-            else
-                return false;
-        }
-        bool isSrcOperand(int operandIndex) override {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == 0)
-                return true;
-            return false;
-        }
-        // both Condition Register and Target are source operands
-        bool isDstOperand(int operandIndex) override {
-            return false;
-        }
-        int getOperandSize(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            if (!operandIndex)
-                return target.opSize();
-            else
-                return 1;
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            if (!operandIndex)
-                return target.regIndex();
-            else
-                return -1;
-         }
-
-        // Operands = Target, Condition Register
-        int getNumOperands() override {
-            return 2;
-        }
-    };
-
-    template<typename TargetType>
-    void
-    CbrInstBase<TargetType>::generateDisassembly()
-    {
-        std::string widthClause;
-
-        if (width != 1) {
-            widthClause = csprintf("_width(%d)", width);
-        }
-
-        disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
-                               cond.disassemble(), target.disassemble());
-    }
-
-    template<typename TargetType>
-    void
-    CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        const uint32_t curr_pc M5_VAR_USED = w->pc();
-        const uint32_t curr_rpc = w->rpc();
-        const VectorMask curr_mask = w->execMask();
-
-        /**
-         * TODO: can we move this pop outside the instruction, and
-         * into the wavefront?
-         */
-        w->popFromReconvergenceStack();
-
-        // immediate post-dominator instruction
-        const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
-        if (curr_rpc != rpc) {
-            w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
-        }
-
-        // taken branch
-        const uint32_t true_pc = getTargetPc();
-        VectorMask true_mask;
-        for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
-        }
-
-        // not taken branch
-        const uint32_t false_pc = nextInstAddr();
-        assert(true_pc != false_pc);
-        if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
-            VectorMask false_mask = curr_mask & ~true_mask;
-            w->pushToReconvergenceStack(false_pc, rpc, false_mask);
-        }
-
-        if (true_pc != rpc && true_mask.count()) {
-            w->pushToReconvergenceStack(true_pc, rpc, true_mask);
-        }
-        assert(w->pc() != curr_pc);
-    }
-
-
-    class CbrDirectInst : public CbrInstBase<LabelOperand>
-    {
-      public:
-        CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : CbrInstBase<LabelOperand>(ib, obj)
-        {
-        }
-        // the source operand of a conditional branch is a Condition
-        // Register which is not stored in the VRF
-        // so we do not count it as a source-register operand
-        // even though, formally, it is one.
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return 0; }
-    };
-
-    class CbrIndirectInst : public CbrInstBase<SRegOperand>
-    {
-      public:
-        CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : CbrInstBase<SRegOperand>(ib, obj)
-        {
-        }
-        // one source operand of the conditional indirect branch is a Condition
-        // register which is not stored in the VRF so we do not count it
-        // as a source-register operand even though, formally, it is one.
-        int numSrcRegOperands() { return target.isVectorRegister(); }
-        int numDstRegOperands() { return 0; }
-    };
-
-    GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
-                             const BrigObject *obj);
-
-    template<typename TargetType>
-    class BrInstBase : public HsailGPUStaticInst
-    {
-      public:
-        void generateDisassembly() override;
-
-        ImmOperand<uint32_t> width;
-        TargetType target;
-
-        BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
-           : HsailGPUStaticInst(obj, "br")
-        {
-            setFlag(Branch);
-            setFlag(UnconditionalJump);
-            width.init(((Brig::BrigInstBr *)ib)->width, obj);
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            target.init(op_offs, obj);
-        }
-
-        uint32_t getTargetPc() override { return target.getTarget(0, 0); }
-
-        void execute(GPUDynInstPtr gpuDynInst) override;
-        bool isVectorRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) override {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return true;
-        }
-        bool isDstOperand(int operandIndex) override { return false; }
-        int getOperandSize(int operandIndex) override {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.opSize();
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return target.regIndex();
-        }
-        int getNumOperands() override { return 1; }
-    };
-
-    template<typename TargetType>
-    void
-    BrInstBase<TargetType>::generateDisassembly()
-    {
-        std::string widthClause;
-
-        if (width.bits != 1) {
-            widthClause = csprintf("_width(%d)", width.bits);
-        }
-
-        disassembly = csprintf("%s%s %s", opcode, widthClause,
-                               target.disassemble());
-    }
-
-    template<typename TargetType>
-    void
-    BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        if (getTargetPc() == w->rpc()) {
-            w->popFromReconvergenceStack();
-        } else {
-            // Rpc and execution mask remain the same
-            w->pc(getTargetPc());
-        }
-    }
-
-    class BrDirectInst : public BrInstBase<LabelOperand>
-    {
-      public:
-        BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : BrInstBase<LabelOperand>(ib, obj)
-        {
-        }
-
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return 0; }
-    };
-
-    class BrIndirectInst : public BrInstBase<SRegOperand>
-    {
-      public:
-        BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : BrInstBase<SRegOperand>(ib, obj)
-        {
-        }
-        int numSrcRegOperands() { return target.isVectorRegister(); }
-        int numDstRegOperands() { return 0; }
-    };
-
-    GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
-                            const BrigObject *obj);
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh
deleted file mode 100644 (file)
index 3132a42..0000000
+++ /dev/null
@@ -1,1298 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
-#define __ARCH_HSAIL_INSTS_DECL_HH__
-
-#include <cmath>
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "debug/HSAIL.hh"
-#include "gpu-compute/gpu_dyn_inst.hh"
-#include "gpu-compute/shader.hh"
-
-namespace HsailISA
-{
-    template<typename _DestOperand, typename _SrcOperand>
-    class HsailOperandType
-    {
-      public:
-        typedef _DestOperand DestOperand;
-        typedef _SrcOperand SrcOperand;
-    };
-
-    typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
-    typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
-    typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
-
-    // The IsBits parameter serves only to disambiguate tbhe B* types from
-    // the U* types, which otherwise would be identical (and
-    // indistinguishable).
-    template<typename _OperandType, typename _CType, Enums::MemType _memType,
-             vgpr_type _vgprType, int IsBits=0>
-    class HsailDataType
-    {
-      public:
-        typedef _OperandType OperandType;
-        typedef _CType CType;
-        static const Enums::MemType memType = _memType;
-        static const vgpr_type vgprType = _vgprType;
-        static const char *label;
-    };
-
-    typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
-    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
-
-    typedef HsailDataType<SRegOperandType, uint16_t,
-                          Enums::M_U16, VT_32, 1> B16;
-
-    typedef HsailDataType<SRegOperandType, uint32_t,
-                          Enums::M_U32, VT_32, 1> B32;
-
-    typedef HsailDataType<DRegOperandType, uint64_t,
-                          Enums::M_U64, VT_64, 1> B64;
-
-    typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
-    typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
-    typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
-    typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
-
-    typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
-    typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
-    typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
-    typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
-
-    typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
-    typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
-
-    template<typename DestOperandType, typename SrcOperandType,
-             int NumSrcOperands>
-    class CommonInstBase : public HsailGPUStaticInst
-    {
-      protected:
-        typename DestOperandType::DestOperand dest;
-        typename SrcOperandType::SrcOperand src[NumSrcOperands];
-
-        void
-        generateDisassembly()
-        {
-            disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
-                                   dest.disassemble());
-
-            for (int i = 0; i < NumSrcOperands; ++i) {
-                disassembly += ",";
-                disassembly += src[i].disassemble();
-            }
-        }
-
-        virtual std::string opcode_suffix() = 0;
-
-      public:
-        CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                       const char *opcode)
-            : HsailGPUStaticInst(obj, opcode)
-        {
-            setFlag(ALU);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-
-            dest.init(op_offs, obj);
-
-            for (int i = 0; i < NumSrcOperands; ++i) {
-                op_offs = obj->getOperandPtr(ib->operands, i + 1);
-                src[i].init(op_offs, obj);
-            }
-        }
-
-        bool isVectorRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isVectorRegister();
-            else
-                return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isCondRegister();
-            else
-                return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isScalarRegister();
-            else
-                return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return true;
-            return false;
-        }
-
-        bool isDstOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex >= NumSrcOperands)
-                return true;
-            return false;
-        }
-        int getOperandSize(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].opSize();
-            else
-                return dest.opSize();
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].regIndex();
-            else
-                return dest.regIndex();
-        }
-        int numSrcRegOperands() {
-            int operands = 0;
-            for (int i = 0; i < NumSrcOperands; i++) {
-                if (src[i].isVectorRegister()) {
-                    operands++;
-                }
-            }
-            return operands;
-        }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands() { return NumSrcOperands + 1; }
-    };
-
-    template<typename DataType, int NumSrcOperands>
-    class ArithInst : public CommonInstBase<typename DataType::OperandType,
-                                            typename DataType::OperandType,
-                                            NumSrcOperands>
-    {
-      public:
-        std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
-
-        ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                  const char *opcode)
-            : CommonInstBase<typename DataType::OperandType,
-                             typename DataType::OperandType,
-                             NumSrcOperands>(ib, obj, opcode)
-        {
-        }
-    };
-
-    template<typename DestOperandType, typename Src0OperandType,
-             typename Src1OperandType, typename Src2OperandType>
-    class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
-    {
-      protected:
-        typename DestOperandType::DestOperand dest;
-        typename Src0OperandType::SrcOperand  src0;
-        typename Src1OperandType::SrcOperand  src1;
-        typename Src2OperandType::SrcOperand  src2;
-
-        void
-        generateDisassembly()
-        {
-            disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
-                                   src0.disassemble(), src1.disassemble(),
-                                   src2.disassemble());
-        }
-
-      public:
-        ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
-                                      const BrigObject *obj,
-                                      const char *opcode)
-            : HsailGPUStaticInst(obj, opcode)
-        {
-            setFlag(ALU);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            src0.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 2);
-            src1.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 3);
-            src2.init(op_offs, obj);
-        }
-
-        bool isVectorRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isVectorRegister();
-            else if (operandIndex == 1)
-                return src1.isVectorRegister();
-            else if (operandIndex == 2)
-                return src2.isVectorRegister();
-            else
-                return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isCondRegister();
-            else if (operandIndex == 1)
-                return src1.isCondRegister();
-            else if (operandIndex == 2)
-                return src2.isCondRegister();
-            else
-                return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isScalarRegister();
-            else if (operandIndex == 1)
-                return src1.isScalarRegister();
-            else if (operandIndex == 2)
-                return src2.isScalarRegister();
-            else
-                return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < 3)
-                return true;
-            else
-                return false;
-        }
-        bool isDstOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex >= 3)
-                return true;
-            else
-                return false;
-        }
-        int getOperandSize(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.opSize();
-            else if (operandIndex == 1)
-                return src1.opSize();
-            else if (operandIndex == 2)
-                return src2.opSize();
-            else
-                return dest.opSize();
-        }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.regIndex();
-            else if (operandIndex == 1)
-                return src1.regIndex();
-            else if (operandIndex == 2)
-                return src2.regIndex();
-            else
-                return dest.regIndex();
-        }
-
-        int numSrcRegOperands() {
-            int operands = 0;
-            if (src0.isVectorRegister()) {
-                operands++;
-            }
-            if (src1.isVectorRegister()) {
-                operands++;
-            }
-            if (src2.isVectorRegister()) {
-                operands++;
-            }
-            return operands;
-        }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands() { return 4; }
-    };
-
-    template<typename DestDataType, typename Src0DataType,
-             typename Src1DataType, typename Src2DataType>
-    class ThreeNonUniformSourceInst :
-        public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
-                                             typename Src0DataType::OperandType,
-                                             typename Src1DataType::OperandType,
-                                             typename Src2DataType::OperandType>
-    {
-      public:
-        typedef typename DestDataType::CType DestCType;
-        typedef typename Src0DataType::CType Src0CType;
-        typedef typename Src1DataType::CType Src1CType;
-        typedef typename Src2DataType::CType Src2CType;
-
-        ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
-                                  const BrigObject *obj, const char *opcode)
-            : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
-                                         typename Src0DataType::OperandType,
-                                         typename Src1DataType::OperandType,
-                                         typename Src2DataType::OperandType>(ib,
-                                                                    obj, opcode)
-        {
-        }
-    };
-
-    template<typename DataType>
-    class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
-                                                      DataType, DataType>
-    {
-      public:
-        CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                 const char *opcode)
-            : ThreeNonUniformSourceInst<DataType, B1, DataType,
-                                        DataType>(ib, obj, opcode)
-        {
-        }
-    };
-
-    template<typename DataType>
-    class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
-                                                               DataType, U32,
-                                                               U32>
-    {
-      public:
-        ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                          const char *opcode)
-            : ThreeNonUniformSourceInst<DataType, DataType, U32,
-                                        U32>(ib, obj, opcode)
-        {
-        }
-    };
-
-    template<typename DestOperandType, typename Src0OperandType,
-             typename Src1OperandType>
-    class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
-    {
-      protected:
-        typename DestOperandType::DestOperand dest;
-        typename Src0OperandType::SrcOperand src0;
-        typename Src1OperandType::SrcOperand src1;
-
-        void
-        generateDisassembly()
-        {
-            disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
-                                   src0.disassemble(), src1.disassemble());
-        }
-
-
-      public:
-        TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
-                                    const BrigObject *obj, const char *opcode)
-            : HsailGPUStaticInst(obj, opcode)
-        {
-            setFlag(ALU);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            src0.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 2);
-            src1.init(op_offs, obj);
-        }
-        bool isVectorRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isVectorRegister();
-            else if (operandIndex == 1)
-                return src1.isVectorRegister();
-            else
-                return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isCondRegister();
-            else if (operandIndex == 1)
-                return src1.isCondRegister();
-            else
-                return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.isScalarRegister();
-            else if (operandIndex == 1)
-                return src1.isScalarRegister();
-            else
-                return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < 2)
-                return true;
-            else
-                return false;
-        }
-        bool isDstOperand(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex >= 2)
-                return true;
-            else
-                return false;
-        }
-        int getOperandSize(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.opSize();
-            else if (operandIndex == 1)
-                return src1.opSize();
-            else
-                return dest.opSize();
-        }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (!operandIndex)
-                return src0.regIndex();
-            else if (operandIndex == 1)
-                return src1.regIndex();
-            else
-                return dest.regIndex();
-        }
-
-        int numSrcRegOperands() {
-            int operands = 0;
-            if (src0.isVectorRegister()) {
-                operands++;
-            }
-            if (src1.isVectorRegister()) {
-                operands++;
-            }
-            return operands;
-        }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands() { return 3; }
-    };
-
-    template<typename DestDataType, typename Src0DataType,
-             typename Src1DataType>
-    class TwoNonUniformSourceInst :
-        public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
-                                           typename Src0DataType::OperandType,
-                                           typename Src1DataType::OperandType>
-    {
-      public:
-        typedef typename DestDataType::CType DestCType;
-        typedef typename Src0DataType::CType Src0CType;
-        typedef typename Src1DataType::CType Src1CType;
-
-        TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
-                                const BrigObject *obj, const char *opcode)
-            : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
-                                         typename Src0DataType::OperandType,
-                                         typename Src1DataType::OperandType>(ib,
-                                                                    obj, opcode)
-        {
-        }
-    };
-
-    // helper function for ClassInst
-    template<typename T>
-    bool
-    fpclassify(T src0, uint32_t src1)
-    {
-        int fpclass = std::fpclassify(src0);
-
-        if ((src1 & 0x3) && (fpclass == FP_NAN)) {
-            return true;
-        }
-
-        if (src0 <= -0.0) {
-            if ((src1 & 0x4) && fpclass == FP_INFINITE)
-                return true;
-            if ((src1 & 0x8) && fpclass == FP_NORMAL)
-                return true;
-            if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
-                return true;
-            if ((src1 & 0x20) && fpclass == FP_ZERO)
-                return true;
-        } else {
-            if ((src1 & 0x40) && fpclass == FP_ZERO)
-                return true;
-            if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
-                return true;
-            if ((src1 & 0x100) && fpclass == FP_NORMAL)
-                return true;
-            if ((src1 & 0x200) && fpclass == FP_INFINITE)
-                return true;
-        }
-        return false;
-    }
-
-    template<typename DataType>
-    class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
-    {
-      public:
-        ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                  const char *opcode)
-            : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
-        {
-        }
-    };
-
-    template<typename DataType>
-    class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
-    {
-      public:
-        ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                  const char *opcode)
-            : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
-        {
-        }
-    };
-
-    // helper function for CmpInst
-    template<typename T>
-    bool
-    compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
-    {
-        using namespace Brig;
-
-        switch (cmpOp) {
-          case BRIG_COMPARE_EQ:
-          case BRIG_COMPARE_EQU:
-          case BRIG_COMPARE_SEQ:
-          case BRIG_COMPARE_SEQU:
-            return (src0 == src1);
-
-          case BRIG_COMPARE_NE:
-          case BRIG_COMPARE_NEU:
-          case BRIG_COMPARE_SNE:
-          case BRIG_COMPARE_SNEU:
-            return (src0 != src1);
-
-          case BRIG_COMPARE_LT:
-          case BRIG_COMPARE_LTU:
-          case BRIG_COMPARE_SLT:
-          case BRIG_COMPARE_SLTU:
-            return (src0 < src1);
-
-          case BRIG_COMPARE_LE:
-          case BRIG_COMPARE_LEU:
-          case BRIG_COMPARE_SLE:
-          case BRIG_COMPARE_SLEU:
-            return (src0 <= src1);
-
-          case BRIG_COMPARE_GT:
-          case BRIG_COMPARE_GTU:
-          case BRIG_COMPARE_SGT:
-          case BRIG_COMPARE_SGTU:
-            return (src0 > src1);
-
-          case BRIG_COMPARE_GE:
-          case BRIG_COMPARE_GEU:
-          case BRIG_COMPARE_SGE:
-          case BRIG_COMPARE_SGEU:
-            return (src0 >= src1);
-
-          case BRIG_COMPARE_NUM:
-          case BRIG_COMPARE_SNUM:
-            return (src0 == src0) || (src1 == src1);
-
-          case BRIG_COMPARE_NAN:
-          case BRIG_COMPARE_SNAN:
-            return (src0 != src0) || (src1 != src1);
-
-          default:
-            fatal("Bad cmpOp value %d\n", (int)cmpOp);
-        }
-    }
-
-    template<typename T>
-    int32_t
-    firstbit(T src0)
-    {
-        if (!src0)
-            return -1;
-
-        //handle positive and negative numbers
-        T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
-
-        //the starting pos is MSB
-        int pos = 8 * sizeof(T) - 1;
-        int cnt = 0;
-
-        //search the first bit set to 1
-        while (!(tmp & (1 << pos))) {
-            ++cnt;
-            --pos;
-        }
-        return cnt;
-    }
-
-    const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
-
-    template<typename DestOperandType, typename SrcOperandType>
-    class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
-                                              2>
-    {
-      protected:
-        Brig::BrigCompareOperation cmpOp;
-
-      public:
-        CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                    const char *_opcode)
-            : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
-                                                                 _opcode)
-        {
-            assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
-            Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
-            cmpOp = (Brig::BrigCompareOperation)i->compare;
-        }
-    };
-
-    template<typename DestDataType, typename SrcDataType>
-    class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
-                                       typename SrcDataType::OperandType>
-    {
-      public:
-        std::string
-        opcode_suffix()
-        {
-            return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
-                            DestDataType::label, SrcDataType::label);
-        }
-
-        CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                const char *_opcode)
-            : CmpInstBase<typename DestDataType::OperandType,
-                          typename SrcDataType::OperandType>(ib, obj, _opcode)
-        {
-        }
-    };
-
-    template<typename DestDataType, typename SrcDataType>
-    class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
-                                          typename SrcDataType::OperandType, 1>
-    {
-      public:
-        std::string opcode_suffix()
-        {
-            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
-        }
-
-        CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                const char *_opcode)
-            : CommonInstBase<typename DestDataType::OperandType,
-                             typename SrcDataType::OperandType,
-                             1>(ib, obj, _opcode)
-        {
-        }
-    };
-
-    template<typename DestDataType, typename SrcDataType>
-    class PopcountInst :
-        public CommonInstBase<typename DestDataType::OperandType,
-                              typename SrcDataType::OperandType, 1>
-    {
-      public:
-        std::string opcode_suffix()
-        {
-            return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
-        }
-
-        PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                     const char *_opcode)
-            : CommonInstBase<typename DestDataType::OperandType,
-                             typename SrcDataType::OperandType,
-                             1>(ib, obj, _opcode)
-        {
-        }
-    };
-
-    class Stub : public HsailGPUStaticInst
-    {
-      public:
-        Stub(const Brig::BrigInstBase *ib, const BrigObject *obj,
-             const char *_opcode)
-            : HsailGPUStaticInst(obj, _opcode)
-        {
-        }
-
-        void generateDisassembly() override
-        {
-            disassembly = csprintf("%s", opcode);
-        }
-
-        bool isVectorRegister(int operandIndex) override { return false; }
-        bool isCondRegister(int operandIndex) override { return false; }
-        bool isScalarRegister(int operandIndex) override { return false; }
-        bool isSrcOperand(int operandIndex) override { return false; }
-        bool isDstOperand(int operandIndex) override { return false; }
-        int getOperandSize(int operandIndex) override { return 0; }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            return -1;
-        }
-
-        int numSrcRegOperands() override { return 0; }
-        int numDstRegOperands() override { return 0; }
-        int getNumOperands() override { return 0; }
-    };
-
-    class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
-    {
-      public:
-        SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
-                               const BrigObject *obj, const char *_opcode)
-            : HsailGPUStaticInst(obj, _opcode)
-        {
-        }
-
-        bool isVectorRegister(int operandIndex) override { return false; }
-        bool isCondRegister(int operandIndex) override { return false; }
-        bool isScalarRegister(int operandIndex) override { return false; }
-        bool isSrcOperand(int operandIndex) override { return false; }
-        bool isDstOperand(int operandIndex) override { return false; }
-        int getOperandSize(int operandIndex) override { return 0; }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            return -1;
-        }
-
-        int numSrcRegOperands() override { return 0; }
-        int numDstRegOperands() override { return 0; }
-        int getNumOperands() override { return 0; }
-    };
-
-    template<typename DestOperandType>
-    class SpecialInstNoSrcBase : public HsailGPUStaticInst
-    {
-      protected:
-        typename DestOperandType::DestOperand dest;
-
-        void generateDisassembly()
-        {
-            disassembly = csprintf("%s %s", opcode, dest.disassemble());
-        }
-
-      public:
-        SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
-                             const BrigObject *obj, const char *_opcode)
-            : HsailGPUStaticInst(obj, _opcode)
-        {
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-        }
-
-        bool isVectorRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) { return false; }
-        bool isDstOperand(int operandIndex) { return true; }
-        int getOperandSize(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.opSize();
-        }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.regIndex();
-        }
-
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands() { return 1; }
-    };
-
-    template<typename DestDataType>
-    class SpecialInstNoSrc :
-        public SpecialInstNoSrcBase<typename DestDataType::OperandType>
-    {
-      public:
-        typedef typename DestDataType::CType DestCType;
-
-        SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                         const char *_opcode)
-            : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
-                                                                       _opcode)
-        {
-        }
-    };
-
-    template<typename DestOperandType>
-    class SpecialInst1SrcBase : public HsailGPUStaticInst
-    {
-      protected:
-        typedef int SrcCType;  // used in execute() template
-
-        typename DestOperandType::DestOperand dest;
-        ImmOperand<SrcCType> src0;
-
-        void
-        generateDisassembly()
-        {
-            disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
-                                   src0.disassemble());
-        }
-
-      public:
-        SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
-                            const BrigObject *obj, const char *_opcode)
-            : HsailGPUStaticInst(obj, _opcode)
-        {
-            setFlag(ALU);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            src0.init(op_offs, obj);
-        }
-        bool isVectorRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) { return false; }
-        bool isDstOperand(int operandIndex) { return true; }
-        int getOperandSize(int operandIndex) {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.opSize();
-        }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return dest.regIndex();
-        }
-
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands() { return 1; }
-    };
-
-    template<typename DestDataType>
-    class SpecialInst1Src :
-        public SpecialInst1SrcBase<typename DestDataType::OperandType>
-    {
-      public:
-        typedef typename DestDataType::CType DestCType;
-
-        SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                        const char *_opcode)
-            : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
-                                                                      _opcode)
-        {
-        }
-    };
-
-    class Ret : public SpecialInstNoSrcNoDest
-    {
-      public:
-        typedef SpecialInstNoSrcNoDest Base;
-
-        Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
-           : Base(ib, obj, "ret")
-        {
-            setFlag(GPUStaticInst::Return);
-        }
-
-        void execute(GPUDynInstPtr gpuDynInst);
-    };
-
-    class Barrier : public SpecialInstNoSrcNoDest
-    {
-      public:
-        typedef SpecialInstNoSrcNoDest Base;
-        uint8_t width;
-
-        Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : Base(ib, obj, "barrier")
-        {
-            setFlag(GPUStaticInst::MemBarrier);
-            assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
-            width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
-        }
-
-        void execute(GPUDynInstPtr gpuDynInst);
-    };
-
-    class MemFence : public SpecialInstNoSrcNoDest
-    {
-      public:
-        typedef SpecialInstNoSrcNoDest Base;
-
-        Brig::BrigMemoryOrder memFenceMemOrder;
-        Brig::BrigMemoryScope memFenceScopeSegGroup;
-        Brig::BrigMemoryScope memFenceScopeSegGlobal;
-        Brig::BrigMemoryScope memFenceScopeSegImage;
-
-        MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : Base(ib, obj, "memfence")
-        {
-            assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
-
-            memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
-                ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
-
-            memFenceScopeSegGroup = (Brig::BrigMemoryScope)
-                ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
-
-            memFenceScopeSegImage = (Brig::BrigMemoryScope)
-                ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
-
-            memFenceMemOrder = (Brig::BrigMemoryOrder)
-                ((Brig::BrigInstMemFence*)ib)->memoryOrder;
-
-            setFlag(MemoryRef);
-            setFlag(GPUStaticInst::MemFence);
-
-            switch (memFenceMemOrder) {
-              case Brig::BRIG_MEMORY_ORDER_NONE:
-                setFlag(NoOrder);
-                break;
-              case Brig::BRIG_MEMORY_ORDER_RELAXED:
-                setFlag(RelaxedOrder);
-                break;
-              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
-                setFlag(Acquire);
-                break;
-              case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
-                setFlag(Release);
-                break;
-              case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
-                setFlag(AcquireRelease);
-                break;
-              default:
-                fatal("MemInst has bad BrigMemoryOrder\n");
-            }
-
-            // set inst flags based on scopes
-            if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
-                memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                setFlag(GPUStaticInst::GlobalSegment);
-
-                /**
-                 * A memory fence that has scope for
-                 * both segments will use the global
-                 * segment, and be executed in the
-                 * global memory pipeline, therefore,
-                 * we set the segment to match the
-                 * global scope only
-                 */
-                switch (memFenceScopeSegGlobal) {
-                  case Brig::BRIG_MEMORY_SCOPE_NONE:
-                    setFlag(NoScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
-                    setFlag(WorkitemScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
-                    setFlag(WorkgroupScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
-                    setFlag(DeviceScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
-                    setFlag(SystemScope);
-                    break;
-                  default:
-                    fatal("MemFence has bad global scope type\n");
-                }
-            } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                setFlag(GPUStaticInst::GlobalSegment);
-
-                switch (memFenceScopeSegGlobal) {
-                  case Brig::BRIG_MEMORY_SCOPE_NONE:
-                    setFlag(NoScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
-                    setFlag(WorkitemScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
-                    setFlag(WorkgroupScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
-                    setFlag(DeviceScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
-                    setFlag(SystemScope);
-                    break;
-                  default:
-                    fatal("MemFence has bad global scope type\n");
-                }
-            } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
-                setFlag(GPUStaticInst::GroupSegment);
-
-                switch (memFenceScopeSegGroup) {
-                  case Brig::BRIG_MEMORY_SCOPE_NONE:
-                    setFlag(NoScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
-                    setFlag(WorkitemScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
-                    setFlag(WorkgroupScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_AGENT:
-                    setFlag(DeviceScope);
-                    break;
-                  case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
-                    setFlag(SystemScope);
-                    break;
-                  default:
-                    fatal("MemFence has bad group scope type\n");
-                }
-            } else {
-                fatal("MemFence constructor: bad scope specifiers\n");
-            }
-        }
-
-        void
-        initiateAcc(GPUDynInstPtr gpuDynInst)
-        {
-            Wavefront *wave = gpuDynInst->wavefront();
-            wave->computeUnit->injectGlobalMemFence(gpuDynInst);
-        }
-
-        void
-        execute(GPUDynInstPtr gpuDynInst)
-        {
-            Wavefront *w = gpuDynInst->wavefront();
-            // 2 cases:
-            //   * memfence to a sequentially consistent memory (e.g., LDS).
-            //     These can be handled as no-ops.
-            //   * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
-            //     etc.). We send a packet, tagged with the memory order and
-            //     scope, and let the GPU coalescer handle it.
-
-            if (isGlobalSeg()) {
-                gpuDynInst->simdId = w->simdId;
-                gpuDynInst->wfSlotId = w->wfSlotId;
-                gpuDynInst->wfDynId = w->wfDynId;
-                gpuDynInst->kern_id = w->kernId;
-                gpuDynInst->cu_id = w->computeUnit->cu_id;
-
-                gpuDynInst->useContinuation = false;
-                GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
-                gmp->issueRequest(gpuDynInst);
-
-                w->wrGmReqsInPipe--;
-                w->rdGmReqsInPipe--;
-                w->memReqsInPipe--;
-                w->outstandingReqs++;
-            } else if (isGroupSeg()) {
-                // no-op
-            } else {
-                fatal("MemFence execute: bad op type\n");
-            }
-        }
-    };
-
-    class Call : public HsailGPUStaticInst
-    {
-      public:
-        // private helper functions
-        void calcAddr(Wavefront* w, GPUDynInstPtr m);
-
-        void
-        generateDisassembly()
-        {
-            if (dest.disassemble() == "") {
-                disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
-                                       src1.disassemble());
-            } else {
-                disassembly = csprintf("%s %s (%s) (%s)", opcode,
-                                       src0.disassemble(), dest.disassemble(),
-                                       src1.disassemble());
-            }
-        }
-
-        bool
-        isPseudoOp()
-        {
-            std::string func_name = src0.disassemble();
-            if (func_name.find("__gem5_hsail_op") != std::string::npos) {
-                return true;
-            }
-            return false;
-        }
-
-        // member variables
-        ListOperand dest;
-        FunctionRefOperand src0;
-        ListOperand src1;
-        HsailCode *func_ptr;
-
-        // exec function for pseudo instructions mapped on top of call opcode
-        void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
-
-        // user-defined pseudo instructions
-        void MagicPrintLane(Wavefront *w);
-        void MagicPrintLane64(Wavefront *w);
-        void MagicPrintWF32(Wavefront *w);
-        void MagicPrintWF64(Wavefront *w);
-        void MagicPrintWFFloat(Wavefront *w);
-        void MagicSimBreak(Wavefront *w);
-        void MagicPrefixSum(Wavefront *w);
-        void MagicReduction(Wavefront *w);
-        void MagicMaskLower(Wavefront *w);
-        void MagicMaskUpper(Wavefront *w);
-        void MagicJoinWFBar(Wavefront *w);
-        void MagicWaitWFBar(Wavefront *w);
-        void MagicPanic(Wavefront *w);
-
-        void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
-                                          GPUDynInstPtr gpuDynInst);
-
-        void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
-                                         GPUDynInstPtr gpuDynInst);
-
-        void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
-
-        void MagicXactCasLd(Wavefront *w);
-        void MagicMostSigThread(Wavefront *w);
-        void MagicMostSigBroadcast(Wavefront *w);
-
-        void MagicPrintWF32ID(Wavefront *w);
-        void MagicPrintWFID64(Wavefront *w);
-
-        Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
-            : HsailGPUStaticInst(obj, "call")
-        {
-            setFlag(ALU);
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            src0.init(op_offs, obj);
-
-            func_ptr = nullptr;
-            std::string func_name = src0.disassemble();
-            if (!isPseudoOp()) {
-                func_ptr = dynamic_cast<HsailCode*>(obj->
-                                                    getFunction(func_name));
-
-                if (!func_ptr)
-                    fatal("call::exec cannot find function: %s\n", func_name);
-            }
-
-            op_offs = obj->getOperandPtr(ib->operands, 2);
-            src1.init(op_offs, obj);
-        }
-
-        bool isVectorRegister(int operandIndex) { return false; }
-        bool isCondRegister(int operandIndex) { return false; }
-        bool isScalarRegister(int operandIndex) { return false; }
-        bool isSrcOperand(int operandIndex) { return false; }
-        bool isDstOperand(int operandIndex) { return false; }
-        int getOperandSize(int operandIndex) { return 0; }
-
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            return -1;
-        }
-
-        void
-        execute(GPUDynInstPtr gpuDynInst)
-        {
-            Wavefront *w = gpuDynInst->wavefront();
-
-            std::string func_name = src0.disassemble();
-            if (isPseudoOp()) {
-                execPseudoInst(w, gpuDynInst);
-            } else {
-                fatal("Native HSAIL functions are not yet implemented: %s\n",
-                      func_name);
-            }
-        }
-        int numSrcRegOperands() { return 0; }
-        int numDstRegOperands() { return 0; }
-        int getNumOperands() { return 2; }
-    };
-
-    template<typename T> T heynot(T arg) { return ~arg; }
-    template<> inline bool heynot<bool>(bool arg) { return !arg; }
-
-
-    /* Explicitly declare template static member variables to avoid
-     * warnings in some clang versions
-     */
-    template<> const char *B1::label;
-    template<> const char *B8::label;
-    template<> const char *B16::label;
-    template<> const char *B32::label;
-    template<> const char *B64::label;
-    template<> const char *S8::label;
-    template<> const char *S16::label;
-    template<> const char *S32::label;
-    template<> const char *S64::label;
-    template<> const char *U8::label;
-    template<> const char *U16::label;
-    template<> const char *U32::label;
-    template<> const char *U64::label;
-    template<> const char *F32::label;
-    template<> const char *F64::label;
-
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_DECL_HH__
diff --git a/src/arch/hsail/insts/gpu_static_inst.cc b/src/arch/hsail/insts/gpu_static_inst.cc
deleted file mode 100644 (file)
index dba2756..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-
-#include "gpu-compute/brig_object.hh"
-
-namespace HsailISA
-{
-    HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj,
-                                           const std::string &opcode)
-        : GPUStaticInst(opcode), hsailCode(obj->currentCode)
-    {
-    }
-
-    void
-    HsailGPUStaticInst::generateDisassembly()
-    {
-        disassembly = opcode;
-    }
-} // namespace HsailISA
diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh
deleted file mode 100644 (file)
index 0bddcac..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
-#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
-
-/*
- * @file gpu_static_inst.hh
- *
- * Defines the base class representing HSAIL GPU static instructions.
- */
-
-#include "arch/hsail/gpu_types.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-
-class BrigObject;
-class HsailCode;
-
-namespace HsailISA
-{
-    class HsailGPUStaticInst : public GPUStaticInst
-    {
-      public:
-        HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
-        void generateDisassembly() override;
-        int instSize() const override { return sizeof(RawMachInst); }
-        bool isValid() const override { return true; }
-
-      protected:
-        HsailCode *hsailCode;
-    };
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc
deleted file mode 100644 (file)
index 783689d..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/insts/decl.hh"
-#include "debug/GPUExec.hh"
-#include "gpu-compute/dispatcher.hh"
-#include "gpu-compute/simple_pool_manager.hh"
-
-namespace HsailISA
-{
-    template<> const char *B1::label = "b1";
-    template<> const char *B8::label = "b8";
-    template<> const char *B16::label = "b16";
-    template<> const char *B32::label = "b32";
-    template<> const char *B64::label = "b64";
-
-    template<> const char *S8::label = "s8";
-    template<> const char *S16::label = "s16";
-    template<> const char *S32::label = "s32";
-    template<> const char *S64::label = "s64";
-
-    template<> const char *U8::label = "u8";
-    template<> const char *U16::label = "u16";
-    template<> const char *U32::label = "u32";
-    template<> const char *U64::label = "u64";
-
-    template<> const char *F32::label = "f32";
-    template<> const char *F64::label = "f64";
-
-    const char*
-    cmpOpToString(Brig::BrigCompareOperation cmpOp)
-    {
-        using namespace Brig;
-
-        switch (cmpOp) {
-          case BRIG_COMPARE_EQ:
-            return "eq";
-          case BRIG_COMPARE_NE:
-            return "ne";
-          case BRIG_COMPARE_LT:
-            return "lt";
-          case BRIG_COMPARE_LE:
-            return "le";
-          case BRIG_COMPARE_GT:
-            return "gt";
-          case BRIG_COMPARE_GE:
-            return "ge";
-          case BRIG_COMPARE_EQU:
-            return "equ";
-          case BRIG_COMPARE_NEU:
-            return "neu";
-          case BRIG_COMPARE_LTU:
-            return "ltu";
-          case BRIG_COMPARE_LEU:
-            return "leu";
-          case BRIG_COMPARE_GTU:
-            return "gtu";
-          case BRIG_COMPARE_GEU:
-            return "geu";
-          case BRIG_COMPARE_NUM:
-            return "num";
-          case BRIG_COMPARE_NAN:
-            return "nan";
-          case BRIG_COMPARE_SEQ:
-            return "seq";
-          case BRIG_COMPARE_SNE:
-            return "sne";
-          case BRIG_COMPARE_SLT:
-            return "slt";
-          case BRIG_COMPARE_SLE:
-            return "sle";
-          case BRIG_COMPARE_SGT:
-            return "sgt";
-          case BRIG_COMPARE_SGE:
-            return "sge";
-          case BRIG_COMPARE_SGEU:
-            return "sgeu";
-          case BRIG_COMPARE_SEQU:
-            return "sequ";
-          case BRIG_COMPARE_SNEU:
-            return "sneu";
-          case BRIG_COMPARE_SLTU:
-            return "sltu";
-          case BRIG_COMPARE_SLEU:
-            return "sleu";
-          case BRIG_COMPARE_SNUM:
-            return "snum";
-          case BRIG_COMPARE_SNAN:
-            return "snan";
-          case BRIG_COMPARE_SGTU:
-            return "sgtu";
-          default:
-            return "unknown";
-        }
-    }
-
-    void
-    Ret::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        const VectorMask &mask = w->getPred();
-
-        // mask off completed work-items
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                w->initMask[lane] = 0;
-            }
-
-        }
-
-        // delete extra instructions fetched for completed work-items
-        w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
-                                   w->instructionBuffer.end());
-        if (w->pendingFetch) {
-            w->dropFetch = true;
-        }
-
-        // if all work-items have completed, then wave-front is done
-        if (w->initMask.none()) {
-            w->status = Wavefront::S_STOPPED;
-
-            int32_t refCount = w->computeUnit->getLds().
-                                   decreaseRefCounter(w->dispatchId, w->wgId);
-
-            DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
-                            w->computeUnit->cu_id, w->wgId, refCount);
-
-            // free the vector registers of the completed wavefront
-            w->computeUnit->vectorRegsReserved[w->simdId] -=
-                w->reservedVectorRegs;
-
-            assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
-
-            uint32_t endIndex = (w->startVgprIndex +
-                                 w->reservedVectorRegs - 1) %
-                w->computeUnit->vrf[w->simdId]->numRegs();
-
-            w->computeUnit->vrf[w->simdId]->manager->
-                freeRegion(w->startVgprIndex, endIndex);
-
-            w->reservedVectorRegs = 0;
-            w->startVgprIndex = 0;
-            w->computeUnit->completedWfs++;
-
-            DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
-                    w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
-
-            if (!refCount) {
-                setFlag(SystemScope);
-                setFlag(Release);
-                setFlag(GlobalSegment);
-                // Notify Memory System of Kernel Completion
-                // Kernel End = isKernel + isRelease
-                w->status = Wavefront::S_RETURNING;
-                GPUDynInstPtr local_mempacket = gpuDynInst;
-                local_mempacket->useContinuation = false;
-                local_mempacket->simdId = w->simdId;
-                local_mempacket->wfSlotId = w->wfSlotId;
-                local_mempacket->wfDynId = w->wfDynId;
-                w->computeUnit->injectGlobalMemFence(local_mempacket, true);
-            } else {
-                w->computeUnit->shader->dispatcher->scheduleDispatch();
-            }
-        }
-    }
-
-    void
-    Barrier::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        assert(w->barrierCnt == w->oldBarrierCnt);
-        w->barrierCnt = w->oldBarrierCnt + 1;
-        w->stalledAtBarrier = true;
-    }
-} // namespace HsailISA
diff --git a/src/arch/hsail/insts/mem.cc b/src/arch/hsail/insts/mem.cc
deleted file mode 100644 (file)
index 6a69288..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/insts/mem.hh"
-
-#include "arch/hsail/Brig.h"
-
-using namespace Brig;
-
-namespace HsailISA
-{
-    const char* atomicOpToString(BrigAtomicOperation brigOp);
-
-    const char*
-    atomicOpToString(BrigAtomicOperation brigOp)
-    {
-        switch (brigOp) {
-          case BRIG_ATOMIC_AND:
-            return "and";
-          case BRIG_ATOMIC_OR:
-            return "or";
-          case BRIG_ATOMIC_XOR:
-            return "xor";
-          case BRIG_ATOMIC_CAS:
-            return "cas";
-          case BRIG_ATOMIC_EXCH:
-            return "exch";
-          case BRIG_ATOMIC_ADD:
-            return "add";
-          case BRIG_ATOMIC_WRAPINC:
-            return "inc";
-          case BRIG_ATOMIC_WRAPDEC:
-            return "dec";
-          case BRIG_ATOMIC_MIN:
-            return "min";
-          case BRIG_ATOMIC_MAX:
-            return "max";
-          case BRIG_ATOMIC_SUB:
-            return "sub";
-          default:
-            return "unknown";
-        }
-    }
-} // namespace HsailISA
diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh
deleted file mode 100644 (file)
index 0c8f6ca..0000000
+++ /dev/null
@@ -1,1777 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
-#define __ARCH_HSAIL_INSTS_MEM_HH__
-
-#include <type_traits>
-
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "gpu-compute/compute_unit.hh"
-
-namespace HsailISA
-{
-    class MemInst
-    {
-      public:
-        MemInst() : size(0), addr_operand(nullptr) { }
-
-        MemInst(Enums::MemType m_type)
-        {
-            if (m_type == Enums::M_U64 ||
-                m_type == Enums::M_S64 ||
-                m_type == Enums::M_F64) {
-                size = 8;
-            } else if (m_type == Enums::M_U32 ||
-                       m_type == Enums::M_S32 ||
-                       m_type == Enums::M_F32) {
-                size = 4;
-            } else if (m_type == Enums::M_U16 ||
-                       m_type == Enums::M_S16 ||
-                       m_type == Enums::M_F16) {
-                size = 2;
-            } else {
-                size = 1;
-            }
-
-            addr_operand = nullptr;
-        }
-
-        void
-        init_addr(AddrOperandBase *_addr_operand)
-        {
-            addr_operand = _addr_operand;
-        }
-
-      private:
-        int size;
-        AddrOperandBase *addr_operand;
-
-      public:
-        int getMemOperandSize() { return size; }
-        AddrOperandBase *getAddressOperand() { return addr_operand; }
-    };
-
-    template<typename DestOperandType, typename AddrOperandType>
-    class LdaInstBase : public HsailGPUStaticInst
-    {
-      public:
-        typename DestOperandType::DestOperand dest;
-        AddrOperandType addr;
-
-        LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                    const char *_opcode)
-           : HsailGPUStaticInst(obj, _opcode)
-        {
-            using namespace Brig;
-
-            setFlag(ALU);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-            dest.init(op_offs, obj);
-            op_offs = obj->getOperandPtr(ib->operands, 1);
-            addr.init(op_offs, obj);
-        }
-
-        int numSrcRegOperands() override
-        { return(this->addr.isVectorRegister()); }
-        int numDstRegOperands() override
-        { return dest.isVectorRegister(); }
-        bool isVectorRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isVectorRegister() :
-                   this->addr.isVectorRegister());
-        }
-        bool isCondRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isCondRegister() :
-                   this->addr.isCondRegister());
-        }
-        bool isScalarRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isScalarRegister() :
-                   this->addr.isScalarRegister());
-        }
-        bool isSrcOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex > 0)
-                return(this->addr.isVectorRegister());
-            return false;
-        }
-        bool isDstOperand(int operandIndex) override {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return(operandIndex == 0);
-        }
-        int getOperandSize(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.opSize() :
-                   this->addr.opSize());
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.regIndex() :
-                   this->addr.regIndex());
-        }
-        int getNumOperands() override
-        {
-            if (this->addr.isVectorRegister())
-                return 2;
-            return 1;
-        }
-    };
-
-    template<typename DestDataType, typename AddrOperandType>
-    class LdaInst :
-        public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
-        public MemInst
-    {
-      public:
-        void generateDisassembly();
-
-        LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                        const char *_opcode)
-            : LdaInstBase<typename DestDataType::OperandType,
-                          AddrOperandType>(ib, obj, _opcode)
-        {
-            init_addr(&this->addr);
-        }
-
-        void execute(GPUDynInstPtr gpuDynInst);
-    };
-
-    template<typename DataType>
-    GPUStaticInst*
-    decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
-        BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
-
-        if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
-            return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
-        } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            // V2/V4 not allowed
-            switch (regDataType.regKind) {
-              case Brig::BRIG_REGISTER_KIND_SINGLE:
-                return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
-              case Brig::BRIG_REGISTER_KIND_DOUBLE:
-                return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
-              default:
-                fatal("Bad ldas register operand type %d\n", regDataType.type);
-            }
-        } else {
-            fatal("Bad ldas register operand kind %d\n", regDataType.kind);
-        }
-    }
-
-    template<typename MemOperandType, typename DestOperandType,
-             typename AddrOperandType>
-    class LdInstBase : public HsailGPUStaticInst
-    {
-      public:
-        Brig::BrigWidth8_t width;
-        typename DestOperandType::DestOperand dest;
-        AddrOperandType addr;
-
-        Brig::BrigSegment segment;
-        Brig::BrigMemoryOrder memoryOrder;
-        Brig::BrigMemoryScope memoryScope;
-        unsigned int equivClass;
-
-        LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                   const char *_opcode)
-           : HsailGPUStaticInst(obj, _opcode)
-        {
-            using namespace Brig;
-
-            setFlag(MemoryRef);
-            setFlag(Load);
-
-            if (ib->opcode == BRIG_OPCODE_LD) {
-                const BrigInstMem *ldst = (const BrigInstMem*)ib;
-
-                segment = (BrigSegment)ldst->segment;
-                memoryOrder = BRIG_MEMORY_ORDER_NONE;
-                memoryScope = BRIG_MEMORY_SCOPE_NONE;
-                equivClass = ldst->equivClass;
-
-                width = ldst->width;
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
-                    dest.init(op_offs, obj);
-
-                op_offs = obj->getOperandPtr(ib->operands, 1);
-                addr.init(op_offs, obj);
-            } else {
-                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
-                segment = (BrigSegment)at->segment;
-                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
-                memoryScope = (BrigMemoryScope)at->memoryScope;
-                equivClass = 0;
-
-                width = BRIG_WIDTH_1;
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-
-                if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
-                    dest.init(op_offs, obj);
-
-                op_offs = obj->getOperandPtr(ib->operands,1);
-                addr.init(op_offs, obj);
-            }
-
-            switch (memoryOrder) {
-              case BRIG_MEMORY_ORDER_NONE:
-                setFlag(NoOrder);
-                break;
-              case BRIG_MEMORY_ORDER_RELAXED:
-                setFlag(RelaxedOrder);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
-                setFlag(Acquire);
-                break;
-              case BRIG_MEMORY_ORDER_SC_RELEASE:
-                setFlag(Release);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
-                setFlag(AcquireRelease);
-                break;
-              default:
-                fatal("LdInst has bad memory order type\n");
-            }
-
-            switch (memoryScope) {
-              case BRIG_MEMORY_SCOPE_NONE:
-                setFlag(NoScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKITEM:
-                setFlag(WorkitemScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKGROUP:
-                setFlag(WorkgroupScope);
-                break;
-              case BRIG_MEMORY_SCOPE_AGENT:
-                setFlag(DeviceScope);
-                break;
-              case BRIG_MEMORY_SCOPE_SYSTEM:
-                setFlag(SystemScope);
-                break;
-              default:
-                fatal("LdInst has bad memory scope type\n");
-            }
-
-            switch (segment) {
-              case BRIG_SEGMENT_GLOBAL:
-                setFlag(GlobalSegment);
-                break;
-              case BRIG_SEGMENT_GROUP:
-                setFlag(GroupSegment);
-                break;
-              case BRIG_SEGMENT_PRIVATE:
-                setFlag(PrivateSegment);
-                break;
-              case BRIG_SEGMENT_READONLY:
-                setFlag(ReadOnlySegment);
-                break;
-              case BRIG_SEGMENT_SPILL:
-                setFlag(SpillSegment);
-                break;
-              case BRIG_SEGMENT_FLAT:
-                setFlag(Flat);
-                break;
-              case BRIG_SEGMENT_KERNARG:
-                setFlag(KernArgSegment);
-                break;
-              case BRIG_SEGMENT_ARG:
-                setFlag(ArgSegment);
-                break;
-              default:
-                panic("Ld: segment %d not supported\n", segment);
-            }
-        }
-
-        int numSrcRegOperands() override
-        { return(this->addr.isVectorRegister()); }
-        int numDstRegOperands() override { return dest.isVectorRegister(); }
-        int getNumOperands() override
-        {
-            if (this->addr.isVectorRegister())
-                return 2;
-            else
-                return 1;
-        }
-        bool isVectorRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isVectorRegister() :
-                   this->addr.isVectorRegister());
-        }
-        bool isCondRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isCondRegister() :
-                   this->addr.isCondRegister());
-        }
-        bool isScalarRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.isScalarRegister() :
-                   this->addr.isScalarRegister());
-        }
-        bool isSrcOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex > 0)
-                return(this->addr.isVectorRegister());
-            return false;
-        }
-        bool isDstOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return(operandIndex == 0);
-        }
-        int getOperandSize(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.opSize() :
-                   this->addr.opSize());
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return((operandIndex == 0) ? dest.regIndex() :
-                   this->addr.regIndex());
-        }
-    };
-
-    template<typename MemDataType, typename DestDataType,
-             typename AddrOperandType>
-    class LdInst :
-        public LdInstBase<typename MemDataType::CType,
-                          typename DestDataType::OperandType, AddrOperandType>,
-        public MemInst
-    {
-        typename DestDataType::OperandType::DestOperand dest_vect[4];
-        uint16_t num_dest_operands;
-        void generateDisassembly() override;
-
-      public:
-        LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-               const char *_opcode)
-            : LdInstBase<typename MemDataType::CType,
-                         typename DestDataType::OperandType,
-                         AddrOperandType>(ib, obj, _opcode),
-              MemInst(MemDataType::memType)
-        {
-            init_addr(&this->addr);
-
-            unsigned op_offs = obj->getOperandPtr(ib->operands,0);
-            const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-
-            if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
-                const Brig::BrigOperandOperandList *brigRegVecOp =
-                    (const Brig::BrigOperandOperandList*)brigOp;
-
-                num_dest_operands =
-                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
-
-                assert(num_dest_operands <= 4);
-            } else {
-                num_dest_operands = 1;
-            }
-
-            if (num_dest_operands > 1) {
-                assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
-
-                for (int i = 0; i < num_dest_operands; ++i) {
-                    dest_vect[i].init_from_vect(op_offs, obj, i);
-                }
-            }
-        }
-
-        void
-        initiateAcc(GPUDynInstPtr gpuDynInst) override
-        {
-            typedef typename MemDataType::CType c0;
-
-            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
-            if (num_dest_operands > 1) {
-                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
-                    if (gpuDynInst->exec_mask[i])
-                        gpuDynInst->statusVector.push_back(num_dest_operands);
-                    else
-                        gpuDynInst->statusVector.push_back(0);
-            }
-
-            for (int k = 0; k < num_dest_operands; ++k) {
-
-                c0 *d = &((c0*)gpuDynInst->d_data)
-                    [k * gpuDynInst->computeUnit()->wfSize()];
-
-                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
-                    if (gpuDynInst->exec_mask[i]) {
-                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-
-                        if (this->isLocalMem()) {
-                            // load from shared memory
-                            *d = gpuDynInst->wavefront()->ldsChunk->
-                                read<c0>(vaddr);
-                        } else {
-                            RequestPtr req = std::make_shared<Request>(
-                                vaddr, sizeof(c0), 0,
-                                gpuDynInst->computeUnit()->masterId(),
-                                0, gpuDynInst->wfDynId);
-
-                            gpuDynInst->setRequestFlags(req);
-                            PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
-                            pkt->dataStatic(d);
-
-                            if (gpuDynInst->computeUnit()->shader->
-                                separate_acquire_release &&
-                                gpuDynInst->isAcquire()) {
-                                // if this load has acquire semantics,
-                                // set the response continuation function
-                                // to perform an Acquire request
-                                gpuDynInst->execContinuation =
-                                    &GPUStaticInst::execLdAcq;
-
-                                gpuDynInst->useContinuation = true;
-                            } else {
-                                // the request will be finished when
-                                // the load completes
-                                gpuDynInst->useContinuation = false;
-                            }
-                            // translation is performed in sendRequest()
-                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
-                                                                   i, pkt);
-                        }
-                    }
-                    ++d;
-                }
-            }
-
-            gpuDynInst->updateStats();
-        }
-
-        void
-        completeAcc(GPUDynInstPtr gpuDynInst) override
-        {
-            typedef typename MemDataType::CType c1;
-
-            constexpr bool is_vt_32 = DestDataType::vgprType == VT_32;
-
-            /**
-              * this code essentially replaces the long if-else chain
-              * that was in used GlobalMemPipeline::exec() to infer the
-              * size (single/double) and type (floating point/integer) of
-              * the destination register. this is needed for load
-              * instructions because the loaded value and the
-              * destination type can be of different sizes, and we also
-              * need to know if the value we're writing back is floating
-              * point and signed/unsigned, so we can properly cast the
-              * writeback value
-              */
-            typedef typename std::conditional<is_vt_32,
-                typename std::conditional<std::is_floating_point<c1>::value,
-                    float, typename std::conditional<std::is_signed<c1>::value,
-                    int32_t, uint32_t>::type>::type,
-                typename std::conditional<std::is_floating_point<c1>::value,
-                    double, typename std::conditional<std::is_signed<c1>::value,
-                    int64_t, uint64_t>::type>::type>::type c0;
-
-
-            Wavefront *w = gpuDynInst->wavefront();
-
-            std::vector<uint32_t> regVec;
-            // iterate over number of destination register operands since
-            // this is a load
-            for (int k = 0; k < num_dest_operands; ++k) {
-                assert((sizeof(c1) * num_dest_operands)
-                       <= MAX_WIDTH_FOR_MEM_INST);
-
-                int dst = this->dest.regIndex() + k;
-                if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST)
-                    dst = dest_vect[k].regIndex();
-                // virtual->physical VGPR mapping
-                int physVgpr = w->remap(dst, sizeof(c0), 1);
-                // save the physical VGPR index
-                regVec.push_back(physVgpr);
-
-                c1 *p1 =
-                    &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()];
-
-                for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
-                    if (gpuDynInst->exec_mask[i]) {
-                        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
-                                "$%s%d <- %d global ld done (src = wavefront "
-                                "ld inst)\n", w->computeUnit->cu_id, w->simdId,
-                                w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
-                                dst, *p1);
-                        // write the value into the physical VGPR. This is a
-                        // purely functional operation. No timing is modeled.
-                        w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
-                                                                    *p1, i);
-                    }
-                    ++p1;
-                }
-            }
-
-            // Schedule the write operation of the load data on the VRF.
-            // This simply models the timing aspect of the VRF write operation.
-            // It does not modify the physical VGPR.
-            int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
-                vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
-                                     sizeof(c0), gpuDynInst->time);
-
-            if (this->isGlobalMem()) {
-                gpuDynInst->computeUnit()->globalMemoryPipe
-                    .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
-            } else {
-                assert(this->isLocalMem());
-                gpuDynInst->computeUnit()->localMemoryPipe
-                    .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
-            }
-        }
-
-      private:
-        void
-        execLdAcq(GPUDynInstPtr gpuDynInst) override
-        {
-            // after the load has complete and if the load has acquire
-            // semantics, issue an acquire request.
-            if (!this->isLocalMem()) {
-                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && gpuDynInst->isAcquire()) {
-                    gpuDynInst->statusBitVector = VectorMask(1);
-                    gpuDynInst->useContinuation = false;
-                    // create request
-                    RequestPtr req = std::make_shared<Request>(0, 0, 0,
-                                  gpuDynInst->computeUnit()->masterId(),
-                                  0, gpuDynInst->wfDynId);
-                    req->setFlags(Request::ACQUIRE);
-                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-                }
-            }
-        }
-
-      public:
-        bool isVectorRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.isVectorRegister());
-            if (num_dest_operands > 1) {
-                return dest_vect[operandIndex].isVectorRegister();
-            }
-            else if (num_dest_operands == 1) {
-                return LdInstBase<typename MemDataType::CType,
-                       typename DestDataType::OperandType,
-                       AddrOperandType>::dest.isVectorRegister();
-            }
-            return false;
-        }
-        bool isCondRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.isCondRegister());
-            if (num_dest_operands > 1)
-                return dest_vect[operandIndex].isCondRegister();
-            else if (num_dest_operands == 1)
-                return LdInstBase<typename MemDataType::CType,
-                       typename DestDataType::OperandType,
-                       AddrOperandType>::dest.isCondRegister();
-            return false;
-        }
-        bool isScalarRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.isScalarRegister());
-            if (num_dest_operands > 1)
-                return dest_vect[operandIndex].isScalarRegister();
-            else if (num_dest_operands == 1)
-                return LdInstBase<typename MemDataType::CType,
-                       typename DestDataType::OperandType,
-                       AddrOperandType>::dest.isScalarRegister();
-            return false;
-        }
-        bool isSrcOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.isVectorRegister());
-            return false;
-        }
-        bool isDstOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return false;
-            return true;
-        }
-        int getOperandSize(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.opSize());
-            if (num_dest_operands > 1)
-                return(dest_vect[operandIndex].opSize());
-            else if (num_dest_operands == 1)
-                return(LdInstBase<typename MemDataType::CType,
-                       typename DestDataType::OperandType,
-                       AddrOperandType>::dest.opSize());
-            return 0;
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if ((num_dest_operands != getNumOperands()) &&
-                (operandIndex == (getNumOperands()-1)))
-                return(this->addr.regIndex());
-            if (num_dest_operands > 1)
-                return(dest_vect[operandIndex].regIndex());
-            else if (num_dest_operands == 1)
-                return(LdInstBase<typename MemDataType::CType,
-                       typename DestDataType::OperandType,
-                       AddrOperandType>::dest.regIndex());
-            return -1;
-        }
-        int getNumOperands() override
-        {
-            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
-                return(num_dest_operands+1);
-            else
-                return(num_dest_operands);
-        }
-        void execute(GPUDynInstPtr gpuDynInst) override;
-    };
-
-    template<typename MemDT, typename DestDT>
-    GPUStaticInst*
-    decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        unsigned op_offs = obj->getOperandPtr(ib->operands,1);
-        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
-        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
-            return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
-        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
-                   tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
-            switch (tmp.regKind) {
-              case Brig::BRIG_REGISTER_KIND_SINGLE:
-                return new LdInst<MemDT, DestDT,
-                                  SRegAddrOperand>(ib, obj, "ld");
-              case Brig::BRIG_REGISTER_KIND_DOUBLE:
-                return new LdInst<MemDT, DestDT,
-                                  DRegAddrOperand>(ib, obj, "ld");
-              default:
-                fatal("Bad ld register operand type %d\n", tmp.regKind);
-            }
-        } else {
-            fatal("Bad ld register operand kind %d\n", tmp.kind);
-        }
-    }
-
-    template<typename MemDT>
-    GPUStaticInst*
-    decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        unsigned op_offs = obj->getOperandPtr(ib->operands,0);
-        BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
-
-        assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
-               dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
-        switch(dest.regKind) {
-          case Brig::BRIG_REGISTER_KIND_SINGLE:
-            switch (ib->type) {
-              case Brig::BRIG_TYPE_B8:
-              case Brig::BRIG_TYPE_B16:
-              case Brig::BRIG_TYPE_B32:
-                return decodeLd2<MemDT, B32>(ib, obj);
-              case Brig::BRIG_TYPE_U8:
-              case Brig::BRIG_TYPE_U16:
-              case Brig::BRIG_TYPE_U32:
-                return decodeLd2<MemDT, U32>(ib, obj);
-              case Brig::BRIG_TYPE_S8:
-              case Brig::BRIG_TYPE_S16:
-              case Brig::BRIG_TYPE_S32:
-                return decodeLd2<MemDT, S32>(ib, obj);
-              case Brig::BRIG_TYPE_F16:
-              case Brig::BRIG_TYPE_F32:
-                return decodeLd2<MemDT, U32>(ib, obj);
-              default:
-                fatal("Bad ld register operand type %d, %d\n",
-                      dest.regKind, ib->type);
-            };
-          case Brig::BRIG_REGISTER_KIND_DOUBLE:
-            switch (ib->type) {
-              case Brig::BRIG_TYPE_B64:
-                return decodeLd2<MemDT, B64>(ib, obj);
-              case Brig::BRIG_TYPE_U64:
-                return decodeLd2<MemDT, U64>(ib, obj);
-              case Brig::BRIG_TYPE_S64:
-                return decodeLd2<MemDT, S64>(ib, obj);
-              case Brig::BRIG_TYPE_F64:
-                return decodeLd2<MemDT, U64>(ib, obj);
-              default:
-                fatal("Bad ld register operand type %d, %d\n",
-                      dest.regKind, ib->type);
-            };
-          default:
-            fatal("Bad ld register operand type %d, %d\n", dest.regKind,
-                  ib->type);
-        }
-    }
-
-    template<typename MemDataType, typename SrcOperandType,
-             typename AddrOperandType>
-    class StInstBase : public HsailGPUStaticInst
-    {
-      public:
-        typename SrcOperandType::SrcOperand src;
-        AddrOperandType addr;
-
-        Brig::BrigSegment segment;
-        Brig::BrigMemoryScope memoryScope;
-        Brig::BrigMemoryOrder memoryOrder;
-        unsigned int equivClass;
-
-        StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                   const char *_opcode)
-           : HsailGPUStaticInst(obj, _opcode)
-        {
-            using namespace Brig;
-
-            setFlag(MemoryRef);
-            setFlag(Store);
-
-            if (ib->opcode == BRIG_OPCODE_ST) {
-                const BrigInstMem *ldst = (const BrigInstMem*)ib;
-
-                segment = (BrigSegment)ldst->segment;
-                memoryOrder = BRIG_MEMORY_ORDER_NONE;
-                memoryScope = BRIG_MEMORY_SCOPE_NONE;
-                equivClass = ldst->equivClass;
-
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                const BrigOperand *baseOp = obj->getOperand(op_offs);
-
-                if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
-                    (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
-                    src.init(op_offs, obj);
-                }
-
-                op_offs = obj->getOperandPtr(ib->operands, 1);
-                addr.init(op_offs, obj);
-            } else {
-                const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
-                segment = (BrigSegment)at->segment;
-                memoryScope = (BrigMemoryScope)at->memoryScope;
-                memoryOrder = (BrigMemoryOrder)at->memoryOrder;
-                equivClass = 0;
-
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                addr.init(op_offs, obj);
-
-                op_offs = obj->getOperandPtr(ib->operands, 1);
-                src.init(op_offs, obj);
-            }
-
-            switch (memoryOrder) {
-              case BRIG_MEMORY_ORDER_NONE:
-                setFlag(NoOrder);
-                break;
-              case BRIG_MEMORY_ORDER_RELAXED:
-                setFlag(RelaxedOrder);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
-                setFlag(Acquire);
-                break;
-              case BRIG_MEMORY_ORDER_SC_RELEASE:
-                setFlag(Release);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
-                setFlag(AcquireRelease);
-                break;
-              default:
-                fatal("StInst has bad memory order type\n");
-            }
-
-            switch (memoryScope) {
-              case BRIG_MEMORY_SCOPE_NONE:
-                setFlag(NoScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKITEM:
-                setFlag(WorkitemScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKGROUP:
-                setFlag(WorkgroupScope);
-                break;
-              case BRIG_MEMORY_SCOPE_AGENT:
-                setFlag(DeviceScope);
-                break;
-              case BRIG_MEMORY_SCOPE_SYSTEM:
-                setFlag(SystemScope);
-                break;
-              default:
-                fatal("StInst has bad memory scope type\n");
-            }
-
-            switch (segment) {
-              case BRIG_SEGMENT_GLOBAL:
-                setFlag(GlobalSegment);
-                break;
-              case BRIG_SEGMENT_GROUP:
-                setFlag(GroupSegment);
-                break;
-              case BRIG_SEGMENT_PRIVATE:
-                setFlag(PrivateSegment);
-                break;
-              case BRIG_SEGMENT_READONLY:
-                setFlag(ReadOnlySegment);
-                break;
-              case BRIG_SEGMENT_SPILL:
-                setFlag(SpillSegment);
-                break;
-              case BRIG_SEGMENT_FLAT:
-                setFlag(Flat);
-                break;
-              case BRIG_SEGMENT_ARG:
-                setFlag(ArgSegment);
-                break;
-              default:
-                panic("St: segment %d not supported\n", segment);
-            }
-        }
-
-        int numDstRegOperands() override { return 0; }
-        int numSrcRegOperands() override
-        {
-            return src.isVectorRegister() + this->addr.isVectorRegister();
-        }
-        int getNumOperands() override
-        {
-            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
-                return 2;
-            else
-                return 1;
-        }
-        bool isVectorRegister(int operandIndex) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return !operandIndex ? src.isVectorRegister() :
-                   this->addr.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return !operandIndex ? src.isCondRegister() :
-                   this->addr.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return !operandIndex ? src.isScalarRegister() :
-                   this->addr.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return true;
-        }
-        bool isDstOperand(int operandIndex) override { return false; }
-        int getOperandSize(int operandIndex) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return !operandIndex ? src.opSize() : this->addr.opSize();
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert(operandIndex >= 0 && operandIndex < getNumOperands());
-            return !operandIndex ? src.regIndex() : this->addr.regIndex();
-        }
-    };
-
-
-    template<typename MemDataType, typename SrcDataType,
-             typename AddrOperandType>
-    class StInst :
-        public StInstBase<MemDataType, typename SrcDataType::OperandType,
-                          AddrOperandType>,
-        public MemInst
-    {
-      public:
-        typename SrcDataType::OperandType::SrcOperand src_vect[4];
-        uint16_t num_src_operands;
-        void generateDisassembly() override;
-
-        StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                        const char *_opcode, int srcIdx)
-            : StInstBase<MemDataType, typename SrcDataType::OperandType,
-                         AddrOperandType>(ib, obj, _opcode),
-              MemInst(SrcDataType::memType)
-        {
-            init_addr(&this->addr);
-
-            BrigRegOperandInfo rinfo;
-            unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
-            const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
-
-            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
-                const Brig::BrigOperandConstantBytes *op =
-                    (Brig::BrigOperandConstantBytes*)baseOp;
-
-                rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
-                                           Brig::BRIG_TYPE_NONE);
-            } else {
-                rinfo = findRegDataType(op_offs, obj);
-            }
-
-            if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
-                const Brig::BrigOperandOperandList *brigRegVecOp =
-                    (const Brig::BrigOperandOperandList*)baseOp;
-
-                num_src_operands =
-                    *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
-
-                assert(num_src_operands <= 4);
-            } else {
-                num_src_operands = 1;
-            }
-
-            if (num_src_operands > 1) {
-                assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
-
-                for (int i = 0; i < num_src_operands; ++i) {
-                    src_vect[i].init_from_vect(op_offs, obj, i);
-                }
-            }
-        }
-
-        void
-        initiateAcc(GPUDynInstPtr gpuDynInst) override
-        {
-            // before performing a store, check if this store has
-            // release semantics, and if so issue a release first
-            if (!this->isLocalMem()) {
-                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && gpuDynInst->isRelease()) {
-
-                    gpuDynInst->statusBitVector = VectorMask(1);
-                    gpuDynInst->execContinuation = &GPUStaticInst::execSt;
-                    gpuDynInst->useContinuation = true;
-                    // create request
-                    RequestPtr req = std::make_shared<Request>(0, 0, 0,
-                                  gpuDynInst->computeUnit()->masterId(),
-                                  0, gpuDynInst->wfDynId);
-                    req->setFlags(Request::RELEASE);
-                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-
-                    return;
-                }
-            }
-
-            // if there is no release semantic, perform stores immediately
-            execSt(gpuDynInst);
-        }
-
-        // stores don't write anything back, so there is nothing
-        // to do here. we only override this method to avoid the
-        // fatal in the base class implementation
-        void completeAcc(GPUDynInstPtr gpuDynInst) override { }
-
-      private:
-        // execSt may be called through a continuation
-        // if the store had release semantics. see comment for
-        // execSt in gpu_static_inst.hh
-        void
-        execSt(GPUDynInstPtr gpuDynInst) override
-        {
-            typedef typename MemDataType::CType c0;
-
-            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
-            if (num_src_operands > 1) {
-                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
-                    if (gpuDynInst->exec_mask[i])
-                        gpuDynInst->statusVector.push_back(num_src_operands);
-                    else
-                        gpuDynInst->statusVector.push_back(0);
-            }
-
-            for (int k = 0; k < num_src_operands; ++k) {
-                c0 *d = &((c0*)gpuDynInst->d_data)
-                    [k * gpuDynInst->computeUnit()->wfSize()];
-
-                for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
-                    if (gpuDynInst->exec_mask[i]) {
-                        Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-
-                        if (this->isLocalMem()) {
-                            //store to shared memory
-                            gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
-                                                                         *d);
-                        } else {
-                            RequestPtr req = std::make_shared<Request>(
-                                vaddr, sizeof(c0), 0,
-                                gpuDynInst->computeUnit()->masterId(),
-                                0, gpuDynInst->wfDynId);
-
-                            gpuDynInst->setRequestFlags(req);
-                            PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
-                            pkt->dataStatic<c0>(d);
-
-                            // translation is performed in sendRequest()
-                            // the request will be finished when the store completes
-                            gpuDynInst->useContinuation = false;
-                            gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
-                                                                   i, pkt);
-
-                        }
-                    }
-                    ++d;
-                }
-            }
-
-            gpuDynInst->updateStats();
-        }
-
-      public:
-        bool isVectorRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == num_src_operands)
-                return this->addr.isVectorRegister();
-            if (num_src_operands > 1)
-                return src_vect[operandIndex].isVectorRegister();
-            else if (num_src_operands == 1)
-                return StInstBase<MemDataType,
-                       typename SrcDataType::OperandType,
-                       AddrOperandType>::src.isVectorRegister();
-            return false;
-        }
-        bool isCondRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == num_src_operands)
-                return this->addr.isCondRegister();
-            if (num_src_operands > 1)
-                return src_vect[operandIndex].isCondRegister();
-            else if (num_src_operands == 1)
-                return StInstBase<MemDataType,
-                       typename SrcDataType::OperandType,
-                       AddrOperandType>::src.isCondRegister();
-            return false;
-        }
-        bool isScalarRegister(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == num_src_operands)
-                return this->addr.isScalarRegister();
-            if (num_src_operands > 1)
-                return src_vect[operandIndex].isScalarRegister();
-            else if (num_src_operands == 1)
-                return StInstBase<MemDataType,
-                       typename SrcDataType::OperandType,
-                       AddrOperandType>::src.isScalarRegister();
-            return false;
-        }
-        bool isSrcOperand(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            return true;
-        }
-        bool isDstOperand(int operandIndex) override { return false; }
-        int getOperandSize(int operandIndex) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == num_src_operands)
-                return this->addr.opSize();
-            if (num_src_operands > 1)
-                return src_vect[operandIndex].opSize();
-            else if (num_src_operands == 1)
-                return StInstBase<MemDataType,
-                       typename SrcDataType::OperandType,
-                       AddrOperandType>::src.opSize();
-            return 0;
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex == num_src_operands)
-                return this->addr.regIndex();
-            if (num_src_operands > 1)
-                return src_vect[operandIndex].regIndex();
-            else if (num_src_operands == 1)
-                return StInstBase<MemDataType,
-                       typename SrcDataType::OperandType,
-                       AddrOperandType>::src.regIndex();
-            return -1;
-        }
-        int getNumOperands() override
-        {
-            if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
-                return num_src_operands + 1;
-            else
-                return num_src_operands;
-        }
-        void execute(GPUDynInstPtr gpuDynInst) override;
-    };
-
-    template<typename DataType, typename SrcDataType>
-    GPUStaticInst*
-    decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        int srcIdx = 0;
-        int destIdx = 1;
-        if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
-            ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
-            srcIdx = 1;
-            destIdx = 0;
-        }
-        unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
-
-        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
-        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
-            return new StInst<DataType, SrcDataType,
-                              NoRegAddrOperand>(ib, obj, "st", srcIdx);
-        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            // V2/V4 not allowed
-            switch (tmp.regKind) {
-              case Brig::BRIG_REGISTER_KIND_SINGLE:
-                return new StInst<DataType, SrcDataType,
-                                  SRegAddrOperand>(ib, obj, "st", srcIdx);
-              case Brig::BRIG_REGISTER_KIND_DOUBLE:
-                return new StInst<DataType, SrcDataType,
-                                  DRegAddrOperand>(ib, obj, "st", srcIdx);
-              default:
-                fatal("Bad st register operand type %d\n", tmp.type);
-            }
-        } else {
-            fatal("Bad st register operand kind %d\n", tmp.kind);
-        }
-    }
-
-    template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
-             bool HasDst>
-    class AtomicInstBase : public HsailGPUStaticInst
-    {
-      public:
-        typename OperandType::DestOperand dest;
-        typename OperandType::SrcOperand src[NumSrcOperands];
-        AddrOperandType addr;
-
-        Brig::BrigSegment segment;
-        Brig::BrigMemoryOrder memoryOrder;
-        Brig::BrigAtomicOperation atomicOperation;
-        Brig::BrigMemoryScope memoryScope;
-        Brig::BrigOpcode opcode;
-
-        AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                       const char *_opcode)
-           : HsailGPUStaticInst(obj, _opcode)
-        {
-            using namespace Brig;
-
-            const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
-            segment = (BrigSegment)at->segment;
-            memoryScope = (BrigMemoryScope)at->memoryScope;
-            memoryOrder = (BrigMemoryOrder)at->memoryOrder;
-            atomicOperation = (BrigAtomicOperation)at->atomicOperation;
-            opcode = (BrigOpcode)ib->opcode;
-
-            assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
-                   opcode == Brig::BRIG_OPCODE_ATOMIC);
-
-            setFlag(MemoryRef);
-
-            if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
-                setFlag(AtomicReturn);
-            } else {
-                setFlag(AtomicNoReturn);
-            }
-
-            switch (memoryOrder) {
-              case BRIG_MEMORY_ORDER_NONE:
-                setFlag(NoOrder);
-                break;
-              case BRIG_MEMORY_ORDER_RELAXED:
-                setFlag(RelaxedOrder);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE:
-                setFlag(Acquire);
-                break;
-              case BRIG_MEMORY_ORDER_SC_RELEASE:
-                setFlag(Release);
-                break;
-              case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
-                setFlag(AcquireRelease);
-                break;
-              default:
-                fatal("AtomicInst has bad memory order type\n");
-            }
-
-            switch (memoryScope) {
-              case BRIG_MEMORY_SCOPE_NONE:
-                setFlag(NoScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKITEM:
-                setFlag(WorkitemScope);
-                break;
-              case BRIG_MEMORY_SCOPE_WORKGROUP:
-                setFlag(WorkgroupScope);
-                break;
-              case BRIG_MEMORY_SCOPE_AGENT:
-                setFlag(DeviceScope);
-                break;
-              case BRIG_MEMORY_SCOPE_SYSTEM:
-                setFlag(SystemScope);
-                break;
-              default:
-                fatal("AtomicInst has bad memory scope type\n");
-            }
-
-            switch (atomicOperation) {
-              case Brig::BRIG_ATOMIC_AND:
-                setFlag(AtomicAnd);
-                break;
-              case Brig::BRIG_ATOMIC_OR:
-                setFlag(AtomicOr);
-                break;
-              case Brig::BRIG_ATOMIC_XOR:
-                setFlag(AtomicXor);
-                break;
-              case Brig::BRIG_ATOMIC_CAS:
-                setFlag(AtomicCAS);
-                break;
-              case Brig::BRIG_ATOMIC_EXCH:
-                setFlag(AtomicExch);
-                break;
-              case Brig::BRIG_ATOMIC_ADD:
-                setFlag(AtomicAdd);
-                break;
-              case Brig::BRIG_ATOMIC_WRAPINC:
-                setFlag(AtomicInc);
-                break;
-              case Brig::BRIG_ATOMIC_WRAPDEC:
-                setFlag(AtomicDec);
-                break;
-              case Brig::BRIG_ATOMIC_MIN:
-                setFlag(AtomicMin);
-                break;
-              case Brig::BRIG_ATOMIC_MAX:
-                setFlag(AtomicMax);
-                break;
-              case Brig::BRIG_ATOMIC_SUB:
-                setFlag(AtomicSub);
-                break;
-              default:
-                fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
-            }
-
-            switch (segment) {
-              case BRIG_SEGMENT_GLOBAL:
-                setFlag(GlobalSegment);
-                break;
-              case BRIG_SEGMENT_GROUP:
-                setFlag(GroupSegment);
-                break;
-              case BRIG_SEGMENT_FLAT:
-                setFlag(Flat);
-                break;
-              default:
-                panic("Atomic: segment %d not supported\n", segment);
-            }
-
-            if (HasDst) {
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                dest.init(op_offs, obj);
-
-                op_offs = obj->getOperandPtr(ib->operands, 1);
-                addr.init(op_offs, obj);
-
-                for (int i = 0; i < NumSrcOperands; ++i) {
-                    op_offs = obj->getOperandPtr(ib->operands, i + 2);
-                    src[i].init(op_offs, obj);
-                }
-            } else {
-
-                unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-                addr.init(op_offs, obj);
-
-                for (int i = 0; i < NumSrcOperands; ++i) {
-                    op_offs = obj->getOperandPtr(ib->operands, i + 1);
-                    src[i].init(op_offs, obj);
-                }
-            }
-        }
-
-        int numSrcRegOperands()
-        {
-            int operands = 0;
-            for (int i = 0; i < NumSrcOperands; i++) {
-                if (src[i].isVectorRegister()) {
-                    operands++;
-                }
-            }
-            if (addr.isVectorRegister())
-                operands++;
-            return operands;
-        }
-        int numDstRegOperands() { return dest.isVectorRegister(); }
-        int getNumOperands()
-        {
-            if (addr.isVectorRegister())
-                return(NumSrcOperands + 2);
-            return(NumSrcOperands + 1);
-        }
-        bool isVectorRegister(int operandIndex)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isVectorRegister();
-            else if (operandIndex == NumSrcOperands)
-                return(addr.isVectorRegister());
-            else
-                return dest.isVectorRegister();
-        }
-        bool isCondRegister(int operandIndex)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isCondRegister();
-            else if (operandIndex == NumSrcOperands)
-                return(addr.isCondRegister());
-            else
-                return dest.isCondRegister();
-        }
-        bool isScalarRegister(int operandIndex)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return src[operandIndex].isScalarRegister();
-            else if (operandIndex == NumSrcOperands)
-                return(addr.isScalarRegister());
-            else
-                return dest.isScalarRegister();
-        }
-        bool isSrcOperand(int operandIndex)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return true;
-            else if (operandIndex == NumSrcOperands)
-                return(addr.isVectorRegister());
-            else
-                return false;
-        }
-        bool isDstOperand(int operandIndex)
-        {
-            if (operandIndex <= NumSrcOperands)
-                return false;
-            else
-                return true;
-        }
-        int getOperandSize(int operandIndex)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return(src[operandIndex].opSize());
-            else if (operandIndex == NumSrcOperands)
-                return(addr.opSize());
-            else
-                return(dest.opSize());
-        }
-        int
-        getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
-        {
-            assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
-            if (operandIndex < NumSrcOperands)
-                return(src[operandIndex].regIndex());
-            else if (operandIndex == NumSrcOperands)
-                return(addr.regIndex());
-            else
-                return(dest.regIndex());
-            return -1;
-        }
-    };
-
-    template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
-             bool HasDst>
-    class AtomicInst :
-        public AtomicInstBase<typename MemDataType::OperandType,
-                              AddrOperandType, NumSrcOperands, HasDst>,
-        public MemInst
-    {
-      public:
-        void generateDisassembly() override;
-
-        AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
-                   const char *_opcode)
-            : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
-                             NumSrcOperands, HasDst>
-                (ib, obj, _opcode),
-              MemInst(MemDataType::memType)
-        {
-            init_addr(&this->addr);
-        }
-
-        void
-        initiateAcc(GPUDynInstPtr gpuDynInst) override
-        {
-            // before doing the RMW, check if this atomic has
-            // release semantics, and if so issue a release first
-            if (!this->isLocalMem()) {
-                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                    && (gpuDynInst->isRelease()
-                    || gpuDynInst->isAcquireRelease())) {
-
-                    gpuDynInst->statusBitVector = VectorMask(1);
-
-                    gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
-                    gpuDynInst->useContinuation = true;
-
-                    // create request
-                    RequestPtr req = std::make_shared<Request>(0, 0, 0,
-                                  gpuDynInst->computeUnit()->masterId(),
-                                  0, gpuDynInst->wfDynId);
-                    req->setFlags(Request::RELEASE);
-                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-
-                    return;
-                }
-            }
-
-            // if there is no release semantic, execute the RMW immediately
-            execAtomic(gpuDynInst);
-
-        }
-
-        void
-        completeAcc(GPUDynInstPtr gpuDynInst) override
-        {
-            // if this is not an atomic return op, then we
-            // have nothing more to do.
-            if (this->isAtomicRet()) {
-                // the size of the src operands and the
-                // memory being operated on must match
-                // for HSAIL atomics - this assumption may
-                // not apply to all ISAs
-                typedef typename MemDataType::CType CType;
-
-                Wavefront *w = gpuDynInst->wavefront();
-                int dst = this->dest.regIndex();
-                std::vector<uint32_t> regVec;
-                // virtual->physical VGPR mapping
-                int physVgpr = w->remap(dst, sizeof(CType), 1);
-                regVec.push_back(physVgpr);
-                CType *p1 = &((CType*)gpuDynInst->d_data)[0];
-
-                for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
-                    if (gpuDynInst->exec_mask[i]) {
-                        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
-                                "$%s%d <- %d global ld done (src = wavefront "
-                                "ld inst)\n", w->computeUnit->cu_id, w->simdId,
-                                w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d",
-                                dst, *p1);
-                        // write the value into the physical VGPR. This is a
-                        // purely functional operation. No timing is modeled.
-                        w->computeUnit->vrf[w->simdId]->write<CType>(physVgpr, *p1, i);
-                    }
-                    ++p1;
-                }
-
-                // Schedule the write operation of the load data on the VRF.
-                // This simply models the timing aspect of the VRF write operation.
-                // It does not modify the physical VGPR.
-                int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
-                    vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
-                                         sizeof(CType), gpuDynInst->time);
-
-                if (this->isGlobalMem()) {
-                    gpuDynInst->computeUnit()->globalMemoryPipe
-                        .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
-                } else {
-                    assert(this->isLocalMem());
-                    gpuDynInst->computeUnit()->localMemoryPipe
-                        .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
-                }
-            }
-        }
-
-        void execute(GPUDynInstPtr gpuDynInst) override;
-
-      private:
-        // execAtomic may be called through a continuation
-        // if the RMW had release semantics. see comment for
-        // execContinuation in gpu_dyn_inst.hh
-        void
-        execAtomic(GPUDynInstPtr gpuDynInst) override
-        {
-            gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
-            typedef typename MemDataType::CType c0;
-
-            c0 *d = &((c0*) gpuDynInst->d_data)[0];
-            c0 *e = &((c0*) gpuDynInst->a_data)[0];
-            c0 *f = &((c0*) gpuDynInst->x_data)[0];
-
-            for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
-                if (gpuDynInst->exec_mask[i]) {
-                    Addr vaddr = gpuDynInst->addr[i];
-
-                    if (this->isLocalMem()) {
-                        Wavefront *wavefront = gpuDynInst->wavefront();
-                        *d = wavefront->ldsChunk->read<c0>(vaddr);
-
-                        if (this->isAtomicAdd()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) + (*e));
-                        } else if (this->isAtomicSub()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) - (*e));
-                        } else if (this->isAtomicMax()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            std::max(wavefront->ldsChunk->read<c0>(vaddr),
-                            (*e)));
-                        } else if (this->isAtomicMin()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            std::min(wavefront->ldsChunk->read<c0>(vaddr),
-                            (*e)));
-                        } else if (this->isAtomicAnd()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) & (*e));
-                        } else if (this->isAtomicOr()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) | (*e));
-                        } else if (this->isAtomicXor()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
-                        } else if (this->isAtomicInc()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) + 1);
-                        } else if (this->isAtomicDec()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            wavefront->ldsChunk->read<c0>(vaddr) - 1);
-                        } else if (this->isAtomicExch()) {
-                            wavefront->ldsChunk->write<c0>(vaddr, (*e));
-                        } else if (this->isAtomicCAS()) {
-                            wavefront->ldsChunk->write<c0>(vaddr,
-                            (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
-                            (*f) : wavefront->ldsChunk->read<c0>(vaddr));
-                        } else {
-                            fatal("Unrecognized or invalid HSAIL atomic op "
-                                  "type.\n");
-                        }
-                    } else {
-                        RequestPtr req =
-                            std::make_shared<Request>(vaddr, sizeof(c0), 0,
-                                        gpuDynInst->computeUnit()->masterId(),
-                                        0, gpuDynInst->wfDynId,
-                                        gpuDynInst->makeAtomicOpFunctor<c0>(e,
-                                        f));
-
-                        gpuDynInst->setRequestFlags(req);
-                        PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
-                        pkt->dataStatic(d);
-
-                        if (gpuDynInst->computeUnit()->shader->
-                            separate_acquire_release &&
-                            (gpuDynInst->isAcquire())) {
-                            // if this atomic has acquire semantics,
-                            // schedule the continuation to perform an
-                            // acquire after the RMW completes
-                            gpuDynInst->execContinuation =
-                                &GPUStaticInst::execAtomicAcq;
-
-                            gpuDynInst->useContinuation = true;
-                        } else {
-                            // the request will be finished when the RMW completes
-                            gpuDynInst->useContinuation = false;
-                        }
-                        // translation is performed in sendRequest()
-                        gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
-                                                               pkt);
-                    }
-                }
-
-                ++d;
-                ++e;
-                ++f;
-            }
-
-            gpuDynInst->updateStats();
-        }
-
-        // execAtomicACq will always be called through a continuation.
-        // see comment for execContinuation in gpu_dyn_inst.hh
-        void
-        execAtomicAcq(GPUDynInstPtr gpuDynInst) override
-        {
-            // after performing the RMW, check to see if this instruction
-            // has acquire semantics, and if so, issue an acquire
-            if (!this->isLocalMem()) {
-                if (gpuDynInst->computeUnit()->shader->separate_acquire_release
-                     && gpuDynInst->isAcquire()) {
-                    gpuDynInst->statusBitVector = VectorMask(1);
-
-                    // the request will be finished when
-                    // the acquire completes
-                    gpuDynInst->useContinuation = false;
-                    // create request
-                    RequestPtr req = std::make_shared<Request>(0, 0, 0,
-                                  gpuDynInst->computeUnit()->masterId(),
-                                  0, gpuDynInst->wfDynId);
-                    req->setFlags(Request::ACQUIRE);
-                    gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-                }
-            }
-        }
-    };
-
-    template<typename DataType, typename AddrOperandType, int NumSrcOperands>
-    GPUStaticInst*
-    constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
-
-        if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
-            return decodeLd<DataType>(ib, obj);
-        } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
-            switch (ib->type) {
-              case Brig::BRIG_TYPE_B8:
-                return decodeSt<S8,S8>(ib, obj);
-              case Brig::BRIG_TYPE_B16:
-                return decodeSt<S16,S16>(ib, obj);
-              case Brig::BRIG_TYPE_B32:
-                return decodeSt<S32,S32>(ib, obj);
-              case Brig::BRIG_TYPE_B64:
-                return decodeSt<S64,S64>(ib, obj);
-              default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
-            }
-        } else {
-            if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
-                return new AtomicInst<DataType, AddrOperandType,
-                    NumSrcOperands, false>(ib, obj, "atomicnoret");
-            else
-                return new AtomicInst<DataType, AddrOperandType,
-                    NumSrcOperands, true>(ib, obj, "atomic");
-        }
-    }
-
-    template<typename DataType, int NumSrcOperands>
-    GPUStaticInst*
-    decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
-            Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
-
-        unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
-
-        BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
-        if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
-            return constructAtomic<DataType, NoRegAddrOperand,
-                                   NumSrcOperands>(ib, obj);
-        } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
-            // V2/V4 not allowed
-            switch (tmp.regKind) {
-              case Brig::BRIG_REGISTER_KIND_SINGLE:
-                  return constructAtomic<DataType, SRegAddrOperand,
-                                         NumSrcOperands>(ib, obj);
-              case Brig::BRIG_REGISTER_KIND_DOUBLE:
-                return constructAtomic<DataType, DRegAddrOperand,
-                                       NumSrcOperands>(ib, obj);
-              default:
-                fatal("Bad atomic register operand type %d\n", tmp.type);
-            }
-        } else {
-            fatal("Bad atomic register operand kind %d\n", tmp.kind);
-        }
-    }
-
-
-    template<typename DataType>
-    GPUStaticInst*
-    decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
-
-        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
-            return decodeAtomicHelper<DataType, 2>(ib, obj);
-        } else {
-            return decodeAtomicHelper<DataType, 1>(ib, obj);
-        }
-    }
-
-    template<typename DataType>
-    GPUStaticInst*
-    decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
-    {
-        const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
-        if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
-            return decodeAtomicHelper<DataType, 2>(ib, obj);
-        } else {
-            return decodeAtomicHelper<DataType, 1>(ib, obj);
-        }
-    }
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_MEM_HH__
diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh
deleted file mode 100644 (file)
index dbda664..0000000
+++ /dev/null
@@ -1,648 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/hsail_code.hh"
-
-// defined in code.cc, but not worth sucking in all of code.h for this
-// at this point
-extern const char *segmentNames[];
-
-namespace HsailISA
-{
-    template<typename DestDataType, typename AddrRegOperandType>
-    void
-    LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
-    {
-        this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
-                                     DestDataType::label,
-                                     this->dest.disassemble(),
-                                     this->addr.disassemble());
-    }
-
-    template<typename DestDataType, typename AddrRegOperandType>
-    void
-    LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        typedef typename DestDataType::CType CType M5_VAR_USED;
-        const VectorMask &mask = w->getPred();
-        std::vector<Addr> addr_vec;
-        addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
-        this->addr.calcVector(w, addr_vec);
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                this->dest.set(w, lane, addr_vec[lane]);
-            }
-        }
-        addr_vec.clear();
-    }
-
-    template<typename MemDataType, typename DestDataType,
-             typename AddrRegOperandType>
-    void
-    LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
-    {
-        switch (num_dest_operands) {
-          case 1:
-            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
-                                         segmentNames[this->segment],
-                                         MemDataType::label,
-                                         this->dest.disassemble(),
-                                         this->addr.disassemble());
-            break;
-          case 2:
-            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
-                                         segmentNames[this->segment],
-                                         MemDataType::label,
-                                         this->dest_vect[0].disassemble(),
-                                         this->dest_vect[1].disassemble(),
-                                         this->addr.disassemble());
-            break;
-          case 3:
-            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
-                                         segmentNames[this->segment],
-                                         MemDataType::label,
-                                         this->dest_vect[0].disassemble(),
-                                         this->dest_vect[1].disassemble(),
-                                         this->dest_vect[2].disassemble(),
-                                         this->addr.disassemble());
-            break;
-          case 4:
-            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
-                                         this->opcode,
-                                         segmentNames[this->segment],
-                                         MemDataType::label,
-                                         this->dest_vect[0].disassemble(),
-                                         this->dest_vect[1].disassemble(),
-                                         this->dest_vect[2].disassemble(),
-                                         this->dest_vect[3].disassemble(),
-                                         this->addr.disassemble());
-            break;
-          default:
-            fatal("Bad ld register dest operand, num vector operands: %d \n",
-                  num_dest_operands);
-            break;
-        }
-    }
-
-    static Addr
-    calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
-    {
-        // what is the size of the object we are accessing??
-        // NOTE: the compiler doesn't generate enough information
-        // to do this yet..have to just line up all the private
-        // work-item spaces back to back for now
-        /*
-        StorageElement* se =
-            i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
-        assert(se);
-
-        return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
-            se->offset * w->computeUnit->wfSize() +
-            lane * se->size;
-        */
-
-        // addressing strategy: interleave the private spaces of
-        // work-items in a wave-front on 8 byte granularity.
-        // this won't be perfect coalescing like the spill space
-        // strategy, but it's better than nothing. The spill space
-        // strategy won't work with private because the same address
-        // may be accessed by different sized loads/stores.
-
-        // Note: I'm assuming that the largest load/store to private
-        // is 8 bytes. If it is larger, the stride will have to increase
-
-        Addr addr_div8 = addr / 8;
-        Addr addr_mod8 = addr % 8;
-
-        Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
-            addr_mod8 + w->privBase;
-
-        assert(ret < w->privBase +
-               (w->privSizePerItem * w->computeUnit->wfSize()));
-
-        return ret;
-    }
-
-    template<typename MemDataType, typename DestDataType,
-             typename AddrRegOperandType>
-    void
-    LdInst<MemDataType, DestDataType,
-           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        typedef typename MemDataType::CType MemCType;
-        const VectorMask &mask = w->getPred();
-
-        // Kernarg references are handled uniquely for now (no Memory Request
-        // is used), so special-case them up front.  Someday we should
-        // make this more realistic, at which we should get rid of this
-        // block and fold this case into the switch below.
-        if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
-            MemCType val;
-
-            // I assume no vector ld for kernargs
-            assert(num_dest_operands == 1);
-
-            // assuming for the moment that we'll never do register
-            // offsets into kernarg space... just to make life simpler
-            uint64_t address = this->addr.calcUniform();
-
-            val = *(MemCType*)&w->kernelArgs[address];
-
-            DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
-
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    this->dest.set(w, lane, val);
-                }
-            }
-
-            return;
-        } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
-            uint64_t address = this->addr.calcUniform();
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    MemCType val = w->readCallArgMem<MemCType>(lane, address);
-
-                    DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
-                            (unsigned long long)val);
-
-                    this->dest.set(w, lane, val);
-                }
-            }
-
-            return;
-        }
-
-        GPUDynInstPtr m = gpuDynInst;
-
-        this->addr.calcVector(w, m->addr);
-
-        m->m_type = MemDataType::memType;
-        m->v_type = DestDataType::vgprType;
-
-        m->exec_mask = w->execMask();
-        m->statusBitVector = 0;
-        m->equiv = this->equivClass;
-
-        if (num_dest_operands == 1) {
-            m->dst_reg = this->dest.regIndex();
-            m->n_reg = 1;
-        } else {
-            m->n_reg = num_dest_operands;
-            for (int i = 0; i < num_dest_operands; ++i) {
-                m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
-            }
-        }
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->kern_id = w->kernId;
-        m->cu_id = w->computeUnit->cu_id;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        switch (this->segment) {
-          case Brig::BRIG_SEGMENT_GLOBAL:
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-
-            // this is a complete hack to get around a compiler bug
-            // (the compiler currently generates global access for private
-            //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (m->addr[lane] < w->privSizePerItem) {
-                    if (mask[lane]) {
-                        // what is the size of the object we are accessing?
-                        // find base for for this wavefront
-
-                        // calcPrivAddr will fail if accesses are unaligned
-                        assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
-
-                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
-                                                     this);
-
-                        m->addr[lane] = privAddr;
-                    }
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsRdGm++;
-            w->rdGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_SPILL:
-            assert(num_dest_operands == 1);
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-            {
-                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                    //  note: this calculation will NOT WORK if the compiler
-                    //  ever generates loads/stores to the same address with
-                    //  different widths (e.g., a ld_u32 addr and a ld_u16 addr)
-                    if (mask[lane]) {
-                        assert(m->addr[lane] < w->spillSizePerItem);
-
-                        m->addr[lane] = m->addr[lane] * w->spillWidth +
-                                        lane * sizeof(MemCType) + w->spillBase;
-
-                        w->lastAddr[lane] = m->addr[lane];
-                    }
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsRdGm++;
-            w->rdGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_GROUP:
-            m->pipeId = LDSMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(24));
-            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstandingReqsRdLm++;
-            w->rdLmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_READONLY:
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
-                    m->addr[lane] += w->roBase;
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsRdGm++;
-            w->rdGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_PRIVATE:
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-            {
-                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                    if (mask[lane]) {
-                        assert(m->addr[lane] < w->privSizePerItem);
-
-                        m->addr[lane] = m->addr[lane] +
-                            lane * sizeof(MemCType) + w->privBase;
-                    }
-                }
-            }
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsRdGm++;
-            w->rdGmReqsInPipe--;
-            break;
-
-          default:
-            fatal("Load to unsupported segment %d %llxe\n", this->segment,
-                  m->addr[0]);
-        }
-
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    template<typename OperationType, typename SrcDataType,
-             typename AddrRegOperandType>
-    void
-    StInst<OperationType, SrcDataType,
-           AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        Wavefront *w = gpuDynInst->wavefront();
-
-        typedef typename OperationType::CType CType;
-
-        const VectorMask &mask = w->getPred();
-
-        // arg references are handled uniquely for now (no Memory Request
-        // is used), so special-case them up front.  Someday we should
-        // make this more realistic, at which we should get rid of this
-        // block and fold this case into the switch below.
-        if (this->segment == Brig::BRIG_SEGMENT_ARG) {
-            uint64_t address = this->addr.calcUniform();
-
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    CType data = this->src.template get<CType>(w, lane);
-                    DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
-                    w->writeCallArgMem<CType>(lane, address, data);
-                }
-            }
-
-            return;
-        }
-
-        GPUDynInstPtr m = gpuDynInst;
-
-        m->exec_mask = w->execMask();
-
-        this->addr.calcVector(w, m->addr);
-
-        if (num_src_operands == 1) {
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    ((CType*)m->d_data)[lane] =
-                        this->src.template get<CType>(w, lane);
-                }
-            }
-        } else {
-            for (int k= 0; k < num_src_operands; ++k) {
-                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                    if (mask[lane]) {
-                        ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
-                            this->src_vect[k].template get<CType>(w, lane);
-                    }
-                }
-            }
-        }
-
-        m->m_type = OperationType::memType;
-        m->v_type = OperationType::vgprType;
-
-        m->statusBitVector = 0;
-        m->equiv = this->equivClass;
-
-        if (num_src_operands == 1) {
-            m->n_reg = 1;
-        } else {
-            m->n_reg = num_src_operands;
-        }
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->kern_id = w->kernId;
-        m->cu_id = w->computeUnit->cu_id;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        switch (this->segment) {
-          case Brig::BRIG_SEGMENT_GLOBAL:
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-
-            // this is a complete hack to get around a compiler bug
-            // (the compiler currently generates global access for private
-            //  addresses (starting from 0). We need to add the private offset)
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                if (mask[lane]) {
-                    if (m->addr[lane] < w->privSizePerItem) {
-
-                        // calcPrivAddr will fail if accesses are unaligned
-                        assert(!((sizeof(CType)-1) & m->addr[lane]));
-
-                        Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
-                                                     this);
-
-                        m->addr[lane] = privAddr;
-                    }
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsWrGm++;
-            w->wrGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_SPILL:
-            assert(num_src_operands == 1);
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-            {
-                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                    if (mask[lane]) {
-                        assert(m->addr[lane] < w->spillSizePerItem);
-
-                        m->addr[lane] = m->addr[lane] * w->spillWidth +
-                                        lane * sizeof(CType) + w->spillBase;
-                    }
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsWrGm++;
-            w->wrGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_GROUP:
-            m->pipeId = LDSMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(24));
-            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstandingReqsWrLm++;
-            w->wrLmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_PRIVATE:
-            m->pipeId = GLBMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(1));
-            {
-                for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                    if (mask[lane]) {
-                        assert(m->addr[lane] < w->privSizePerItem);
-                        m->addr[lane] = m->addr[lane] + lane *
-                            sizeof(CType)+w->privBase;
-                    }
-                }
-            }
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsWrGm++;
-            w->wrGmReqsInPipe--;
-            break;
-
-          default:
-            fatal("Store to unsupported segment %d\n", this->segment);
-        }
-
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    template<typename OperationType, typename SrcDataType,
-             typename AddrRegOperandType>
-    void
-    StInst<OperationType, SrcDataType,
-           AddrRegOperandType>::generateDisassembly()
-    {
-        switch (num_src_operands) {
-          case 1:
-            this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
-                                         segmentNames[this->segment],
-                                         OperationType::label,
-                                         this->src.disassemble(),
-                                         this->addr.disassemble());
-            break;
-          case 2:
-            this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
-                                         segmentNames[this->segment],
-                                         OperationType::label,
-                                         this->src_vect[0].disassemble(),
-                                         this->src_vect[1].disassemble(),
-                                         this->addr.disassemble());
-            break;
-          case 4:
-            this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
-                                         this->opcode,
-                                         segmentNames[this->segment],
-                                         OperationType::label,
-                                         this->src_vect[0].disassemble(),
-                                         this->src_vect[1].disassemble(),
-                                         this->src_vect[2].disassemble(),
-                                         this->src_vect[3].disassemble(),
-                                         this->addr.disassemble());
-            break;
-          default: fatal("Bad ld register src operand, num vector operands: "
-                         "%d \n", num_src_operands);
-            break;
-        }
-    }
-
-    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
-             bool HasDst>
-    void
-    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
-        HasDst>::execute(GPUDynInstPtr gpuDynInst)
-    {
-        typedef typename DataType::CType CType;
-
-        Wavefront *w = gpuDynInst->wavefront();
-
-        GPUDynInstPtr m = gpuDynInst;
-
-        this->addr.calcVector(w, m->addr);
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            ((CType *)m->a_data)[lane] =
-                this->src[0].template get<CType>(w, lane);
-        }
-
-        // load second source operand for CAS
-        if (NumSrcOperands > 1) {
-            for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-                ((CType*)m->x_data)[lane] =
-                    this->src[1].template get<CType>(w, lane);
-            }
-        }
-
-        assert(NumSrcOperands <= 2);
-
-        m->m_type = DataType::memType;
-        m->v_type = DataType::vgprType;
-
-        m->exec_mask = w->execMask();
-        m->statusBitVector = 0;
-        m->equiv = 0;  // atomics don't have an equivalence class operand
-        m->n_reg = 1;
-
-        if (HasDst) {
-            m->dst_reg = this->dest.regIndex();
-        }
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->kern_id = w->kernId;
-        m->cu_id = w->computeUnit->cu_id;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        switch (this->segment) {
-          case Brig::BRIG_SEGMENT_GLOBAL:
-            m->latency.set(w->computeUnit->shader->ticks(64));
-            m->pipeId = GLBMEM_PIPE;
-
-            w->computeUnit->globalMemoryPipe.issueRequest(m);
-            w->outstandingReqsWrGm++;
-            w->wrGmReqsInPipe--;
-            w->outstandingReqsRdGm++;
-            w->rdGmReqsInPipe--;
-            break;
-
-          case Brig::BRIG_SEGMENT_GROUP:
-            m->pipeId = LDSMEM_PIPE;
-            m->latency.set(w->computeUnit->shader->ticks(24));
-            w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
-            w->outstandingReqsWrLm++;
-            w->wrLmReqsInPipe--;
-            w->outstandingReqsRdLm++;
-            w->rdLmReqsInPipe--;
-            break;
-
-          default:
-            fatal("Atomic op to unsupported segment %d\n",
-                  this->segment);
-        }
-
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
-
-    template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
-             bool HasDst>
-    void
-    AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
-               HasDst>::generateDisassembly()
-    {
-        if (HasDst) {
-            this->disassembly =
-                csprintf("%s_%s_%s_%s %s,%s", this->opcode,
-                         atomicOpToString(this->atomicOperation),
-                         segmentNames[this->segment],
-                         DataType::label, this->dest.disassemble(),
-                         this->addr.disassemble());
-        } else {
-            this->disassembly =
-                csprintf("%s_%s_%s_%s %s", this->opcode,
-                         atomicOpToString(this->atomicOperation),
-                         segmentNames[this->segment],
-                         DataType::label, this->addr.disassemble());
-        }
-
-        for (int i = 0; i < NumSrcOperands; ++i) {
-            this->disassembly += ",";
-            this->disassembly += this->src[i].disassemble();
-        }
-    }
-} // namespace HsailISA
diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc
deleted file mode 100644 (file)
index 580328a..0000000
+++ /dev/null
@@ -1,791 +0,0 @@
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Marc Orr
- */
-
-#include <csignal>
-
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/mem.hh"
-
-namespace HsailISA
-{
-    // Pseudo (or magic) instructions are overloaded on the hsail call
-    // instruction, because of its flexible parameter signature.
-
-    // To add a new magic instruction:
-    // 1. Add an entry to the enum.
-    // 2. Implement it in the switch statement below (Call::exec).
-    // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
-    //    so its easy to call from an OpenCL kernel.
-
-    // This enum should be identical to the enum in
-    // hsa/hsail-gpu-compute/util/magicinst.h
-    enum
-    {
-        MAGIC_PRINT_WF_32 = 0,
-        MAGIC_PRINT_WF_64,
-        MAGIC_PRINT_LANE,
-        MAGIC_PRINT_LANE_64,
-        MAGIC_PRINT_WF_FLOAT,
-        MAGIC_SIM_BREAK,
-        MAGIC_PREF_SUM,
-        MAGIC_REDUCTION,
-        MAGIC_MASKLANE_LOWER,
-        MAGIC_MASKLANE_UPPER,
-        MAGIC_JOIN_WF_BAR,
-        MAGIC_WAIT_WF_BAR,
-        MAGIC_PANIC,
-        MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
-        MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
-        MAGIC_LOAD_GLOBAL_U32_REG,
-        MAGIC_XACT_CAS_LD,
-        MAGIC_MOST_SIG_THD,
-        MAGIC_MOST_SIG_BROADCAST,
-        MAGIC_PRINT_WFID_32,
-        MAGIC_PRINT_WFID_64
-    };
-
-    void
-    Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
-    {
-        const VectorMask &mask = w->getPred();
-
-        int op = 0;
-        bool got_op = false;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val0 = src1.get<int>(w, lane, 0);
-                if (got_op) {
-                    if (src_val0 != op) {
-                        fatal("Multiple magic instructions per PC not "
-                              "supported\n");
-                    }
-                } else {
-                    op = src_val0;
-                    got_op = true;
-                }
-            }
-        }
-
-        switch(op) {
-          case MAGIC_PRINT_WF_32:
-            MagicPrintWF32(w);
-            break;
-          case MAGIC_PRINT_WF_64:
-            MagicPrintWF64(w);
-            break;
-          case MAGIC_PRINT_LANE:
-            MagicPrintLane(w);
-            break;
-          case MAGIC_PRINT_LANE_64:
-            MagicPrintLane64(w);
-            break;
-          case MAGIC_PRINT_WF_FLOAT:
-            MagicPrintWFFloat(w);
-            break;
-          case MAGIC_SIM_BREAK:
-            MagicSimBreak(w);
-            break;
-          case MAGIC_PREF_SUM:
-            MagicPrefixSum(w);
-            break;
-          case MAGIC_REDUCTION:
-            MagicReduction(w);
-            break;
-          case MAGIC_MASKLANE_LOWER:
-            MagicMaskLower(w);
-            break;
-          case MAGIC_MASKLANE_UPPER:
-            MagicMaskUpper(w);
-            break;
-          case MAGIC_JOIN_WF_BAR:
-            MagicJoinWFBar(w);
-            break;
-          case MAGIC_WAIT_WF_BAR:
-            MagicWaitWFBar(w);
-            break;
-          case MAGIC_PANIC:
-            MagicPanic(w);
-            break;
-
-          // atomic instructions
-          case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
-            MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
-            break;
-
-          case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
-            MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
-            break;
-
-          case MAGIC_LOAD_GLOBAL_U32_REG:
-            MagicLoadGlobalU32Reg(w, gpuDynInst);
-            break;
-
-          case MAGIC_XACT_CAS_LD:
-            MagicXactCasLd(w);
-            break;
-
-          case MAGIC_MOST_SIG_THD:
-            MagicMostSigThread(w);
-            break;
-
-          case MAGIC_MOST_SIG_BROADCAST:
-            MagicMostSigBroadcast(w);
-            break;
-
-          case MAGIC_PRINT_WFID_32:
-            MagicPrintWF32ID(w);
-            break;
-
-          case MAGIC_PRINT_WFID_64:
-            MagicPrintWFID64(w);
-            break;
-
-          default: fatal("unrecognized magic instruction: %d\n", op);
-        }
-    }
-
-    void
-    Call::MagicPrintLane(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-                if (src_val2) {
-                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
-                             disassemble(), w->computeUnit->cu_id, w->simdId,
-                             w->wfSlotId, lane, src_val1);
-                } else {
-                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
-                             disassemble(), w->computeUnit->cu_id, w->simdId,
-                             w->wfSlotId, lane, src_val1);
-                }
-            }
-        }
-    #endif
-    }
-
-    void
-    Call::MagicPrintLane64(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-                if (src_val2) {
-                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
-                             disassemble(), w->computeUnit->cu_id, w->simdId,
-                             w->wfSlotId, lane, src_val1);
-                } else {
-                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
-                             disassemble(), w->computeUnit->cu_id, w->simdId,
-                             w->wfSlotId, lane, src_val1);
-                }
-            }
-        }
-    #endif
-    }
-
-    void
-    Call::MagicPrintWF32(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        std::string res_str;
-        res_str = csprintf("krl_prt (%s)\n", disassemble());
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (!(lane & 7)) {
-                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
-            }
-
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-
-                if (src_val2) {
-                    res_str += csprintf("%08x", src_val1);
-                } else {
-                    res_str += csprintf("%08d", src_val1);
-                }
-            } else {
-                res_str += csprintf("xxxxxxxx");
-            }
-
-            if ((lane & 7) == 7) {
-                res_str += csprintf("\n");
-            } else {
-                res_str += csprintf(" ");
-            }
-        }
-
-        res_str += "\n\n";
-        DPRINTFN(res_str.c_str());
-    #endif
-    }
-
-    void
-    Call::MagicPrintWF32ID(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        std::string res_str;
-        int src_val3 = -1;
-        res_str = csprintf("krl_prt (%s)\n", disassemble());
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (!(lane & 7)) {
-                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
-            }
-
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-                src_val3 = src1.get<int>(w, lane, 3);
-
-                if (src_val2) {
-                    res_str += csprintf("%08x", src_val1);
-                } else {
-                    res_str += csprintf("%08d", src_val1);
-                }
-            } else {
-                res_str += csprintf("xxxxxxxx");
-            }
-
-            if ((lane & 7) == 7) {
-                res_str += csprintf("\n");
-            } else {
-                res_str += csprintf(" ");
-            }
-        }
-
-        res_str += "\n\n";
-        if (w->wfDynId == src_val3) {
-            DPRINTFN(res_str.c_str());
-        }
-    #endif
-    }
-
-    void
-    Call::MagicPrintWF64(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        std::string res_str;
-        res_str = csprintf("krl_prt (%s)\n", disassemble());
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (!(lane & 3)) {
-                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
-            }
-
-            if (mask[lane]) {
-                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-
-                if (src_val2) {
-                    res_str += csprintf("%016x", src_val1);
-                } else {
-                    res_str += csprintf("%016d", src_val1);
-                }
-            } else {
-                res_str += csprintf("xxxxxxxxxxxxxxxx");
-            }
-
-            if ((lane & 3) == 3) {
-                res_str += csprintf("\n");
-            } else {
-                res_str += csprintf(" ");
-            }
-        }
-
-        res_str += "\n\n";
-        DPRINTFN(res_str.c_str());
-    #endif
-    }
-
-    void
-    Call::MagicPrintWFID64(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        std::string res_str;
-        int src_val3 = -1;
-        res_str = csprintf("krl_prt (%s)\n", disassemble());
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (!(lane & 3)) {
-                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
-            }
-
-            if (mask[lane]) {
-                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
-                int src_val2 = src1.get<int>(w, lane, 2);
-                src_val3 = src1.get<int>(w, lane, 3);
-
-                if (src_val2) {
-                    res_str += csprintf("%016x", src_val1);
-                } else {
-                    res_str += csprintf("%016d", src_val1);
-                }
-            } else {
-                res_str += csprintf("xxxxxxxxxxxxxxxx");
-            }
-
-            if ((lane & 3) == 3) {
-                res_str += csprintf("\n");
-            } else {
-                res_str += csprintf(" ");
-            }
-        }
-
-        res_str += "\n\n";
-        if (w->wfDynId == src_val3) {
-            DPRINTFN(res_str.c_str());
-        }
-    #endif
-    }
-
-    void
-    Call::MagicPrintWFFloat(Wavefront *w)
-    {
-    #if TRACING_ON
-        const VectorMask &mask = w->getPred();
-        std::string res_str;
-        res_str = csprintf("krl_prt (%s)\n", disassemble());
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (!(lane & 7)) {
-                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
-            }
-
-            if (mask[lane]) {
-                float src_val1 = src1.get<float>(w, lane, 1);
-                res_str += csprintf("%08f", src_val1);
-            } else {
-                res_str += csprintf("xxxxxxxx");
-            }
-
-            if ((lane & 7) == 7) {
-                res_str += csprintf("\n");
-            } else {
-                res_str += csprintf(" ");
-            }
-        }
-
-        res_str += "\n\n";
-        DPRINTFN(res_str.c_str());
-    #endif
-    }
-
-    // raises a signal that GDB will catch
-    // when done with the break, type "signal 0" in gdb to continue
-    void
-    Call::MagicSimBreak(Wavefront *w)
-    {
-        std::string res_str;
-        // print out state for this wavefront and then break
-        res_str = csprintf("Breakpoint encountered for wavefront %i\n",
-                           w->wfSlotId);
-
-        res_str += csprintf("  Kern ID: %i\n", w->kernId);
-        res_str += csprintf("  Phase ID: %i\n", w->simdId);
-        res_str += csprintf("  Executing on CU #%i\n", w->computeUnit->cu_id);
-        res_str += csprintf("  Exec mask: ");
-
-        for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
-            if (w->execMask(i))
-                res_str += "1";
-            else
-                res_str += "0";
-
-            if ((i & 7) == 7)
-                res_str += " ";
-        }
-
-        res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
-
-        res_str += "\nHelpful debugging hints:\n";
-        res_str += "   Check out w->s_reg / w->d_reg for register state\n";
-
-        res_str += "\n\n";
-        DPRINTFN(res_str.c_str());
-        fflush(stdout);
-
-        raise(SIGTRAP);
-    }
-
-    void
-    Call::MagicPrefixSum(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int res = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                dest.set<int>(w, lane, res);
-                res += src_val1;
-            }
-        }
-    }
-
-    void
-    Call::MagicReduction(Wavefront *w)
-    {
-        // reduction magic instruction
-        //   The reduction instruction takes up to 64 inputs (one from
-        //   each thread in a WF) and sums them. It returns the sum to
-        //   each thread in the WF.
-        const VectorMask &mask = w->getPred();
-        int res = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                res += src_val1;
-            }
-        }
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                dest.set<int>(w, lane, res);
-            }
-        }
-    }
-
-    void
-    Call::MagicMaskLower(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int res = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-
-                if (src_val1) {
-                    if (lane < (w->computeUnit->wfSize()/2)) {
-                        res = res | ((uint32_t)(1) << lane);
-                    }
-                }
-            }
-        }
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                dest.set<int>(w, lane, res);
-            }
-        }
-    }
-
-    void
-    Call::MagicMaskUpper(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int res = 0;
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-
-                if (src_val1) {
-                    if (lane >= (w->computeUnit->wfSize()/2)) {
-                        res = res | ((uint32_t)(1) <<
-                                     (lane - (w->computeUnit->wfSize()/2)));
-                    }
-                }
-            }
-        }
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                dest.set<int>(w, lane, res);
-            }
-        }
-    }
-
-    void
-    Call::MagicJoinWFBar(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int max_cnt = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                w->barCnt[lane]++;
-
-                if (w->barCnt[lane] > max_cnt) {
-                    max_cnt = w->barCnt[lane];
-                }
-            }
-        }
-
-        if (max_cnt > w->maxBarCnt) {
-            w->maxBarCnt = max_cnt;
-        }
-    }
-
-    void
-    Call::MagicWaitWFBar(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int max_cnt = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                w->barCnt[lane]--;
-            }
-
-            if (w->barCnt[lane] > max_cnt) {
-                max_cnt = w->barCnt[lane];
-            }
-        }
-
-        if (max_cnt < w->maxBarCnt) {
-            w->maxBarCnt = max_cnt;
-        }
-
-        w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
-                                   w->instructionBuffer.end());
-        if (w->pendingFetch)
-            w->dropFetch = true;
-    }
-
-    void
-    Call::MagicPanic(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                int src_val1 = src1.get<int>(w, lane, 1);
-                panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
-                      src_val1, lane);
-            }
-        }
-    }
-
-    void
-    Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
-    {
-        // the address is in src1 | src2
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            int src_val1 = src1.get<int>(w, lane, 1);
-            int src_val2 = src1.get<int>(w, lane, 2);
-            Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
-
-            m->addr[lane] = addr;
-        }
-
-    }
-
-    void
-    Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
-    {
-        GPUDynInstPtr m = gpuDynInst;
-
-        calcAddr(w, m);
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
-        }
-
-        setFlag(AtomicNoReturn);
-        setFlag(AtomicAdd);
-        setFlag(NoScope);
-        setFlag(NoOrder);
-        setFlag(GlobalSegment);
-
-        m->m_type = U32::memType;
-        m->v_type = U32::vgprType;
-
-        m->exec_mask = w->execMask();
-        m->statusBitVector = 0;
-        m->equiv = 0;  // atomics don't have an equivalence class operand
-        m->n_reg = 1;
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        m->pipeId = GLBMEM_PIPE;
-        m->latency.set(w->computeUnit->shader->ticks(64));
-        w->computeUnit->globalMemoryPipe.issueRequest(m);
-        w->outstandingReqsWrGm++;
-        w->wrGmReqsInPipe--;
-        w->outstandingReqsRdGm++;
-        w->rdGmReqsInPipe--;
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    void
-    Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
-    {
-        GPUDynInstPtr m = gpuDynInst;
-        calcAddr(w, m);
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
-        }
-
-        setFlag(AtomicNoReturn);
-        setFlag(AtomicAdd);
-        setFlag(NoScope);
-        setFlag(NoOrder);
-        setFlag(GlobalSegment);
-
-        m->m_type = U32::memType;
-        m->v_type = U32::vgprType;
-
-        m->exec_mask = w->execMask();
-        m->statusBitVector = 0;
-        m->equiv = 0;  // atomics don't have an equivalence class operand
-        m->n_reg = 1;
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        m->pipeId = GLBMEM_PIPE;
-        m->latency.set(w->computeUnit->shader->ticks(64));
-        w->computeUnit->globalMemoryPipe.issueRequest(m);
-        w->outstandingReqsWrGm++;
-        w->wrGmReqsInPipe--;
-        w->outstandingReqsRdGm++;
-        w->rdGmReqsInPipe--;
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    void
-    Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
-    {
-        GPUDynInstPtr m = gpuDynInst;
-        // calculate the address
-        calcAddr(w, m);
-
-        setFlag(Load);
-        setFlag(NoScope);
-        setFlag(NoOrder);
-        setFlag(GlobalSegment);
-
-        m->m_type = U32::memType;  //MemDataType::memType;
-        m->v_type = U32::vgprType; //DestDataType::vgprType;
-
-        m->exec_mask = w->execMask();
-        m->statusBitVector = 0;
-        m->equiv = 0;
-        m->n_reg = 1;
-
-        // FIXME
-        //m->dst_reg = this->dest.regIndex();
-
-        m->simdId = w->simdId;
-        m->wfSlotId = w->wfSlotId;
-        m->wfDynId = w->wfDynId;
-        m->latency.init(&w->computeUnit->shader->tick_cnt);
-
-        m->pipeId = GLBMEM_PIPE;
-        m->latency.set(w->computeUnit->shader->ticks(1));
-        w->computeUnit->globalMemoryPipe.issueRequest(m);
-        w->outstandingReqsRdGm++;
-        w->rdGmReqsInPipe--;
-        w->outstandingReqs++;
-        w->memReqsInPipe--;
-    }
-
-    void
-    Call::MagicXactCasLd(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int src_val1 = 0;
-
-        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-            if (mask[lane]) {
-                src_val1 = src1.get<int>(w, lane, 1);
-                break;
-            }
-        }
-
-        if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
-            w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
-            w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
-        }
-
-        w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
-            .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
-    }
-
-    void
-    Call::MagicMostSigThread(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        unsigned mst = true;
-
-        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
-            if (mask[lane]) {
-                dest.set<int>(w, lane, mst);
-                mst = false;
-            }
-        }
-    }
-
-    void
-    Call::MagicMostSigBroadcast(Wavefront *w)
-    {
-        const VectorMask &mask = w->getPred();
-        int res = 0;
-        bool got_res = false;
-
-        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
-            if (mask[lane]) {
-                if (!got_res) {
-                    res = src1.get<int>(w, lane, 1);
-                    got_res = true;
-                }
-                dest.set<int>(w, lane, res);
-            }
-        }
-    }
-
-} // namespace HsailISA
diff --git a/src/arch/hsail/operand.cc b/src/arch/hsail/operand.cc
deleted file mode 100644 (file)
index 993d352..0000000
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/operand.hh"
-
-using namespace Brig;
-
-bool
-BaseRegOperand::init(unsigned opOffset, const BrigObject *obj,
-                     unsigned &maxRegIdx, char _regFileChar)
-{
-    regFileChar = _regFileChar;
-    const BrigOperand *brigOp = obj->getOperand(opOffset);
-
-    if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER)
-        return false;
-
-    const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp;
-
-    regIdx = brigRegOp->regNum;
-
-    DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx,
-            brigRegOp->regKind);
-
-    maxRegIdx = std::max(maxRegIdx, regIdx);
-
-    return true;
-}
-
-void
-ListOperand::init(unsigned opOffset, const BrigObject *obj)
-{
-    const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset);
-
-    switch (brigOp->kind) {
-      case BRIG_KIND_OPERAND_CODE_LIST:
-        {
-            const BrigOperandCodeList *opList =
-                (const BrigOperandCodeList*)brigOp;
-
-            const Brig::BrigData *oprnd_data =
-                obj->getBrigBaseData(opList->elements);
-
-            // Note: for calls Dest list of operands could be size of 0.
-            elementCount = oprnd_data->byteCount / 4;
-
-            DPRINTF(GPUReg, "Operand Code List: # elements: %d\n",
-                    elementCount);
-
-            for (int i = 0; i < elementCount; ++i) {
-                unsigned *data_offset =
-                    (unsigned*)obj->getData(opList->elements + 4 * (i + 1));
-
-                const BrigDirectiveVariable *p =
-                    (const BrigDirectiveVariable*)obj->
-                    getCodeSectionEntry(*data_offset);
-
-                StorageElement *se = obj->currentCode->storageMap->
-                    findSymbol(BRIG_SEGMENT_ARG, p);
-
-                assert(se);
-                callArgs.push_back(se);
-            }
-        }
-        break;
-      default:
-        fatal("ListOperand: bad operand kind %d\n", brigOp->kind);
-    }
-}
-
-std::string
-ListOperand::disassemble()
-{
-    std::string res_str("");
-
-    for (auto it : callArgs) {
-        res_str += csprintf("%s ", it->name.c_str());
-    }
-
-    return res_str;
-}
-
-void
-FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj)
-{
-    const BrigOperand *baseOp = obj->getOperand(opOffset);
-
-    if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) {
-        fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind);
-    }
-
-    const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp;
-
-    const BrigDirectiveExecutable *p =
-        (const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref);
-
-    func_name = obj->getString(p->name);
-}
-
-std::string
-FunctionRefOperand::disassemble()
-{
-    DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name);
-
-    return csprintf("%s", func_name);
-}
-
-bool
-BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj,
-                               int at, unsigned &maxRegIdx, char _regFileChar)
-{
-    regFileChar = _regFileChar;
-    const BrigOperand *brigOp = obj->getOperand(opOffset);
-
-    if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
-        return false;
-
-
-    const Brig::BrigOperandOperandList *brigRegVecOp =
-         (const Brig::BrigOperandOperandList*)brigOp;
-
-    unsigned *data_offset =
-        (unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1));
-
-    const BrigOperand *p =
-        (const BrigOperand*)obj->getOperand(*data_offset);
-    if (p->kind != BRIG_KIND_OPERAND_REGISTER) {
-        return false;
-    }
-
-    const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p;
-
-    regIdx = brigRegOp->regNum;
-
-    DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx,
-            brigRegOp->regKind);
-
-    maxRegIdx = std::max(maxRegIdx, regIdx);
-
-    return true;
-}
-
-void
-BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj,
-                     unsigned &maxRegIdx, char _regFileChar)
-{
-    const char *name = obj->getString(strOffset);
-    char *endptr;
-    regIdx = strtoul(name + 2, &endptr, 10);
-
-    if (name[0] != '$' || name[1] != _regFileChar) {
-        fatal("register operand parse error on \"%s\"\n", name);
-    }
-
-    maxRegIdx = std::max(maxRegIdx, regIdx);
-}
-
-unsigned SRegOperand::maxRegIdx;
-unsigned DRegOperand::maxRegIdx;
-unsigned CRegOperand::maxRegIdx;
-
-std::string
-SRegOperand::disassemble()
-{
-    return csprintf("$s%d", regIdx);
-}
-
-std::string
-DRegOperand::disassemble()
-{
-    return csprintf("$d%d", regIdx);
-}
-
-std::string
-CRegOperand::disassemble()
-{
-    return csprintf("$c%d", regIdx);
-}
-
-BrigRegOperandInfo
-findRegDataType(unsigned opOffset, const BrigObject *obj)
-{
-    const BrigOperand *baseOp = obj->getOperand(opOffset);
-
-    switch (baseOp->kind) {
-      case BRIG_KIND_OPERAND_REGISTER:
-        {
-            const BrigOperandRegister *op = (BrigOperandRegister*)baseOp;
-
-            return BrigRegOperandInfo((BrigKind16_t)baseOp->kind,
-                                      (BrigRegisterKind)op->regKind);
-        }
-        break;
-
-      case BRIG_KIND_OPERAND_WAVESIZE:
-        {
-            BrigRegisterKind reg_kind = BRIG_REGISTER_KIND_DOUBLE;
-            return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
-        }
-
-      case BRIG_KIND_OPERAND_OPERAND_LIST:
-        {
-            const BrigOperandOperandList *op =
-               (BrigOperandOperandList*)baseOp;
-            const BrigData *data_p = (BrigData*)obj->getData(op->elements);
-
-
-            int num_operands = 0;
-            BrigRegisterKind reg_kind = (BrigRegisterKind)0;
-            for (int offset = 0; offset < data_p->byteCount; offset += 4) {
-                const BrigOperand *op_p = (const BrigOperand *)
-                   obj->getOperand(((int *)data_p->bytes)[offset/4]);
-
-                if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) {
-                    const BrigOperandRegister *brigRegOp =
-                       (const BrigOperandRegister*)op_p;
-                    reg_kind = (BrigRegisterKind)brigRegOp->regKind;
-                } else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) {
-                    uint16_t num_bytes =
-                       ((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount
-                           - sizeof(BrigBase);
-                    if (num_bytes == sizeof(uint32_t)) {
-                        reg_kind = BRIG_REGISTER_KIND_SINGLE;
-                    } else if (num_bytes == sizeof(uint64_t)) {
-                        reg_kind = BRIG_REGISTER_KIND_DOUBLE;
-                    } else {
-                        fatal("OperandList: bad operand size %d\n", num_bytes);
-                    }
-                } else if (op_p->kind == BRIG_KIND_OPERAND_WAVESIZE) {
-                    reg_kind = BRIG_REGISTER_KIND_DOUBLE;
-                } else {
-                    fatal("OperandList: bad operand kind %d\n", op_p->kind);
-                }
-
-                num_operands++;
-            }
-            assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST);
-
-            return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
-        }
-        break;
-
-      case BRIG_KIND_OPERAND_ADDRESS:
-        {
-            const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
-
-            if (!op->reg) {
-                BrigType type = BRIG_TYPE_NONE;
-
-                if (op->symbol) {
-                    const BrigDirective *dir = (BrigDirective*)
-                        obj->getCodeSectionEntry(op->symbol);
-
-                    assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
-
-                    const BrigDirectiveVariable *sym =
-                       (const BrigDirectiveVariable*)dir;
-
-                    type = (BrigType)sym->type;
-                }
-                return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS,
-                                          (BrigType)type);
-            } else {
-                const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp;
-                const BrigOperand *reg = obj->getOperand(b->reg);
-                const BrigOperandRegister *rop = (BrigOperandRegister*)reg;
-
-                return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER,
-                                          (BrigRegisterKind)rop->regKind);
-            }
-        }
-        break;
-
-     default:
-       fatal("AddrOperand: bad operand kind %d\n", baseOp->kind);
-       break;
-   }
-}
-
-void
-AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj)
-{
-    assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS);
-
-    const BrigDirective *d =
-        (BrigDirective*)obj->getCodeSectionEntry(op->symbol);
-
-    /**
-     * HSAIL does not properly handle immediate offsets for instruction types
-     * that utilize them. It currently only supports instructions that use
-     * variables instead. Again, these pop up in code that is never executed
-     * (i.e. the HCC AMP codes) so we just hack it here to let us pass through
-     * the HSAIL object initialization. If such code is ever called, we would
-     * have to implement this properly.
-     */
-    if (d->kind != BRIG_KIND_DIRECTIVE_VARIABLE) {
-        warn("HSAIL implementation does not support instructions with "
-             "address calculations where the operand is not a variable\n");
-    }
-
-    const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d;
-    name = obj->getString(sym->name);
-
-    if (sym->segment != BRIG_SEGMENT_ARG) {
-        storageElement =
-            obj->currentCode->storageMap->findSymbol(sym->segment, name);
-        offset = 0;
-    } else {
-        // sym->name does not work for BRIG_SEGMENT_ARG for the following case:
-        //
-        //     void foo(int a);
-        //     void bar(double a);
-        //
-        //     foo(...) --> arg_u32 %param_p0;
-        //                  st_arg_u32 $s0, [%param_p0];
-        //                  call &foo (%param_p0);
-        //     bar(...) --> arg_f64 %param_p0;
-        //                  st_arg_u64 $d0, [%param_p0];
-        //                  call &foo (%param_p0);
-        //
-        //  Both functions use the same variable name (param_p0)!!!
-        //
-        //  Maybe this is a bug in the compiler (I don't know).
-        //
-        // Solution:
-        // Use directive pointer (BrigDirectiveVariable) to differentiate 2
-        // versions of param_p0.
-        //
-        // Note this solution is kind of stupid, because we are pulling stuff
-        // out of the brig binary via the directive pointer and putting it into
-        // the symbol table, but now we are indexing the symbol table by the
-        // brig directive pointer! It makes the symbol table sort of pointless.
-        // But I don't want to mess with the rest of the infrastructure, so
-        // let's go with this for now.
-        //
-        // When we update the compiler again, we should see if this problem goes
-        // away. If so, we can fold some of this functionality into the code for
-        // kernel arguments. If not, maybe we can index the symbol name on a
-        // hash of the variable AND function name
-        storageElement = obj->currentCode->
-                 storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym);
-
-        assert(storageElement);
-    }
-}
-
-uint64_t
-AddrOperandBase::calcUniformBase()
-{
-    // start with offset, will be 0 if not specified
-    uint64_t address = offset;
-
-    // add in symbol value if specified
-    if (storageElement) {
-        address += storageElement->offset;
-    }
-
-    return address;
-}
-
-std::string
-AddrOperandBase::disassemble(std::string reg_disassembly)
-{
-    std::string disasm;
-
-    if (offset || reg_disassembly != "") {
-        disasm += "[";
-
-        if (reg_disassembly != "") {
-            disasm += reg_disassembly;
-
-            if (offset > 0) {
-                disasm += "+";
-            }
-        }
-
-        if (offset) {
-            disasm += csprintf("%d", offset);
-        }
-
-        disasm += "]";
-    } else if (name) {
-        disasm += csprintf("[%s]", name);
-    }
-
-    return disasm;
-}
-
-void
-NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj)
-{
-    const BrigOperand *baseOp = obj->getOperand(opOffset);
-
-    if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) {
-        BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp;
-        parseAddr(addrOp, obj);
-        offset = (uint64_t(addrOp->offset.hi) << 32) |
-                  uint64_t(addrOp->offset.lo);
-    } else {
-        fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind);
-    }
-
-}
-
-std::string
-NoRegAddrOperand::disassemble()
-{
-    return AddrOperandBase::disassemble(std::string(""));
-}
-
-void
-LabelOperand::init(unsigned opOffset, const BrigObject *obj)
-{
-    const BrigOperandCodeRef *op =
-        (const BrigOperandCodeRef*)obj->getOperand(opOffset);
-
-    assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF);
-
-    const BrigDirective *dir =
-        (const BrigDirective*)obj->getCodeSectionEntry(op->ref);
-
-    assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL);
-    label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj);
-}
-
-uint32_t
-LabelOperand::getTarget(Wavefront *w, int lane)
-{
-    return label->get();
-}
-
-std::string
-LabelOperand::disassemble()
-{
-    return label->name;
-}
diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh
deleted file mode 100644 (file)
index 3fbb099..0000000
+++ /dev/null
@@ -1,796 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_OPERAND_HH__
-#define __ARCH_HSAIL_OPERAND_HH__
-
-/**
- *  @file operand.hh
- *
- *  Defines classes encapsulating HSAIL instruction operands.
- */
-
-#include <limits>
-#include <string>
-
-#include "arch/hsail/Brig.h"
-#include "base/trace.hh"
-#include "base/types.hh"
-#include "debug/GPUReg.hh"
-#include "enums/RegisterType.hh"
-#include "gpu-compute/brig_object.hh"
-#include "gpu-compute/compute_unit.hh"
-#include "gpu-compute/hsail_code.hh"
-#include "gpu-compute/shader.hh"
-#include "gpu-compute/vector_register_file.hh"
-#include "gpu-compute/wavefront.hh"
-
-class Label;
-class StorageElement;
-
-class BaseOperand
-{
-  public:
-    Enums::RegisterType registerType;
-    uint32_t regOperandSize;
-    BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; }
-    bool isVectorRegister() { return registerType == Enums::RT_VECTOR; }
-    bool isScalarRegister() { return registerType == Enums::RT_SCALAR; }
-    bool isCondRegister() { return registerType == Enums::RT_CONDITION; }
-    unsigned int regIndex() { return 0; }
-    uint32_t opSize() { return regOperandSize; }
-    virtual ~BaseOperand() { }
-};
-
-class BrigRegOperandInfo
-{
-  public:
-    Brig::BrigKind16_t kind;
-    Brig::BrigType type;
-    Brig::BrigRegisterKind regKind;
-
-    BrigRegOperandInfo(Brig::BrigKind16_t _kind,
-                       Brig::BrigRegisterKind _regKind)
-        : kind(_kind), regKind(_regKind)
-    {
-    }
-
-    BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type)
-        : kind(_kind), type(_type)
-    {
-    }
-
-    BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES),
-                           type(Brig::BRIG_TYPE_NONE)
-    {
-    }
-};
-
-BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj);
-
-class BaseRegOperand : public BaseOperand
-{
-  public:
-    unsigned regIdx;
-    char regFileChar;
-
-    bool init(unsigned opOffset, const BrigObject *obj,
-              unsigned &maxRegIdx, char _regFileChar);
-
-    bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at,
-                        unsigned &maxRegIdx, char _regFileChar);
-
-    void initWithStrOffset(unsigned strOffset, const BrigObject *obj,
-                           unsigned &maxRegIdx, char _regFileChar);
-    unsigned int regIndex() { return regIdx; }
-};
-
-class SRegOperand : public BaseRegOperand
-{
-  public:
-    static unsigned maxRegIdx;
-
-    bool
-    init(unsigned opOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint32_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's');
-    }
-
-    bool
-    init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
-    {
-        regOperandSize = sizeof(uint32_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
-                                              's');
-    }
-
-    void
-    initWithStrOffset(unsigned strOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint32_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
-                                                 's');
-    }
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane)
-    {
-        assert(sizeof(OperandType) <= sizeof(uint32_t));
-        assert(regIdx < w->maxSpVgprs);
-        // if OperandType is smaller than 32-bit, we truncate the value
-        OperandType ret;
-        uint32_t vgprIdx;
-
-        switch (sizeof(OperandType)) {
-          case 1: // 1 byte operand
-              vgprIdx = w->remap(regIdx, 1, 1);
-              ret = (w->computeUnit->vrf[w->simdId]->
-                      read<uint32_t>(vgprIdx, lane)) & 0xff;
-            break;
-          case 2: // 2 byte operand
-              vgprIdx = w->remap(regIdx, 2, 1);
-              ret = (w->computeUnit->vrf[w->simdId]->
-                      read<uint32_t>(vgprIdx, lane)) & 0xffff;
-            break;
-          case 4: // 4 byte operand
-              vgprIdx = w->remap(regIdx,sizeof(OperandType), 1);
-              ret = w->computeUnit->vrf[w->simdId]->
-                  read<OperandType>(vgprIdx, lane);
-            break;
-          default:
-            panic("Bad OperandType\n");
-            break;
-        }
-
-        return (OperandType)ret;
-    }
-
-    // special get method for compatibility with LabelOperand
-    uint32_t
-    getTarget(Wavefront *w, int lane)
-    {
-        return get<uint32_t>(w, lane);
-    }
-
-    template<typename OperandType>
-    void set(Wavefront *w, int lane, OperandType &val);
-    std::string disassemble();
-};
-
-template<typename OperandType>
-void
-SRegOperand::set(Wavefront *w, int lane, OperandType &val)
-{
-    DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
-            w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
-
-    assert(sizeof(OperandType) == sizeof(uint32_t));
-    assert(regIdx < w->maxSpVgprs);
-    uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
-    w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
-}
-
-template<>
-inline void
-SRegOperand::set(Wavefront *w, int lane, uint64_t &val)
-{
-    DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
-            w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
-
-    assert(regIdx < w->maxSpVgprs);
-    uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1);
-    w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane);
-}
-
-class DRegOperand : public BaseRegOperand
-{
-  public:
-    static unsigned maxRegIdx;
-
-    bool
-    init(unsigned opOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint64_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd');
-    }
-
-    bool
-    init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
-    {
-        regOperandSize = sizeof(uint64_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
-                                              'd');
-    }
-
-    void
-    initWithStrOffset(unsigned strOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint64_t);
-        registerType = Enums::RT_VECTOR;
-
-        return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
-                                                 'd');
-    }
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane)
-    {
-        assert(sizeof(OperandType) <= sizeof(uint64_t));
-        // TODO: this check is valid only for HSAIL
-        assert(regIdx < w->maxDpVgprs);
-        uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
-
-        return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane);
-    }
-
-    template<typename OperandType>
-    void
-    set(Wavefront *w, int lane, OperandType &val)
-    {
-        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n",
-                w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
-                val);
-
-        assert(sizeof(OperandType) <= sizeof(uint64_t));
-        // TODO: this check is valid only for HSAIL
-        assert(regIdx < w->maxDpVgprs);
-        uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
-        w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
-    }
-
-    std::string disassemble();
-};
-
-class CRegOperand : public BaseRegOperand
-{
-  public:
-    static unsigned maxRegIdx;
-
-    bool
-    init(unsigned opOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint8_t);
-        registerType = Enums::RT_CONDITION;
-
-        return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c');
-    }
-
-    bool
-    init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
-    {
-        regOperandSize = sizeof(uint8_t);
-        registerType = Enums::RT_CONDITION;
-
-        return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
-                                              'c');
-    }
-
-    void
-    initWithStrOffset(unsigned strOffset, const BrigObject *obj)
-    {
-        regOperandSize = sizeof(uint8_t);
-        registerType = Enums::RT_CONDITION;
-
-        return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
-                                                 'c');
-    }
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane)
-    {
-        assert(regIdx < w->condRegState->numRegs());
-
-        return w->condRegState->read<OperandType>((int)regIdx, lane);
-    }
-
-    template<typename OperandType>
-    void
-    set(Wavefront *w, int lane, OperandType &val)
-    {
-        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n",
-                w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
-                val);
-
-        assert(regIdx < w->condRegState->numRegs());
-        w->condRegState->write<OperandType>(regIdx,lane,val);
-    }
-
-    std::string disassemble();
-};
-
-template<typename T>
-class ImmOperand : public BaseOperand
-{
-  private:
-    uint16_t kind;
-  public:
-    T bits;
-
-    bool init(unsigned opOffset, const BrigObject *obj);
-    bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
-    std::string disassemble();
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w)
-    {
-        assert(sizeof(OperandType) <= sizeof(T));
-        panic_if(w == nullptr, "WF pointer needs to be set");
-
-        switch (kind) {
-          // immediate operand is WF size
-          case Brig::BRIG_KIND_OPERAND_WAVESIZE:
-            return (OperandType)w->computeUnit->wfSize();
-            break;
-
-          default:
-            return *(OperandType*)&bits;
-            break;
-        }
-    }
-
-    // This version of get() takes a WF* and a lane id for
-    // compatibility with the register-based get() methods.
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane)
-    {
-        return get<OperandType>(w);
-    }
-};
-
-template<typename T>
-bool
-ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
-{
-    const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
-
-    switch (brigOp->kind) {
-      // this is immediate operand
-      case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES:
-        {
-            DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T),
-                    brigOp->byteCount);
-
-            auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
-
-            bits = *((T*)(obj->getData(cbptr->bytes + 4)));
-            kind = brigOp->kind;
-            return true;
-        }
-        break;
-
-      case Brig::BRIG_KIND_OPERAND_WAVESIZE:
-        kind = brigOp->kind;
-        bits = std::numeric_limits<unsigned long long>::digits;
-        return true;
-
-      default:
-        kind = Brig::BRIG_KIND_NONE;
-        return false;
-    }
-}
-
-template <typename T>
-bool
-ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
-{
-    const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
-
-    if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
-        kind = Brig::BRIG_KIND_NONE;
-        return false;
-    }
-
-
-    const Brig::BrigOperandOperandList *brigVecOp =
-         (const Brig::BrigOperandOperandList *)brigOp;
-
-    unsigned *data_offset =
-        (unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1));
-
-    const Brig::BrigOperand *p =
-        (const Brig::BrigOperand *)obj->getOperand(*data_offset);
-
-    if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
-        kind = Brig::BRIG_KIND_NONE;
-        return false;
-    }
-
-    return init(*data_offset, obj);
-}
-template<typename T>
-std::string
-ImmOperand<T>::disassemble()
-{
-    return csprintf("0x%08x", bits);
-}
-
-template<typename RegOperand, typename T>
-class RegOrImmOperand : public BaseOperand
-{
-  private:
-    bool is_imm;
-
-  public:
-    void setImm(const bool value) { is_imm = value; }
-
-    ImmOperand<T> imm_op;
-    RegOperand reg_op;
-
-    RegOrImmOperand() { is_imm = false; }
-    void init(unsigned opOffset, const BrigObject *obj);
-    void init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
-    std::string disassemble();
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane)
-    {
-        return is_imm ?  imm_op.template get<OperandType>(w) :
-                         reg_op.template get<OperandType>(w, lane);
-    }
-
-    uint32_t
-    opSize()
-    {
-        if (!is_imm) {
-            return reg_op.opSize();
-        }
-
-        return 0;
-    }
-
-    bool
-    isVectorRegister()
-    {
-        if (!is_imm) {
-            return reg_op.registerType == Enums::RT_VECTOR;
-        }
-        return false;
-    }
-
-    bool
-    isCondRegister()
-    {
-        if (!is_imm) {
-            return reg_op.registerType == Enums::RT_CONDITION;
-        }
-
-        return false;
-    }
-
-    bool
-    isScalarRegister()
-    {
-        if (!is_imm) {
-            return reg_op.registerType == Enums::RT_SCALAR;
-        }
-
-        return false;
-    }
-
-    unsigned int
-    regIndex()
-    {
-        if (!is_imm) {
-            return reg_op.regIndex();
-        }
-        return 0;
-    }
-};
-
-template<typename RegOperand, typename T>
-void
-RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj)
-{
-    is_imm = false;
-
-    if (reg_op.init(opOffset, obj)) {
-        return;
-    }
-
-    if (imm_op.init(opOffset, obj)) {
-        is_imm = true;
-        return;
-    }
-
-    fatal("RegOrImmOperand::init(): bad operand kind %d\n",
-          obj->getOperand(opOffset)->kind);
-}
-
-template<typename RegOperand, typename T>
-void
-RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset,
-                                               const BrigObject *obj, int at)
-{
-    if (reg_op.init_from_vect(opOffset, obj, at)) {
-        is_imm = false;
-
-        return;
-    }
-
-    if (imm_op.init_from_vect(opOffset, obj, at)) {
-        is_imm = true;
-
-        return;
-    }
-
-    fatal("RegOrImmOperand::init(): bad operand kind %d\n",
-          obj->getOperand(opOffset)->kind);
-}
-
-template<typename RegOperand, typename T>
-std::string
-RegOrImmOperand<RegOperand, T>::disassemble()
-{
-    return is_imm ? imm_op.disassemble() : reg_op.disassemble();
-}
-
-typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand;
-typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand;
-typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand;
-
-class AddrOperandBase : public BaseOperand
-{
-  protected:
-    // helper function for init()
-    void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj);
-
-    // helper function for disassemble()
-    std::string disassemble(std::string reg_disassembly);
-    uint64_t calcUniformBase();
-
-  public:
-    virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
-    virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
-
-    int64_t offset;
-    const char *name = nullptr;
-    StorageElement *storageElement;
-};
-
-template<typename RegOperandType>
-class RegAddrOperand : public AddrOperandBase
-{
-  public:
-    RegOperandType reg;
-    void init(unsigned opOffset, const BrigObject *obj);
-    uint64_t calcUniform();
-    void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
-    uint64_t calcLane(Wavefront *w, int lane=0);
-    uint32_t opSize() { return reg.opSize(); }
-    bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
-    bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; }
-    bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; }
-    unsigned int regIndex() { return reg.regIndex(); }
-    std::string disassemble();
-};
-
-template<typename RegOperandType>
-void
-RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj)
-{
-    using namespace Brig;
-
-    const BrigOperand *baseOp = obj->getOperand(opOffset);
-
-    switch (baseOp->kind) {
-      case BRIG_KIND_OPERAND_ADDRESS:
-        {
-            const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
-            storageElement = nullptr;
-
-            reg.init(op->reg, obj);
-
-            if (reg.regFileChar == 's') {
-                // if the address expression is 32b, then the hi
-                // bits of the offset must be set to 0 in the BRIG
-                assert(!op->offset.hi);
-                /**
-                 * the offset field of an HSAIL instruction may be negative
-                 * so here we cast the raw bits we get from the BRIG file to
-                 * a signed type to avoid address calculation errors
-                 */
-                offset = (int32_t)(op->offset.lo);
-                reg.regOperandSize = sizeof(uint32_t);
-                registerType = Enums::RT_VECTOR;
-            }
-            else if (reg.regFileChar == 'd') {
-                offset = (int64_t)(((uint64_t)(op->offset.hi) << 32)
-                    | (uint64_t)(op->offset.lo));
-                reg.regOperandSize = sizeof(uint64_t);
-                registerType = Enums::RT_VECTOR;
-            }
-        }
-        break;
-
-      default:
-        fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind);
-        break;
-    }
-}
-
-template<typename RegOperandType>
-uint64_t
-RegAddrOperand<RegOperandType>::calcUniform()
-{
-    fatal("can't do calcUniform() on register-based address\n");
-
-    return 0;
-}
-
-template<typename RegOperandType>
-void
-RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
-                                           std::vector<Addr> &addrVec)
-{
-    Addr address = calcUniformBase();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
-        if (w->execMask(lane)) {
-            if (reg.regFileChar == 's') {
-                addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
-            } else {
-                addrVec[lane] = address + reg.template get<Addr>(w, lane);
-            }
-        }
-    }
-}
-
-template<typename RegOperandType>
-uint64_t
-RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane)
-{
-    Addr address = calcUniformBase();
-
-    return address + reg.template get<Addr>(w, lane);
-}
-
-template<typename RegOperandType>
-std::string
-RegAddrOperand<RegOperandType>::disassemble()
-{
-    return AddrOperandBase::disassemble(reg.disassemble());
-}
-
-typedef RegAddrOperand<SRegOperand> SRegAddrOperand;
-typedef RegAddrOperand<DRegOperand> DRegAddrOperand;
-
-class NoRegAddrOperand : public AddrOperandBase
-{
-  public:
-    void init(unsigned opOffset, const BrigObject *obj);
-    uint64_t calcUniform();
-    void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
-    uint64_t calcLane(Wavefront *w, int lane=0);
-    std::string disassemble();
-};
-
-inline uint64_t
-NoRegAddrOperand::calcUniform()
-{
-    return AddrOperandBase::calcUniformBase();
-}
-
-inline uint64_t
-NoRegAddrOperand::calcLane(Wavefront *w, int lane)
-{
-    return calcUniform();
-}
-
-inline void
-NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
-{
-    uint64_t address = calcUniformBase();
-
-    for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
-        addrVec[lane] = address;
-}
-
-class LabelOperand : public BaseOperand
-{
-  public:
-    Label *label;
-
-    void init(unsigned opOffset, const BrigObject *obj);
-    std::string disassemble();
-
-    // special get method for compatibility with SRegOperand
-    uint32_t getTarget(Wavefront *w, int lane);
-
-};
-
-class ListOperand : public BaseOperand
-{
-  public:
-    int elementCount;
-    std::vector<StorageElement*> callArgs;
-
-    int
-    getSrcOperand(int idx)
-    {
-        DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx,
-                callArgs.size());
-
-        return callArgs.at(idx)->offset;
-    }
-
-    void init(unsigned opOffset, const BrigObject *obj);
-
-    std::string disassemble();
-
-    template<typename OperandType>
-    OperandType
-    get(Wavefront *w, int lane, int arg_idx)
-    {
-        return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx));
-    }
-
-    template<typename OperandType>
-    void
-    set(Wavefront *w, int lane, OperandType val)
-    {
-        w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val);
-        DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n",
-                w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane,
-                getSrcOperand(0), val);
-    }
-};
-
-class FunctionRefOperand : public BaseOperand
-{
-  public:
-    const char *func_name;
-
-    void init(unsigned opOffset, const BrigObject *obj);
-    std::string disassemble();
-};
-
-#endif // __ARCH_HSAIL_OPERAND_HH__
diff --git a/src/gpu-compute/brig_object.cc b/src/gpu-compute/brig_object.cc
deleted file mode 100644 (file)
index 6211598..0000000
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt, Anthony Gutierrez
- */
-
-#include "gpu-compute/brig_object.hh"
-
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <cassert>
-#include <cstddef>
-#include <cstdlib>
-
-#include "arch/hsail/Brig.h"
-#include "base/logging.hh"
-#include "base/trace.hh"
-#include "debug/BRIG.hh"
-#include "debug/HSAILObject.hh"
-#include "debug/HSALoader.hh"
-
-using namespace Brig;
-
-std::vector<std::function<HsaObject*(const std::string&, int, uint8_t*)>>
-    HsaObject::tryFileFuncs = { BrigObject::tryFile };
-
-extern int getBrigDataTypeBytes(BrigType16_t t);
-
-const char *BrigObject::sectionNames[] =
-{
-    "hsa_data",
-    "hsa_code",
-    "hsa_operand",
-    ".shstrtab"
-};
-
-const char *segmentNames[] =
-{
-    "none",
-    "flat",
-    "global",
-    "readonly",
-    "kernarg",
-    "group",
-    "private",
-    "spill",
-    "args"
-};
-
-const uint8_t*
-BrigObject::getSectionOffset(enum SectionIndex sec, int offs) const
-{
-    // allow offs == size for dummy end pointers
-    assert(offs <= sectionInfo[sec].size);
-
-    return sectionInfo[sec].ptr + offs;
-}
-
-const char*
-BrigObject::getString(int offs) const
-{
-    return (const char*)(getSectionOffset(DataSectionIndex, offs) + 4);
-}
-
-const BrigBase*
-BrigObject::getCodeSectionEntry(int offs) const
-{
-    return (const BrigBase*)getSectionOffset(CodeSectionIndex, offs);
-}
-
-const BrigData*
-BrigObject::getBrigBaseData(int offs) const
-{
-    return (Brig::BrigData*)(getSectionOffset(DataSectionIndex, offs));
-}
-
-const uint8_t*
-BrigObject::getData(int offs) const
-{
-    return getSectionOffset(DataSectionIndex, offs);
-}
-
-const BrigOperand*
-BrigObject::getOperand(int offs) const
-{
-    return (const BrigOperand*)getSectionOffset(OperandsSectionIndex, offs);
-}
-
-unsigned
-BrigObject::getOperandPtr(int offs, int index) const
-{
-    unsigned *op_offs = (unsigned*)(getData(offs + 4 * (index + 1)));
-
-    return *op_offs;
-}
-
-const BrigInstBase*
-BrigObject::getInst(int offs) const
-{
-    return (const BrigInstBase*)getSectionOffset(CodeSectionIndex, offs);
-}
-
-HsaCode*
-BrigObject::getKernel(const std::string &name) const
-{
-    return nullptr;
-}
-
-HsaCode*
-BrigObject::getFunction(const std::string &name) const
-{
-    for (int i = 0; i < functions.size(); ++i) {
-        if (functions[i]->name() == name) {
-            return functions[i];
-        }
-    }
-
-    return nullptr;
-}
-
-void
-BrigObject::processDirectives(const BrigBase *dirPtr, const BrigBase *endPtr,
-                              StorageMap *storageMap)
-{
-    while (dirPtr < endPtr) {
-        if (!dirPtr->byteCount) {
-            fatal("Bad directive size 0\n");
-        }
-
-        // calculate next pointer now so we can override it if needed
-        const BrigBase *nextDirPtr = brigNext(dirPtr);
-
-        DPRINTF(HSAILObject, "Code section entry kind: #%x, byte count: %d\n",
-                dirPtr->kind, dirPtr->byteCount);
-
-        switch (dirPtr->kind) {
-          case BRIG_KIND_DIRECTIVE_FUNCTION:
-            {
-                const BrigDirectiveExecutable *p M5_VAR_USED =
-                    reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
-
-                DPRINTF(HSAILObject,"DIRECTIVE_FUNCTION: %s offset: "
-                        "%d next: %d\n", getString(p->name),
-                        p->firstCodeBlockEntry, p->nextModuleEntry);
-
-                if (p->firstCodeBlockEntry != p->nextModuleEntry) {
-                    // Function calls are not supported. We allow the BRIG
-                    // object file to create stubs, but the function calls will
-                    // not work properly if the application makes use of them.
-                    warn("HSA function invocations are unsupported.\n");
-
-                    const char *name = getString(p->name);
-
-                    HsailCode *code_obj = nullptr;
-
-                    for (int i = 0; i < functions.size(); ++i) {
-                        if (functions[i]->name() == name) {
-                            code_obj = functions[i];
-                            break;
-                        }
-                    }
-
-                    if (!code_obj) {
-                        // create new local storage map for kernel-local symbols
-                        code_obj = new HsailCode(name, p, this,
-                                                 new StorageMap(storageMap));
-                        functions.push_back(code_obj);
-                    } else {
-                        panic("Multiple definition of Function!!: %s\n",
-                              getString(p->name));
-                    }
-                }
-
-                nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_KERNEL:
-            {
-                const BrigDirectiveExecutable *p =
-                    reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
-
-                DPRINTF(HSAILObject,"DIRECTIVE_KERNEL: %s offset: %d count: "
-                        "next: %d\n", getString(p->name),
-                        p->firstCodeBlockEntry, p->nextModuleEntry);
-
-                const char *name = getString(p->name);
-
-                if (name[0] == '&')
-                    name++;
-
-                std::string str = name;
-                char *temp;
-                int len = str.length();
-
-                if (str[len - 1] >= 'a' && str[len - 1] <= 'z') {
-                    temp = new char[str.size() + 1];
-                    std::copy(str.begin(), str.end() , temp);
-                    temp[str.size()] = '\0';
-                } else {
-                    temp = new char[str.size()];
-                    std::copy(str.begin(), str.end() - 1 , temp);
-                    temp[str.size() - 1 ] = '\0';
-                }
-
-                std::string kernel_name = temp;
-                delete[] temp;
-
-                HsailCode *code_obj = nullptr;
-
-                for (const auto &kernel : kernels) {
-                    if (kernel->name() == kernel_name) {
-                        code_obj = kernel;
-                        break;
-                    }
-                }
-
-                if (!code_obj) {
-                    // create new local storage map for kernel-local symbols
-                    code_obj = new HsailCode(kernel_name, p, this,
-                                             new StorageMap(storageMap));
-
-                    kernels.push_back(code_obj);
-                }
-
-                nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_VARIABLE:
-            {
-                const BrigDirectiveVariable *p =
-                    reinterpret_cast<const BrigDirectiveVariable*>(dirPtr);
-
-                uint64_t readonlySize_old =
-                    storageMap->getSize(BRIG_SEGMENT_READONLY);
-
-                StorageElement* se = storageMap->addSymbol(p, this);
-
-                DPRINTF(HSAILObject, "DIRECTIVE_VARIABLE, symbol %s\n",
-                        getString(p->name));
-
-                if (p->segment == BRIG_SEGMENT_READONLY) {
-                    // readonly memory has initialization data
-                    uint8_t* readonlyData_old = readonlyData;
-
-                    readonlyData =
-                        new uint8_t[storageMap->getSize(BRIG_SEGMENT_READONLY)];
-
-                    if (p->init) {
-                        if ((p->type == BRIG_TYPE_ROIMG) ||
-                            (p->type == BRIG_TYPE_WOIMG) ||
-                            (p->type == BRIG_TYPE_SAMP) ||
-                            (p->type == BRIG_TYPE_SIG32) ||
-                            (p->type == BRIG_TYPE_SIG64)) {
-                            panic("Read only data type not supported: %s\n",
-                                  getString(p->name));
-                        }
-
-                        const BrigOperand *brigOp = getOperand(p->init);
-                        assert(brigOp->kind ==
-                               BRIG_KIND_OPERAND_CONSTANT_BYTES);
-
-                        const Brig::BrigData *operand_data M5_VAR_USED =
-                            getBrigBaseData(((BrigOperandConstantBytes*)
-                                            brigOp)->bytes);
-
-                        assert((operand_data->byteCount / 4) > 0);
-
-                        uint8_t *symbol_data =
-                            (uint8_t*)getData(((BrigOperandConstantBytes*)
-                                              brigOp)->bytes + 4);
-
-                        // copy the old data and add the new data
-                        if (readonlySize_old > 0) {
-                            memcpy(readonlyData, readonlyData_old,
-                                   readonlySize_old);
-                        }
-
-                        memcpy(readonlyData + se->offset, symbol_data,
-                               se->size);
-
-                        delete[] readonlyData_old;
-                   }
-                }
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_LABEL:
-            {
-              const BrigDirectiveLabel M5_VAR_USED *p =
-                    reinterpret_cast<const BrigDirectiveLabel*>(dirPtr);
-
-              panic("Label directives cannot be at the module level: %s\n",
-                    getString(p->name));
-
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_COMMENT:
-            {
-              const BrigDirectiveComment M5_VAR_USED *p =
-                  reinterpret_cast<const BrigDirectiveComment*>(dirPtr);
-
-              DPRINTF(HSAILObject, "DIRECTIVE_COMMENT: %s\n",
-                      getString(p->name));
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_LOC:
-            {
-                DPRINTF(HSAILObject, "BRIG_DIRECTIVE_LOC\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_MODULE:
-            {
-                const BrigDirectiveModule M5_VAR_USED *p =
-                    reinterpret_cast<const BrigDirectiveModule*>(dirPtr);
-
-                DPRINTF(HSAILObject, "BRIG_DIRECTIVE_MODULE: %s\n",
-                        getString(p->name));
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_CONTROL:
-            {
-                DPRINTF(HSAILObject, "DIRECTIVE_CONTROL\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_PRAGMA:
-            {
-                DPRINTF(HSAILObject, "DIRECTIVE_PRAGMA\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_EXTENSION:
-            {
-                DPRINTF(HSAILObject, "DIRECTIVE_EXTENSION\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
-            {
-                DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_START\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
-            {
-                DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_END\n");
-            }
-            break;
-          default:
-            if (dirPtr->kind >= BRIG_KIND_INST_BEGIN &&
-                dirPtr->kind <= BRIG_KIND_INST_END)
-                break;
-
-            if (dirPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
-                dirPtr->kind <= BRIG_KIND_OPERAND_END)
-                break;
-
-            warn("Unknown Brig directive kind: %d\n", dirPtr->kind);
-            break;
-        }
-
-        dirPtr = nextDirPtr;
-    }
-}
-
-HsaObject*
-BrigObject::tryFile(const std::string &fname, int len, uint8_t *fileData)
-{
-    const char *brig_ident = "HSA BRIG";
-
-    if (memcmp(brig_ident, fileData, MODULE_IDENTIFICATION_LENGTH))
-        return nullptr;
-
-    return new BrigObject(fname, len, fileData);
-}
-
-BrigObject::BrigObject(const std::string &fname, int len, uint8_t *fileData)
-    : HsaObject(fname), storageMap(new StorageMap())
-{
-    const char *brig_ident = "HSA BRIG";
-    BrigModuleHeader *mod_hdr = (BrigModuleHeader*)fileData;
-
-    fatal_if(memcmp(brig_ident, mod_hdr, MODULE_IDENTIFICATION_LENGTH),
-             "%s is not a BRIG file\n", fname);
-
-    if (mod_hdr->brigMajor != BRIG_VERSION_BRIG_MAJOR ||
-        mod_hdr->brigMinor != BRIG_VERSION_BRIG_MINOR) {
-        fatal("%s: BRIG version mismatch, %d.%d != %d.%d\n",
-              fname, mod_hdr->brigMajor, mod_hdr->brigMinor,
-              BRIG_VERSION_BRIG_MAJOR, BRIG_VERSION_BRIG_MINOR);
-    }
-
-    fatal_if(mod_hdr->sectionCount != NumSectionIndices, "%s: BRIG section "
-             "count (%d) != expected value (%d)\n", fname,
-             mod_hdr->sectionCount, NumSectionIndices);
-
-    for (int i = 0; i < NumSectionIndices; ++i) {
-        sectionInfo[i].ptr = nullptr;
-    }
-
-    uint64_t *sec_idx_table = (uint64_t*)(fileData + mod_hdr->sectionIndex);
-    for (int sec_idx = 0; sec_idx < mod_hdr->sectionCount; ++sec_idx) {
-        uint8_t *sec_hdr_byte_ptr = fileData + sec_idx_table[sec_idx];
-        BrigSectionHeader *sec_hdr = (BrigSectionHeader*)sec_hdr_byte_ptr;
-
-        // It doesn't look like cprintf supports string precision values,
-        // but if this breaks, the right answer is to fix that
-        DPRINTF(HSAILObject, "found section %.*s\n", sec_hdr->nameLength,
-                sec_hdr->name);
-
-        sectionInfo[sec_idx].ptr = new uint8_t[sec_hdr->byteCount];
-        memcpy(sectionInfo[sec_idx].ptr, sec_hdr_byte_ptr, sec_hdr->byteCount);
-        sectionInfo[sec_idx].size = sec_hdr->byteCount;
-    }
-
-    BrigSectionHeader *code_hdr =
-        (BrigSectionHeader*)sectionInfo[CodeSectionIndex].ptr;
-
-    DPRINTF(HSAILObject, "Code section hdr, count: %d, hdr count: %d, "
-            "name len: %d\n", code_hdr->byteCount, code_hdr->headerByteCount,
-            code_hdr->nameLength);
-
-    // start at offset 4 to skip initial null entry (see Brig spec)
-    processDirectives(getCodeSectionEntry(code_hdr->headerByteCount),
-                      getCodeSectionEntry(sectionInfo[CodeSectionIndex].size),
-                      storageMap);
-
-    delete[] fileData;
-
-    DPRINTF(HSALoader, "BRIG object %s loaded.\n", fname);
-}
-
-BrigObject::~BrigObject()
-{
-    for (int i = 0; i < NumSectionIndices; ++i)
-        if (sectionInfo[i].ptr)
-            delete[] sectionInfo[i].ptr;
-}
diff --git a/src/gpu-compute/brig_object.hh b/src/gpu-compute/brig_object.hh
deleted file mode 100644 (file)
index 59a5859..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt, Anthony Gutierrez
- */
-
-#ifndef __BRIG_OBJECT_HH__
-#define __BRIG_OBJECT_HH__
-
-#include <cassert>
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "arch/hsail/Brig.h"
-#include "gpu-compute/hsa_object.hh"
-#include "gpu-compute/hsail_code.hh"
-
-class LabelMap;
-class StorageMap;
-
-/* @class BrigObject
- * this class implements the BRIG loader object, and
- * is used when the simulator directly executes HSAIL.
- * this class is responsible for extracting all
- * information about kernels contained in BRIG format
- * and converts them to HsailCode objects that are
- * usable by the simulator and emulated runtime.
- */
-
-class BrigObject final : public HsaObject
-{
-  public:
-    enum SectionIndex
-    {
-        DataSectionIndex,
-        CodeSectionIndex,
-        OperandsSectionIndex,
-        NumSectionIndices
-    };
-
-    static const char *sectionNames[];
-
-    struct SectionInfo
-    {
-        uint8_t *ptr;
-        int size;
-    };
-
-    static HsaObject* tryFile(const std::string &fname, int len,
-                              uint8_t *fileData);
-
-    SectionInfo sectionInfo[NumSectionIndices];
-    const uint8_t *getSectionOffset(enum SectionIndex sec, int offs) const;
-
-    std::vector<HsailCode*> kernels;
-    std::vector<HsailCode*> functions;
-    std::string kern_block_name;
-
-    void processDirectives(const Brig::BrigBase *dirPtr,
-                           const Brig::BrigBase *endPtr,
-                           StorageMap *storageMap);
-
-    BrigObject(const std::string &fname, int len, uint8_t *fileData);
-    ~BrigObject();
-
-    // eventually these will need to be per-kernel not per-object-file
-    StorageMap *storageMap;
-    LabelMap *labelMap;
-
-    const char* getString(int offs) const;
-    const Brig::BrigData* getBrigBaseData(int offs) const;
-    const uint8_t* getData(int offs) const;
-    const Brig::BrigBase* getCodeSectionEntry(int offs) const;
-    const Brig::BrigOperand* getOperand(int offs) const;
-    unsigned getOperandPtr(int offs, int index) const;
-    const Brig::BrigInstBase* getInst(int offs) const;
-
-    HsaCode* getKernel(const std::string &name) const override;
-    HsaCode* getFunction(const std::string &name) const override;
-
-    int numKernels() const override { return kernels.size(); }
-
-    HsaCode* getKernel(int i) const override { return kernels[i]; }
-
-    // pointer to the current kernel/function we're processing, so elements
-    // under construction can reference it.  kinda ugly, but easier
-    // than passing it all over for the few places it's needed.
-    mutable HsailCode *currentCode;
-};
-
-// Utility function to bump Brig item pointer to next element given
-// item size in bytes.  Really just an add but with lots of casting.
-template<typename T>
-T*
-brigNext(T *ptr)
-{
-    Brig::BrigBase *base_ptr = (Brig::BrigBase*)ptr;
-    int size = base_ptr->byteCount;
-    assert(size);
-
-    return (T*)((uint8_t*)ptr + size);
-}
-
-#endif // __BRIG_OBJECT_HH__
diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc
deleted file mode 100644 (file)
index ee86017..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "gpu-compute/cl_driver.hh"
-
-#include <memory>
-
-#include "base/intmath.hh"
-#include "cpu/thread_context.hh"
-#include "gpu-compute/dispatcher.hh"
-#include "gpu-compute/hsa_code.hh"
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "gpu-compute/hsa_object.hh"
-#include "params/ClDriver.hh"
-#include "sim/process.hh"
-#include "sim/syscall_emul_buf.hh"
-
-ClDriver::ClDriver(ClDriverParams *p)
-    : EmulatedDriver(p), hsaCode(0)
-{
-    for (const auto &codeFile : p->codefile)
-        codeFiles.push_back(&codeFile);
-
-    maxFuncArgsSize = 0;
-
-    for (int i = 0; i < codeFiles.size(); ++i) {
-        HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
-
-        for (int k = 0; k < obj->numKernels(); ++k) {
-            assert(obj->getKernel(k));
-            kernels.push_back(obj->getKernel(k));
-            kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
-            int kern_funcargs_size = kernels.back()->funcarg_size;
-            maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
-                kern_funcargs_size : maxFuncArgsSize;
-        }
-    }
-
-    int name_offs = 0;
-    int code_offs = 0;
-
-    for (int i = 0; i < kernels.size(); ++i) {
-        kernelInfo.push_back(HsaKernelInfo());
-        HsaCode *k = kernels[i];
-
-        k->generateHsaKernelInfo(&kernelInfo[i]);
-
-        kernelInfo[i].name_offs = name_offs;
-        kernelInfo[i].code_offs = code_offs;
-
-        name_offs += k->name().size() + 1;
-        code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
-    }
-}
-
-void
-ClDriver::handshake(GpuDispatcher *_dispatcher)
-{
-    dispatcher = _dispatcher;
-    dispatcher->setFuncargsSize(maxFuncArgsSize);
-}
-
-int
-ClDriver::open(ThreadContext *tc, int mode, int flags)
-{
-    auto p = tc->getProcessPtr();
-    std::shared_ptr<DeviceFDEntry> fdp;
-    fdp = std::make_shared<DeviceFDEntry>(this, filename);
-    int tgt_fd = p->fds->allocFD(fdp);
-    return tgt_fd;
-}
-
-int
-ClDriver::ioctl(ThreadContext *tc, unsigned req, Addr buf_addr)
-{
-    switch (req) {
-      case HSA_GET_SIZES:
-        {
-            TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
-            sizes->num_kernels = kernels.size();
-            sizes->string_table_size = 0;
-            sizes->code_size = 0;
-            sizes->readonly_size = 0;
-
-            if (kernels.size() > 0) {
-                // all kernels will share the same read-only memory
-                sizes->readonly_size =
-                    kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
-                // check our assumption
-                for (int i = 1; i<kernels.size(); ++i) {
-                    assert(sizes->readonly_size ==
-                    kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
-                }
-            }
-
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaCode *k = kernels[i];
-                // add one for terminating '\0'
-                sizes->string_table_size += k->name().size() + 1;
-                sizes->code_size +=
-                    k->numInsts() * sizeof(TheGpuISA::RawMachInst);
-            }
-
-            sizes.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_KINFO:
-        {
-            TypedBufferArg<HsaKernelInfo>
-                kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
-
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaKernelInfo *ki = &kinfo[i];
-                ki->name_offs = kernelInfo[i].name_offs;
-                ki->code_offs = kernelInfo[i].code_offs;
-                ki->sRegCount = kernelInfo[i].sRegCount;
-                ki->dRegCount = kernelInfo[i].dRegCount;
-                ki->cRegCount = kernelInfo[i].cRegCount;
-                ki->static_lds_size  = kernelInfo[i].static_lds_size;
-                ki->private_mem_size = kernelInfo[i].private_mem_size;
-                ki->spill_mem_size   = kernelInfo[i].spill_mem_size;
-            }
-
-            kinfo.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_STRINGS:
-        {
-            int string_table_size = 0;
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaCode *k = kernels[i];
-                string_table_size += k->name().size() + 1;
-            }
-
-            BufferArg buf(buf_addr, string_table_size);
-            char *bufp = (char*)buf.bufferPtr();
-
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaCode *k = kernels[i];
-                const char *n = k->name().c_str();
-
-                // idiomatic string copy
-                while ((*bufp++ = *n++));
-            }
-
-            assert(bufp - (char *)buf.bufferPtr() == string_table_size);
-
-            buf.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_READONLY_DATA:
-        {
-            // we can pick any kernel --- they share the same
-            // readonly segment (this assumption is checked in GET_SIZES)
-            uint64_t size =
-                kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
-            BufferArg data(buf_addr, size);
-            char *datap = (char *)data.bufferPtr();
-            memcpy(datap,
-                   kernels.back()->readonly_data,
-                   size);
-            data.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_CODE:
-        {
-            // set hsaCode pointer
-            hsaCode = buf_addr;
-            int code_size = 0;
-
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaCode *k = kernels[i];
-                code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
-            }
-
-            TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
-            TheGpuISA::RawMachInst *bufp = buf;
-
-            int buf_idx = 0;
-
-            for (int i = 0; i < kernels.size(); ++i) {
-                HsaCode *k = kernels[i];
-
-                for (int j = 0; j < k->numInsts(); ++j) {
-                    bufp[buf_idx] = k->insts()->at(j);
-                    ++buf_idx;
-                }
-            }
-
-            buf.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_CU_CNT:
-        {
-            BufferArg buf(buf_addr, sizeof(uint32_t));
-            *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
-            buf.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      case HSA_GET_VSZ:
-        {
-            BufferArg buf(buf_addr, sizeof(uint32_t));
-            *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
-            buf.copyOut(tc->getVirtProxy());
-        }
-        break;
-      case HSA_GET_HW_STATIC_CONTEXT_SIZE:
-        {
-            BufferArg buf(buf_addr, sizeof(uint32_t));
-            *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
-            buf.copyOut(tc->getVirtProxy());
-        }
-        break;
-
-      default:
-        fatal("ClDriver: bad ioctl %d\n", req);
-    }
-
-    return 0;
-}
-
-const char*
-ClDriver::codeOffToKernelName(uint64_t code_ptr)
-{
-    assert(hsaCode);
-    uint32_t code_offs = code_ptr - hsaCode;
-
-    for (int i = 0; i < kernels.size(); ++i) {
-        if (code_offs == kernelInfo[i].code_offs) {
-            return kernels[i]->name().c_str();
-        }
-    }
-
-    return nullptr;
-}
-
-ClDriver*
-ClDriverParams::create()
-{
-    return new ClDriver(this);
-}
diff --git a/src/gpu-compute/cl_driver.hh b/src/gpu-compute/cl_driver.hh
deleted file mode 100644 (file)
index bc7b749..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __CL_DRIVER_HH__
-#define __CL_DRIVER_HH__
-
-#include <vector>
-
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "sim/emul_driver.hh"
-
-class GpuDispatcher;
-class HsaCode;
-class Process;
-class ThreadContext;
-
-struct ClDriverParams;
-
-class ClDriver final : public EmulatedDriver
-{
-  public:
-    ClDriver(ClDriverParams *p);
-    void handshake(GpuDispatcher *_dispatcher);
-    int open(ThreadContext *tc, int mode, int flags);
-    int ioctl(ThreadContext *tc, unsigned req, Addr buf);
-    const char* codeOffToKernelName(uint64_t code_ptr);
-
-  private:
-    GpuDispatcher *dispatcher;
-
-    std::vector<const std::string*> codeFiles;
-
-    // All the kernels we know about
-    std::vector<HsaCode*> kernels;
-    std::vector<HsaCode*> functions;
-
-    std::vector<HsaKernelInfo> kernelInfo;
-
-    // maximum size necessary for function arguments
-    int maxFuncArgsSize;
-    // The host virtual address for the kernel code
-    uint64_t hsaCode;
-};
-
-#endif // __CL_DRIVER_HH__
diff --git a/src/gpu-compute/cl_event.hh b/src/gpu-compute/cl_event.hh
deleted file mode 100644 (file)
index 9722600..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __GPU_CL_EVENT_HH__
-#define __GPU_CL_EVENT_HH__
-
-struct HsaQueueEntry;
-
-class _cl_event {
-  public:
-    _cl_event() : done(false), hsaTaskPtr(nullptr), start(0), end(0) { }
-
-    volatile bool done;
-    HsaQueueEntry *hsaTaskPtr;
-    uint64_t start;
-    uint64_t end;
-};
-
-#endif // __GPU_CL_EVENT_HH__
diff --git a/src/gpu-compute/condition_register_state.cc b/src/gpu-compute/condition_register_state.cc
deleted file mode 100644 (file)
index 08555bb..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#include "gpu-compute/condition_register_state.hh"
-
-#include "gpu-compute/compute_unit.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-#include "gpu-compute/shader.hh"
-#include "gpu-compute/wavefront.hh"
-
-ConditionRegisterState::ConditionRegisterState()
-{
-    computeUnit = nullptr;
-    c_reg.clear();
-    busy.clear();
-}
-
-void
-ConditionRegisterState::setParent(ComputeUnit *_computeUnit)
-{
-    computeUnit = _computeUnit;
-    _name = computeUnit->name() + ".CondRegState";
-}
-
-void
-ConditionRegisterState::init(uint32_t _size)
-{
-    c_reg.resize(_size);
-    busy.resize(_size, 0);
-}
-
-void
-ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w)
-{
-    // iterate over all operands
-    for (auto i = 0; i < ii->getNumOperands(); ++i) {
-        // is this a condition register destination operand?
-        if (ii->isCondRegister(i) && ii->isDstOperand(i)) {
-            // mark the register as busy
-            markReg(ii->getRegisterIndex(i, ii), 1);
-            uint32_t pipeLen =  w->computeUnit->spBypassLength();
-
-            // schedule an event for marking the register as ready
-            w->computeUnit->
-                registerEvent(w->simdId, ii->getRegisterIndex(i, ii),
-                              ii->getOperandSize(i),
-                              w->computeUnit->shader->tick_cnt +
-                              w->computeUnit->shader->ticks(pipeLen), 0);
-        }
-    }
-}
diff --git a/src/gpu-compute/condition_register_state.hh b/src/gpu-compute/condition_register_state.hh
deleted file mode 100644 (file)
index 2d3f5e1..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#ifndef __CONDITION_REGISTER_STATE_HH__
-#define __CONDITION_REGISTER_STATE_HH__
-
-#include <string>
-#include <vector>
-
-#include "gpu-compute/misc.hh"
-
-class ComputeUnit;
-class GPUStaticInst;
-class Shader;
-class Wavefront;
-
-// Condition Register State (used only when executing HSAIL)
-class ConditionRegisterState
-{
-  public:
-    ConditionRegisterState();
-    void init(uint32_t _size);
-    const std::string name() const { return _name; }
-    void setParent(ComputeUnit *_computeUnit);
-    void regStats() { }
-
-    template<typename T>
-    T
-    read(int regIdx, int threadId)
-    {
-        bool tmp = c_reg[regIdx][threadId];
-        T *p0 = (T*)(&tmp);
-
-        return *p0;
-    }
-
-    template<typename T>
-    void
-    write(int regIdx, int threadId, T value)
-    {
-        c_reg[regIdx][threadId] = (bool)(value & 0x01);
-    }
-
-    void
-    markReg(int regIdx, uint8_t value)
-    {
-        busy.at(regIdx) = value;
-    }
-
-    uint8_t
-    regBusy(int idx)
-    {
-        uint8_t status = busy.at(idx);
-        return status;
-    }
-
-    int numRegs() { return c_reg.size(); }
-    void exec(GPUDynInstPtr ii, Wavefront *w);
-
-  private:
-    ComputeUnit* computeUnit;
-    std::string _name;
-    // Condition Register state
-    std::vector<VectorMask> c_reg;
-    // flag indicating if a register is busy
-    std::vector<uint8_t> busy;
-};
-
-#endif
diff --git a/src/gpu-compute/hsa_code.hh b/src/gpu-compute/hsa_code.hh
deleted file mode 100644 (file)
index 9f358e2..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __HSA_CODE_HH__
-#define __HSA_CODE_HH__
-
-#include <string>
-#include <vector>
-
-#include "arch/gpu_types.hh"
-#include "config/the_gpu_isa.hh"
-
-class HsaKernelInfo;
-
-/* @class HsaCode
- * base code object for the set of HSA kernels associated
- * with a single application. this class provides the common
- * methods for creating, accessing, and storing information
- * about kernel and variable symbols, symbol name, memory
- * segment sizes, and instruction count, etc.
- */
-
-class HsaCode
-{
-  public:
-    HsaCode(const std::string &name) : readonly_data(nullptr), funcarg_size(0),
-                                       _name(name)
-    {
-    }
-
-    enum class MemorySegment {
-        NONE,
-        FLAT,
-        GLOBAL,
-        READONLY,
-        KERNARG,
-        GROUP,
-        PRIVATE,
-        SPILL,
-        ARG,
-        EXTSPACE0
-    };
-
-    const std::string& name() const { return _name; }
-    int numInsts() const { return _insts.size(); }
-    std::vector<TheGpuISA::RawMachInst>* insts() { return &_insts; }
-
-    void
-    setReadonlyData(uint8_t *_readonly_data)
-    {
-        readonly_data = _readonly_data;
-    }
-
-    virtual int getSize(MemorySegment segment) const = 0;
-    virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const = 0;
-
-    uint8_t *readonly_data;
-    int funcarg_size;
-
-  protected:
-    // An array that stores instruction indices (0 through kernel size)
-    // for a kernel passed to code object constructor as an argument.
-    std::vector<TheGpuISA::RawMachInst> _insts;
-
-  private:
-    const std::string _name;
-};
-
-#endif // __HSA_CODE_HH__
diff --git a/src/gpu-compute/hsa_kernel_info.hh b/src/gpu-compute/hsa_kernel_info.hh
deleted file mode 100644 (file)
index 4151695..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __HSA_KERNEL_INFO_HH__
-#define __HSA_KERNEL_INFO_HH__
-
-// This file defines the public interface between the HSA emulated
-// driver and application programs.
-
-#include <cstdint>
-
-static const int HSA_GET_SIZES = 0x4801;
-static const int HSA_GET_KINFO = 0x4802;
-static const int HSA_GET_STRINGS = 0x4803;
-static const int HSA_GET_CODE = 0x4804;
-static const int HSA_GET_READONLY_DATA = 0x4805;
-static const int HSA_GET_CU_CNT = 0x4806;
-static const int HSA_GET_VSZ = 0x4807;
-static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808;
-
-// Return value (via buffer ptr) for HSA_GET_SIZES
-struct HsaDriverSizes
-{
-    uint32_t num_kernels;
-    uint32_t string_table_size;
-    uint32_t code_size;
-    uint32_t readonly_size;
-};
-
-// HSA_GET_KINFO returns an array of num_kernels of these structs
-struct HsaKernelInfo
-{
-    // byte offset into string table
-    uint32_t name_offs;
-    // byte offset into code array
-    uint32_t code_offs;
-    uint32_t static_lds_size;
-    uint32_t private_mem_size;
-    uint32_t spill_mem_size;
-    // Number of s registers
-    uint32_t sRegCount;
-    // Number of d registers
-    uint32_t dRegCount;
-    // Number of c registers
-    uint32_t cRegCount;
-};
-
-#endif // __HSA_KERNEL_INFO_HH__
diff --git a/src/gpu-compute/hsa_object.cc b/src/gpu-compute/hsa_object.cc
deleted file mode 100644 (file)
index ac734a4..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "gpu-compute/hsa_object.hh"
-
-#include <cassert>
-#include <fstream>
-
-#include "base/logging.hh"
-
-HsaObject::HsaObject(const std::string &fname)
-    : readonlyData(nullptr), filename(fname)
-{
-}
-
-HsaObject*
-HsaObject::createHsaObject(const std::string &fname)
-{
-    HsaObject *hsaObj = nullptr;
-    uint8_t *file_data = nullptr;
-    int file_length = 0;
-
-    std::ifstream code_file(fname, std::ifstream::ate | std::ifstream::in |
-                            std::ifstream::binary);
-
-    assert(code_file.is_open());
-    assert(code_file.good());
-
-    file_length = code_file.tellg();
-    code_file.seekg(0, code_file.beg);
-    file_data = new uint8_t[file_length];
-    code_file.read((char*)file_data, file_length);
-    code_file.close();
-
-    for (const auto &tryFile : tryFileFuncs) {
-        if ((hsaObj = tryFile(fname, file_length, file_data))) {
-            return hsaObj;
-        }
-    }
-
-    delete[] file_data;
-    fatal("Unknown HSA object type for file: %s.\n", fname);
-
-    return nullptr;
-}
diff --git a/src/gpu-compute/hsa_object.hh b/src/gpu-compute/hsa_object.hh
deleted file mode 100644 (file)
index 1f08f5d..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __HSA_OBJECT_HH__
-#define __HSA_OBJECT_HH__
-
-#include <functional>
-#include <string>
-#include <vector>
-
-class HsaCode;
-
-/* @class HsaObject
- * base loader object for HSA kernels. this class provides
- * the base method definitions for loading, storing, and
- * accessing HSA kernel objects into the simulator.
- */
-
-class HsaObject
-{
-  public:
-    HsaObject(const std::string &fileName);
-
-    static HsaObject* createHsaObject(const std::string &fname);
-    static std::vector<std::function<HsaObject*(const std::string&, int,
-                                                uint8_t*)>> tryFileFuncs;
-
-    virtual HsaCode* getKernel(const std::string &name) const = 0;
-    virtual HsaCode* getKernel(int i) const = 0;
-    virtual HsaCode* getFunction(const std::string &name) const = 0;
-    virtual int numKernels() const = 0;
-
-    const std::string& name() const { return filename; }
-
-    uint8_t *readonlyData;
-
-
-  protected:
-    const std::string filename;
-};
-
-#endif // __HSA_OBJECT_HH__
diff --git a/src/gpu-compute/hsail_code.cc b/src/gpu-compute/hsail_code.cc
deleted file mode 100644 (file)
index a5b47b1..0000000
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/hsail_code.hh"
-
-#include "arch/gpu_types.hh"
-#include "arch/hsail/Brig.h"
-#include "arch/hsail/operand.hh"
-#include "config/the_gpu_isa.hh"
-#include "debug/BRIG.hh"
-#include "debug/HSAILObject.hh"
-#include "gpu-compute/brig_object.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-#include "gpu-compute/kernel_cfg.hh"
-
-using namespace Brig;
-
-int getBrigDataTypeBytes(BrigType16_t t);
-
-HsailCode::HsailCode(const std::string &name_str)
-    : HsaCode(name_str), private_size(-1), readonly_size(-1)
-{
-}
-
-void
-HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
-                StorageMap *objStorageMap)
-{
-    storageMap = objStorageMap;
-
-    // set pointer so that decoding process can find this kernel context when
-    // needed
-    obj->currentCode = this;
-
-    if (code_dir->base.kind != BRIG_KIND_DIRECTIVE_FUNCTION &&
-        code_dir->base.kind != BRIG_KIND_DIRECTIVE_KERNEL) {
-        fatal("unexpected directive kind %d inside kernel/function init\n",
-              code_dir->base.kind);
-    }
-
-    DPRINTF(HSAILObject, "Initializing code, first code block entry is: %d\n",
-            code_dir->firstCodeBlockEntry);
-
-    // clear these static vars so we can properly track the max index
-    // for this kernel
-    SRegOperand::maxRegIdx = 0;
-    DRegOperand::maxRegIdx = 0;
-    CRegOperand::maxRegIdx = 0;
-    setPrivateSize(0);
-
-    const BrigBase *entryPtr = brigNext((BrigBase*)code_dir);
-    const BrigBase *endPtr =
-        obj->getCodeSectionEntry(code_dir->nextModuleEntry);
-
-    // the instruction's byte address (relative to the base addr
-    // of the code section)
-    int inst_addr = 0;
-    // the index that points to the instruction in the instruction
-    // array
-    int inst_idx = 0;
-    std::vector<GPUStaticInst*> instructions;
-    int funcarg_size_scope = 0;
-
-    // walk through instructions in code section and directives in
-    // directive section in parallel, processing directives that apply
-    // when we reach the relevant code point.
-    while (entryPtr < endPtr) {
-        switch (entryPtr->kind) {
-          case BRIG_KIND_DIRECTIVE_VARIABLE:
-           {
-                const BrigDirectiveVariable *sym =
-                    (const BrigDirectiveVariable*)entryPtr;
-
-                DPRINTF(HSAILObject,"Initializing code, directive is "
-                        "kind_variable, symbol is: %s\n",
-                        obj->getString(sym->name));
-
-                StorageElement *se = storageMap->addSymbol(sym, obj);
-
-                if (sym->segment == BRIG_SEGMENT_PRIVATE) {
-                    setPrivateSize(se->size);
-                } else { // spill
-                    funcarg_size_scope += se->size;
-                }
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_LABEL:
-            {
-                const BrigDirectiveLabel *lbl =
-                    (const BrigDirectiveLabel*)entryPtr;
-
-                DPRINTF(HSAILObject,"Initializing code, directive is "
-                        "kind_label, label is: %s \n",
-                        obj->getString(lbl->name));
-
-                labelMap.addLabel(lbl, inst_addr, obj);
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_PRAGMA:
-            {
-                DPRINTF(HSAILObject, "Initializing code, directive "
-                        "is kind_pragma\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_COMMENT:
-            {
-                DPRINTF(HSAILObject, "Initializing code, directive is "
-                        "kind_comment\n");
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
-            {
-                DPRINTF(HSAILObject, "Initializing code, directive is "
-                        "kind_arg_block_start\n");
-
-                storageMap->resetOffset(BRIG_SEGMENT_ARG);
-                funcarg_size_scope = 0;
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
-            {
-                DPRINTF(HSAILObject, "Initializing code, directive is "
-                        "kind_arg_block_end\n");
-
-                funcarg_size = funcarg_size < funcarg_size_scope ?
-                                              funcarg_size_scope : funcarg_size;
-            }
-            break;
-
-          case BRIG_KIND_DIRECTIVE_END:
-            DPRINTF(HSAILObject, "Initializing code, dircetive is "
-                    "kind_end\n");
-
-            break;
-
-          default:
-            if (entryPtr->kind >= BRIG_KIND_INST_BEGIN &&
-                entryPtr->kind <= BRIG_KIND_INST_END) {
-
-                BrigInstBase *instPtr = (BrigInstBase*)entryPtr;
-                TheGpuISA::MachInst machInst = { instPtr, obj };
-                GPUStaticInst *iptr = decoder.decode(machInst);
-
-                if (iptr) {
-                    DPRINTF(HSAILObject, "Initializing code, processing inst "
-                            "byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
-                            inst_idx, instPtr->opcode);
-
-                    TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
-                    iptr->instNum(inst_idx);
-                    iptr->instAddr(inst_addr);
-                    _insts.push_back(raw_inst);
-                    instructions.push_back(iptr);
-                }
-                inst_addr += sizeof(TheGpuISA::RawMachInst);
-                ++inst_idx;
-            } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
-                       entryPtr->kind < BRIG_KIND_OPERAND_END) {
-                warn("unexpected operand entry in code segment\n");
-            } else {
-                // there are surely some more cases we will need to handle,
-                // but we'll deal with them as we find them.
-                fatal("unexpected directive kind %d inside kernel scope\n",
-                      entryPtr->kind);
-            }
-        }
-
-        entryPtr = brigNext(entryPtr);
-    }
-
-    // compute Control Flow Graph for current kernel
-    ControlFlowInfo::assignImmediatePostDominators(instructions);
-
-    max_sreg = SRegOperand::maxRegIdx;
-    max_dreg = DRegOperand::maxRegIdx;
-    max_creg = CRegOperand::maxRegIdx;
-
-    obj->currentCode = nullptr;
-}
-
-HsailCode::HsailCode(const std::string &name_str,
-                     const BrigDirectiveExecutable *code_dir,
-                     const BrigObject *obj, StorageMap *objStorageMap)
-    : HsaCode(name_str), private_size(-1), readonly_size(-1)
-{
-    init(code_dir, obj, objStorageMap);
-}
-
-void
-LabelMap::addLabel(const Brig::BrigDirectiveLabel *lblDir, int inst_index,
-                   const BrigObject *obj)
-{
-    std::string lbl_name = obj->getString(lblDir->name);
-    Label &lbl = map[lbl_name];
-
-    if (lbl.defined()) {
-        fatal("Attempt to redefine existing label %s\n", lbl_name);
-    }
-
-    lbl.define(lbl_name, inst_index);
-    DPRINTF(HSAILObject, "label %s = %d\n", lbl_name, inst_index);
-}
-
-Label*
-LabelMap::refLabel(const Brig::BrigDirectiveLabel *lblDir,
-                   const BrigObject *obj)
-{
-    std::string name = obj->getString(lblDir->name);
-    Label &lbl = map[name];
-    lbl.checkName(name);
-
-    return &lbl;
-}
-
-int
-getBrigDataTypeBytes(BrigType16_t t)
-{
-    switch (t) {
-      case BRIG_TYPE_S8:
-      case BRIG_TYPE_U8:
-      case BRIG_TYPE_B8:
-        return 1;
-
-      case BRIG_TYPE_S16:
-      case BRIG_TYPE_U16:
-      case BRIG_TYPE_B16:
-      case BRIG_TYPE_F16:
-        return 2;
-
-      case BRIG_TYPE_S32:
-      case BRIG_TYPE_U32:
-      case BRIG_TYPE_B32:
-      case BRIG_TYPE_F32:
-        return 4;
-
-      case BRIG_TYPE_S64:
-      case BRIG_TYPE_U64:
-      case BRIG_TYPE_B64:
-      case BRIG_TYPE_F64:
-        return 8;
-
-      case BRIG_TYPE_B1:
-
-      default:
-        fatal("unhandled symbol data type %d", t);
-        return 0;
-    }
-}
-
-StorageElement*
-StorageSpace::addSymbol(const BrigDirectiveVariable *sym,
-                        const BrigObject *obj)
-{
-    const char *sym_name = obj->getString(sym->name);
-    uint64_t size = 0;
-    uint64_t offset = 0;
-
-    if (sym->type & BRIG_TYPE_ARRAY) {
-        size = getBrigDataTypeBytes(sym->type & ~BRIG_TYPE_ARRAY);
-        size *= (((uint64_t)sym->dim.hi) << 32 | (uint64_t)sym->dim.lo);
-
-        offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type &
-                         ~BRIG_TYPE_ARRAY));
-    } else {
-        size = getBrigDataTypeBytes(sym->type);
-        offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type));
-    }
-
-    nextOffset = offset + size;
-
-    DPRINTF(HSAILObject, "Adding SYMBOL %s size %d offset %#x, init: %d\n",
-            sym_name, size, offset, sym->init);
-
-    StorageElement* se = new StorageElement(sym_name, offset, size, sym);
-    elements.push_back(se);
-    elements_by_addr.insert(AddrRange(offset, offset + size - 1), se);
-    elements_by_brigptr[sym] = se;
-
-    return se;
-}
-
-StorageElement*
-StorageSpace::findSymbol(std::string name)
-{
-    for (auto it : elements) {
-        if (it->name == name) {
-            return it;
-        }
-    }
-
-    return nullptr;
-}
-
-StorageElement*
-StorageSpace::findSymbol(uint64_t addr)
-{
-    assert(elements_by_addr.size() > 0);
-
-    auto se = elements_by_addr.contains(addr);
-
-    if (se == elements_by_addr.end()) {
-        return nullptr;
-    } else {
-        return se->second;
-    }
-}
-
-StorageElement*
-StorageSpace::findSymbol(const BrigDirectiveVariable *brigptr)
-{
-    assert(elements_by_brigptr.size() > 0);
-
-    auto se = elements_by_brigptr.find(brigptr);
-
-    if (se == elements_by_brigptr.end()) {
-        return nullptr;
-    } else {
-        return se->second;
-    }
-}
-
-StorageMap::StorageMap(StorageMap *outerScope)
-    : outerScopeMap(outerScope)
-{
-    for (int i = 0; i < NumSegments; ++i)
-        space[i] = new StorageSpace((BrigSegment)i);
-}
-
-StorageElement*
-StorageMap::addSymbol(const BrigDirectiveVariable *sym, const BrigObject *obj)
-{
-    BrigSegment8_t segment = sym->segment;
-
-    assert(segment >= Brig::BRIG_SEGMENT_FLAT);
-    assert(segment < NumSegments);
-
-    return space[segment]->addSymbol(sym, obj);
-}
-
-int
-StorageMap::getSize(Brig::BrigSegment segment)
-{
-    assert(segment > Brig::BRIG_SEGMENT_GLOBAL);
-    assert(segment < NumSegments);
-
-    if (segment != Brig::BRIG_SEGMENT_GROUP &&
-        segment != Brig::BRIG_SEGMENT_READONLY) {
-        return space[segment]->getSize();
-    } else {
-        int ret = space[segment]->getSize();
-
-        if (outerScopeMap) {
-            ret += outerScopeMap->getSize(segment);
-        }
-
-        return ret;
-    }
-}
-
-void
-StorageMap::resetOffset(Brig::BrigSegment segment)
-{
-    space[segment]->resetOffset();
-}
-
-StorageElement*
-StorageMap::findSymbol(BrigSegment segment, std::string name)
-{
-    StorageElement *se = space[segment]->findSymbol(name);
-
-    if (se)
-        return se;
-
-    if (outerScopeMap)
-        return outerScopeMap->findSymbol(segment, name);
-
-    return nullptr;
-}
-
-StorageElement*
-StorageMap::findSymbol(Brig::BrigSegment segment, uint64_t addr)
-{
-    StorageSpace *sp = space[segment];
-
-    if (!sp) {
-        // there is no memory in segment?
-        return nullptr;
-    }
-
-    StorageElement *se = sp->findSymbol(addr);
-
-    if (se)
-        return se;
-
-    if (outerScopeMap)
-        return outerScopeMap->findSymbol(segment, addr);
-
-    return nullptr;
-
-}
-
-StorageElement*
-StorageMap::findSymbol(Brig::BrigSegment segment,
-                       const BrigDirectiveVariable *brigptr)
-{
-    StorageSpace *sp = space[segment];
-
-    if (!sp) {
-        // there is no memory in segment?
-        return nullptr;
-    }
-
-    StorageElement *se = sp->findSymbol(brigptr);
-
-    if (se)
-        return se;
-
-    if (outerScopeMap)
-        return outerScopeMap->findSymbol(segment, brigptr);
-
-    return nullptr;
-
-}
diff --git a/src/gpu-compute/hsail_code.hh b/src/gpu-compute/hsail_code.hh
deleted file mode 100644 (file)
index 9919461..0000000
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __HSAIL_CODE_HH__
-#define __HSAIL_CODE_HH__
-
-#include <cassert>
-#include <list>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "arch/gpu_decoder.hh"
-#include "arch/hsail/Brig.h"
-#include "base/addr_range_map.hh"
-#include "base/intmath.hh"
-#include "config/the_gpu_isa.hh"
-#include "gpu-compute/hsa_code.hh"
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "gpu-compute/misc.hh"
-
-class BrigObject;
-class GPUStaticInst;
-
-inline int
-popcount(uint64_t src, int sz)
-{
-    int cnt = 0;
-
-    for (int i = 0; i < sz; ++i) {
-        if (src & 1)
-            ++cnt;
-        src >>= 1;
-    }
-
-    return cnt;
-}
-
-inline int
-firstbit(uint64_t src, int sz)
-{
-    int i;
-
-    for (i = 0; i < sz; ++i) {
-        if (src & 1)
-            break;
-        src >>= 1;
-    }
-
-    return i;
-}
-
-inline int
-lastbit(uint64_t src, int sz)
-{
-    int i0 = -1;
-
-    for (int i = 0; i < sz; ++i) {
-        if (src & 1)
-            i0 = i;
-        src >>= 1;
-    }
-
-    return i0;
-}
-
-inline int
-signbit(uint64_t src, int sz)
-{
-    int i0 = -1;
-
-    if (src & (1 << (sz - 1))) {
-        for (int i = 0; i < sz - 1; ++i) {
-            if (!(src & 1))
-                i0 = i;
-            src >>= 1;
-        }
-    } else {
-        for (int i = 0; i < sz - 1; ++i) {
-            if (src & 1)
-                i0 = i;
-            src >>= 1;
-        }
-    }
-
-    return i0;
-}
-
-inline uint64_t
-bitrev(uint64_t src, int sz)
-{
-    uint64_t r = 0;
-
-    for (int i = 0; i < sz; ++i) {
-        r <<= 1;
-        if (src & 1)
-            r |= 1;
-        src >>= 1;
-    }
-
-    return r;
-}
-
-inline uint64_t
-mul_hi(uint32_t a, uint32_t b)
-{
-    return ((uint64_t)a * (uint64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(int32_t a, int32_t b)
-{
-    return ((int64_t)a * (int64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(uint64_t a, uint64_t b)
-{
-    return ((uint64_t)a * (uint64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(int64_t a, int64_t b)
-{
-    return ((int64_t)a * (int64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(double a, double b)
-{
-    return 0;
-}
-
-class Label
-{
-  public:
-    std::string name;
-    int value;
-
-    Label() : value(-1)
-    {
-    }
-
-    bool defined() { return value != -1; }
-
-    void
-    checkName(std::string &_name)
-    {
-        if (name.empty()) {
-            name = _name;
-        } else {
-            assert(name == _name);
-        }
-    }
-
-    void
-    define(std::string &_name, int _value)
-    {
-        assert(!defined());
-        assert(_value != -1);
-        value = _value;
-        checkName(_name);
-    }
-
-    int
-    get()
-    {
-        assert(defined());
-        return value;
-    }
-};
-
-class LabelMap
-{
-    std::map<std::string, Label> map;
-
-  public:
-    LabelMap() { }
-
-    void addLabel(const Brig::BrigDirectiveLabel *lbl, int inst_index,
-                  const BrigObject *obj);
-
-    Label *refLabel(const Brig::BrigDirectiveLabel *lbl,
-                    const BrigObject *obj);
-};
-
-const int NumSegments = Brig::BRIG_SEGMENT_AMD_GCN;
-
-extern const char *segmentNames[];
-
-class StorageElement
-{
-  public:
-    std::string name;
-    uint64_t offset;
-
-    uint64_t size;
-    const Brig::BrigDirectiveVariable *brigSymbol;
-    StorageElement(const char *_name, uint64_t _offset, int _size,
-                   const Brig::BrigDirectiveVariable *sym)
-        : name(_name), offset(_offset), size(_size), brigSymbol(sym)
-    {
-    }
-};
-
-class StorageSpace
-{
-    typedef std::map<const Brig::BrigDirectiveVariable*, StorageElement*>
-            DirVarToSE_map;
-
-    std::list<StorageElement*> elements;
-    AddrRangeMap<StorageElement*> elements_by_addr;
-    DirVarToSE_map elements_by_brigptr;
-
-    uint64_t nextOffset;
-
-  public:
-    StorageSpace(Brig::BrigSegment _class) : nextOffset(0)
-    {
-    }
-
-    StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
-                              const BrigObject *obj);
-
-    StorageElement* findSymbol(std::string name);
-    StorageElement* findSymbol(uint64_t addr);
-    StorageElement* findSymbol(const Brig::BrigDirectiveVariable *brigptr);
-
-    int getSize() { return nextOffset; }
-    void resetOffset() { nextOffset = 0; }
-};
-
-class StorageMap
-{
-    StorageMap *outerScopeMap;
-    StorageSpace *space[NumSegments];
-
-  public:
-    StorageMap(StorageMap *outerScope = nullptr);
-
-    StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
-                              const BrigObject *obj);
-
-    StorageElement* findSymbol(Brig::BrigSegment segment, std::string name);
-    StorageElement* findSymbol(Brig::BrigSegment segment, uint64_t addr);
-
-    StorageElement* findSymbol(Brig::BrigSegment segment,
-                               const Brig::BrigDirectiveVariable *brigptr);
-
-    // overloaded version to avoid casting
-    StorageElement*
-    findSymbol(Brig::BrigSegment8_t segment, std::string name)
-    {
-        return findSymbol((Brig::BrigSegment)segment, name);
-    }
-
-    int getSize(Brig::BrigSegment segment);
-    void resetOffset(Brig::BrigSegment segment);
-};
-
-typedef enum
-{
-    BT_DEFAULT,
-    BT_B8,
-    BT_U8,
-    BT_U16,
-    BT_U32,
-    BT_U64,
-    BT_S8,
-    BT_S16,
-    BT_S32,
-    BT_S64,
-    BT_F16,
-    BT_F32,
-    BT_F64,
-    BT_NULL
-} base_type_e;
-
-/* @class HsailCode
- * the HsailCode class is used to store information
- * about HSA kernels stored in the BRIG format. it holds
- * all information about a kernel, function, or variable
- * symbol and provides methods for accessing that
- * information.
- */
-
-class HsailCode final : public HsaCode
-{
-  public:
-    TheGpuISA::Decoder decoder;
-
-    StorageMap *storageMap;
-    LabelMap labelMap;
-    uint32_t kernarg_start;
-    uint32_t kernarg_end;
-    int32_t private_size;
-
-    int32_t readonly_size;
-
-    // We track the maximum register index used for each register
-    // class when we load the code so we can size the register files
-    // appropriately (i.e., one more than the max index).
-    uint32_t max_creg;    // maximum c-register index
-    uint32_t max_sreg;    // maximum s-register index
-    uint32_t max_dreg;    // maximum d-register index
-
-    HsailCode(const std::string &name_str,
-              const Brig::BrigDirectiveExecutable *code_dir,
-              const BrigObject *obj,
-              StorageMap *objStorageMap);
-
-    // this version is used to create a placeholder when
-    // we encounter a kernel-related directive before the
-    // kernel itself
-    HsailCode(const std::string &name_str);
-
-    void init(const Brig::BrigDirectiveExecutable *code_dir,
-              const BrigObject *obj, StorageMap *objStorageMap);
-
-    void
-    generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const
-    {
-        hsaKernelInfo->sRegCount = max_sreg + 1;
-        hsaKernelInfo->dRegCount = max_dreg + 1;
-        hsaKernelInfo->cRegCount = max_creg + 1;
-
-        hsaKernelInfo->static_lds_size = getSize(Brig::BRIG_SEGMENT_GROUP);
-
-        hsaKernelInfo->private_mem_size =
-            roundUp(getSize(Brig::BRIG_SEGMENT_PRIVATE), 8);
-
-        hsaKernelInfo->spill_mem_size =
-            roundUp(getSize(Brig::BRIG_SEGMENT_SPILL), 8);
-    }
-
-    int
-    getSize(MemorySegment segment) const
-    {
-        Brig::BrigSegment brigSeg;
-
-        switch (segment) {
-          case MemorySegment::NONE:
-            brigSeg = Brig::BRIG_SEGMENT_NONE;
-            break;
-          case MemorySegment::FLAT:
-            brigSeg = Brig::BRIG_SEGMENT_FLAT;
-            break;
-          case MemorySegment::GLOBAL:
-            brigSeg = Brig::BRIG_SEGMENT_GLOBAL;
-            break;
-          case MemorySegment::READONLY:
-            brigSeg = Brig::BRIG_SEGMENT_READONLY;
-            break;
-          case MemorySegment::KERNARG:
-            brigSeg = Brig::BRIG_SEGMENT_KERNARG;
-            break;
-          case MemorySegment::GROUP:
-            brigSeg = Brig::BRIG_SEGMENT_GROUP;
-            break;
-          case MemorySegment::PRIVATE:
-            brigSeg = Brig::BRIG_SEGMENT_PRIVATE;
-            break;
-          case MemorySegment::SPILL:
-            brigSeg = Brig::BRIG_SEGMENT_SPILL;
-            break;
-          case MemorySegment::ARG:
-            brigSeg = Brig::BRIG_SEGMENT_ARG;
-            break;
-          case MemorySegment::EXTSPACE0:
-            brigSeg = Brig::BRIG_SEGMENT_AMD_GCN;
-            break;
-          default:
-            fatal("Unknown BrigSegment type.\n");
-        }
-
-        return getSize(brigSeg);
-    }
-
-  private:
-    int
-    getSize(Brig::BrigSegment segment) const
-    {
-        if (segment == Brig::BRIG_SEGMENT_PRIVATE) {
-            // with the code generated by new HSA compiler the assertion
-            // does not hold anymore..
-            //assert(private_size != -1);
-            return private_size;
-        } else {
-            return storageMap->getSize(segment);
-        }
-    }
-
-  public:
-    StorageElement*
-    findSymbol(Brig::BrigSegment segment, uint64_t addr)
-    {
-        return storageMap->findSymbol(segment, addr);
-    }
-
-    void
-    setPrivateSize(int32_t _private_size)
-    {
-        private_size = _private_size;
-    }
-
-    Label*
-    refLabel(const Brig::BrigDirectiveLabel *lbl, const BrigObject *obj)
-    {
-        return labelMap.refLabel(lbl, obj);
-    }
-};
-
-#endif // __HSAIL_CODE_HH__
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc
deleted file mode 100644 (file)
index de518ec..0000000
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/kernel_cfg.hh"
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstring>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <string>
-
-#include "gpu-compute/gpu_static_inst.hh"
-
-void
-ControlFlowInfo::assignImmediatePostDominators(
-        const std::vector<GPUStaticInst*>& instructions)
-{
-    ControlFlowInfo cfg(instructions);
-    cfg.findImmediatePostDominators();
-}
-
-
-ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
-        instructions(insts)
-{
-    createBasicBlocks();
-    connectBasicBlocks();
-}
-
-BasicBlock*
-ControlFlowInfo::basicBlock(int inst_addr) const {
-    for (auto& block: basicBlocks) {
-       int first_block_addr = block->firstInstruction->instAddr();
-       if (inst_addr >= first_block_addr && inst_addr <
-           first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
-           return block.get();
-       }
-    }
-    return nullptr;
-}
-
-
-GPUStaticInst*
-ControlFlowInfo::lastInstruction(const BasicBlock* block) const
-{
-    if (block->isExit()) {
-        return nullptr;
-    }
-
-    return instructions.at(block->firstInstruction->instNum() +
-                           block->size - 1);
-}
-
-BasicBlock*
-ControlFlowInfo::postDominator(const BasicBlock* block) const
-{
-    if (block->isExit()) {
-        return nullptr;
-    }
-    return basicBlock(lastInstruction(block)->ipdInstNum());
-}
-
-void
-ControlFlowInfo::createBasicBlocks()
-{
-    assert(!instructions.empty());
-    std::set<int> leaders;
-    // first instruction is a leader
-    leaders.insert(0);
-    for (const auto &instruction : instructions) {
-        if (instruction->isBranch()) {
-            const int target_pc = instruction->getTargetPc();
-            leaders.insert(target_pc);
-            leaders.insert(instruction->nextInstAddr());
-        }
-    }
-
-    size_t block_size = 0;
-    for (const auto &instruction : instructions) {
-        if (leaders.find(instruction->instAddr()) != leaders.end()) {
-            uint32_t id = basicBlocks.size();
-            if (id > 0) {
-                basicBlocks.back()->size = block_size;
-            }
-            block_size = 0;
-            basicBlocks.emplace_back(new BasicBlock(id, instruction));
-        }
-        block_size++;
-    }
-    basicBlocks.back()->size = block_size;
-    // exit basic block
-    basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr));
-}
-
-void
-ControlFlowInfo::connectBasicBlocks()
-{
-    BasicBlock* exit_bb = basicBlocks.back().get();
-    for (auto& bb : basicBlocks) {
-        if (bb->isExit()) {
-            break;
-        }
-        GPUStaticInst* last = lastInstruction(bb.get());
-        if (last->isReturn()) {
-            bb->successorIds.insert(exit_bb->id);
-            continue;
-        }
-        if (last->isBranch()) {
-            const uint32_t target_pc = last->getTargetPc();
-            BasicBlock* target_bb = basicBlock(target_pc);
-            bb->successorIds.insert(target_bb->id);
-        }
-
-        // Unconditional jump instructions have a unique successor
-        if (!last->isUnconditionalJump()) {
-            BasicBlock* next_bb = basicBlock(last->nextInstAddr());
-            bb->successorIds.insert(next_bb->id);
-        }
-    }
-}
-
-
-// In-place set intersection
-static void
-intersect(std::set<uint32_t>& a, const std::set<uint32_t>& b)
-{
-    std::set<uint32_t>::iterator it = a.begin();
-    while (it != a.end()) {
-        it = b.find(*it) != b.end() ? ++it : a.erase(it);
-    }
-}
-
-
-void
-ControlFlowInfo::findPostDominators()
-{
-    // the only postdominator of the exit block is itself
-    basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id);
-    //copy all basic blocks to all postdominator lists except for exit block
-    for (auto& block : basicBlocks) {
-        if (!block->isExit()) {
-            for (uint32_t i = 0; i < basicBlocks.size(); i++) {
-                block->postDominatorIds.insert(i);
-            }
-        }
-    }
-
-    bool change = true;
-    while (change) {
-        change = false;
-        for (int h = basicBlocks.size() - 2; h >= 0; --h) {
-            size_t num_postdominators =
-                    basicBlocks[h]->postDominatorIds.size();
-            for (int s : basicBlocks[h]->successorIds) {
-                intersect(basicBlocks[h]->postDominatorIds,
-                          basicBlocks[s]->postDominatorIds);
-            }
-            basicBlocks[h]->postDominatorIds.insert(h);
-            change |= (num_postdominators
-                    != basicBlocks[h]->postDominatorIds.size());
-        }
-    }
-}
-
-
-// In-place set difference
-static void
-setDifference(std::set<uint32_t>&a,
-           const std::set<uint32_t>& b, uint32_t exception)
-{
-    for (uint32_t b_elem : b) {
-        if (b_elem != exception) {
-            a.erase(b_elem);
-        }
-    }
-}
-
-void
-ControlFlowInfo::findImmediatePostDominators()
-{
-    assert(basicBlocks.size() > 1); // Entry and exit blocks must be present
-
-    findPostDominators();
-
-    for (auto& basicBlock : basicBlocks) {
-        if (basicBlock->isExit()) {
-            continue;
-        }
-        std::set<uint32_t> candidates = basicBlock->postDominatorIds;
-        candidates.erase(basicBlock->id);
-        for (uint32_t postDominatorId : basicBlock->postDominatorIds) {
-            if (postDominatorId != basicBlock->id) {
-                setDifference(candidates,
-                           basicBlocks[postDominatorId]->postDominatorIds,
-                           postDominatorId);
-            }
-        }
-        assert(candidates.size() == 1);
-        GPUStaticInst* last_instruction = lastInstruction(basicBlock.get());
-        BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
-        if (!ipd_block->isExit()) {
-            GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
-            last_instruction->ipdInstNum(ipd_first_inst->instAddr());
-        } else {
-            last_instruction->ipdInstNum(last_instruction->nextInstAddr());
-        }
-    }
-}
-
-void
-ControlFlowInfo::printPostDominators() const
-{
-    for (auto& block : basicBlocks) {
-        std::cout << "PD(" << block->id << ") = {";
-        std::copy(block->postDominatorIds.begin(),
-                  block->postDominatorIds.end(),
-                  std::ostream_iterator<uint32_t>(std::cout, ", "));
-        std::cout << "}" << std::endl;
-    }
-}
-
-void
-ControlFlowInfo::printImmediatePostDominators() const
-{
-    for (const auto& block : basicBlocks) {
-        if (block->isExit()) {
-            continue;
-        }
-        std::cout << "IPD(" << block->id << ") = ";
-        std::cout << postDominator(block.get())->id << ", ";
-    }
-    std::cout << std::endl;
-}
-void
-ControlFlowInfo::printBasicBlocks() const
-{
-    for (GPUStaticInst* inst : instructions) {
-        int inst_addr = inst->instAddr();
-        std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
-                << "]: " << inst->disassemble();
-        if (inst->isBranch()) {
-            std::cout << ", PC = " << inst->getTargetPc();
-        }
-        std::cout << std::endl;
-    }
-}
-
-void
-ControlFlowInfo::printBasicBlockDot() const
-{
-    printf("digraph {\n");
-    for (const auto& basic_block : basicBlocks) {
-        printf("\t");
-        for (uint32_t successorId : basic_block->successorIds) {
-            printf("%d -> %d; ", basic_block->id, successorId);
-        }
-        printf("\n");
-    }
-    printf("}\n");
-}
diff --git a/src/gpu-compute/kernel_cfg.hh b/src/gpu-compute/kernel_cfg.hh
deleted file mode 100644 (file)
index d4959c8..0000000
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __KERNEL_CFG_HH__
-#define __KERNEL_CFG_HH__
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <vector>
-
-
-class GPUStaticInst;
-class HsailCode;
-
-struct BasicBlock
-{
-    BasicBlock(uint32_t num, GPUStaticInst* begin) :
-            id(num), size(0), firstInstruction(begin)
-    {
-    }
-
-    bool
-    isEntry() const
-    {
-        return !id;
-    }
-
-    bool
-    isExit() const
-    {
-        return !size;
-    }
-
-    /**
-     * Unique identifier for the block within a given kernel.
-     */
-    const uint32_t id;
-
-    /**
-     * Number of instructions contained in the block
-     */
-    size_t size;
-
-    /**
-     * Pointer to first instruction of the block.
-     */
-    GPUStaticInst* firstInstruction;
-
-    /**
-     * Identifiers of the blocks that follow (are reachable from) this block.
-     */
-    std::set<uint32_t> successorIds;
-
-    /**
-     * Identifiers of the blocks that will be visited from this block.
-     */
-    std::set<uint32_t> postDominatorIds;
-};
-
-class ControlFlowInfo
-{
-public:
-
-    /**
-     * Compute immediate post-dominator instruction for kernel instructions.
-     */
-    static void assignImmediatePostDominators(
-            const std::vector<GPUStaticInst*>& instructions);
-
-private:
-    ControlFlowInfo(const std::vector<GPUStaticInst*>& instructions);
-
-    GPUStaticInst* lastInstruction(const BasicBlock* block) const;
-
-    BasicBlock* basicBlock(int inst_addr) const;
-
-    BasicBlock* postDominator(const BasicBlock* block) const;
-
-    void createBasicBlocks();
-
-    void connectBasicBlocks();
-
-    void findPostDominators();
-
-    void findImmediatePostDominators();
-
-    void printBasicBlocks() const;
-
-    void printBasicBlockDot() const;
-
-    void printPostDominators() const;
-
-    void printImmediatePostDominators() const;
-
-    std::vector<std::unique_ptr<BasicBlock>> basicBlocks;
-    std::vector<GPUStaticInst*> instructions;
-};
-
-#endif // __KERNEL_CFG_HH__
diff --git a/src/gpu-compute/ndrange.hh b/src/gpu-compute/ndrange.hh
deleted file mode 100644 (file)
index db6dc45..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __NDRANGE_HH__
-#define __NDRANGE_HH__
-
-#include "base/types.hh"
-#include "gpu-compute/qstruct.hh"
-
-struct NDRange
-{
-    // copy of the queue entry provided at dispatch
-    HsaQueueEntry q;
-
-    // The current workgroup id (3 dimensions)
-    int wgId[3];
-    // The number of workgroups in each dimension
-    int numWg[3];
-    // The total number of workgroups
-    int numWgTotal;
-
-    // The number of completed work groups
-    int numWgCompleted;
-    // The global workgroup ID
-    uint32_t globalWgId;
-
-    // flag indicating whether all work groups have been launched
-    bool wg_disp_rem;
-    // kernel complete
-    bool execDone;
-    bool userDoorBellSet;
-    volatile bool *addrToNotify;
-    volatile uint32_t *numDispLeft;
-    int dispatchId;
-    int curCid; // Current context id
-};
-
-#endif // __NDRANGE_HH__
diff --git a/src/gpu-compute/qstruct.hh b/src/gpu-compute/qstruct.hh
deleted file mode 100644 (file)
index b400dc0..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Brad Beckmann, Marc Orr
- */
-
-#ifndef __Q_STRUCT_HH__
-#define __Q_STRUCT_HH__
-
-#include <bitset>
-#include <cstdint>
-
-// Maximum number of arguments
-static const int KER_NUM_ARGS = 32;
-// Kernel argument buffer size
-static const int KER_ARGS_LENGTH = 512;
-
-class LdsChunk;
-struct NDRange;
-
-// Be very careful of alignment in this structure. The structure
-// must compile to the same layout in both 32-bit and 64-bit mode.
-struct HsaQueueEntry
-{
-    // Base pointer for array of instruction pointers
-    uint64_t code_ptr;
-    // Grid Size (3 dimensions)
-    uint32_t gdSize[3];
-    // Workgroup Size (3 dimensions)
-    uint32_t wgSize[3];
-    uint16_t sRegCount;
-    uint16_t dRegCount;
-    uint16_t cRegCount;
-    uint64_t privMemStart;
-    uint32_t privMemPerItem;
-    uint32_t privMemTotal;
-    uint64_t spillMemStart;
-    uint32_t spillMemPerItem;
-    uint32_t spillMemTotal;
-    uint64_t roMemStart;
-    uint32_t roMemTotal;
-    // Size (in bytes) of LDS
-    uint32_t ldsSize;
-    // Virtual Memory Id (unused right now)
-    uint32_t vmId;
-
-    // Pointer to dependency chain (unused now)
-    uint64_t depends;
-
-    // pointer to bool
-    uint64_t addrToNotify;
-    // pointer to uint32_t
-    uint64_t numDispLeft;
-
-    // variables to pass arguments when running in standalone mode,
-    // will be removed when run.py and sh.cpp have been updated to
-    // use args and offset arrays
-    uint64_t arg1;
-    uint64_t arg2;
-    uint64_t arg3;
-    uint64_t arg4;
-
-    // variables to pass arguments when running in cpu+gpu mode
-    uint8_t args[KER_ARGS_LENGTH];
-    uint16_t offsets[KER_NUM_ARGS];
-    uint16_t num_args;
-};
-
-// State that needs to be passed between the simulation and simulated app, a
-// pointer to this struct can be passed through the depends field in the
-// HsaQueueEntry struct
-struct HostState
-{
-    // cl_event* has original HsaQueueEntry for init
-    uint64_t event;
-};
-
-// Total number of HSA queues
-static const int HSAQ_NQUEUES = 8;
-
-// These values will eventually live in memory mapped registers
-// and be settable by the kernel mode driver.
-
-// Number of entries in each HSA queue
-static const int HSAQ_SIZE = 64;
-// Address of first HSA queue index
-static const int HSAQ_INDX_BASE = 0x10000ll;
-// Address of first HSA queue
-static const int HSAQ_BASE = 0x11000ll;
-// Suggested start of HSA code
-static const int HSA_CODE_BASE = 0x18000ll;
-
-// These are shortcuts for deriving the address of a specific
-// HSA queue or queue index
-#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n)
-#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue))
-#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0))
-#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1))
-#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2))
-
-/*
- * Example code for writing to a queue
- *
- * void
- * ToQueue(int n,struct fsaQueue *val)
- * {
- *     int wi = *(int*)HSAQ_WI(n);
- *     int ri = *(int*)HSAQ_RI(n);
- *     int ci = *(int*)HSAQ_CI(n);
- *
- *     if (ci - ri < HSAQ_SIZE) {
- *         (*(int*)HSAQ_CI(n))++;
- *         *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val;
- *         (*(int*)HSAQ_WI(n))++;
- *     }
- * }
- */
-
-#endif // __Q_STRUCT_HH__
diff --git a/src/gpu-compute/vector_register_state.cc b/src/gpu-compute/vector_register_state.cc
deleted file mode 100644 (file)
index e177d3b..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#include "gpu-compute/vector_register_state.hh"
-
-#include <limits>
-
-#include "gpu-compute/compute_unit.hh"
-
-VecRegisterState::VecRegisterState() : computeUnit(nullptr)
-{
-    s_reg.clear();
-    d_reg.clear();
-}
-
-void
-VecRegisterState::setParent(ComputeUnit *_computeUnit)
-{
-    computeUnit = _computeUnit;
-    _name = computeUnit->name() + ".VecRegState";
-}
-
-void
-VecRegisterState::init(uint32_t _size, uint32_t wf_size)
-{
-    s_reg.resize(_size);
-    fatal_if(wf_size > std::numeric_limits<unsigned long long>::digits ||
-             wf_size <= 0,
-             "WF size is larger than the host can support or is zero");
-    fatal_if((wf_size & (wf_size - 1)) != 0,
-             "Wavefront size should be a power of 2");
-    for (int i = 0; i < s_reg.size(); ++i) {
-        s_reg[i].resize(wf_size, 0);
-    }
-    d_reg.resize(_size);
-    for (int i = 0; i < d_reg.size(); ++i) {
-        d_reg[i].resize(wf_size, 0);
-    }
-}
diff --git a/src/gpu-compute/vector_register_state.hh b/src/gpu-compute/vector_register_state.hh
deleted file mode 100644 (file)
index 97a0d8e..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#ifndef __VECTOR_REGISTER_STATE_HH__
-#define __VECTOR_REGISTER_STATE_HH__
-
-#include <array>
-#include <cassert>
-#include <string>
-#include <vector>
-
-#include "gpu-compute/misc.hh"
-
-class ComputeUnit;
-
-// Vector Register State per SIMD unit (contents of the vector
-// registers in the VRF of the SIMD)
-class VecRegisterState
-{
-  public:
-    VecRegisterState();
-    void init(uint32_t _size, uint32_t wf_size);
-
-    const std::string& name() const { return _name; }
-    void setParent(ComputeUnit *_computeUnit);
-    void regStats() { }
-
-    // Access methods
-    template<typename T>
-    T
-    read(int regIdx, int threadId=0) {
-        T *p0;
-        assert(sizeof(T) == 4 || sizeof(T) == 8);
-        if (sizeof(T) == 4) {
-            p0 = (T*)(&s_reg[regIdx][threadId]);
-        } else {
-            p0 = (T*)(&d_reg[regIdx][threadId]);
-        }
-
-        return *p0;
-    }
-
-    template<typename T>
-    void
-    write(unsigned int regIdx, T value, int threadId=0) {
-        T *p0;
-        assert(sizeof(T) == 4 || sizeof(T) == 8);
-        if (sizeof(T) == 4) {
-            p0 = (T*)(&s_reg[regIdx][threadId]);
-        } else {
-            p0 = (T*)(&d_reg[regIdx][threadId]);
-        }
-
-        *p0 = value;
-    }
-
-    // (Single Precision) Vector Register File size.
-    int regSize() { return s_reg.size(); }
-
-  private:
-    ComputeUnit *computeUnit;
-    std::string _name;
-    // 32-bit Single Precision Vector Register State
-    std::vector<std::vector<uint32_t>> s_reg;
-    // 64-bit Double Precision Vector Register State
-    std::vector<std::vector<uint64_t>> d_reg;
-};
-
-#endif // __VECTOR_REGISTER_STATE_HH__
index 500c5c71d5000d3884df7609ad23bdeb83abb564..d33b5b0149d6a3689dbb32052aa13392bd5dcbdf 100755 (executable)
@@ -88,7 +88,7 @@ def _validateTags(commit_header):
     # @todo this is error prone, and should be extracted automatically from
     #       a file
 
-    valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-hsail",
+    valid_tags = ["arch", "arch-arm", "arch-gcn3",
         "arch-mips", "arch-power", "arch-riscv", "arch-sparc", "arch-x86",
         "base", "configs", "cpu", "cpu-kvm", "cpu-minor", "cpu-o3",
         "cpu-simple", "dev", "dev-arm", "dev-virtio", "ext", "fastmodel",
index 1f1404c94e64d106646d43c082c94a8af27e233c..e7187757c669d4dc0127c9e7e1ea21d1ab953fee 100755 (executable)
@@ -49,7 +49,7 @@ add_option('--builds',
            'SPARC,' \
            'X86,X86_MESI_Two_Level,' \
            'RISCV,' \
-           'HSAIL_X86',
+           'GCN3_X86',
            help="comma-separated build targets to test (default: '%default')")
 add_option('--modes',
            default='se,fs',