From 9d51dec937906e0acf6cb3210eeac9d25e0be46c Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Tue, 1 May 2018 17:34:29 -0400 Subject: [PATCH] arch, gpu-compute: Remove HSAIL related files Change-Id: Iefba0a38d62da7598bbfe3fe6ff46454d35144b1 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28410 Reviewed-by: Anthony Gutierrez Maintainer: Anthony Gutierrez Tested-by: kokoro --- MAINTAINERS | 1 - SConstruct | 2 +- build_opts/HSAIL_X86 | 5 - src/arch/gcn3/SConscript | 3 + src/arch/hsail/Brig.h | 67 - src/arch/hsail/Brig_new.hpp | 1589 ----------------- src/arch/hsail/SConscript | 53 - src/arch/hsail/SConsopts | 40 - src/arch/hsail/gen.py | 912 ---------- src/arch/hsail/gpu_decoder.hh | 77 - src/arch/hsail/gpu_isa.hh | 75 - src/arch/hsail/gpu_types.hh | 69 - src/arch/hsail/insts/branch.cc | 86 - src/arch/hsail/insts/branch.hh | 441 ----- src/arch/hsail/insts/decl.hh | 1298 -------------- src/arch/hsail/insts/gpu_static_inst.cc | 53 - src/arch/hsail/insts/gpu_static_inst.hh | 66 - src/arch/hsail/insts/main.cc | 209 --- src/arch/hsail/insts/mem.cc | 76 - src/arch/hsail/insts/mem.hh | 1777 ------------------- src/arch/hsail/insts/mem_impl.hh | 648 ------- src/arch/hsail/insts/pseudo_inst.cc | 791 --------- src/arch/hsail/operand.cc | 468 ----- src/arch/hsail/operand.hh | 796 --------- src/gpu-compute/brig_object.cc | 476 ----- src/gpu-compute/brig_object.hh | 134 -- src/gpu-compute/cl_driver.cc | 279 --- src/gpu-compute/cl_driver.hh | 77 - src/gpu-compute/cl_event.hh | 49 - src/gpu-compute/condition_register_state.cc | 83 - src/gpu-compute/condition_register_state.hh | 101 -- src/gpu-compute/hsa_code.hh | 101 -- src/gpu-compute/hsa_kernel_info.hh | 80 - src/gpu-compute/hsa_object.cc | 77 - src/gpu-compute/hsa_object.hh | 74 - src/gpu-compute/hsail_code.cc | 460 ----- src/gpu-compute/hsail_code.hh | 445 ----- src/gpu-compute/kernel_cfg.cc | 295 --- src/gpu-compute/kernel_cfg.hh | 133 -- src/gpu-compute/ndrange.hh | 70 - src/gpu-compute/qstruct.hh | 148 -- src/gpu-compute/vector_register_state.cc | 71 - src/gpu-compute/vector_register_state.hh | 101 -- util/git-commit-msg.py | 2 +- util/regress | 2 +- 45 files changed, 6 insertions(+), 12854 deletions(-) delete mode 100644 build_opts/HSAIL_X86 delete mode 100644 src/arch/hsail/Brig.h delete mode 100644 src/arch/hsail/Brig_new.hpp delete mode 100644 src/arch/hsail/SConscript delete mode 100644 src/arch/hsail/SConsopts delete mode 100755 src/arch/hsail/gen.py delete mode 100644 src/arch/hsail/gpu_decoder.hh delete mode 100644 src/arch/hsail/gpu_isa.hh delete mode 100644 src/arch/hsail/gpu_types.hh delete mode 100644 src/arch/hsail/insts/branch.cc delete mode 100644 src/arch/hsail/insts/branch.hh delete mode 100644 src/arch/hsail/insts/decl.hh delete mode 100644 src/arch/hsail/insts/gpu_static_inst.cc delete mode 100644 src/arch/hsail/insts/gpu_static_inst.hh delete mode 100644 src/arch/hsail/insts/main.cc delete mode 100644 src/arch/hsail/insts/mem.cc delete mode 100644 src/arch/hsail/insts/mem.hh delete mode 100644 src/arch/hsail/insts/mem_impl.hh delete mode 100644 src/arch/hsail/insts/pseudo_inst.cc delete mode 100644 src/arch/hsail/operand.cc delete mode 100644 src/arch/hsail/operand.hh delete mode 100644 src/gpu-compute/brig_object.cc delete mode 100644 src/gpu-compute/brig_object.hh delete mode 100644 src/gpu-compute/cl_driver.cc delete mode 100644 src/gpu-compute/cl_driver.hh delete mode 100644 src/gpu-compute/cl_event.hh delete mode 100644 src/gpu-compute/condition_register_state.cc delete mode 100644 src/gpu-compute/condition_register_state.hh delete mode 100644 src/gpu-compute/hsa_code.hh delete mode 100644 src/gpu-compute/hsa_kernel_info.hh delete mode 100644 src/gpu-compute/hsa_object.cc delete mode 100644 src/gpu-compute/hsa_object.hh delete mode 100644 src/gpu-compute/hsail_code.cc delete mode 100644 src/gpu-compute/hsail_code.hh delete mode 100644 src/gpu-compute/kernel_cfg.cc delete mode 100644 src/gpu-compute/kernel_cfg.hh delete mode 100644 src/gpu-compute/ndrange.hh delete mode 100644 src/gpu-compute/qstruct.hh delete mode 100644 src/gpu-compute/vector_register_state.cc delete mode 100644 src/gpu-compute/vector_register_state.hh diff --git a/MAINTAINERS b/MAINTAINERS index 9a4d7fc51..92c4ce818 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -29,7 +29,6 @@ arch-arm: Andreas Sandberg Giacomo Travaglini arch-gcn3: -arch-hsail: Tony Gutierrez arch-mips: arch-power: diff --git a/SConstruct b/SConstruct index 3a03af497..4bc3d0e39 100755 --- a/SConstruct +++ b/SConstruct @@ -989,7 +989,7 @@ all_gpu_isa_list.sort() sticky_vars.AddVariables( EnumVariable('TARGET_ISA', 'Target ISA', 'null', all_isa_list), - EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', all_gpu_isa_list), + EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'gcn3', all_gpu_isa_list), ListVariable('CPU_MODELS', 'CPU models', sorted(n for n,m in CpuModel.dict.items() if m.default), sorted(CpuModel.dict.keys())), diff --git a/build_opts/HSAIL_X86 b/build_opts/HSAIL_X86 deleted file mode 100644 index 105f82cbd..000000000 --- a/build_opts/HSAIL_X86 +++ /dev/null @@ -1,5 +0,0 @@ -PROTOCOL = 'GPU_RfO' -TARGET_ISA = 'x86' -TARGET_GPU_ISA = 'hsail' -BUILD_GPU = True -CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU' diff --git a/src/arch/gcn3/SConscript b/src/arch/gcn3/SConscript index f438cdb10..da57bf552 100644 --- a/src/arch/gcn3/SConscript +++ b/src/arch/gcn3/SConscript @@ -37,6 +37,9 @@ import sys Import('*') +if not env['BUILD_GPU']: + Return() + if env['TARGET_GPU_ISA'] == 'gcn3': Source('decoder.cc') Source('insts/gpu_static_inst.cc') diff --git a/src/arch/hsail/Brig.h b/src/arch/hsail/Brig.h deleted file mode 100644 index b260157ab..000000000 --- a/src/arch/hsail/Brig.h +++ /dev/null @@ -1,67 +0,0 @@ -// University of Illinois/NCSA -// Open Source License -// -// Copyright (c) 2013, Advanced Micro Devices, Inc. -// All rights reserved. -// -// Developed by: -// -// HSA Team -// -// Advanced Micro Devices, Inc -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal with -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimers in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the names of the LLVM Team, University of Illinois at -// Urbana-Champaign, nor the names of its contributors may be used to -// endorse or promote products derived from this Software without specific -// prior written permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -// SOFTWARE. -#ifndef INTERNAL_BRIG_H -#define INTERNAL_BRIG_H - -#include - -namespace Brig { -#include "Brig_new.hpp" - -// These typedefs provide some backward compatibility with earlier versions -// of Brig.h, reducing the number of code changes. The distinct names also -// increase legibility by showing the code's intent. -typedef BrigBase BrigDirective; -typedef BrigBase BrigOperand; - -enum BrigMemoryFenceSegments { // for internal use only - //.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc } - //.mnemo_token=_EMMemoryFenceSegments - //.mnemo_context=EInstModifierInstFenceContext - BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0, - BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1, - BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2, - BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip -}; - -} - -#endif // defined(INTERNAL_BRIG_H) diff --git a/src/arch/hsail/Brig_new.hpp b/src/arch/hsail/Brig_new.hpp deleted file mode 100644 index 95fcf4d46..000000000 --- a/src/arch/hsail/Brig_new.hpp +++ /dev/null @@ -1,1589 +0,0 @@ -// University of Illinois/NCSA -// Open Source License -// -// Copyright (c) 2013-2015, Advanced Micro Devices, Inc. -// All rights reserved. -// -// Developed by: -// -// HSA Team -// -// Advanced Micro Devices, Inc -// -// www.amd.com -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of -// this software and associated documentation files (the "Software"), to deal with -// the Software without restriction, including without limitation the rights to -// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -// of the Software, and to permit persons to whom the Software is furnished to do -// so, subject to the following conditions: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// -// * Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimers in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the names of the LLVM Team, University of Illinois at -// Urbana-Champaign, nor the names of its contributors may be used to -// endorse or promote products derived from this Software without specific -// prior written permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -// SOFTWARE. - -//.ignore{ - -#ifndef INCLUDED_BRIG_H -#define INCLUDED_BRIG_H - -#include - -enum BrigAuxDefs { - MAX_OPERANDS_NUM = 6 -}; - -//} - -typedef uint32_t BrigVersion32_t; - -enum BrigVersion { - - //.nowrap - //.nodump - //.nollvm - - BRIG_VERSION_HSAIL_MAJOR = 1, - BRIG_VERSION_HSAIL_MINOR = 0, - BRIG_VERSION_BRIG_MAJOR = 1, - BRIG_VERSION_BRIG_MINOR = 0 -}; - -typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE - -typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE - -typedef uint8_t BrigAluModifier8_t; - -typedef uint8_t BrigAtomicOperation8_t; - -typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef - -typedef uint8_t BrigCompareOperation8_t; - -typedef uint16_t BrigControlDirective16_t; - -typedef uint32_t BrigDataOffset32_t; - -typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef //.defValue=0 - -typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef //.defValue=0 - -typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0 - -typedef uint8_t BrigExecutableModifier8_t; - -typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN - -typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN - -typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN - -typedef uint8_t BrigImageQuery8_t; - -typedef uint16_t BrigKind16_t; - -typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE - -typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE - -typedef uint8_t BrigMemoryModifier8_t; - -typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED - -typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM - -typedef uint16_t BrigOpcode16_t; - -typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef - -typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE - -typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL - -typedef uint16_t BrigRegisterKind16_t; - -typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE - -typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE - -typedef uint8_t BrigSamplerCoordNormalization8_t; - -typedef uint8_t BrigSamplerFilter8_t; - -typedef uint8_t BrigSamplerQuery8_t; - -typedef uint32_t BrigSectionIndex32_t; - -typedef uint8_t BrigSegCvtModifier8_t; - -typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE - -typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef - -typedef uint16_t BrigType16_t; - -typedef uint8_t BrigVariableModifier8_t; - -typedef uint8_t BrigWidth8_t; - -typedef uint32_t BrigExceptions32_t; - -enum BrigKind { - - //.nollvm - // - //.wname={ s/^BRIG_KIND//; MACRO2Name($_) } - //.mnemo=$wname{ $wname } - // - //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" } - //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1" - // - //.isBodyOnly={ "false" } - //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()" - //.isBodyOnly_default="assert(false); return false" - // - //.isToplevelOnly={ "false" } - //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()" - //.isToplevelOnly_default="assert(false); return false" - - BRIG_KIND_NONE = 0x0000, //.skip - - BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip - BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, - BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, - BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true - BRIG_KIND_DIRECTIVE_LOC = 0x100a, - BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, - BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true - BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, - BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip - - BRIG_KIND_INST_BEGIN = 0x2000, //.skip - BRIG_KIND_INST_ADDR = 0x2000, - BRIG_KIND_INST_ATOMIC = 0x2001, - BRIG_KIND_INST_BASIC = 0x2002, - BRIG_KIND_INST_BR = 0x2003, - BRIG_KIND_INST_CMP = 0x2004, - BRIG_KIND_INST_CVT = 0x2005, - BRIG_KIND_INST_IMAGE = 0x2006, - BRIG_KIND_INST_LANE = 0x2007, - BRIG_KIND_INST_MEM = 0x2008, - BRIG_KIND_INST_MEM_FENCE = 0x2009, - BRIG_KIND_INST_MOD = 0x200a, - BRIG_KIND_INST_QUERY_IMAGE = 0x200b, - BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, - BRIG_KIND_INST_QUEUE = 0x200d, - BRIG_KIND_INST_SEG = 0x200e, - BRIG_KIND_INST_SEG_CVT = 0x200f, - BRIG_KIND_INST_SIGNAL = 0x2010, - BRIG_KIND_INST_SOURCE_TYPE = 0x2011, - BRIG_KIND_INST_END = 0x2012, //.skip - - BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip - BRIG_KIND_OPERAND_ADDRESS = 0x3000, - BRIG_KIND_OPERAND_ALIGN = 0x3001, - BRIG_KIND_OPERAND_CODE_LIST = 0x3002, - BRIG_KIND_OPERAND_CODE_REF = 0x3003, - BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, - BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip - BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, - BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, - BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, - BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, - BRIG_KIND_OPERAND_REGISTER = 0x300a, - BRIG_KIND_OPERAND_STRING = 0x300b, - BRIG_KIND_OPERAND_WAVESIZE = 0x300c, - BRIG_KIND_OPERAND_END = 0x300d //.skip -}; - -enum BrigAlignment { - - //.mnemo={ s/^BRIG_ALIGNMENT_//; lc } - //.mnemo_proto="const char* align2str(unsigned arg)" - // - //.bytes={ /(\d+)/ ? $1 : undef } - //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1" - // - //.rbytes=$bytes{ $bytes } - //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)" - //.rbytes_default="return BRIG_ALIGNMENT_LAST" - // - //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" } - - BRIG_ALIGNMENT_NONE = 0, //.no_mnemo - BRIG_ALIGNMENT_1 = 1, //.mnemo="" - BRIG_ALIGNMENT_2 = 2, - BRIG_ALIGNMENT_4 = 3, - BRIG_ALIGNMENT_8 = 4, - BRIG_ALIGNMENT_16 = 5, - BRIG_ALIGNMENT_32 = 6, - BRIG_ALIGNMENT_64 = 7, - BRIG_ALIGNMENT_128 = 8, - BRIG_ALIGNMENT_256 = 9, - - BRIG_ALIGNMENT_LAST, //.skip - BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip -}; - -enum BrigAllocation { - - //.mnemo={ s/^BRIG_ALLOCATION_//;lc } - //.mnemo_token=EAllocKind - - BRIG_ALLOCATION_NONE = 0, //.mnemo="" - BRIG_ALLOCATION_PROGRAM = 1, - BRIG_ALLOCATION_AGENT = 2, - BRIG_ALLOCATION_AUTOMATIC = 3 -}; - -enum BrigAluModifierMask { - BRIG_ALU_FTZ = 1 -}; - -enum BrigAtomicOperation { - - //.tdcaption="Atomic Operations" - // - //.mnemo={ s/^BRIG_ATOMIC_//;lc } - //.mnemo_token=_EMAtomicOp - //.mnemo_context=EInstModifierInstAtomicContext - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_ATOMIC_ADD = 0, - BRIG_ATOMIC_AND = 1, - BRIG_ATOMIC_CAS = 2, - BRIG_ATOMIC_EXCH = 3, - BRIG_ATOMIC_LD = 4, - BRIG_ATOMIC_MAX = 5, - BRIG_ATOMIC_MIN = 6, - BRIG_ATOMIC_OR = 7, - BRIG_ATOMIC_ST = 8, - BRIG_ATOMIC_SUB = 9, - BRIG_ATOMIC_WRAPDEC = 10, - BRIG_ATOMIC_WRAPINC = 11, - BRIG_ATOMIC_XOR = 12, - BRIG_ATOMIC_WAIT_EQ = 13, - BRIG_ATOMIC_WAIT_NE = 14, - BRIG_ATOMIC_WAIT_LT = 15, - BRIG_ATOMIC_WAIT_GTE = 16, - BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, - BRIG_ATOMIC_WAITTIMEOUT_NE = 18, - BRIG_ATOMIC_WAITTIMEOUT_LT = 19, - BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 -}; - -enum BrigCompareOperation { - - //.tdcaption="Comparison Operators" - // - //.mnemo={ s/^BRIG_COMPARE_//;lc } - //.mnemo_token=_EMCompare - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_COMPARE_EQ = 0, - BRIG_COMPARE_NE = 1, - BRIG_COMPARE_LT = 2, - BRIG_COMPARE_LE = 3, - BRIG_COMPARE_GT = 4, - BRIG_COMPARE_GE = 5, - BRIG_COMPARE_EQU = 6, - BRIG_COMPARE_NEU = 7, - BRIG_COMPARE_LTU = 8, - BRIG_COMPARE_LEU = 9, - BRIG_COMPARE_GTU = 10, - BRIG_COMPARE_GEU = 11, - BRIG_COMPARE_NUM = 12, - BRIG_COMPARE_NAN = 13, - BRIG_COMPARE_SEQ = 14, - BRIG_COMPARE_SNE = 15, - BRIG_COMPARE_SLT = 16, - BRIG_COMPARE_SLE = 17, - BRIG_COMPARE_SGT = 18, - BRIG_COMPARE_SGE = 19, - BRIG_COMPARE_SGEU = 20, - BRIG_COMPARE_SEQU = 21, - BRIG_COMPARE_SNEU = 22, - BRIG_COMPARE_SLTU = 23, - BRIG_COMPARE_SLEU = 24, - BRIG_COMPARE_SNUM = 25, - BRIG_COMPARE_SNAN = 26, - BRIG_COMPARE_SGTU = 27 -}; - -enum BrigControlDirective { - - //.mnemo={ s/^BRIG_CONTROL_//;lc } - //.mnemo_token=EControl - // - //.print=$mnemo{ $mnemo } - - BRIG_CONTROL_NONE = 0, //.skip - BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, - BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, - BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, - BRIG_CONTROL_MAXFLATGRIDSIZE = 4, - BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, - BRIG_CONTROL_REQUIREDDIM = 6, - BRIG_CONTROL_REQUIREDGRIDSIZE = 7, - BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, - BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 -}; - -enum BrigExecutableModifierMask { - //.nodump - BRIG_EXECUTABLE_DEFINITION = 1 -}; - -enum BrigImageChannelOrder { - - //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc } - //.mnemo_token=EImageOrder - //.mnemo_context=EImageOrderContext - // - //.print=$mnemo{ $mnemo } - - BRIG_CHANNEL_ORDER_A = 0, - BRIG_CHANNEL_ORDER_R = 1, - BRIG_CHANNEL_ORDER_RX = 2, - BRIG_CHANNEL_ORDER_RG = 3, - BRIG_CHANNEL_ORDER_RGX = 4, - BRIG_CHANNEL_ORDER_RA = 5, - BRIG_CHANNEL_ORDER_RGB = 6, - BRIG_CHANNEL_ORDER_RGBX = 7, - BRIG_CHANNEL_ORDER_RGBA = 8, - BRIG_CHANNEL_ORDER_BGRA = 9, - BRIG_CHANNEL_ORDER_ARGB = 10, - BRIG_CHANNEL_ORDER_ABGR = 11, - BRIG_CHANNEL_ORDER_SRGB = 12, - BRIG_CHANNEL_ORDER_SRGBX = 13, - BRIG_CHANNEL_ORDER_SRGBA = 14, - BRIG_CHANNEL_ORDER_SBGRA = 15, - BRIG_CHANNEL_ORDER_INTENSITY = 16, - BRIG_CHANNEL_ORDER_LUMINANCE = 17, - BRIG_CHANNEL_ORDER_DEPTH = 18, - BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, - - // used internally - BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified - - BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip - -}; - -enum BrigImageChannelType { - - //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc } - //.mnemo_token=EImageFormat - // - //.print=$mnemo{ $mnemo } - - BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, - BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, - BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, - BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, - BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, - BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, - BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, - BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, - BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, - BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, - BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, - BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, - BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, - BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, - BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, - BRIG_CHANNEL_TYPE_FLOAT = 15, - - // used internally - BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo="" - - BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigImageGeometry { - - //.tdcaption="Geometry" - // - //.mnemo={ s/^BRIG_GEOMETRY_//;lc } - //.mnemo_token=EImageGeometry - // - //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef} - //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo" - //.dim_default="assert(0); return 0" - // - //.depth={/DEPTH$/?"true":"false"} - //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo" - //.depth_default="return false" - - BRIG_GEOMETRY_1D = 0, - BRIG_GEOMETRY_2D = 1, - BRIG_GEOMETRY_3D = 2, - BRIG_GEOMETRY_1DA = 3, - BRIG_GEOMETRY_2DA = 4, - BRIG_GEOMETRY_1DB = 5, - BRIG_GEOMETRY_2DDEPTH = 6, - BRIG_GEOMETRY_2DADEPTH = 7, - - // used internally - BRIG_GEOMETRY_UNKNOWN, //.mnemo="" - - BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigImageQuery { - - //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc } - // - //.print=$mnemo{ $mnemo } - - BRIG_IMAGE_QUERY_WIDTH = 0, - BRIG_IMAGE_QUERY_HEIGHT = 1, - BRIG_IMAGE_QUERY_DEPTH = 2, - BRIG_IMAGE_QUERY_ARRAY = 3, - BRIG_IMAGE_QUERY_CHANNELORDER = 4, - BRIG_IMAGE_QUERY_CHANNELTYPE = 5, - BRIG_IMAGE_QUERY_NUMMIPLEVELS = 6 -}; - -enum BrigLinkage { - - //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc } - - BRIG_LINKAGE_NONE = 0, - BRIG_LINKAGE_PROGRAM = 1, - BRIG_LINKAGE_MODULE = 2, - BRIG_LINKAGE_FUNCTION = 3, - BRIG_LINKAGE_ARG = 4 -}; - -enum BrigMachineModel { - - //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc } - //.mnemo_token=ETargetMachine - // - //.print=$mnemo{ $mnemo } - - BRIG_MACHINE_SMALL = 0, - BRIG_MACHINE_LARGE = 1, - - BRIG_MACHINE_UNDEF = 2 //.skip -}; - -enum BrigMemoryModifierMask { //.tddef=0 - BRIG_MEMORY_CONST = 1 -}; - -enum BrigMemoryOrder { - - //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc } - //.mnemo_token=_EMMemoryOrder - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_MEMORY_ORDER_NONE = 0, //.mnemo="" - BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx - BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq - BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl - BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar - - BRIG_MEMORY_ORDER_LAST = 5 //.skip -}; - -enum BrigMemoryScope { - - //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc } - //.mnemo_token=_EMMemoryScope - // - //.print=$mnemo{ $mnemo } - - BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo="" - BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo="" - BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave - BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg - BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent - BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system - - BRIG_MEMORY_SCOPE_LAST = 6 //.skip -}; - -enum BrigOpcode { - - //.tdcaption="Instruction Opcodes" - // - //.k={ "BASIC" } - //.pscode=$k{ MACRO2Name("_".$k) } - //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" } - //.opcodeparser_incfile=ParserUtilities - //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic" - // - //.psopnd={undef} - //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" } - //.opndparser_incfile=ParserUtilities - //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands" - // - //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc } - //.mnemo_scanner=Instructions //.mnemo_token=EInstruction - //.mnemo_context=EDefaultContext - // - //.has_memory_order={undef} - //.semsupport=$has_memory_order{ return $has_memory_order && "true" } - // - //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; } - //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true" - // - //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" } - //.opcodevis_switch //.opcodevis_proto="template RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)" - //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()" - //.opcodevis_incfile=ItemUtils - // - //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef } - //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false" - // - //.vecOpndIndex={undef} - //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1" - //.vecOpndIndex_incfile=ParserUtilities - // - //.numdst={undef} - //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1" - // - //.print=$mnemo{ $mnemo } - - BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false - BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD - BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD - BRIG_OPCODE_BORROW = 3, - BRIG_OPCODE_CARRY = 4, - BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD - BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD - BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD - BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD - BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD - BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD - BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD - BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD - BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD - BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD - BRIG_OPCODE_REM = 17, - BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD - BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD - BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD - BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD - BRIG_OPCODE_MAD24 = 22, - BRIG_OPCODE_MAD24HI = 23, - BRIG_OPCODE_MUL24 = 24, - BRIG_OPCODE_MUL24HI = 25, - BRIG_OPCODE_SHL = 26, - BRIG_OPCODE_SHR = 27, - BRIG_OPCODE_AND = 28, - BRIG_OPCODE_NOT = 29, - BRIG_OPCODE_OR = 30, - BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE - BRIG_OPCODE_XOR = 32, - BRIG_OPCODE_BITEXTRACT = 33, - BRIG_OPCODE_BITINSERT = 34, - BRIG_OPCODE_BITMASK = 35, - BRIG_OPCODE_BITREV = 36, - BRIG_OPCODE_BITSELECT = 37, - BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE - BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE - BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1 - BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0 - BRIG_OPCODE_LDA = 42, //.k=ADDR - BRIG_OPCODE_MOV = 43, - BRIG_OPCODE_SHUFFLE = 44, - BRIG_OPCODE_UNPACKHI = 45, - BRIG_OPCODE_UNPACKLO = 46, - BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE - BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE - BRIG_OPCODE_CMOV = 49, - BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE - BRIG_OPCODE_NCOS = 51, - BRIG_OPCODE_NEXP2 = 52, - BRIG_OPCODE_NFMA = 53, - BRIG_OPCODE_NLOG2 = 54, - BRIG_OPCODE_NRCP = 55, - BRIG_OPCODE_NRSQRT = 56, - BRIG_OPCODE_NSIN = 57, - BRIG_OPCODE_NSQRT = 58, - BRIG_OPCODE_BITALIGN = 59, - BRIG_OPCODE_BYTEALIGN = 60, - BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE - BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE - BRIG_OPCODE_LERP = 63, - BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE - BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE - BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT - BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT - BRIG_OPCODE_STOF = 68, //.k=SEG_CVT - BRIG_OPCODE_CMP = 69, //.k=CMP - BRIG_OPCODE_CVT = 70, //.k=CVT - BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0 - BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0 - BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC - BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0 - BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL - BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0 - BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0 - BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0 - BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0 - BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0 - BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE - BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE - BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER - BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0 - BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands - BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false - BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0 - BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false - BRIG_OPCODE_LDF = 95, - BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE - BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE - BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0 - BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE - BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false - BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0 - BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0 - BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE - BRIG_OPCODE_ALLOCA = 104, //.k=MEM - BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, - BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, - BRIG_OPCODE_DIM = 107, - BRIG_OPCODE_GRIDGROUPS = 108, - BRIG_OPCODE_GRIDSIZE = 109, - BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, - BRIG_OPCODE_PACKETID = 111, - BRIG_OPCODE_WORKGROUPID = 112, - BRIG_OPCODE_WORKGROUPSIZE = 113, - BRIG_OPCODE_WORKITEMABSID = 114, - BRIG_OPCODE_WORKITEMFLATABSID = 115, - BRIG_OPCODE_WORKITEMFLATID = 116, - BRIG_OPCODE_WORKITEMID = 117, - BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0 - BRIG_OPCODE_GETDETECTEXCEPT = 119, - BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0 - BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE - BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE - BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE - BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE - BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0 - BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0 - BRIG_OPCODE_CLOCK = 127, - BRIG_OPCODE_CUID = 128, - BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0 - BRIG_OPCODE_GROUPBASEPTR = 130, - BRIG_OPCODE_KERNARGBASEPTR = 131, - BRIG_OPCODE_LANEID = 132, - BRIG_OPCODE_MAXCUID = 133, - BRIG_OPCODE_MAXWAVEID = 134, - BRIG_OPCODE_NULLPTR = 135, //.k=SEG - BRIG_OPCODE_WAVEID = 136, - BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip - - BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE - BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE - BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2, - BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3, - BRIG_OPCODE_GCNMED3 = (1u << 15) | 4, - BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD - BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD - BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD - BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD - BRIG_OPCODE_GCNBFM = (1u << 15) | 9, - BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0 - BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0 - BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC - BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet - BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14, - BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15, - BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc - BRIG_OPCODE_GCNMSAD = (1u << 15) | 17, - BRIG_OPCODE_GCNQSAD = (1u << 15) | 18, - BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19, - BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE - BRIG_OPCODE_GCNSADW = (1u << 15) | 21, - BRIG_OPCODE_GCNSADD = (1u << 15) | 22, - BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume - BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append - BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg - BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg - BRIG_OPCODE_GCNMAX = (1u << 15) | 27, - BRIG_OPCODE_GCNMIN = (1u << 15) | 28, - BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD - BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30, - - BRIG_OPCODE_AMDRDIMAGELOD = (1u << 15) | 31, //.k=IMAGE //.mnemo=amd_rdimagelod //.vecOpndIndex=0 - BRIG_OPCODE_AMDRDIMAGEGRAD = (1u << 15) | 32, //.k=IMAGE //.mnemo=amd_rdimagegrad //.vecOpndIndex=0 - BRIG_OPCODE_AMDLDIMAGEMIP = (1u << 15) | 33, //.k=IMAGE //.mnemo=amd_ldimagemip //.vecOpndIndex=0 - BRIG_OPCODE_AMDSTIMAGEMIP = (1u << 15) | 34, //.k=IMAGE //.mnemo=amd_stimagemip //.vecOpndIndex=0 //.numdst=0 - BRIG_OPCODE_AMDQUERYIMAGE = (1u << 15) | 35 //.k=QUERY_IMAGE //.mnemo=amd_queryimage -}; - -enum BrigPack { - - //.tdcaption="Packing" - // - //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc } - //.mnemo_token=_EMPacking - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_PACK_NONE = 0, //.mnemo="" - BRIG_PACK_PP = 1, - BRIG_PACK_PS = 2, - BRIG_PACK_SP = 3, - BRIG_PACK_SS = 4, - BRIG_PACK_S = 5, - BRIG_PACK_P = 6, - BRIG_PACK_PPSAT = 7, - BRIG_PACK_PSSAT = 8, - BRIG_PACK_SPSAT = 9, - BRIG_PACK_SSSAT = 10, - BRIG_PACK_SSAT = 11, - BRIG_PACK_PSAT = 12 -}; - -enum BrigProfile { - - //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc } - //.mnemo_token=ETargetProfile - // - //.print=$mnemo{ $mnemo } - - BRIG_PROFILE_BASE = 0, - BRIG_PROFILE_FULL = 1, - - BRIG_PROFILE_UNDEF = 2 //.skip -}; - -enum BrigRegisterKind { - - //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) } - // - //.bits={ } - //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1" - // - //.nollvm - - BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1 - BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32 - BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64 - BRIG_REGISTER_KIND_QUAD = 3 //.bits=128 -}; - -enum BrigRound { - - //.mnemo={} - //.mnemo_fn=round2str //.mnemo_token=_EMRound - // - //.sat={/_SAT$/? "true" : "false"} - //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding" - //.sat_default="return false" - // - //.sig={/_SIGNALING_/? "true" : "false"} - //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding" - //.sig_default="return false" - // - //.int={/_INTEGER_/? "true" : "false"} - //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding" - //.int_default="return false" - // - //.flt={/_FLOAT_/? "true" : "false"} - //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding" - //.flt_default="return false" - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_ROUND_NONE = 0, //.no_mnemo - BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo - BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near - BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero - BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up - BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down - BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari - BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi - BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi - BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi - BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat - BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat - BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat - BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat - BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari - BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi - BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi - BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni - BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat - BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat - BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat - BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat -}; - -enum BrigSamplerAddressing { - - //.mnemo={ s/^BRIG_ADDRESSING_//;lc } - //.mnemo_token=ESamplerAddressingMode - - BRIG_ADDRESSING_UNDEFINED = 0, - BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, - BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, - BRIG_ADDRESSING_REPEAT = 3, - BRIG_ADDRESSING_MIRRORED_REPEAT = 4, - - BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigSamplerCoordNormalization { - - //.mnemo={ s/^BRIG_COORD_//;lc } - //.mnemo_token=ESamplerCoord - // - //.print=$mnemo{ $mnemo } - - BRIG_COORD_UNNORMALIZED = 0, - BRIG_COORD_NORMALIZED = 1 -}; - -enum BrigSamplerFilter { - - //.mnemo={ s/^BRIG_FILTER_//;lc } - // - //.print=$mnemo{ $mnemo } - - BRIG_FILTER_NEAREST = 0, - BRIG_FILTER_LINEAR = 1, - - BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip -}; - -enum BrigSamplerQuery { - - //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc } - //.mnemo_token=_EMSamplerQuery - // - //.print=$mnemo{ $mnemo } - - BRIG_SAMPLER_QUERY_ADDRESSING = 0, - BRIG_SAMPLER_QUERY_COORD = 1, - BRIG_SAMPLER_QUERY_FILTER = 2 -}; - -enum BrigSectionIndex { - - //.nollvm - // - //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc } - - BRIG_SECTION_INDEX_DATA = 0, - BRIG_SECTION_INDEX_CODE = 1, - BRIG_SECTION_INDEX_OPERAND = 2, - BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3, - - // used internally - BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip -}; - -enum BrigSegCvtModifierMask { - BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull" -}; - -enum BrigSegment { - - //.mnemo={ s/^BRIG_SEGMENT_//;lc} - //.mnemo_token=_EMSegment - //.mnemo_context=EInstModifierContext - // - //.print=$mnemo{ $mnemo ? "_$mnemo" : "" } - - BRIG_SEGMENT_NONE = 0, //.mnemo="" - BRIG_SEGMENT_FLAT = 1, //.mnemo="" - BRIG_SEGMENT_GLOBAL = 2, - BRIG_SEGMENT_READONLY = 3, - BRIG_SEGMENT_KERNARG = 4, - BRIG_SEGMENT_GROUP = 5, - BRIG_SEGMENT_PRIVATE = 6, - BRIG_SEGMENT_SPILL = 7, - BRIG_SEGMENT_ARG = 8, - - BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip - - BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region" -}; - -enum BrigPackedTypeBits { - - //.nodump - // - //.nollvm - - BRIG_TYPE_BASE_SIZE = 5, - BRIG_TYPE_PACK_SIZE = 2, - BRIG_TYPE_ARRAY_SIZE = 1, - - BRIG_TYPE_BASE_SHIFT = 0, - BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, - BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, - - BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT, - BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT, - - BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, - BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, - - BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT -}; - -enum BrigType { - - //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef } - //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0" - //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef } - //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0" - // - //.mnemo={ s/^BRIG_TYPE_//;lc } - //.mnemo_token=_EMType - // - //.array={/ARRAY$/?"true":"false"} - //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type" - //.array_default="return false" - // - //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"} - //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type" - //.a2e_default="return BRIG_TYPE_NONE" - // - //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"} - //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type" - //.e2a_default="return BRIG_TYPE_NONE" - // - //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc} - //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type" - //.t2s_default="return NULL" - // - //.dispatch_switch //.dispatch_incfile=TemplateUtilities - //.dispatch_proto="template\nRetType dispatchByType_gen(unsigned type, Visitor& v)" - //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" } - //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)" - // - //- .tdname=BrigType - // - //.print=$mnemo{ "_$mnemo" } - - BRIG_TYPE_NONE = 0, //.mnemo="" //.print="" - BRIG_TYPE_U8 = 1, //.ctype=uint8_t - BRIG_TYPE_U16 = 2, //.ctype=uint16_t - BRIG_TYPE_U32 = 3, //.ctype=uint32_t - BRIG_TYPE_U64 = 4, //.ctype=uint64_t - BRIG_TYPE_S8 = 5, //.ctype=int8_t - BRIG_TYPE_S16 = 6, //.ctype=int16_t - BRIG_TYPE_S32 = 7, //.ctype=int32_t - BRIG_TYPE_S64 = 8, //.ctype=int64_t - BRIG_TYPE_F16 = 9, //.ctype=f16_t - BRIG_TYPE_F32 = 10, //.ctype=float - BRIG_TYPE_F64 = 11, //.ctype=double - BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1 - BRIG_TYPE_B8 = 13, //.ctype=uint8_t - BRIG_TYPE_B16 = 14, //.ctype=uint16_t - BRIG_TYPE_B32 = 15, //.ctype=uint32_t - BRIG_TYPE_B64 = 16, //.ctype=uint64_t - BRIG_TYPE_B128 = 17, //.ctype=b128_t - BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64 - BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64 - BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64 - BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64 - BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64 - BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64 - - BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t - BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t - BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t - BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t - BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t - BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t - BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t - BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t - BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t - BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t - BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t - BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t - BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t - BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t - BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t - BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t - BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t - BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t - BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t - BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t - BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t - BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float - BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float - BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double - - BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" - - // Used internally - BRIG_TYPE_INVALID = (unsigned) -1 //.skip -}; - -enum BrigVariableModifierMask { - - //.nodump - - BRIG_VARIABLE_DEFINITION = 1, - BRIG_VARIABLE_CONST = 2 -}; - -enum BrigWidth { - - //.tddef=1 - // - //.print={ s/^BRIG_WIDTH_//; "_width($_)" } - - BRIG_WIDTH_NONE = 0, - BRIG_WIDTH_1 = 1, - BRIG_WIDTH_2 = 2, - BRIG_WIDTH_4 = 3, - BRIG_WIDTH_8 = 4, - BRIG_WIDTH_16 = 5, - BRIG_WIDTH_32 = 6, - BRIG_WIDTH_64 = 7, - BRIG_WIDTH_128 = 8, - BRIG_WIDTH_256 = 9, - BRIG_WIDTH_512 = 10, - BRIG_WIDTH_1024 = 11, - BRIG_WIDTH_2048 = 12, - BRIG_WIDTH_4096 = 13, - BRIG_WIDTH_8192 = 14, - BRIG_WIDTH_16384 = 15, - BRIG_WIDTH_32768 = 16, - BRIG_WIDTH_65536 = 17, - BRIG_WIDTH_131072 = 18, - BRIG_WIDTH_262144 = 19, - BRIG_WIDTH_524288 = 20, - BRIG_WIDTH_1048576 = 21, - BRIG_WIDTH_2097152 = 22, - BRIG_WIDTH_4194304 = 23, - BRIG_WIDTH_8388608 = 24, - BRIG_WIDTH_16777216 = 25, - BRIG_WIDTH_33554432 = 26, - BRIG_WIDTH_67108864 = 27, - BRIG_WIDTH_134217728 = 28, - BRIG_WIDTH_268435456 = 29, - BRIG_WIDTH_536870912 = 30, - BRIG_WIDTH_1073741824 = 31, - BRIG_WIDTH_2147483648 = 32, - BRIG_WIDTH_WAVESIZE = 33, - BRIG_WIDTH_ALL = 34, - - BRIG_WIDTH_LAST //.skip -}; - -struct BrigUInt64 { //.isroot //.standalone - uint32_t lo; //.defValue=0 - uint32_t hi; //.defValue=0 - - //+hcode KLASS& operator=(uint64_t rhs); - //+hcode operator uint64_t(); - //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; } - //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); } -}; - -struct BrigAluModifier { //.isroot //.standalone - BrigAluModifier8_t allBits; //.defValue=0 - //^^ bool ftz; //.wtype=BitValRef<0> -}; - -struct BrigBase { //.nowrap - uint16_t byteCount; - BrigKind16_t kind; -}; - -//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE }; -//.alias Directive:Code { //.generic }; -//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND }; - -struct BrigData { - //.nowrap - uint32_t byteCount; - uint8_t bytes[1]; -}; - -struct BrigExecutableModifier { //.isroot //.standalone - BrigExecutableModifier8_t allBits; //.defValue=0 - //^^ bool isDefinition; //.wtype=BitValRef<0> -}; - -struct BrigMemoryModifier { //.isroot //.standalone - BrigMemoryModifier8_t allBits; //.defValue=0 - //^^ bool isConst; //.wtype=BitValRef<0> -}; - -struct BrigSegCvtModifier { //.isroot //.standalone - BrigSegCvtModifier8_t allBits; //.defValue=0 - //^^ bool isNoNull; //.wtype=BitValRef<0> -}; - -struct BrigVariableModifier { //.isroot //.standalone - BrigVariableModifier8_t allBits; //.defValue=0 - - //^^ bool isDefinition; //.wtype=BitValRef<0> - //^^ bool isConst; //.wtype=BitValRef<1> -}; - -struct BrigDirectiveArgBlockEnd { - BrigBase base; -}; - -struct BrigDirectiveArgBlockStart { - BrigBase base; -}; - -struct BrigDirectiveComment { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveControl { - BrigBase base; - BrigControlDirective16_t control; - uint16_t reserved; //.defValue=0 - BrigDataOffsetOperandList32_t operands; -}; - -struct BrigDirectiveExecutable { //.generic - BrigBase base; - BrigDataOffsetString32_t name; - uint16_t outArgCount; //.defValue=0 - uint16_t inArgCount; //.defValue=0 - BrigCodeOffset32_t firstInArg; - BrigCodeOffset32_t firstCodeBlockEntry; - BrigCodeOffset32_t nextModuleEntry; - BrigExecutableModifier modifier; //.acc=subItem //.wtype=ExecutableModifier - BrigLinkage8_t linkage; - uint16_t reserved; //.defValue=0 -}; - -//.alias DirectiveKernel:DirectiveExecutable { }; -//.alias DirectiveFunction:DirectiveExecutable { }; -//.alias DirectiveSignature:DirectiveExecutable { }; -//.alias DirectiveIndirectFunction:DirectiveExecutable { }; - -struct BrigDirectiveExtension { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveFbarrier { - BrigBase base; - BrigDataOffsetString32_t name; - BrigVariableModifier modifier; //.acc=subItem //.wtype=VariableModifier - BrigLinkage8_t linkage; - uint16_t reserved; //.defValue=0 -}; - -struct BrigDirectiveLabel { - BrigBase base; - BrigDataOffsetString32_t name; -}; - -struct BrigDirectiveLoc { - BrigBase base; - BrigDataOffsetString32_t filename; - uint32_t line; - uint32_t column; //.defValue=1 -}; - -struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE - BrigBase base; -}; - -struct BrigDirectivePragma { - BrigBase base; - BrigDataOffsetOperandList32_t operands; -}; - -struct BrigDirectiveVariable { - BrigBase base; - BrigDataOffsetString32_t name; - BrigOperandOffset32_t init; - BrigType16_t type; - - //+hcode bool isArray(); - //+implcode inline bool KLASS::isArray() { return isArrayType(type()); } - - //+hcode unsigned elementType(); - //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); } - - BrigSegment8_t segment; - BrigAlignment8_t align; - BrigUInt64 dim; //.acc=subItem //.wtype=UInt64 - BrigVariableModifier modifier; //.acc=subItem //.wtype=VariableModifier - BrigLinkage8_t linkage; - BrigAllocation8_t allocation; - uint8_t reserved; //.defValue=0 -}; - -struct BrigDirectiveModule { - BrigBase base; - BrigDataOffsetString32_t name; - BrigVersion32_t hsailMajor; //.wtype=ValRef - BrigVersion32_t hsailMinor; //.wtype=ValRef - BrigProfile8_t profile; - BrigMachineModel8_t machineModel; - BrigRound8_t defaultFloatRound; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode - BrigBase base; - BrigOpcode16_t opcode; - BrigType16_t type; - BrigDataOffsetOperandList32_t operands; - - //+hcode Operand operand(int index); - //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; } -}; - -struct BrigInstAddr { - BrigInstBase base; - BrigSegment8_t segment; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstAtomic { - BrigInstBase base; - BrigSegment8_t segment; - BrigMemoryOrder8_t memoryOrder; - BrigMemoryScope8_t memoryScope; - BrigAtomicOperation8_t atomicOperation; - uint8_t equivClass; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstBasic { - BrigInstBase base; -}; - -struct BrigInstBr { - BrigInstBase base; - BrigWidth8_t width; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstCmp { - BrigInstBase base; - BrigType16_t sourceType; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigCompareOperation8_t compare; - BrigPack8_t pack; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstCvt { - BrigInstBase base; - BrigType16_t sourceType; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigRound8_t round; -}; - -struct BrigInstImage { - BrigInstBase base; - BrigType16_t imageType; - BrigType16_t coordType; - BrigImageGeometry8_t geometry; - uint8_t equivClass; - uint16_t reserved; //.defValue=0 -}; - -struct BrigInstLane { - BrigInstBase base; - BrigType16_t sourceType; - BrigWidth8_t width; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstMem { - BrigInstBase base; - BrigSegment8_t segment; - BrigAlignment8_t align; - uint8_t equivClass; - BrigWidth8_t width; - BrigMemoryModifier modifier; //.acc=subItem //.wtype=MemoryModifier - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstMemFence { - BrigInstBase base; - BrigMemoryOrder8_t memoryOrder; - BrigMemoryScope8_t globalSegmentMemoryScope; - BrigMemoryScope8_t groupSegmentMemoryScope; - BrigMemoryScope8_t imageSegmentMemoryScope; -}; - -struct BrigInstMod { - BrigInstBase base; - BrigAluModifier modifier; //.acc=subItem //.wtype=AluModifier - BrigRound8_t round; - BrigPack8_t pack; - uint8_t reserved; //.defValue=0 -}; - -struct BrigInstQueryImage { - BrigInstBase base; - BrigType16_t imageType; - BrigImageGeometry8_t geometry; - BrigImageQuery8_t imageQuery; -}; - -struct BrigInstQuerySampler { - BrigInstBase base; - BrigSamplerQuery8_t samplerQuery; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstQueue { - BrigInstBase base; - BrigSegment8_t segment; - BrigMemoryOrder8_t memoryOrder; - uint16_t reserved; //.defValue=0 -}; - -struct BrigInstSeg { - BrigInstBase base; - BrigSegment8_t segment; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigInstSegCvt { - BrigInstBase base; - BrigType16_t sourceType; - BrigSegment8_t segment; - BrigSegCvtModifier modifier; //.acc=subItem //.wtype=SegCvtModifier -}; - -struct BrigInstSignal { - BrigInstBase base; - BrigType16_t signalType; - BrigMemoryOrder8_t memoryOrder; - BrigAtomicOperation8_t signalOperation; -}; - -struct BrigInstSourceType { - BrigInstBase base; - BrigType16_t sourceType; - uint16_t reserved; //.defValue=0 -}; - -typedef BrigInstSourceType BrigInstPopcount; - -struct BrigOperandAddress { - BrigBase base; - BrigCodeOffset32_t symbol; //.wtype=ItemRef - BrigOperandOffset32_t reg; //.wtype=ItemRef - BrigUInt64 offset; //.acc=subItem //.wtype=UInt64 -}; - -struct BrigOperandAlign { - BrigBase base; - BrigAlignment8_t align; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigOperandCodeList { - BrigBase base; - BrigDataOffsetCodeList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Code elements(int index); - //+implcode inline Code KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandCodeRef { - BrigBase base; - BrigCodeOffset32_t ref; -}; - -struct BrigOperandConstantBytes { - BrigBase base; - BrigType16_t type; //.defValue=0 - uint16_t reserved; //.defValue=0 - BrigDataOffsetString32_t bytes; -}; - -struct BrigOperandConstantOperandList { - BrigBase base; - BrigType16_t type; - uint16_t reserved; //.defValue=0 - BrigDataOffsetOperandList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Operand elements(int index); - //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandConstantImage { - BrigBase base; - BrigType16_t type; - BrigImageGeometry8_t geometry; - BrigImageChannelOrder8_t channelOrder; - BrigImageChannelType8_t channelType; - uint8_t reserved[3]; //.defValue=0 - BrigUInt64 width; //.acc=subItem //.wtype=UInt64 - BrigUInt64 height; //.acc=subItem //.wtype=UInt64 - BrigUInt64 depth; //.acc=subItem //.wtype=UInt64 - BrigUInt64 array; //.acc=subItem //.wtype=UInt64 -}; - -struct BrigOperandOperandList { - BrigBase base; - BrigDataOffsetOperandList32_t elements; - - //+hcode unsigned elementCount(); - //+implcode inline unsigned KLASS::elementCount() { return elements().size(); } - //+hcode Operand elements(int index); - //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; } -}; - -struct BrigOperandRegister { - BrigBase base; - BrigRegisterKind16_t regKind; - uint16_t regNum; -}; - -struct BrigOperandConstantSampler { - BrigBase base; - BrigType16_t type; - BrigSamplerCoordNormalization8_t coord; - BrigSamplerFilter8_t filter; - BrigSamplerAddressing8_t addressing; - uint8_t reserved[3]; //.defValue=0 -}; - -struct BrigOperandString { - BrigBase base; - BrigDataOffsetString32_t string; -}; - -struct BrigOperandWavesize { - BrigBase base; -}; - -//.ignore{ - -enum BrigExceptionsMask { - BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, - BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, - BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, - BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, - BRIG_EXCEPTIONS_INEXACT = 1 << 4, - - BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 -}; - -struct BrigSectionHeader { - uint64_t byteCount; - uint32_t headerByteCount; - uint32_t nameLength; - uint8_t name[1]; -}; - -#define MODULE_IDENTIFICATION_LENGTH (8) - -struct BrigModuleHeader { - char identification[MODULE_IDENTIFICATION_LENGTH]; - BrigVersion32_t brigMajor; - BrigVersion32_t brigMinor; - uint64_t byteCount; - uint8_t hash[64]; - uint32_t reserved; - uint32_t sectionCount; - uint64_t sectionIndex; -}; - -typedef BrigModuleHeader* BrigModule_t; - -#endif // defined(INCLUDED_BRIG_H) -//} diff --git a/src/arch/hsail/SConscript b/src/arch/hsail/SConscript deleted file mode 100644 index 251c103fd..000000000 --- a/src/arch/hsail/SConscript +++ /dev/null @@ -1,53 +0,0 @@ -# -*- mode:python -*- - -# Copyright (c) 2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# Author: Anthony Gutierrez -# - -Import('*') - -if not env['BUILD_GPU']: - Return() - -if env['TARGET_GPU_ISA'] == 'hsail': - env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'], - 'gen.py', '$SOURCE $TARGETS') - - Source('gpu_decoder.cc') - Source('insts/branch.cc') - Source('insts/gen_exec.cc') - Source('insts/gpu_static_inst.cc') - Source('insts/main.cc') - Source('insts/pseudo_inst.cc') - Source('insts/mem.cc') - Source('operand.cc') diff --git a/src/arch/hsail/SConsopts b/src/arch/hsail/SConsopts deleted file mode 100644 index 641963c82..000000000 --- a/src/arch/hsail/SConsopts +++ /dev/null @@ -1,40 +0,0 @@ -# -*- mode:python -*- - -# -# Copyright (c) 2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# Author: Anthony Gutierrez -# - -Import('*') - -all_gpu_isa_list.append('hsail') diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py deleted file mode 100755 index 56404248b..000000000 --- a/src/arch/hsail/gen.py +++ /dev/null @@ -1,912 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2015 Advanced Micro Devices, Inc. -# All rights reserved. -# -# For use for simulation and test purposes only -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -# Author: Steve Reinhardt -# - -from __future__ import print_function - -import sys, re - -from m5.util import code_formatter - -if len(sys.argv) != 4: - print("Error: need 3 args (file names)") - sys.exit(0) - -header_code = code_formatter() -decoder_code = code_formatter() -exec_code = code_formatter() - -############### -# -# Generate file prologs (includes etc.) -# -############### - -header_code(''' -#include "arch/hsail/insts/decl.hh" -#include "base/bitfield.hh" -#include "gpu-compute/hsail_code.hh" -#include "gpu-compute/wavefront.hh" - -namespace HsailISA -{ -''') -header_code.indent() - -decoder_code(''' -#include "arch/hsail/gpu_decoder.hh" -#include "arch/hsail/insts/branch.hh" -#include "arch/hsail/insts/decl.hh" -#include "arch/hsail/insts/gen_decl.hh" -#include "arch/hsail/insts/mem.hh" -#include "arch/hsail/insts/mem_impl.hh" -#include "gpu-compute/brig_object.hh" - -namespace HsailISA -{ - std::vector Decoder::decodedInsts; - - GPUStaticInst* - Decoder::decode(MachInst machInst) - { - using namespace Brig; - - const BrigInstBase *ib = machInst.brigInstBase; - const BrigObject *obj = machInst.brigObj; - - switch(ib->opcode) { -''') -decoder_code.indent() -decoder_code.indent() - -exec_code(''' -#include "arch/hsail/insts/gen_decl.hh" -#include "base/intmath.hh" - -namespace HsailISA -{ -''') -exec_code.indent() - -############### -# -# Define code templates for class declarations (for header file) -# -############### - -# Basic header template for an instruction stub. -header_template_stub = ''' -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } - - void execute(GPUDynInstPtr gpuDynInst); -}; - -''' - -# Basic header template for an instruction with no template parameters. -header_template_nodt = ''' -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } - - void execute(GPUDynInstPtr gpuDynInst); -}; - -''' - -# Basic header template for an instruction with a single DataType -# template parameter. -header_template_1dt = ''' -template -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - typedef typename DataType::CType CType; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } - - void execute(GPUDynInstPtr gpuDynInst); -}; - -''' - -header_template_1dt_noexec = ''' -template -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - typedef typename DataType::CType CType; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } -}; - -''' - -# Same as header_template_1dt, except the base class has a second -# template parameter NumSrcOperands to allow a variable number of -# source operands. Note that since this is implemented with an array, -# it only works for instructions where all sources are of the same -# type (like most arithmetics). -header_template_1dt_varsrcs = ''' -template -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - typedef typename DataType::CType CType; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } - - void execute(GPUDynInstPtr gpuDynInst); -}; - -''' - -# Header template for instruction with two DataType template -# parameters, one for the dest and one for the source. This is used -# by compare and convert. -header_template_2dt = ''' -template -class $class_name : public $base_class -{ - public: - typedef $base_class Base; - typedef typename DestDataType::CType DestCType; - typedef typename SrcDataType::CType SrcCType; - - $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "$opcode") - { - } - - void execute(GPUDynInstPtr gpuDynInst); -}; - -''' - -header_templates = { - 'ArithInst': header_template_1dt_varsrcs, - 'CmovInst': header_template_1dt, - 'ClassInst': header_template_1dt, - 'ShiftInst': header_template_1dt, - 'ExtractInsertInst': header_template_1dt, - 'CmpInst': header_template_2dt, - 'CvtInst': header_template_2dt, - 'PopcountInst': header_template_2dt, - 'LdInst': '', - 'StInst': '', - 'SpecialInstNoSrc': header_template_nodt, - 'SpecialInst1Src': header_template_nodt, - 'SpecialInstNoSrcNoDest': '', - 'Stub': header_template_stub, -} - -############### -# -# Define code templates for exec functions -# -############### - -# exec function body -exec_template_stub = ''' -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); -} - -''' -exec_template_nodt_nosrc = ''' -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - typedef Base::DestCType DestCType; - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - DestCType dest_val = $expr; - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_nodt_1src = ''' -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - typedef Base::DestCType DestCType; - typedef Base::SrcCType SrcCType; - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - SrcCType src_val0 = this->src0.get(w, lane); - DestCType dest_val = $expr; - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_1dt_varsrcs = ''' -template -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType dest_val; - if ($dest_is_src_flag) { - dest_val = this->dest.template get(w, lane); - } - - CType src_val[$num_srcs]; - - for (int i = 0; i < $num_srcs; ++i) { - src_val[i] = this->src[i].template get(w, lane); - } - - dest_val = (CType)($expr); - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_1dt_3srcs = ''' -template -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - typedef typename Base::Src0CType Src0T; - typedef typename Base::Src1CType Src1T; - typedef typename Base::Src2CType Src2T; - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType dest_val; - - if ($dest_is_src_flag) { - dest_val = this->dest.template get(w, lane); - } - - Src0T src_val0 = this->src0.template get(w, lane); - Src1T src_val1 = this->src1.template get(w, lane); - Src2T src_val2 = this->src2.template get(w, lane); - - dest_val = $expr; - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_1dt_2src_1dest = ''' -template -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - typedef typename Base::DestCType DestT; - typedef CType Src0T; - typedef typename Base::Src1CType Src1T; - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - DestT dest_val; - if ($dest_is_src_flag) { - dest_val = this->dest.template get(w, lane); - } - Src0T src_val0 = this->src0.template get(w, lane); - Src1T src_val1 = this->src1.template get(w, lane); - - dest_val = $expr; - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_shift = ''' -template -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType dest_val; - - if ($dest_is_src_flag) { - dest_val = this->dest.template get(w, lane); - } - - CType src_val0 = this->src0.template get(w, lane); - uint32_t src_val1 = this->src1.template get(w, lane); - - dest_val = $expr; - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_template_2dt = ''' -template -void -$class_name::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - DestCType dest_val; - SrcCType src_val[$num_srcs]; - - for (int i = 0; i < $num_srcs; ++i) { - src_val[i] = this->src[i].template get(w, lane); - } - - dest_val = $expr; - - this->dest.set(w, lane, dest_val); - } - } -} - -''' - -exec_templates = { - 'ArithInst': exec_template_1dt_varsrcs, - 'CmovInst': exec_template_1dt_3srcs, - 'ExtractInsertInst': exec_template_1dt_3srcs, - 'ClassInst': exec_template_1dt_2src_1dest, - 'CmpInst': exec_template_2dt, - 'CvtInst': exec_template_2dt, - 'PopcountInst': exec_template_2dt, - 'LdInst': '', - 'StInst': '', - 'SpecialInstNoSrc': exec_template_nodt_nosrc, - 'SpecialInst1Src': exec_template_nodt_1src, - 'SpecialInstNoSrcNoDest': '', - 'Stub': exec_template_stub, -} - -############### -# -# Define code templates for the decoder cases -# -############### - -# decode template for nodt-opcode case -decode_nodt_template = ''' - case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' - -decode_case_prolog_class_inst = ''' - case BRIG_OPCODE_$brig_opcode_upper: - { - //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); - BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; - //switch (baseOp->kind) { - // case BRIG_OPERAND_REG: - // type = ((const BrigOperandReg*)baseOp)->type; - // break; - // case BRIG_OPERAND_IMMED: - // type = ((const BrigOperandImmed*)baseOp)->type; - // break; - // default: - // fatal("CLASS unrecognized kind of operand %d\\n", - // baseOp->kind); - //} - switch (type) {''' - -# common prolog for 1dt- or 2dt-opcode case: switch on data type -decode_case_prolog = ''' - case BRIG_OPCODE_$brig_opcode_upper: - { - switch (ib->type) {''' - -# single-level decode case entry (for 1dt opcodes) -decode_case_entry = \ -' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' - -decode_store_prolog = \ -' case BRIG_TYPE_$type_name: {' - -decode_store_case_epilog = ''' - }''' - -decode_store_case_entry = \ -' return $constructor(ib, obj);' - -# common epilog for type switch -decode_case_epilog = ''' - default: fatal("$brig_opcode_upper: unrecognized type %d\\n", - ib->type); - } - } - break;''' - -# Additional templates for nested decode on a second type field (for -# compare and convert). These are used in place of the -# decode_case_entry template to create a second-level switch on on the -# second type field inside each case of the first-level type switch. -# Because the name and location of the second type can vary, the Brig -# instruction type must be provided in $brig_type, and the name of the -# second type field must be provided in $type_field. -decode_case2_prolog = ''' - case BRIG_TYPE_$type_name: - switch (((Brig$brig_type*)ib)->$type2_field) {''' - -decode_case2_entry = \ -' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' - -decode_case2_epilog = ''' - default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", - ((Brig$brig_type*)ib)->$type2_field); - } - break;''' - -# Figure out how many source operands an expr needs by looking for the -# highest-numbered srcN value referenced. Since sources are numbered -# starting at 0, the return value is N+1. -def num_src_operands(expr): - if expr.find('src2') != -1: - return 3 - elif expr.find('src1') != -1: - return 2 - elif expr.find('src0') != -1: - return 1 - else: - return 0 - -############### -# -# Define final code generation methods -# -# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for -# generating actual instructions. -# -############### - -# Generate class declaration, exec function, and decode switch case -# for an brig_opcode with a single-level type switch. The 'types' -# parameter is a list or tuple of types for which the instruction -# should be instantiated. -def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', - type2_info=None, constructor_prefix='new ', is_store=False): - brig_opcode_upper = brig_opcode.upper() - class_name = brig_opcode - opcode = class_name.lower() - - if base_class == 'ArithInst': - # note that expr must be provided with ArithInst so we can - # derive num_srcs for the template - assert expr - - if expr: - # Derive several bits of info from expr. If expr is not used, - # this info will be irrelevant. - num_srcs = num_src_operands(expr) - # if the RHS expression includes 'dest', then we're doing an RMW - # on the reg and we need to treat it like a source - dest_is_src = expr.find('dest') != -1 - dest_is_src_flag = str(dest_is_src).lower() # for C++ - if base_class in ['ShiftInst']: - expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) - elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: - expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) - else: - expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) - expr = re.sub(r'\bdest\b', r'dest_val', expr) - - # Strip template arguments off of base class before looking up - # appropriate templates - base_class_base = re.sub(r'<.*>$', '', base_class) - header_code(header_templates[base_class_base]) - - if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): - exec_code(exec_templates[base_class_base]) - elif base_class.startswith('ShiftInst'): - header_code(exec_template_shift) - else: - header_code(exec_templates[base_class_base]) - - if not types or isinstance(types, str): - # Just a single type - constructor = constructor_prefix + class_name - decoder_code(decode_nodt_template) - else: - # multiple types, need at least one level of decode - if brig_opcode == 'Class': - decoder_code(decode_case_prolog_class_inst) - else: - decoder_code(decode_case_prolog) - if not type2_info: - if not is_store: - # single list of types, to basic one-level decode - for type_name in types: - full_class_name = '%s<%s>' % (class_name, type_name.upper()) - constructor = constructor_prefix + full_class_name - decoder_code(decode_case_entry) - else: - # single list of types, to basic one-level decode - for type_name in types: - decoder_code(decode_store_prolog) - type_size = int(re.findall(r'[0-9]+', type_name)[0]) - src_size = 32 - type_type = type_name[0] - full_class_name = '%s<%s,%s>' % (class_name, \ - type_name.upper(), \ - '%s%d' % \ - (type_type.upper(), \ - type_size)) - constructor = constructor_prefix + full_class_name - decoder_code(decode_store_case_entry) - decoder_code(decode_store_case_epilog) - else: - # need secondary type switch (convert, compare) - # unpack extra info on second switch - (type2_field, types2) = type2_info - brig_type = 'Inst%s' % brig_opcode - for type_name in types: - decoder_code(decode_case2_prolog) - fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) - for type2_name in types2: - full_class_name = fmt % type2_name.upper() - constructor = constructor_prefix + full_class_name - decoder_code(decode_case2_entry) - - decoder_code(decode_case2_epilog) - - decoder_code(decode_case_epilog) - -############### -# -# Generate instructions -# -############### - -# handy abbreviations for common sets of types - -# arithmetic ops are typically defined only on 32- and 64-bit sizes -arith_int_types = ('S32', 'U32', 'S64', 'U64') -arith_float_types = ('F32', 'F64') -arith_types = arith_int_types + arith_float_types - -bit_types = ('B1', 'B32', 'B64') - -all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types - -# I think you might be able to do 'f16' memory ops too, but we'll -# ignore them for now. -mem_types = all_int_types + arith_float_types -mem_atom_types = all_int_types + ('B32', 'B64') - -##### Arithmetic & logical operations -gen('Add', arith_types, 'src0 + src1') -gen('Sub', arith_types, 'src0 - src1') -gen('Mul', arith_types, 'src0 * src1') -gen('Div', arith_types, 'src0 / src1') -gen('Min', arith_types, 'std::min(src0, src1)') -gen('Max', arith_types, 'std::max(src0, src1)') -gen('Gcnmin', arith_types, 'std::min(src0, src1)') - -gen('CopySign', arith_float_types, - 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') -gen('Sqrt', arith_float_types, 'sqrt(src0)') -gen('Floor', arith_float_types, 'floor(src0)') - -# "fast" sqrt... same as slow for us -gen('Nsqrt', arith_float_types, 'sqrt(src0)') -gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') -gen('Nrcp', arith_float_types, '1.0/src0') -gen('Fract', arith_float_types, - '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') - -gen('Ncos', arith_float_types, 'cos(src0)'); -gen('Nsin', arith_float_types, 'sin(src0)'); - -gen('And', bit_types, 'src0 & src1') -gen('Or', bit_types, 'src0 | src1') -gen('Xor', bit_types, 'src0 ^ src1') - -gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)') -gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ - ('sourceType', ('B32', 'B64'))) - -gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') -gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') - -# gen('Mul_hi', types=('s32','u32', '??')) -# gen('Mul24', types=('s32','u32', '??')) -gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') - -gen('Abs', arith_types, 'std::abs(src0)') -gen('Neg', arith_types, '-src0') - -gen('Mov', bit_types + arith_types, 'src0') -gen('Not', bit_types, 'heynot(src0)') - -# mad and fma differ only in rounding behavior, which we don't emulate -# also there's an integer form of mad, but not of fma -gen('Mad', arith_types, 'src0 * src1 + src2') -gen('Fma', arith_float_types, 'src0 * src1 + src2') - -#native floating point operations -gen('Nfma', arith_float_types, 'src0 * src1 + src2') - -gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') -gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') -gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') - -# see base/bitfield.hh -gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', - 'ExtractInsertInst') - -gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', - 'ExtractInsertInst') - -##### Compare -gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', - 'CmpInst', ('sourceType', arith_types + bit_types)) -gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') - -##### Conversion - -# Conversion operations are only defined on B1, not B32 or B64 -cvt_types = ('B1',) + mem_types - -gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) - - -##### Load & Store -gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') -gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') -gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', - is_store=True) -gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') -gen('AtomicNoRet', mem_atom_types, base_class='StInst', - constructor_prefix='decode') - -gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') -gen('Br', base_class = 'LdInst', constructor_prefix='decode') - -##### Special operations -def gen_special(brig_opcode, expr, dest_type='U32'): - num_srcs = num_src_operands(expr) - if num_srcs == 0: - base_class = 'SpecialInstNoSrc<%s>' % dest_type - elif num_srcs == 1: - base_class = 'SpecialInst1Src<%s>' % dest_type - else: - assert false - - gen(brig_opcode, None, expr, base_class) - -gen_special('WorkItemId', 'w->workItemId[src0][lane]') -gen_special('WorkItemAbsId', - 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') -gen_special('WorkGroupId', 'w->workGroupId[src0]') -gen_special('WorkGroupSize', 'w->workGroupSz[src0]') -gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') -gen_special('GridSize', 'w->gridSz[src0]') -gen_special('GridGroups', - 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') -gen_special('LaneId', 'lane') -gen_special('WaveId', 'w->wfId') -gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') - -# gen_special('CU'', ') - -gen('Ret', base_class='SpecialInstNoSrcNoDest') -gen('Barrier', base_class='SpecialInstNoSrcNoDest') -gen('MemFence', base_class='SpecialInstNoSrcNoDest') - -# Map magic instructions to the BrigSyscall opcode -# Magic instructions are defined in magic.hh -# -# In the future, real HSA kernel system calls can be implemented and coexist -# with magic instructions. -gen('Call', base_class='SpecialInstNoSrcNoDest') - -# Stubs for unimplemented instructions: -# These may need to be implemented at some point in the future, but -# for now we just match the instructions with their operands. -# -# By defining stubs for these instructions, we can work with -# applications that have them in dead/unused code paths. -# -# Needed for rocm-hcc compilations for HSA backends since -# builtins-hsail library is `cat`d onto the generated kernels. -# The builtins-hsail library consists of handcoded hsail functions -# that __might__ be needed by the rocm-hcc compiler in certain binaries. -gen('Bitmask', base_class='Stub') -gen('Bitrev', base_class='Stub') -gen('Firstbit', base_class='Stub') -gen('Lastbit', base_class='Stub') -gen('Unpacklo', base_class='Stub') -gen('Unpackhi', base_class='Stub') -gen('Pack', base_class='Stub') -gen('Unpack', base_class='Stub') -gen('Lerp', base_class='Stub') -gen('Packcvt', base_class='Stub') -gen('Unpackcvt', base_class='Stub') -gen('Sad', base_class='Stub') -gen('Sadhi', base_class='Stub') -gen('Activelanecount', base_class='Stub') -gen('Activelaneid', base_class='Stub') -gen('Activelanemask', base_class='Stub') -gen('Activelanepermute', base_class='Stub') -gen('Groupbaseptr', base_class='Stub') -gen('Signalnoret', base_class='Stub') - -############### -# -# Generate file epilogs -# -############### -header_code(''' -template<> -inline void -Abs::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType dest_val; - CType src_val; - - src_val = this->src[0].template get(w, lane); - - dest_val = (CType)(src_val); - - this->dest.set(w, lane, dest_val); - } - } -} - -template<> -inline void -Abs::execute(GPUDynInstPtr gpuDynInst) -{ - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType dest_val; - CType src_val; - - src_val = this->src[0].template get(w, lane); - - dest_val = (CType)(src_val); - - this->dest.set(w, lane, dest_val); - } - } -} -''') - -header_code.dedent() -header_code(''' -} // namespace HsailISA -''') - -# close off main decode switch -decoder_code.dedent() -decoder_code.dedent() -decoder_code(''' - default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); - } // end switch(ib->opcode) - } // end decode() -} // namespace HsailISA -''') - -exec_code.dedent() -exec_code(''' -} // namespace HsailISA -''') - -############### -# -# Output accumulated code to files -# -############### -header_code.write(sys.argv[1]) -decoder_code.write(sys.argv[2]) -exec_code.write(sys.argv[3]) diff --git a/src/arch/hsail/gpu_decoder.hh b/src/arch/hsail/gpu_decoder.hh deleted file mode 100644 index 98a689664..000000000 --- a/src/arch/hsail/gpu_decoder.hh +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __ARCH_HSAIL_GPU_DECODER_HH__ -#define __ARCH_HSAIL_GPU_DECODER_HH__ - -#include - -#include "arch/hsail/gpu_types.hh" - -class BrigObject; -class GPUStaticInst; - -namespace Brig -{ - class BrigInstBase; -} - -namespace HsailISA -{ - class Decoder - { - public: - GPUStaticInst* decode(MachInst machInst); - - GPUStaticInst* - decode(RawMachInst inst) - { - return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr; - } - - RawMachInst - saveInst(GPUStaticInst *decodedInst) - { - decodedInsts.push_back(decodedInst); - - return decodedInsts.size() - 1; - } - - private: - static std::vector decodedInsts; - }; -} // namespace HsailISA - -#endif // __ARCH_HSAIL_GPU_DECODER_HH__ diff --git a/src/arch/hsail/gpu_isa.hh b/src/arch/hsail/gpu_isa.hh deleted file mode 100644 index 75063cb5d..000000000 --- a/src/arch/hsail/gpu_isa.hh +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2016 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __ARCH_HSAIL_GPU_ISA_HH__ -#define __ARCH_HSAIL_GPU_ISA_HH__ - -#include - -#include "arch/hsail/gpu_types.hh" -#include "base/logging.hh" -#include "base/types.hh" -#include "gpu-compute/misc.hh" - -namespace HsailISA -{ - class GPUISA - { - public: - GPUISA() - { - } - - void - writeMiscReg(int opIdx, RegVal operandVal) - { - fatal("HSAIL does not implement misc registers yet\n"); - } - - RegVal - readMiscReg(int opIdx) const - { - fatal("HSAIL does not implement misc registers yet\n"); - } - - bool hasScalarUnit() const { return false; } - - uint32_t - advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst) - { - return old_pc + sizeof(RawMachInst); - } - }; -} - -#endif // __ARCH_HSAIL_GPU_ISA_HH__ diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh deleted file mode 100644 index 7b6689d67..000000000 --- a/src/arch/hsail/gpu_types.hh +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __ARCH_HSAIL_GPU_TYPES_HH__ -#define __ARCH_HSAIL_GPU_TYPES_HH__ - -#include - -namespace Brig -{ - class BrigInstBase; -} - -class BrigObject; - -namespace HsailISA -{ - // A raw machine instruction represents the raw bits that - // our model uses to represent an actual instruction. In - // the case of HSAIL this is just an index into a list of - // instruction objects. - typedef uint32_t RawMachInst; - - // The MachInst is a representation of an instruction - // that has more information than just the machine code. - // For HSAIL the actual machine code is a BrigInstBase - // and the BrigObject contains more pertinent - // information related to operaands, etc. - - struct MachInst - { - const Brig::BrigInstBase *brigInstBase; - const BrigObject *brigObj; - }; -} - -#endif // __ARCH_HSAIL_GPU_TYPES_HH__ diff --git a/src/arch/hsail/insts/branch.cc b/src/arch/hsail/insts/branch.cc deleted file mode 100644 index d65279cc8..000000000 --- a/src/arch/hsail/insts/branch.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#include "arch/hsail/insts/branch.hh" - -#include "gpu-compute/hsail_code.hh" - -namespace HsailISA -{ - GPUStaticInst* - decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - // Detect direct vs indirect branch by seeing whether we have a - // register operand. - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - const Brig::BrigOperand *reg = obj->getOperand(op_offs); - - if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - return new BrnIndirectInst(ib, obj); - } else { - return new BrnDirectInst(ib, obj); - } - } - - GPUStaticInst* - decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - // Detect direct vs indirect branch by seeing whether we have a - // second register operand (after the condition). - unsigned op_offs = obj->getOperandPtr(ib->operands, 1); - const Brig::BrigOperand *reg = obj->getOperand(op_offs); - - if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - return new CbrIndirectInst(ib, obj); - } else { - return new CbrDirectInst(ib, obj); - } - } - - GPUStaticInst* - decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - // Detect direct vs indirect branch by seeing whether we have a - // second register operand (after the condition). - unsigned op_offs = obj->getOperandPtr(ib->operands, 1); - const Brig::BrigOperand *reg = obj->getOperand(op_offs); - - if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - return new BrIndirectInst(ib, obj); - } else { - return new BrDirectInst(ib, obj); - } - } -} // namespace HsailISA diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh deleted file mode 100644 index 79603f408..000000000 --- a/src/arch/hsail/insts/branch.hh +++ /dev/null @@ -1,441 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__ -#define __ARCH_HSAIL_INSTS_BRANCH_HH__ - -#include "arch/hsail/insts/gpu_static_inst.hh" -#include "arch/hsail/operand.hh" -#include "gpu-compute/gpu_dyn_inst.hh" -#include "gpu-compute/wavefront.hh" - -namespace HsailISA -{ - - // The main difference between a direct branch and an indirect branch - // is whether the target is a register or a label, so we can share a - // lot of code if we template the base implementation on that type. - template - class BrnInstBase : public HsailGPUStaticInst - { - public: - void generateDisassembly() override; - - Brig::BrigWidth8_t width; - TargetType target; - - BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) - : HsailGPUStaticInst(obj, "brn") - { - setFlag(Branch); - setFlag(UnconditionalJump); - width = ((Brig::BrigInstBr*)ib)->width; - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - target.init(op_offs, obj); - } - - uint32_t getTargetPc() override { return target.getTarget(0, 0); } - - bool isVectorRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isVectorRegister(); - } - bool isCondRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isCondRegister(); - } - bool isScalarRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isScalarRegister(); - } - - bool isSrcOperand(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return true; - } - - bool isDstOperand(int operandIndex) override { - return false; - } - - int getOperandSize(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.opSize(); - } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.regIndex(); - } - - int getNumOperands() override { - return 1; - } - - void execute(GPUDynInstPtr gpuDynInst) override; - }; - - template - void - BrnInstBase::generateDisassembly() - { - std::string widthClause; - - if (width != 1) { - widthClause = csprintf("_width(%d)", width); - } - - disassembly = csprintf("%s%s %s", opcode, widthClause, - target.disassemble()); - } - - template - void - BrnInstBase::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - if (getTargetPc() == w->rpc()) { - w->popFromReconvergenceStack(); - } else { - // Rpc and execution mask remain the same - w->pc(getTargetPc()); - } - } - - class BrnDirectInst : public BrnInstBase - { - public: - BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : BrnInstBase(ib, obj) - { - } - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return 0; } - }; - - class BrnIndirectInst : public BrnInstBase - { - public: - BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : BrnInstBase(ib, obj) - { - } - int numSrcRegOperands() { return target.isVectorRegister(); } - int numDstRegOperands() { return 0; } - }; - - GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib, - const BrigObject *obj); - - template - class CbrInstBase : public HsailGPUStaticInst - { - public: - void generateDisassembly() override; - - Brig::BrigWidth8_t width; - CRegOperand cond; - TargetType target; - - CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) - : HsailGPUStaticInst(obj, "cbr") - { - setFlag(Branch); - width = ((Brig::BrigInstBr *)ib)->width; - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - cond.init(op_offs, obj); - op_offs = obj->getOperandPtr(ib->operands, 1); - target.init(op_offs, obj); - } - - uint32_t getTargetPc() override { return target.getTarget(0, 0); } - - void execute(GPUDynInstPtr gpuDynInst) override; - // Assumption: Target is operand 0, Condition Register is operand 1 - bool isVectorRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - if (!operandIndex) - return target.isVectorRegister(); - else - return false; - } - bool isCondRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - if (!operandIndex) - return target.isCondRegister(); - else - return true; - } - bool isScalarRegister(int operandIndex) override { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return target.isScalarRegister(); - else - return false; - } - bool isSrcOperand(int operandIndex) override { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == 0) - return true; - return false; - } - // both Condition Register and Target are source operands - bool isDstOperand(int operandIndex) override { - return false; - } - int getOperandSize(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - if (!operandIndex) - return target.opSize(); - else - return 1; - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - if (!operandIndex) - return target.regIndex(); - else - return -1; - } - - // Operands = Target, Condition Register - int getNumOperands() override { - return 2; - } - }; - - template - void - CbrInstBase::generateDisassembly() - { - std::string widthClause; - - if (width != 1) { - widthClause = csprintf("_width(%d)", width); - } - - disassembly = csprintf("%s%s %s,%s", opcode, widthClause, - cond.disassemble(), target.disassemble()); - } - - template - void - CbrInstBase::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - const uint32_t curr_pc M5_VAR_USED = w->pc(); - const uint32_t curr_rpc = w->rpc(); - const VectorMask curr_mask = w->execMask(); - - /** - * TODO: can we move this pop outside the instruction, and - * into the wavefront? - */ - w->popFromReconvergenceStack(); - - // immediate post-dominator instruction - const uint32_t rpc = static_cast(ipdInstNum()); - if (curr_rpc != rpc) { - w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask); - } - - // taken branch - const uint32_t true_pc = getTargetPc(); - VectorMask true_mask; - for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - true_mask[lane] = cond.get(w, lane) & curr_mask[lane]; - } - - // not taken branch - const uint32_t false_pc = nextInstAddr(); - assert(true_pc != false_pc); - if (false_pc != rpc && true_mask.count() < curr_mask.count()) { - VectorMask false_mask = curr_mask & ~true_mask; - w->pushToReconvergenceStack(false_pc, rpc, false_mask); - } - - if (true_pc != rpc && true_mask.count()) { - w->pushToReconvergenceStack(true_pc, rpc, true_mask); - } - assert(w->pc() != curr_pc); - } - - - class CbrDirectInst : public CbrInstBase - { - public: - CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : CbrInstBase(ib, obj) - { - } - // the source operand of a conditional branch is a Condition - // Register which is not stored in the VRF - // so we do not count it as a source-register operand - // even though, formally, it is one. - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return 0; } - }; - - class CbrIndirectInst : public CbrInstBase - { - public: - CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : CbrInstBase(ib, obj) - { - } - // one source operand of the conditional indirect branch is a Condition - // register which is not stored in the VRF so we do not count it - // as a source-register operand even though, formally, it is one. - int numSrcRegOperands() { return target.isVectorRegister(); } - int numDstRegOperands() { return 0; } - }; - - GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib, - const BrigObject *obj); - - template - class BrInstBase : public HsailGPUStaticInst - { - public: - void generateDisassembly() override; - - ImmOperand width; - TargetType target; - - BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj) - : HsailGPUStaticInst(obj, "br") - { - setFlag(Branch); - setFlag(UnconditionalJump); - width.init(((Brig::BrigInstBr *)ib)->width, obj); - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - target.init(op_offs, obj); - } - - uint32_t getTargetPc() override { return target.getTarget(0, 0); } - - void execute(GPUDynInstPtr gpuDynInst) override; - bool isVectorRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isVectorRegister(); - } - bool isCondRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isCondRegister(); - } - bool isScalarRegister(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) override { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return true; - } - bool isDstOperand(int operandIndex) override { return false; } - int getOperandSize(int operandIndex) override { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.opSize(); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return target.regIndex(); - } - int getNumOperands() override { return 1; } - }; - - template - void - BrInstBase::generateDisassembly() - { - std::string widthClause; - - if (width.bits != 1) { - widthClause = csprintf("_width(%d)", width.bits); - } - - disassembly = csprintf("%s%s %s", opcode, widthClause, - target.disassemble()); - } - - template - void - BrInstBase::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - if (getTargetPc() == w->rpc()) { - w->popFromReconvergenceStack(); - } else { - // Rpc and execution mask remain the same - w->pc(getTargetPc()); - } - } - - class BrDirectInst : public BrInstBase - { - public: - BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : BrInstBase(ib, obj) - { - } - - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return 0; } - }; - - class BrIndirectInst : public BrInstBase - { - public: - BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj) - : BrInstBase(ib, obj) - { - } - int numSrcRegOperands() { return target.isVectorRegister(); } - int numDstRegOperands() { return 0; } - }; - - GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib, - const BrigObject *obj); -} // namespace HsailISA - -#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__ diff --git a/src/arch/hsail/insts/decl.hh b/src/arch/hsail/insts/decl.hh deleted file mode 100644 index 3132a425a..000000000 --- a/src/arch/hsail/insts/decl.hh +++ /dev/null @@ -1,1298 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __ARCH_HSAIL_INSTS_DECL_HH__ -#define __ARCH_HSAIL_INSTS_DECL_HH__ - -#include - -#include "arch/hsail/insts/gpu_static_inst.hh" -#include "arch/hsail/operand.hh" -#include "debug/HSAIL.hh" -#include "gpu-compute/gpu_dyn_inst.hh" -#include "gpu-compute/shader.hh" - -namespace HsailISA -{ - template - class HsailOperandType - { - public: - typedef _DestOperand DestOperand; - typedef _SrcOperand SrcOperand; - }; - - typedef HsailOperandType CRegOperandType; - typedef HsailOperandType SRegOperandType; - typedef HsailOperandType DRegOperandType; - - // The IsBits parameter serves only to disambiguate tbhe B* types from - // the U* types, which otherwise would be identical (and - // indistinguishable). - template - class HsailDataType - { - public: - typedef _OperandType OperandType; - typedef _CType CType; - static const Enums::MemType memType = _memType; - static const vgpr_type vgprType = _vgprType; - static const char *label; - }; - - typedef HsailDataType B1; - typedef HsailDataType B8; - - typedef HsailDataType B16; - - typedef HsailDataType B32; - - typedef HsailDataType B64; - - typedef HsailDataType S8; - typedef HsailDataType S16; - typedef HsailDataType S32; - typedef HsailDataType S64; - - typedef HsailDataType U8; - typedef HsailDataType U16; - typedef HsailDataType U32; - typedef HsailDataType U64; - - typedef HsailDataType F32; - typedef HsailDataType F64; - - template - class CommonInstBase : public HsailGPUStaticInst - { - protected: - typename DestOperandType::DestOperand dest; - typename SrcOperandType::SrcOperand src[NumSrcOperands]; - - void - generateDisassembly() - { - disassembly = csprintf("%s%s %s", opcode, opcode_suffix(), - dest.disassemble()); - - for (int i = 0; i < NumSrcOperands; ++i) { - disassembly += ","; - disassembly += src[i].disassemble(); - } - } - - virtual std::string opcode_suffix() = 0; - - public: - CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : HsailGPUStaticInst(obj, opcode) - { - setFlag(ALU); - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - - dest.init(op_offs, obj); - - for (int i = 0; i < NumSrcOperands; ++i) { - op_offs = obj->getOperandPtr(ib->operands, i + 1); - src[i].init(op_offs, obj); - } - } - - bool isVectorRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isVectorRegister(); - else - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isCondRegister(); - else - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isScalarRegister(); - else - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return true; - return false; - } - - bool isDstOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex >= NumSrcOperands) - return true; - return false; - } - int getOperandSize(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].opSize(); - else - return dest.opSize(); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - - if (operandIndex < NumSrcOperands) - return src[operandIndex].regIndex(); - else - return dest.regIndex(); - } - int numSrcRegOperands() { - int operands = 0; - for (int i = 0; i < NumSrcOperands; i++) { - if (src[i].isVectorRegister()) { - operands++; - } - } - return operands; - } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() { return NumSrcOperands + 1; } - }; - - template - class ArithInst : public CommonInstBase - { - public: - std::string opcode_suffix() { return csprintf("_%s", DataType::label); } - - ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : CommonInstBase(ib, obj, opcode) - { - } - }; - - template - class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst - { - protected: - typename DestOperandType::DestOperand dest; - typename Src0OperandType::SrcOperand src0; - typename Src1OperandType::SrcOperand src1; - typename Src2OperandType::SrcOperand src2; - - void - generateDisassembly() - { - disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(), - src0.disassemble(), src1.disassemble(), - src2.disassemble()); - } - - public: - ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib, - const BrigObject *obj, - const char *opcode) - : HsailGPUStaticInst(obj, opcode) - { - setFlag(ALU); - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - src0.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 2); - src1.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 3); - src2.init(op_offs, obj); - } - - bool isVectorRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isVectorRegister(); - else if (operandIndex == 1) - return src1.isVectorRegister(); - else if (operandIndex == 2) - return src2.isVectorRegister(); - else - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isCondRegister(); - else if (operandIndex == 1) - return src1.isCondRegister(); - else if (operandIndex == 2) - return src2.isCondRegister(); - else - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isScalarRegister(); - else if (operandIndex == 1) - return src1.isScalarRegister(); - else if (operandIndex == 2) - return src2.isScalarRegister(); - else - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < 3) - return true; - else - return false; - } - bool isDstOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex >= 3) - return true; - else - return false; - } - int getOperandSize(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.opSize(); - else if (operandIndex == 1) - return src1.opSize(); - else if (operandIndex == 2) - return src2.opSize(); - else - return dest.opSize(); - } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.regIndex(); - else if (operandIndex == 1) - return src1.regIndex(); - else if (operandIndex == 2) - return src2.regIndex(); - else - return dest.regIndex(); - } - - int numSrcRegOperands() { - int operands = 0; - if (src0.isVectorRegister()) { - operands++; - } - if (src1.isVectorRegister()) { - operands++; - } - if (src2.isVectorRegister()) { - operands++; - } - return operands; - } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() { return 4; } - }; - - template - class ThreeNonUniformSourceInst : - public ThreeNonUniformSourceInstBase - { - public: - typedef typename DestDataType::CType DestCType; - typedef typename Src0DataType::CType Src0CType; - typedef typename Src1DataType::CType Src1CType; - typedef typename Src2DataType::CType Src2CType; - - ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *opcode) - : ThreeNonUniformSourceInstBase(ib, - obj, opcode) - { - } - }; - - template - class CmovInst : public ThreeNonUniformSourceInst - { - public: - CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : ThreeNonUniformSourceInst(ib, obj, opcode) - { - } - }; - - template - class ExtractInsertInst : public ThreeNonUniformSourceInst - { - public: - ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : ThreeNonUniformSourceInst(ib, obj, opcode) - { - } - }; - - template - class TwoNonUniformSourceInstBase : public HsailGPUStaticInst - { - protected: - typename DestOperandType::DestOperand dest; - typename Src0OperandType::SrcOperand src0; - typename Src1OperandType::SrcOperand src1; - - void - generateDisassembly() - { - disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(), - src0.disassemble(), src1.disassemble()); - } - - - public: - TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *opcode) - : HsailGPUStaticInst(obj, opcode) - { - setFlag(ALU); - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - src0.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 2); - src1.init(op_offs, obj); - } - bool isVectorRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isVectorRegister(); - else if (operandIndex == 1) - return src1.isVectorRegister(); - else - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isCondRegister(); - else if (operandIndex == 1) - return src1.isCondRegister(); - else - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.isScalarRegister(); - else if (operandIndex == 1) - return src1.isScalarRegister(); - else - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < 2) - return true; - else - return false; - } - bool isDstOperand(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex >= 2) - return true; - else - return false; - } - int getOperandSize(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.opSize(); - else if (operandIndex == 1) - return src1.opSize(); - else - return dest.opSize(); - } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (!operandIndex) - return src0.regIndex(); - else if (operandIndex == 1) - return src1.regIndex(); - else - return dest.regIndex(); - } - - int numSrcRegOperands() { - int operands = 0; - if (src0.isVectorRegister()) { - operands++; - } - if (src1.isVectorRegister()) { - operands++; - } - return operands; - } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() { return 3; } - }; - - template - class TwoNonUniformSourceInst : - public TwoNonUniformSourceInstBase - { - public: - typedef typename DestDataType::CType DestCType; - typedef typename Src0DataType::CType Src0CType; - typedef typename Src1DataType::CType Src1CType; - - TwoNonUniformSourceInst(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *opcode) - : TwoNonUniformSourceInstBase(ib, - obj, opcode) - { - } - }; - - // helper function for ClassInst - template - bool - fpclassify(T src0, uint32_t src1) - { - int fpclass = std::fpclassify(src0); - - if ((src1 & 0x3) && (fpclass == FP_NAN)) { - return true; - } - - if (src0 <= -0.0) { - if ((src1 & 0x4) && fpclass == FP_INFINITE) - return true; - if ((src1 & 0x8) && fpclass == FP_NORMAL) - return true; - if ((src1 & 0x10) && fpclass == FP_SUBNORMAL) - return true; - if ((src1 & 0x20) && fpclass == FP_ZERO) - return true; - } else { - if ((src1 & 0x40) && fpclass == FP_ZERO) - return true; - if ((src1 & 0x80) && fpclass == FP_SUBNORMAL) - return true; - if ((src1 & 0x100) && fpclass == FP_NORMAL) - return true; - if ((src1 & 0x200) && fpclass == FP_INFINITE) - return true; - } - return false; - } - - template - class ClassInst : public TwoNonUniformSourceInst - { - public: - ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : TwoNonUniformSourceInst(ib, obj, opcode) - { - } - }; - - template - class ShiftInst : public TwoNonUniformSourceInst - { - public: - ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *opcode) - : TwoNonUniformSourceInst(ib, obj, opcode) - { - } - }; - - // helper function for CmpInst - template - bool - compare(T src0, T src1, Brig::BrigCompareOperation cmpOp) - { - using namespace Brig; - - switch (cmpOp) { - case BRIG_COMPARE_EQ: - case BRIG_COMPARE_EQU: - case BRIG_COMPARE_SEQ: - case BRIG_COMPARE_SEQU: - return (src0 == src1); - - case BRIG_COMPARE_NE: - case BRIG_COMPARE_NEU: - case BRIG_COMPARE_SNE: - case BRIG_COMPARE_SNEU: - return (src0 != src1); - - case BRIG_COMPARE_LT: - case BRIG_COMPARE_LTU: - case BRIG_COMPARE_SLT: - case BRIG_COMPARE_SLTU: - return (src0 < src1); - - case BRIG_COMPARE_LE: - case BRIG_COMPARE_LEU: - case BRIG_COMPARE_SLE: - case BRIG_COMPARE_SLEU: - return (src0 <= src1); - - case BRIG_COMPARE_GT: - case BRIG_COMPARE_GTU: - case BRIG_COMPARE_SGT: - case BRIG_COMPARE_SGTU: - return (src0 > src1); - - case BRIG_COMPARE_GE: - case BRIG_COMPARE_GEU: - case BRIG_COMPARE_SGE: - case BRIG_COMPARE_SGEU: - return (src0 >= src1); - - case BRIG_COMPARE_NUM: - case BRIG_COMPARE_SNUM: - return (src0 == src0) || (src1 == src1); - - case BRIG_COMPARE_NAN: - case BRIG_COMPARE_SNAN: - return (src0 != src0) || (src1 != src1); - - default: - fatal("Bad cmpOp value %d\n", (int)cmpOp); - } - } - - template - int32_t - firstbit(T src0) - { - if (!src0) - return -1; - - //handle positive and negative numbers - T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0); - - //the starting pos is MSB - int pos = 8 * sizeof(T) - 1; - int cnt = 0; - - //search the first bit set to 1 - while (!(tmp & (1 << pos))) { - ++cnt; - --pos; - } - return cnt; - } - - const char* cmpOpToString(Brig::BrigCompareOperation cmpOp); - - template - class CmpInstBase : public CommonInstBase - { - protected: - Brig::BrigCompareOperation cmpOp; - - public: - CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : CommonInstBase(ib, obj, - _opcode) - { - assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP); - Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib; - cmpOp = (Brig::BrigCompareOperation)i->compare; - } - }; - - template - class CmpInst : public CmpInstBase - { - public: - std::string - opcode_suffix() - { - return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp), - DestDataType::label, SrcDataType::label); - } - - CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : CmpInstBase(ib, obj, _opcode) - { - } - }; - - template - class CvtInst : public CommonInstBase - { - public: - std::string opcode_suffix() - { - return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); - } - - CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : CommonInstBase(ib, obj, _opcode) - { - } - }; - - template - class PopcountInst : - public CommonInstBase - { - public: - std::string opcode_suffix() - { - return csprintf("_%s_%s", DestDataType::label, SrcDataType::label); - } - - PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : CommonInstBase(ib, obj, _opcode) - { - } - }; - - class Stub : public HsailGPUStaticInst - { - public: - Stub(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - } - - void generateDisassembly() override - { - disassembly = csprintf("%s", opcode); - } - - bool isVectorRegister(int operandIndex) override { return false; } - bool isCondRegister(int operandIndex) override { return false; } - bool isScalarRegister(int operandIndex) override { return false; } - bool isSrcOperand(int operandIndex) override { return false; } - bool isDstOperand(int operandIndex) override { return false; } - int getOperandSize(int operandIndex) override { return 0; } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - return -1; - } - - int numSrcRegOperands() override { return 0; } - int numDstRegOperands() override { return 0; } - int getNumOperands() override { return 0; } - }; - - class SpecialInstNoSrcNoDest : public HsailGPUStaticInst - { - public: - SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - } - - bool isVectorRegister(int operandIndex) override { return false; } - bool isCondRegister(int operandIndex) override { return false; } - bool isScalarRegister(int operandIndex) override { return false; } - bool isSrcOperand(int operandIndex) override { return false; } - bool isDstOperand(int operandIndex) override { return false; } - int getOperandSize(int operandIndex) override { return 0; } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - return -1; - } - - int numSrcRegOperands() override { return 0; } - int numDstRegOperands() override { return 0; } - int getNumOperands() override { return 0; } - }; - - template - class SpecialInstNoSrcBase : public HsailGPUStaticInst - { - protected: - typename DestOperandType::DestOperand dest; - - void generateDisassembly() - { - disassembly = csprintf("%s %s", opcode, dest.disassemble()); - } - - public: - SpecialInstNoSrcBase(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - } - - bool isVectorRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) { return false; } - bool isDstOperand(int operandIndex) { return true; } - int getOperandSize(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.opSize(); - } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.regIndex(); - } - - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() { return 1; } - }; - - template - class SpecialInstNoSrc : - public SpecialInstNoSrcBase - { - public: - typedef typename DestDataType::CType DestCType; - - SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : SpecialInstNoSrcBase(ib, obj, - _opcode) - { - } - }; - - template - class SpecialInst1SrcBase : public HsailGPUStaticInst - { - protected: - typedef int SrcCType; // used in execute() template - - typename DestOperandType::DestOperand dest; - ImmOperand src0; - - void - generateDisassembly() - { - disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(), - src0.disassemble()); - } - - public: - SpecialInst1SrcBase(const Brig::BrigInstBase *ib, - const BrigObject *obj, const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - setFlag(ALU); - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - src0.init(op_offs, obj); - } - bool isVectorRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) { return false; } - bool isDstOperand(int operandIndex) { return true; } - int getOperandSize(int operandIndex) { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.opSize(); - } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return dest.regIndex(); - } - - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() { return 1; } - }; - - template - class SpecialInst1Src : - public SpecialInst1SrcBase - { - public: - typedef typename DestDataType::CType DestCType; - - SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : SpecialInst1SrcBase(ib, obj, - _opcode) - { - } - }; - - class Ret : public SpecialInstNoSrcNoDest - { - public: - typedef SpecialInstNoSrcNoDest Base; - - Ret(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "ret") - { - setFlag(GPUStaticInst::Return); - } - - void execute(GPUDynInstPtr gpuDynInst); - }; - - class Barrier : public SpecialInstNoSrcNoDest - { - public: - typedef SpecialInstNoSrcNoDest Base; - uint8_t width; - - Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "barrier") - { - setFlag(GPUStaticInst::MemBarrier); - assert(ib->base.kind == Brig::BRIG_KIND_INST_BR); - width = (uint8_t)((Brig::BrigInstBr*)ib)->width; - } - - void execute(GPUDynInstPtr gpuDynInst); - }; - - class MemFence : public SpecialInstNoSrcNoDest - { - public: - typedef SpecialInstNoSrcNoDest Base; - - Brig::BrigMemoryOrder memFenceMemOrder; - Brig::BrigMemoryScope memFenceScopeSegGroup; - Brig::BrigMemoryScope memFenceScopeSegGlobal; - Brig::BrigMemoryScope memFenceScopeSegImage; - - MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj) - : Base(ib, obj, "memfence") - { - assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE); - - memFenceScopeSegGlobal = (Brig::BrigMemoryScope) - ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope; - - memFenceScopeSegGroup = (Brig::BrigMemoryScope) - ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope; - - memFenceScopeSegImage = (Brig::BrigMemoryScope) - ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope; - - memFenceMemOrder = (Brig::BrigMemoryOrder) - ((Brig::BrigInstMemFence*)ib)->memoryOrder; - - setFlag(MemoryRef); - setFlag(GPUStaticInst::MemFence); - - switch (memFenceMemOrder) { - case Brig::BRIG_MEMORY_ORDER_NONE: - setFlag(NoOrder); - break; - case Brig::BRIG_MEMORY_ORDER_RELAXED: - setFlag(RelaxedOrder); - break; - case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE: - setFlag(Acquire); - break; - case Brig::BRIG_MEMORY_ORDER_SC_RELEASE: - setFlag(Release); - break; - case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: - setFlag(AcquireRelease); - break; - default: - fatal("MemInst has bad BrigMemoryOrder\n"); - } - - // set inst flags based on scopes - if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE && - memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { - setFlag(GPUStaticInst::GlobalSegment); - - /** - * A memory fence that has scope for - * both segments will use the global - * segment, and be executed in the - * global memory pipeline, therefore, - * we set the segment to match the - * global scope only - */ - switch (memFenceScopeSegGlobal) { - case Brig::BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case Brig::BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case Brig::BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("MemFence has bad global scope type\n"); - } - } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) { - setFlag(GPUStaticInst::GlobalSegment); - - switch (memFenceScopeSegGlobal) { - case Brig::BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case Brig::BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case Brig::BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("MemFence has bad global scope type\n"); - } - } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) { - setFlag(GPUStaticInst::GroupSegment); - - switch (memFenceScopeSegGroup) { - case Brig::BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case Brig::BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case Brig::BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case Brig::BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("MemFence has bad group scope type\n"); - } - } else { - fatal("MemFence constructor: bad scope specifiers\n"); - } - } - - void - initiateAcc(GPUDynInstPtr gpuDynInst) - { - Wavefront *wave = gpuDynInst->wavefront(); - wave->computeUnit->injectGlobalMemFence(gpuDynInst); - } - - void - execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - // 2 cases: - // * memfence to a sequentially consistent memory (e.g., LDS). - // These can be handled as no-ops. - // * memfence to a relaxed consistency cache (e.g., Hermes, Viper, - // etc.). We send a packet, tagged with the memory order and - // scope, and let the GPU coalescer handle it. - - if (isGlobalSeg()) { - gpuDynInst->simdId = w->simdId; - gpuDynInst->wfSlotId = w->wfSlotId; - gpuDynInst->wfDynId = w->wfDynId; - gpuDynInst->kern_id = w->kernId; - gpuDynInst->cu_id = w->computeUnit->cu_id; - - gpuDynInst->useContinuation = false; - GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe); - gmp->issueRequest(gpuDynInst); - - w->wrGmReqsInPipe--; - w->rdGmReqsInPipe--; - w->memReqsInPipe--; - w->outstandingReqs++; - } else if (isGroupSeg()) { - // no-op - } else { - fatal("MemFence execute: bad op type\n"); - } - } - }; - - class Call : public HsailGPUStaticInst - { - public: - // private helper functions - void calcAddr(Wavefront* w, GPUDynInstPtr m); - - void - generateDisassembly() - { - if (dest.disassemble() == "") { - disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(), - src1.disassemble()); - } else { - disassembly = csprintf("%s %s (%s) (%s)", opcode, - src0.disassemble(), dest.disassemble(), - src1.disassemble()); - } - } - - bool - isPseudoOp() - { - std::string func_name = src0.disassemble(); - if (func_name.find("__gem5_hsail_op") != std::string::npos) { - return true; - } - return false; - } - - // member variables - ListOperand dest; - FunctionRefOperand src0; - ListOperand src1; - HsailCode *func_ptr; - - // exec function for pseudo instructions mapped on top of call opcode - void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst); - - // user-defined pseudo instructions - void MagicPrintLane(Wavefront *w); - void MagicPrintLane64(Wavefront *w); - void MagicPrintWF32(Wavefront *w); - void MagicPrintWF64(Wavefront *w); - void MagicPrintWFFloat(Wavefront *w); - void MagicSimBreak(Wavefront *w); - void MagicPrefixSum(Wavefront *w); - void MagicReduction(Wavefront *w); - void MagicMaskLower(Wavefront *w); - void MagicMaskUpper(Wavefront *w); - void MagicJoinWFBar(Wavefront *w); - void MagicWaitWFBar(Wavefront *w); - void MagicPanic(Wavefront *w); - - void MagicAtomicNRAddGlobalU32Reg(Wavefront *w, - GPUDynInstPtr gpuDynInst); - - void MagicAtomicNRAddGroupU32Reg(Wavefront *w, - GPUDynInstPtr gpuDynInst); - - void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst); - - void MagicXactCasLd(Wavefront *w); - void MagicMostSigThread(Wavefront *w); - void MagicMostSigBroadcast(Wavefront *w); - - void MagicPrintWF32ID(Wavefront *w); - void MagicPrintWFID64(Wavefront *w); - - Call(const Brig::BrigInstBase *ib, const BrigObject *obj) - : HsailGPUStaticInst(obj, "call") - { - setFlag(ALU); - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - op_offs = obj->getOperandPtr(ib->operands, 1); - src0.init(op_offs, obj); - - func_ptr = nullptr; - std::string func_name = src0.disassemble(); - if (!isPseudoOp()) { - func_ptr = dynamic_cast(obj-> - getFunction(func_name)); - - if (!func_ptr) - fatal("call::exec cannot find function: %s\n", func_name); - } - - op_offs = obj->getOperandPtr(ib->operands, 2); - src1.init(op_offs, obj); - } - - bool isVectorRegister(int operandIndex) { return false; } - bool isCondRegister(int operandIndex) { return false; } - bool isScalarRegister(int operandIndex) { return false; } - bool isSrcOperand(int operandIndex) { return false; } - bool isDstOperand(int operandIndex) { return false; } - int getOperandSize(int operandIndex) { return 0; } - - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - return -1; - } - - void - execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - std::string func_name = src0.disassemble(); - if (isPseudoOp()) { - execPseudoInst(w, gpuDynInst); - } else { - fatal("Native HSAIL functions are not yet implemented: %s\n", - func_name); - } - } - int numSrcRegOperands() { return 0; } - int numDstRegOperands() { return 0; } - int getNumOperands() { return 2; } - }; - - template T heynot(T arg) { return ~arg; } - template<> inline bool heynot(bool arg) { return !arg; } - - - /* Explicitly declare template static member variables to avoid - * warnings in some clang versions - */ - template<> const char *B1::label; - template<> const char *B8::label; - template<> const char *B16::label; - template<> const char *B32::label; - template<> const char *B64::label; - template<> const char *S8::label; - template<> const char *S16::label; - template<> const char *S32::label; - template<> const char *S64::label; - template<> const char *U8::label; - template<> const char *U16::label; - template<> const char *U32::label; - template<> const char *U64::label; - template<> const char *F32::label; - template<> const char *F64::label; - -} // namespace HsailISA - -#endif // __ARCH_HSAIL_INSTS_DECL_HH__ diff --git a/src/arch/hsail/insts/gpu_static_inst.cc b/src/arch/hsail/insts/gpu_static_inst.cc deleted file mode 100644 index dba2756d0..000000000 --- a/src/arch/hsail/insts/gpu_static_inst.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#include "arch/hsail/insts/gpu_static_inst.hh" - -#include "gpu-compute/brig_object.hh" - -namespace HsailISA -{ - HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj, - const std::string &opcode) - : GPUStaticInst(opcode), hsailCode(obj->currentCode) - { - } - - void - HsailGPUStaticInst::generateDisassembly() - { - disassembly = opcode; - } -} // namespace HsailISA diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh deleted file mode 100644 index 0bddcac80..000000000 --- a/src/arch/hsail/insts/gpu_static_inst.hh +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ -#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ - -/* - * @file gpu_static_inst.hh - * - * Defines the base class representing HSAIL GPU static instructions. - */ - -#include "arch/hsail/gpu_types.hh" -#include "gpu-compute/gpu_static_inst.hh" - -class BrigObject; -class HsailCode; - -namespace HsailISA -{ - class HsailGPUStaticInst : public GPUStaticInst - { - public: - HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode); - void generateDisassembly() override; - int instSize() const override { return sizeof(RawMachInst); } - bool isValid() const override { return true; } - - protected: - HsailCode *hsailCode; - }; -} // namespace HsailISA - -#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__ diff --git a/src/arch/hsail/insts/main.cc b/src/arch/hsail/insts/main.cc deleted file mode 100644 index 783689dd5..000000000 --- a/src/arch/hsail/insts/main.cc +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "arch/hsail/insts/decl.hh" -#include "debug/GPUExec.hh" -#include "gpu-compute/dispatcher.hh" -#include "gpu-compute/simple_pool_manager.hh" - -namespace HsailISA -{ - template<> const char *B1::label = "b1"; - template<> const char *B8::label = "b8"; - template<> const char *B16::label = "b16"; - template<> const char *B32::label = "b32"; - template<> const char *B64::label = "b64"; - - template<> const char *S8::label = "s8"; - template<> const char *S16::label = "s16"; - template<> const char *S32::label = "s32"; - template<> const char *S64::label = "s64"; - - template<> const char *U8::label = "u8"; - template<> const char *U16::label = "u16"; - template<> const char *U32::label = "u32"; - template<> const char *U64::label = "u64"; - - template<> const char *F32::label = "f32"; - template<> const char *F64::label = "f64"; - - const char* - cmpOpToString(Brig::BrigCompareOperation cmpOp) - { - using namespace Brig; - - switch (cmpOp) { - case BRIG_COMPARE_EQ: - return "eq"; - case BRIG_COMPARE_NE: - return "ne"; - case BRIG_COMPARE_LT: - return "lt"; - case BRIG_COMPARE_LE: - return "le"; - case BRIG_COMPARE_GT: - return "gt"; - case BRIG_COMPARE_GE: - return "ge"; - case BRIG_COMPARE_EQU: - return "equ"; - case BRIG_COMPARE_NEU: - return "neu"; - case BRIG_COMPARE_LTU: - return "ltu"; - case BRIG_COMPARE_LEU: - return "leu"; - case BRIG_COMPARE_GTU: - return "gtu"; - case BRIG_COMPARE_GEU: - return "geu"; - case BRIG_COMPARE_NUM: - return "num"; - case BRIG_COMPARE_NAN: - return "nan"; - case BRIG_COMPARE_SEQ: - return "seq"; - case BRIG_COMPARE_SNE: - return "sne"; - case BRIG_COMPARE_SLT: - return "slt"; - case BRIG_COMPARE_SLE: - return "sle"; - case BRIG_COMPARE_SGT: - return "sgt"; - case BRIG_COMPARE_SGE: - return "sge"; - case BRIG_COMPARE_SGEU: - return "sgeu"; - case BRIG_COMPARE_SEQU: - return "sequ"; - case BRIG_COMPARE_SNEU: - return "sneu"; - case BRIG_COMPARE_SLTU: - return "sltu"; - case BRIG_COMPARE_SLEU: - return "sleu"; - case BRIG_COMPARE_SNUM: - return "snum"; - case BRIG_COMPARE_SNAN: - return "snan"; - case BRIG_COMPARE_SGTU: - return "sgtu"; - default: - return "unknown"; - } - } - - void - Ret::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - const VectorMask &mask = w->getPred(); - - // mask off completed work-items - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - w->initMask[lane] = 0; - } - - } - - // delete extra instructions fetched for completed work-items - w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, - w->instructionBuffer.end()); - if (w->pendingFetch) { - w->dropFetch = true; - } - - // if all work-items have completed, then wave-front is done - if (w->initMask.none()) { - w->status = Wavefront::S_STOPPED; - - int32_t refCount = w->computeUnit->getLds(). - decreaseRefCounter(w->dispatchId, w->wgId); - - DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n", - w->computeUnit->cu_id, w->wgId, refCount); - - // free the vector registers of the completed wavefront - w->computeUnit->vectorRegsReserved[w->simdId] -= - w->reservedVectorRegs; - - assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0); - - uint32_t endIndex = (w->startVgprIndex + - w->reservedVectorRegs - 1) % - w->computeUnit->vrf[w->simdId]->numRegs(); - - w->computeUnit->vrf[w->simdId]->manager-> - freeRegion(w->startVgprIndex, endIndex); - - w->reservedVectorRegs = 0; - w->startVgprIndex = 0; - w->computeUnit->completedWfs++; - - DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId); - - if (!refCount) { - setFlag(SystemScope); - setFlag(Release); - setFlag(GlobalSegment); - // Notify Memory System of Kernel Completion - // Kernel End = isKernel + isRelease - w->status = Wavefront::S_RETURNING; - GPUDynInstPtr local_mempacket = gpuDynInst; - local_mempacket->useContinuation = false; - local_mempacket->simdId = w->simdId; - local_mempacket->wfSlotId = w->wfSlotId; - local_mempacket->wfDynId = w->wfDynId; - w->computeUnit->injectGlobalMemFence(local_mempacket, true); - } else { - w->computeUnit->shader->dispatcher->scheduleDispatch(); - } - } - } - - void - Barrier::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - assert(w->barrierCnt == w->oldBarrierCnt); - w->barrierCnt = w->oldBarrierCnt + 1; - w->stalledAtBarrier = true; - } -} // namespace HsailISA diff --git a/src/arch/hsail/insts/mem.cc b/src/arch/hsail/insts/mem.cc deleted file mode 100644 index 6a6928838..000000000 --- a/src/arch/hsail/insts/mem.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "arch/hsail/insts/mem.hh" - -#include "arch/hsail/Brig.h" - -using namespace Brig; - -namespace HsailISA -{ - const char* atomicOpToString(BrigAtomicOperation brigOp); - - const char* - atomicOpToString(BrigAtomicOperation brigOp) - { - switch (brigOp) { - case BRIG_ATOMIC_AND: - return "and"; - case BRIG_ATOMIC_OR: - return "or"; - case BRIG_ATOMIC_XOR: - return "xor"; - case BRIG_ATOMIC_CAS: - return "cas"; - case BRIG_ATOMIC_EXCH: - return "exch"; - case BRIG_ATOMIC_ADD: - return "add"; - case BRIG_ATOMIC_WRAPINC: - return "inc"; - case BRIG_ATOMIC_WRAPDEC: - return "dec"; - case BRIG_ATOMIC_MIN: - return "min"; - case BRIG_ATOMIC_MAX: - return "max"; - case BRIG_ATOMIC_SUB: - return "sub"; - default: - return "unknown"; - } - } -} // namespace HsailISA diff --git a/src/arch/hsail/insts/mem.hh b/src/arch/hsail/insts/mem.hh deleted file mode 100644 index 0c8f6ca1d..000000000 --- a/src/arch/hsail/insts/mem.hh +++ /dev/null @@ -1,1777 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __ARCH_HSAIL_INSTS_MEM_HH__ -#define __ARCH_HSAIL_INSTS_MEM_HH__ - -#include - -#include "arch/hsail/insts/decl.hh" -#include "arch/hsail/insts/gpu_static_inst.hh" -#include "arch/hsail/operand.hh" -#include "gpu-compute/compute_unit.hh" - -namespace HsailISA -{ - class MemInst - { - public: - MemInst() : size(0), addr_operand(nullptr) { } - - MemInst(Enums::MemType m_type) - { - if (m_type == Enums::M_U64 || - m_type == Enums::M_S64 || - m_type == Enums::M_F64) { - size = 8; - } else if (m_type == Enums::M_U32 || - m_type == Enums::M_S32 || - m_type == Enums::M_F32) { - size = 4; - } else if (m_type == Enums::M_U16 || - m_type == Enums::M_S16 || - m_type == Enums::M_F16) { - size = 2; - } else { - size = 1; - } - - addr_operand = nullptr; - } - - void - init_addr(AddrOperandBase *_addr_operand) - { - addr_operand = _addr_operand; - } - - private: - int size; - AddrOperandBase *addr_operand; - - public: - int getMemOperandSize() { return size; } - AddrOperandBase *getAddressOperand() { return addr_operand; } - }; - - template - class LdaInstBase : public HsailGPUStaticInst - { - public: - typename DestOperandType::DestOperand dest; - AddrOperandType addr; - - LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - using namespace Brig; - - setFlag(ALU); - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - op_offs = obj->getOperandPtr(ib->operands, 1); - addr.init(op_offs, obj); - } - - int numSrcRegOperands() override - { return(this->addr.isVectorRegister()); } - int numDstRegOperands() override - { return dest.isVectorRegister(); } - bool isVectorRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isVectorRegister() : - this->addr.isVectorRegister()); - } - bool isCondRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isCondRegister() : - this->addr.isCondRegister()); - } - bool isScalarRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isScalarRegister() : - this->addr.isScalarRegister()); - } - bool isSrcOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex > 0) - return(this->addr.isVectorRegister()); - return false; - } - bool isDstOperand(int operandIndex) override { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return(operandIndex == 0); - } - int getOperandSize(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.opSize() : - this->addr.opSize()); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.regIndex() : - this->addr.regIndex()); - } - int getNumOperands() override - { - if (this->addr.isVectorRegister()) - return 2; - return 1; - } - }; - - template - class LdaInst : - public LdaInstBase, - public MemInst - { - public: - void generateDisassembly(); - - LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : LdaInstBase(ib, obj, _opcode) - { - init_addr(&this->addr); - } - - void execute(GPUDynInstPtr gpuDynInst); - }; - - template - GPUStaticInst* - decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - unsigned op_offs = obj->getOperandPtr(ib->operands, 1); - BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj); - - if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { - return new LdaInst(ib, obj, "ldas"); - } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - // V2/V4 not allowed - switch (regDataType.regKind) { - case Brig::BRIG_REGISTER_KIND_SINGLE: - return new LdaInst(ib, obj, "ldas"); - case Brig::BRIG_REGISTER_KIND_DOUBLE: - return new LdaInst(ib, obj, "ldas"); - default: - fatal("Bad ldas register operand type %d\n", regDataType.type); - } - } else { - fatal("Bad ldas register operand kind %d\n", regDataType.kind); - } - } - - template - class LdInstBase : public HsailGPUStaticInst - { - public: - Brig::BrigWidth8_t width; - typename DestOperandType::DestOperand dest; - AddrOperandType addr; - - Brig::BrigSegment segment; - Brig::BrigMemoryOrder memoryOrder; - Brig::BrigMemoryScope memoryScope; - unsigned int equivClass; - - LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - using namespace Brig; - - setFlag(MemoryRef); - setFlag(Load); - - if (ib->opcode == BRIG_OPCODE_LD) { - const BrigInstMem *ldst = (const BrigInstMem*)ib; - - segment = (BrigSegment)ldst->segment; - memoryOrder = BRIG_MEMORY_ORDER_NONE; - memoryScope = BRIG_MEMORY_SCOPE_NONE; - equivClass = ldst->equivClass; - - width = ldst->width; - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); - if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - addr.init(op_offs, obj); - } else { - const BrigInstAtomic *at = (const BrigInstAtomic*)ib; - - segment = (BrigSegment)at->segment; - memoryOrder = (BrigMemoryOrder)at->memoryOrder; - memoryScope = (BrigMemoryScope)at->memoryScope; - equivClass = 0; - - width = BRIG_WIDTH_1; - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); - - if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER) - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands,1); - addr.init(op_offs, obj); - } - - switch (memoryOrder) { - case BRIG_MEMORY_ORDER_NONE: - setFlag(NoOrder); - break; - case BRIG_MEMORY_ORDER_RELAXED: - setFlag(RelaxedOrder); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE: - setFlag(Acquire); - break; - case BRIG_MEMORY_ORDER_SC_RELEASE: - setFlag(Release); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: - setFlag(AcquireRelease); - break; - default: - fatal("LdInst has bad memory order type\n"); - } - - switch (memoryScope) { - case BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("LdInst has bad memory scope type\n"); - } - - switch (segment) { - case BRIG_SEGMENT_GLOBAL: - setFlag(GlobalSegment); - break; - case BRIG_SEGMENT_GROUP: - setFlag(GroupSegment); - break; - case BRIG_SEGMENT_PRIVATE: - setFlag(PrivateSegment); - break; - case BRIG_SEGMENT_READONLY: - setFlag(ReadOnlySegment); - break; - case BRIG_SEGMENT_SPILL: - setFlag(SpillSegment); - break; - case BRIG_SEGMENT_FLAT: - setFlag(Flat); - break; - case BRIG_SEGMENT_KERNARG: - setFlag(KernArgSegment); - break; - case BRIG_SEGMENT_ARG: - setFlag(ArgSegment); - break; - default: - panic("Ld: segment %d not supported\n", segment); - } - } - - int numSrcRegOperands() override - { return(this->addr.isVectorRegister()); } - int numDstRegOperands() override { return dest.isVectorRegister(); } - int getNumOperands() override - { - if (this->addr.isVectorRegister()) - return 2; - else - return 1; - } - bool isVectorRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isVectorRegister() : - this->addr.isVectorRegister()); - } - bool isCondRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isCondRegister() : - this->addr.isCondRegister()); - } - bool isScalarRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.isScalarRegister() : - this->addr.isScalarRegister()); - } - bool isSrcOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex > 0) - return(this->addr.isVectorRegister()); - return false; - } - bool isDstOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return(operandIndex == 0); - } - int getOperandSize(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.opSize() : - this->addr.opSize()); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return((operandIndex == 0) ? dest.regIndex() : - this->addr.regIndex()); - } - }; - - template - class LdInst : - public LdInstBase, - public MemInst - { - typename DestDataType::OperandType::DestOperand dest_vect[4]; - uint16_t num_dest_operands; - void generateDisassembly() override; - - public: - LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : LdInstBase(ib, obj, _opcode), - MemInst(MemDataType::memType) - { - init_addr(&this->addr); - - unsigned op_offs = obj->getOperandPtr(ib->operands,0); - const Brig::BrigOperand *brigOp = obj->getOperand(op_offs); - - if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { - const Brig::BrigOperandOperandList *brigRegVecOp = - (const Brig::BrigOperandOperandList*)brigOp; - - num_dest_operands = - *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; - - assert(num_dest_operands <= 4); - } else { - num_dest_operands = 1; - } - - if (num_dest_operands > 1) { - assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); - - for (int i = 0; i < num_dest_operands; ++i) { - dest_vect[i].init_from_vect(op_offs, obj, i); - } - } - } - - void - initiateAcc(GPUDynInstPtr gpuDynInst) override - { - typedef typename MemDataType::CType c0; - - gpuDynInst->statusBitVector = gpuDynInst->exec_mask; - - if (num_dest_operands > 1) { - for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) - if (gpuDynInst->exec_mask[i]) - gpuDynInst->statusVector.push_back(num_dest_operands); - else - gpuDynInst->statusVector.push_back(0); - } - - for (int k = 0; k < num_dest_operands; ++k) { - - c0 *d = &((c0*)gpuDynInst->d_data) - [k * gpuDynInst->computeUnit()->wfSize()]; - - for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { - if (gpuDynInst->exec_mask[i]) { - Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); - - if (this->isLocalMem()) { - // load from shared memory - *d = gpuDynInst->wavefront()->ldsChunk-> - read(vaddr); - } else { - RequestPtr req = std::make_shared( - vaddr, sizeof(c0), 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - - gpuDynInst->setRequestFlags(req); - PacketPtr pkt = new Packet(req, MemCmd::ReadReq); - pkt->dataStatic(d); - - if (gpuDynInst->computeUnit()->shader-> - separate_acquire_release && - gpuDynInst->isAcquire()) { - // if this load has acquire semantics, - // set the response continuation function - // to perform an Acquire request - gpuDynInst->execContinuation = - &GPUStaticInst::execLdAcq; - - gpuDynInst->useContinuation = true; - } else { - // the request will be finished when - // the load completes - gpuDynInst->useContinuation = false; - } - // translation is performed in sendRequest() - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, - i, pkt); - } - } - ++d; - } - } - - gpuDynInst->updateStats(); - } - - void - completeAcc(GPUDynInstPtr gpuDynInst) override - { - typedef typename MemDataType::CType c1; - - constexpr bool is_vt_32 = DestDataType::vgprType == VT_32; - - /** - * this code essentially replaces the long if-else chain - * that was in used GlobalMemPipeline::exec() to infer the - * size (single/double) and type (floating point/integer) of - * the destination register. this is needed for load - * instructions because the loaded value and the - * destination type can be of different sizes, and we also - * need to know if the value we're writing back is floating - * point and signed/unsigned, so we can properly cast the - * writeback value - */ - typedef typename std::conditional::value, - float, typename std::conditional::value, - int32_t, uint32_t>::type>::type, - typename std::conditional::value, - double, typename std::conditional::value, - int64_t, uint64_t>::type>::type>::type c0; - - - Wavefront *w = gpuDynInst->wavefront(); - - std::vector regVec; - // iterate over number of destination register operands since - // this is a load - for (int k = 0; k < num_dest_operands; ++k) { - assert((sizeof(c1) * num_dest_operands) - <= MAX_WIDTH_FOR_MEM_INST); - - int dst = this->dest.regIndex() + k; - if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST) - dst = dest_vect[k].regIndex(); - // virtual->physical VGPR mapping - int physVgpr = w->remap(dst, sizeof(c0), 1); - // save the physical VGPR index - regVec.push_back(physVgpr); - - c1 *p1 = - &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()]; - - for (int i = 0; i < w->computeUnit->wfSize(); ++i) { - if (gpuDynInst->exec_mask[i]) { - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: " - "$%s%d <- %d global ld done (src = wavefront " - "ld inst)\n", w->computeUnit->cu_id, w->simdId, - w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d", - dst, *p1); - // write the value into the physical VGPR. This is a - // purely functional operation. No timing is modeled. - w->computeUnit->vrf[w->simdId]->write(physVgpr, - *p1, i); - } - ++p1; - } - } - - // Schedule the write operation of the load data on the VRF. - // This simply models the timing aspect of the VRF write operation. - // It does not modify the physical VGPR. - int loadVrfBankConflictCycles = gpuDynInst->computeUnit()-> - vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec, - sizeof(c0), gpuDynInst->time); - - if (this->isGlobalMem()) { - gpuDynInst->computeUnit()->globalMemoryPipe - .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles); - } else { - assert(this->isLocalMem()); - gpuDynInst->computeUnit()->localMemoryPipe - .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles); - } - } - - private: - void - execLdAcq(GPUDynInstPtr gpuDynInst) override - { - // after the load has complete and if the load has acquire - // semantics, issue an acquire request. - if (!this->isLocalMem()) { - if (gpuDynInst->computeUnit()->shader->separate_acquire_release - && gpuDynInst->isAcquire()) { - gpuDynInst->statusBitVector = VectorMask(1); - gpuDynInst->useContinuation = false; - // create request - RequestPtr req = std::make_shared(0, 0, 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - req->setFlags(Request::ACQUIRE); - gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); - } - } - } - - public: - bool isVectorRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.isVectorRegister()); - if (num_dest_operands > 1) { - return dest_vect[operandIndex].isVectorRegister(); - } - else if (num_dest_operands == 1) { - return LdInstBase::dest.isVectorRegister(); - } - return false; - } - bool isCondRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.isCondRegister()); - if (num_dest_operands > 1) - return dest_vect[operandIndex].isCondRegister(); - else if (num_dest_operands == 1) - return LdInstBase::dest.isCondRegister(); - return false; - } - bool isScalarRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.isScalarRegister()); - if (num_dest_operands > 1) - return dest_vect[operandIndex].isScalarRegister(); - else if (num_dest_operands == 1) - return LdInstBase::dest.isScalarRegister(); - return false; - } - bool isSrcOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.isVectorRegister()); - return false; - } - bool isDstOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return false; - return true; - } - int getOperandSize(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.opSize()); - if (num_dest_operands > 1) - return(dest_vect[operandIndex].opSize()); - else if (num_dest_operands == 1) - return(LdInstBase::dest.opSize()); - return 0; - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if ((num_dest_operands != getNumOperands()) && - (operandIndex == (getNumOperands()-1))) - return(this->addr.regIndex()); - if (num_dest_operands > 1) - return(dest_vect[operandIndex].regIndex()); - else if (num_dest_operands == 1) - return(LdInstBase::dest.regIndex()); - return -1; - } - int getNumOperands() override - { - if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) - return(num_dest_operands+1); - else - return(num_dest_operands); - } - void execute(GPUDynInstPtr gpuDynInst) override; - }; - - template - GPUStaticInst* - decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - unsigned op_offs = obj->getOperandPtr(ib->operands,1); - BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); - - if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { - return new LdInst(ib, obj, "ld"); - } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER || - tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { - switch (tmp.regKind) { - case Brig::BRIG_REGISTER_KIND_SINGLE: - return new LdInst(ib, obj, "ld"); - case Brig::BRIG_REGISTER_KIND_DOUBLE: - return new LdInst(ib, obj, "ld"); - default: - fatal("Bad ld register operand type %d\n", tmp.regKind); - } - } else { - fatal("Bad ld register operand kind %d\n", tmp.kind); - } - } - - template - GPUStaticInst* - decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - unsigned op_offs = obj->getOperandPtr(ib->operands,0); - BrigRegOperandInfo dest = findRegDataType(op_offs, obj); - - assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER || - dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); - switch(dest.regKind) { - case Brig::BRIG_REGISTER_KIND_SINGLE: - switch (ib->type) { - case Brig::BRIG_TYPE_B8: - case Brig::BRIG_TYPE_B16: - case Brig::BRIG_TYPE_B32: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_U8: - case Brig::BRIG_TYPE_U16: - case Brig::BRIG_TYPE_U32: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_S8: - case Brig::BRIG_TYPE_S16: - case Brig::BRIG_TYPE_S32: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_F16: - case Brig::BRIG_TYPE_F32: - return decodeLd2(ib, obj); - default: - fatal("Bad ld register operand type %d, %d\n", - dest.regKind, ib->type); - }; - case Brig::BRIG_REGISTER_KIND_DOUBLE: - switch (ib->type) { - case Brig::BRIG_TYPE_B64: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_U64: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_S64: - return decodeLd2(ib, obj); - case Brig::BRIG_TYPE_F64: - return decodeLd2(ib, obj); - default: - fatal("Bad ld register operand type %d, %d\n", - dest.regKind, ib->type); - }; - default: - fatal("Bad ld register operand type %d, %d\n", dest.regKind, - ib->type); - } - } - - template - class StInstBase : public HsailGPUStaticInst - { - public: - typename SrcOperandType::SrcOperand src; - AddrOperandType addr; - - Brig::BrigSegment segment; - Brig::BrigMemoryScope memoryScope; - Brig::BrigMemoryOrder memoryOrder; - unsigned int equivClass; - - StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - using namespace Brig; - - setFlag(MemoryRef); - setFlag(Store); - - if (ib->opcode == BRIG_OPCODE_ST) { - const BrigInstMem *ldst = (const BrigInstMem*)ib; - - segment = (BrigSegment)ldst->segment; - memoryOrder = BRIG_MEMORY_ORDER_NONE; - memoryScope = BRIG_MEMORY_SCOPE_NONE; - equivClass = ldst->equivClass; - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - const BrigOperand *baseOp = obj->getOperand(op_offs); - - if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) || - (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) { - src.init(op_offs, obj); - } - - op_offs = obj->getOperandPtr(ib->operands, 1); - addr.init(op_offs, obj); - } else { - const BrigInstAtomic *at = (const BrigInstAtomic*)ib; - - segment = (BrigSegment)at->segment; - memoryScope = (BrigMemoryScope)at->memoryScope; - memoryOrder = (BrigMemoryOrder)at->memoryOrder; - equivClass = 0; - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - addr.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - src.init(op_offs, obj); - } - - switch (memoryOrder) { - case BRIG_MEMORY_ORDER_NONE: - setFlag(NoOrder); - break; - case BRIG_MEMORY_ORDER_RELAXED: - setFlag(RelaxedOrder); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE: - setFlag(Acquire); - break; - case BRIG_MEMORY_ORDER_SC_RELEASE: - setFlag(Release); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: - setFlag(AcquireRelease); - break; - default: - fatal("StInst has bad memory order type\n"); - } - - switch (memoryScope) { - case BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("StInst has bad memory scope type\n"); - } - - switch (segment) { - case BRIG_SEGMENT_GLOBAL: - setFlag(GlobalSegment); - break; - case BRIG_SEGMENT_GROUP: - setFlag(GroupSegment); - break; - case BRIG_SEGMENT_PRIVATE: - setFlag(PrivateSegment); - break; - case BRIG_SEGMENT_READONLY: - setFlag(ReadOnlySegment); - break; - case BRIG_SEGMENT_SPILL: - setFlag(SpillSegment); - break; - case BRIG_SEGMENT_FLAT: - setFlag(Flat); - break; - case BRIG_SEGMENT_ARG: - setFlag(ArgSegment); - break; - default: - panic("St: segment %d not supported\n", segment); - } - } - - int numDstRegOperands() override { return 0; } - int numSrcRegOperands() override - { - return src.isVectorRegister() + this->addr.isVectorRegister(); - } - int getNumOperands() override - { - if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) - return 2; - else - return 1; - } - bool isVectorRegister(int operandIndex) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return !operandIndex ? src.isVectorRegister() : - this->addr.isVectorRegister(); - } - bool isCondRegister(int operandIndex) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return !operandIndex ? src.isCondRegister() : - this->addr.isCondRegister(); - } - bool isScalarRegister(int operandIndex) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return !operandIndex ? src.isScalarRegister() : - this->addr.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return true; - } - bool isDstOperand(int operandIndex) override { return false; } - int getOperandSize(int operandIndex) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return !operandIndex ? src.opSize() : this->addr.opSize(); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert(operandIndex >= 0 && operandIndex < getNumOperands()); - return !operandIndex ? src.regIndex() : this->addr.regIndex(); - } - }; - - - template - class StInst : - public StInstBase, - public MemInst - { - public: - typename SrcDataType::OperandType::SrcOperand src_vect[4]; - uint16_t num_src_operands; - void generateDisassembly() override; - - StInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode, int srcIdx) - : StInstBase(ib, obj, _opcode), - MemInst(SrcDataType::memType) - { - init_addr(&this->addr); - - BrigRegOperandInfo rinfo; - unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx); - const Brig::BrigOperand *baseOp = obj->getOperand(op_offs); - - if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { - const Brig::BrigOperandConstantBytes *op = - (Brig::BrigOperandConstantBytes*)baseOp; - - rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind, - Brig::BRIG_TYPE_NONE); - } else { - rinfo = findRegDataType(op_offs, obj); - } - - if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { - const Brig::BrigOperandOperandList *brigRegVecOp = - (const Brig::BrigOperandOperandList*)baseOp; - - num_src_operands = - *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4; - - assert(num_src_operands <= 4); - } else { - num_src_operands = 1; - } - - if (num_src_operands > 1) { - assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST); - - for (int i = 0; i < num_src_operands; ++i) { - src_vect[i].init_from_vect(op_offs, obj, i); - } - } - } - - void - initiateAcc(GPUDynInstPtr gpuDynInst) override - { - // before performing a store, check if this store has - // release semantics, and if so issue a release first - if (!this->isLocalMem()) { - if (gpuDynInst->computeUnit()->shader->separate_acquire_release - && gpuDynInst->isRelease()) { - - gpuDynInst->statusBitVector = VectorMask(1); - gpuDynInst->execContinuation = &GPUStaticInst::execSt; - gpuDynInst->useContinuation = true; - // create request - RequestPtr req = std::make_shared(0, 0, 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - req->setFlags(Request::RELEASE); - gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); - - return; - } - } - - // if there is no release semantic, perform stores immediately - execSt(gpuDynInst); - } - - // stores don't write anything back, so there is nothing - // to do here. we only override this method to avoid the - // fatal in the base class implementation - void completeAcc(GPUDynInstPtr gpuDynInst) override { } - - private: - // execSt may be called through a continuation - // if the store had release semantics. see comment for - // execSt in gpu_static_inst.hh - void - execSt(GPUDynInstPtr gpuDynInst) override - { - typedef typename MemDataType::CType c0; - - gpuDynInst->statusBitVector = gpuDynInst->exec_mask; - - if (num_src_operands > 1) { - for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) - if (gpuDynInst->exec_mask[i]) - gpuDynInst->statusVector.push_back(num_src_operands); - else - gpuDynInst->statusVector.push_back(0); - } - - for (int k = 0; k < num_src_operands; ++k) { - c0 *d = &((c0*)gpuDynInst->d_data) - [k * gpuDynInst->computeUnit()->wfSize()]; - - for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { - if (gpuDynInst->exec_mask[i]) { - Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0); - - if (this->isLocalMem()) { - //store to shared memory - gpuDynInst->wavefront()->ldsChunk->write(vaddr, - *d); - } else { - RequestPtr req = std::make_shared( - vaddr, sizeof(c0), 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - - gpuDynInst->setRequestFlags(req); - PacketPtr pkt = new Packet(req, MemCmd::WriteReq); - pkt->dataStatic(d); - - // translation is performed in sendRequest() - // the request will be finished when the store completes - gpuDynInst->useContinuation = false; - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, - i, pkt); - - } - } - ++d; - } - } - - gpuDynInst->updateStats(); - } - - public: - bool isVectorRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == num_src_operands) - return this->addr.isVectorRegister(); - if (num_src_operands > 1) - return src_vect[operandIndex].isVectorRegister(); - else if (num_src_operands == 1) - return StInstBase::src.isVectorRegister(); - return false; - } - bool isCondRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == num_src_operands) - return this->addr.isCondRegister(); - if (num_src_operands > 1) - return src_vect[operandIndex].isCondRegister(); - else if (num_src_operands == 1) - return StInstBase::src.isCondRegister(); - return false; - } - bool isScalarRegister(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == num_src_operands) - return this->addr.isScalarRegister(); - if (num_src_operands > 1) - return src_vect[operandIndex].isScalarRegister(); - else if (num_src_operands == 1) - return StInstBase::src.isScalarRegister(); - return false; - } - bool isSrcOperand(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - return true; - } - bool isDstOperand(int operandIndex) override { return false; } - int getOperandSize(int operandIndex) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == num_src_operands) - return this->addr.opSize(); - if (num_src_operands > 1) - return src_vect[operandIndex].opSize(); - else if (num_src_operands == 1) - return StInstBase::src.opSize(); - return 0; - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex == num_src_operands) - return this->addr.regIndex(); - if (num_src_operands > 1) - return src_vect[operandIndex].regIndex(); - else if (num_src_operands == 1) - return StInstBase::src.regIndex(); - return -1; - } - int getNumOperands() override - { - if (this->addr.isVectorRegister() || this->addr.isScalarRegister()) - return num_src_operands + 1; - else - return num_src_operands; - } - void execute(GPUDynInstPtr gpuDynInst) override; - }; - - template - GPUStaticInst* - decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - int srcIdx = 0; - int destIdx = 1; - if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC || - ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) { - srcIdx = 1; - destIdx = 0; - } - unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx); - - BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); - - if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { - return new StInst(ib, obj, "st", srcIdx); - } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - // V2/V4 not allowed - switch (tmp.regKind) { - case Brig::BRIG_REGISTER_KIND_SINGLE: - return new StInst(ib, obj, "st", srcIdx); - case Brig::BRIG_REGISTER_KIND_DOUBLE: - return new StInst(ib, obj, "st", srcIdx); - default: - fatal("Bad st register operand type %d\n", tmp.type); - } - } else { - fatal("Bad st register operand kind %d\n", tmp.kind); - } - } - - template - class AtomicInstBase : public HsailGPUStaticInst - { - public: - typename OperandType::DestOperand dest; - typename OperandType::SrcOperand src[NumSrcOperands]; - AddrOperandType addr; - - Brig::BrigSegment segment; - Brig::BrigMemoryOrder memoryOrder; - Brig::BrigAtomicOperation atomicOperation; - Brig::BrigMemoryScope memoryScope; - Brig::BrigOpcode opcode; - - AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : HsailGPUStaticInst(obj, _opcode) - { - using namespace Brig; - - const BrigInstAtomic *at = (const BrigInstAtomic*)ib; - - segment = (BrigSegment)at->segment; - memoryScope = (BrigMemoryScope)at->memoryScope; - memoryOrder = (BrigMemoryOrder)at->memoryOrder; - atomicOperation = (BrigAtomicOperation)at->atomicOperation; - opcode = (BrigOpcode)ib->opcode; - - assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET || - opcode == Brig::BRIG_OPCODE_ATOMIC); - - setFlag(MemoryRef); - - if (opcode == Brig::BRIG_OPCODE_ATOMIC) { - setFlag(AtomicReturn); - } else { - setFlag(AtomicNoReturn); - } - - switch (memoryOrder) { - case BRIG_MEMORY_ORDER_NONE: - setFlag(NoOrder); - break; - case BRIG_MEMORY_ORDER_RELAXED: - setFlag(RelaxedOrder); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE: - setFlag(Acquire); - break; - case BRIG_MEMORY_ORDER_SC_RELEASE: - setFlag(Release); - break; - case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: - setFlag(AcquireRelease); - break; - default: - fatal("AtomicInst has bad memory order type\n"); - } - - switch (memoryScope) { - case BRIG_MEMORY_SCOPE_NONE: - setFlag(NoScope); - break; - case BRIG_MEMORY_SCOPE_WORKITEM: - setFlag(WorkitemScope); - break; - case BRIG_MEMORY_SCOPE_WORKGROUP: - setFlag(WorkgroupScope); - break; - case BRIG_MEMORY_SCOPE_AGENT: - setFlag(DeviceScope); - break; - case BRIG_MEMORY_SCOPE_SYSTEM: - setFlag(SystemScope); - break; - default: - fatal("AtomicInst has bad memory scope type\n"); - } - - switch (atomicOperation) { - case Brig::BRIG_ATOMIC_AND: - setFlag(AtomicAnd); - break; - case Brig::BRIG_ATOMIC_OR: - setFlag(AtomicOr); - break; - case Brig::BRIG_ATOMIC_XOR: - setFlag(AtomicXor); - break; - case Brig::BRIG_ATOMIC_CAS: - setFlag(AtomicCAS); - break; - case Brig::BRIG_ATOMIC_EXCH: - setFlag(AtomicExch); - break; - case Brig::BRIG_ATOMIC_ADD: - setFlag(AtomicAdd); - break; - case Brig::BRIG_ATOMIC_WRAPINC: - setFlag(AtomicInc); - break; - case Brig::BRIG_ATOMIC_WRAPDEC: - setFlag(AtomicDec); - break; - case Brig::BRIG_ATOMIC_MIN: - setFlag(AtomicMin); - break; - case Brig::BRIG_ATOMIC_MAX: - setFlag(AtomicMax); - break; - case Brig::BRIG_ATOMIC_SUB: - setFlag(AtomicSub); - break; - default: - fatal("Bad BrigAtomicOperation code %d\n", atomicOperation); - } - - switch (segment) { - case BRIG_SEGMENT_GLOBAL: - setFlag(GlobalSegment); - break; - case BRIG_SEGMENT_GROUP: - setFlag(GroupSegment); - break; - case BRIG_SEGMENT_FLAT: - setFlag(Flat); - break; - default: - panic("Atomic: segment %d not supported\n", segment); - } - - if (HasDst) { - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - dest.init(op_offs, obj); - - op_offs = obj->getOperandPtr(ib->operands, 1); - addr.init(op_offs, obj); - - for (int i = 0; i < NumSrcOperands; ++i) { - op_offs = obj->getOperandPtr(ib->operands, i + 2); - src[i].init(op_offs, obj); - } - } else { - - unsigned op_offs = obj->getOperandPtr(ib->operands, 0); - addr.init(op_offs, obj); - - for (int i = 0; i < NumSrcOperands; ++i) { - op_offs = obj->getOperandPtr(ib->operands, i + 1); - src[i].init(op_offs, obj); - } - } - } - - int numSrcRegOperands() - { - int operands = 0; - for (int i = 0; i < NumSrcOperands; i++) { - if (src[i].isVectorRegister()) { - operands++; - } - } - if (addr.isVectorRegister()) - operands++; - return operands; - } - int numDstRegOperands() { return dest.isVectorRegister(); } - int getNumOperands() - { - if (addr.isVectorRegister()) - return(NumSrcOperands + 2); - return(NumSrcOperands + 1); - } - bool isVectorRegister(int operandIndex) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isVectorRegister(); - else if (operandIndex == NumSrcOperands) - return(addr.isVectorRegister()); - else - return dest.isVectorRegister(); - } - bool isCondRegister(int operandIndex) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isCondRegister(); - else if (operandIndex == NumSrcOperands) - return(addr.isCondRegister()); - else - return dest.isCondRegister(); - } - bool isScalarRegister(int operandIndex) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return src[operandIndex].isScalarRegister(); - else if (operandIndex == NumSrcOperands) - return(addr.isScalarRegister()); - else - return dest.isScalarRegister(); - } - bool isSrcOperand(int operandIndex) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return true; - else if (operandIndex == NumSrcOperands) - return(addr.isVectorRegister()); - else - return false; - } - bool isDstOperand(int operandIndex) - { - if (operandIndex <= NumSrcOperands) - return false; - else - return true; - } - int getOperandSize(int operandIndex) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return(src[operandIndex].opSize()); - else if (operandIndex == NumSrcOperands) - return(addr.opSize()); - else - return(dest.opSize()); - } - int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) - { - assert((operandIndex >= 0) && (operandIndex < getNumOperands())); - if (operandIndex < NumSrcOperands) - return(src[operandIndex].regIndex()); - else if (operandIndex == NumSrcOperands) - return(addr.regIndex()); - else - return(dest.regIndex()); - return -1; - } - }; - - template - class AtomicInst : - public AtomicInstBase, - public MemInst - { - public: - void generateDisassembly() override; - - AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj, - const char *_opcode) - : AtomicInstBase - (ib, obj, _opcode), - MemInst(MemDataType::memType) - { - init_addr(&this->addr); - } - - void - initiateAcc(GPUDynInstPtr gpuDynInst) override - { - // before doing the RMW, check if this atomic has - // release semantics, and if so issue a release first - if (!this->isLocalMem()) { - if (gpuDynInst->computeUnit()->shader->separate_acquire_release - && (gpuDynInst->isRelease() - || gpuDynInst->isAcquireRelease())) { - - gpuDynInst->statusBitVector = VectorMask(1); - - gpuDynInst->execContinuation = &GPUStaticInst::execAtomic; - gpuDynInst->useContinuation = true; - - // create request - RequestPtr req = std::make_shared(0, 0, 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - req->setFlags(Request::RELEASE); - gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); - - return; - } - } - - // if there is no release semantic, execute the RMW immediately - execAtomic(gpuDynInst); - - } - - void - completeAcc(GPUDynInstPtr gpuDynInst) override - { - // if this is not an atomic return op, then we - // have nothing more to do. - if (this->isAtomicRet()) { - // the size of the src operands and the - // memory being operated on must match - // for HSAIL atomics - this assumption may - // not apply to all ISAs - typedef typename MemDataType::CType CType; - - Wavefront *w = gpuDynInst->wavefront(); - int dst = this->dest.regIndex(); - std::vector regVec; - // virtual->physical VGPR mapping - int physVgpr = w->remap(dst, sizeof(CType), 1); - regVec.push_back(physVgpr); - CType *p1 = &((CType*)gpuDynInst->d_data)[0]; - - for (int i = 0; i < w->computeUnit->wfSize(); ++i) { - if (gpuDynInst->exec_mask[i]) { - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: " - "$%s%d <- %d global ld done (src = wavefront " - "ld inst)\n", w->computeUnit->cu_id, w->simdId, - w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d", - dst, *p1); - // write the value into the physical VGPR. This is a - // purely functional operation. No timing is modeled. - w->computeUnit->vrf[w->simdId]->write(physVgpr, *p1, i); - } - ++p1; - } - - // Schedule the write operation of the load data on the VRF. - // This simply models the timing aspect of the VRF write operation. - // It does not modify the physical VGPR. - int loadVrfBankConflictCycles = gpuDynInst->computeUnit()-> - vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec, - sizeof(CType), gpuDynInst->time); - - if (this->isGlobalMem()) { - gpuDynInst->computeUnit()->globalMemoryPipe - .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles); - } else { - assert(this->isLocalMem()); - gpuDynInst->computeUnit()->localMemoryPipe - .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles); - } - } - } - - void execute(GPUDynInstPtr gpuDynInst) override; - - private: - // execAtomic may be called through a continuation - // if the RMW had release semantics. see comment for - // execContinuation in gpu_dyn_inst.hh - void - execAtomic(GPUDynInstPtr gpuDynInst) override - { - gpuDynInst->statusBitVector = gpuDynInst->exec_mask; - - typedef typename MemDataType::CType c0; - - c0 *d = &((c0*) gpuDynInst->d_data)[0]; - c0 *e = &((c0*) gpuDynInst->a_data)[0]; - c0 *f = &((c0*) gpuDynInst->x_data)[0]; - - for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) { - if (gpuDynInst->exec_mask[i]) { - Addr vaddr = gpuDynInst->addr[i]; - - if (this->isLocalMem()) { - Wavefront *wavefront = gpuDynInst->wavefront(); - *d = wavefront->ldsChunk->read(vaddr); - - if (this->isAtomicAdd()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) + (*e)); - } else if (this->isAtomicSub()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) - (*e)); - } else if (this->isAtomicMax()) { - wavefront->ldsChunk->write(vaddr, - std::max(wavefront->ldsChunk->read(vaddr), - (*e))); - } else if (this->isAtomicMin()) { - wavefront->ldsChunk->write(vaddr, - std::min(wavefront->ldsChunk->read(vaddr), - (*e))); - } else if (this->isAtomicAnd()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) & (*e)); - } else if (this->isAtomicOr()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) | (*e)); - } else if (this->isAtomicXor()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) ^ (*e)); - } else if (this->isAtomicInc()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) + 1); - } else if (this->isAtomicDec()) { - wavefront->ldsChunk->write(vaddr, - wavefront->ldsChunk->read(vaddr) - 1); - } else if (this->isAtomicExch()) { - wavefront->ldsChunk->write(vaddr, (*e)); - } else if (this->isAtomicCAS()) { - wavefront->ldsChunk->write(vaddr, - (wavefront->ldsChunk->read(vaddr) == (*e)) ? - (*f) : wavefront->ldsChunk->read(vaddr)); - } else { - fatal("Unrecognized or invalid HSAIL atomic op " - "type.\n"); - } - } else { - RequestPtr req = - std::make_shared(vaddr, sizeof(c0), 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId, - gpuDynInst->makeAtomicOpFunctor(e, - f)); - - gpuDynInst->setRequestFlags(req); - PacketPtr pkt = new Packet(req, MemCmd::SwapReq); - pkt->dataStatic(d); - - if (gpuDynInst->computeUnit()->shader-> - separate_acquire_release && - (gpuDynInst->isAcquire())) { - // if this atomic has acquire semantics, - // schedule the continuation to perform an - // acquire after the RMW completes - gpuDynInst->execContinuation = - &GPUStaticInst::execAtomicAcq; - - gpuDynInst->useContinuation = true; - } else { - // the request will be finished when the RMW completes - gpuDynInst->useContinuation = false; - } - // translation is performed in sendRequest() - gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i, - pkt); - } - } - - ++d; - ++e; - ++f; - } - - gpuDynInst->updateStats(); - } - - // execAtomicACq will always be called through a continuation. - // see comment for execContinuation in gpu_dyn_inst.hh - void - execAtomicAcq(GPUDynInstPtr gpuDynInst) override - { - // after performing the RMW, check to see if this instruction - // has acquire semantics, and if so, issue an acquire - if (!this->isLocalMem()) { - if (gpuDynInst->computeUnit()->shader->separate_acquire_release - && gpuDynInst->isAcquire()) { - gpuDynInst->statusBitVector = VectorMask(1); - - // the request will be finished when - // the acquire completes - gpuDynInst->useContinuation = false; - // create request - RequestPtr req = std::make_shared(0, 0, 0, - gpuDynInst->computeUnit()->masterId(), - 0, gpuDynInst->wfDynId); - req->setFlags(Request::ACQUIRE); - gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req); - } - } - } - }; - - template - GPUStaticInst* - constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; - - if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) { - return decodeLd(ib, obj); - } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) { - switch (ib->type) { - case Brig::BRIG_TYPE_B8: - return decodeSt(ib, obj); - case Brig::BRIG_TYPE_B16: - return decodeSt(ib, obj); - case Brig::BRIG_TYPE_B32: - return decodeSt(ib, obj); - case Brig::BRIG_TYPE_B64: - return decodeSt(ib, obj); - default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type); - } - } else { - if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) - return new AtomicInst(ib, obj, "atomicnoret"); - else - return new AtomicInst(ib, obj, "atomic"); - } - } - - template - GPUStaticInst* - decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - unsigned addrIndex = (Brig::BrigOpcode)ib->opcode == - Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1; - - unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex); - - BrigRegOperandInfo tmp = findRegDataType(op_offs, obj); - - if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) { - return constructAtomic(ib, obj); - } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) { - // V2/V4 not allowed - switch (tmp.regKind) { - case Brig::BRIG_REGISTER_KIND_SINGLE: - return constructAtomic(ib, obj); - case Brig::BRIG_REGISTER_KIND_DOUBLE: - return constructAtomic(ib, obj); - default: - fatal("Bad atomic register operand type %d\n", tmp.type); - } - } else { - fatal("Bad atomic register operand kind %d\n", tmp.kind); - } - } - - - template - GPUStaticInst* - decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; - - if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { - return decodeAtomicHelper(ib, obj); - } else { - return decodeAtomicHelper(ib, obj); - } - } - - template - GPUStaticInst* - decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj) - { - const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib; - if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) { - return decodeAtomicHelper(ib, obj); - } else { - return decodeAtomicHelper(ib, obj); - } - } -} // namespace HsailISA - -#endif // __ARCH_HSAIL_INSTS_MEM_HH__ diff --git a/src/arch/hsail/insts/mem_impl.hh b/src/arch/hsail/insts/mem_impl.hh deleted file mode 100644 index dbda6643b..000000000 --- a/src/arch/hsail/insts/mem_impl.hh +++ /dev/null @@ -1,648 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "gpu-compute/hsail_code.hh" - -// defined in code.cc, but not worth sucking in all of code.h for this -// at this point -extern const char *segmentNames[]; - -namespace HsailISA -{ - template - void - LdaInst::generateDisassembly() - { - this->disassembly = csprintf("%s_%s %s,%s", this->opcode, - DestDataType::label, - this->dest.disassemble(), - this->addr.disassemble()); - } - - template - void - LdaInst::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - typedef typename DestDataType::CType CType M5_VAR_USED; - const VectorMask &mask = w->getPred(); - std::vector addr_vec; - addr_vec.resize(w->computeUnit->wfSize(), (Addr)0); - this->addr.calcVector(w, addr_vec); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - this->dest.set(w, lane, addr_vec[lane]); - } - } - addr_vec.clear(); - } - - template - void - LdInst::generateDisassembly() - { - switch (num_dest_operands) { - case 1: - this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, - segmentNames[this->segment], - MemDataType::label, - this->dest.disassemble(), - this->addr.disassemble()); - break; - case 2: - this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, - segmentNames[this->segment], - MemDataType::label, - this->dest_vect[0].disassemble(), - this->dest_vect[1].disassemble(), - this->addr.disassemble()); - break; - case 3: - this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode, - segmentNames[this->segment], - MemDataType::label, - this->dest_vect[0].disassemble(), - this->dest_vect[1].disassemble(), - this->dest_vect[2].disassemble(), - this->addr.disassemble()); - break; - case 4: - this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", - this->opcode, - segmentNames[this->segment], - MemDataType::label, - this->dest_vect[0].disassemble(), - this->dest_vect[1].disassemble(), - this->dest_vect[2].disassemble(), - this->dest_vect[3].disassemble(), - this->addr.disassemble()); - break; - default: - fatal("Bad ld register dest operand, num vector operands: %d \n", - num_dest_operands); - break; - } - } - - static Addr - calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i) - { - // what is the size of the object we are accessing?? - // NOTE: the compiler doesn't generate enough information - // to do this yet..have to just line up all the private - // work-item spaces back to back for now - /* - StorageElement* se = - i->parent->findSymbol(Brig::BrigPrivateSpace, addr); - assert(se); - - return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() + - se->offset * w->computeUnit->wfSize() + - lane * se->size; - */ - - // addressing strategy: interleave the private spaces of - // work-items in a wave-front on 8 byte granularity. - // this won't be perfect coalescing like the spill space - // strategy, but it's better than nothing. The spill space - // strategy won't work with private because the same address - // may be accessed by different sized loads/stores. - - // Note: I'm assuming that the largest load/store to private - // is 8 bytes. If it is larger, the stride will have to increase - - Addr addr_div8 = addr / 8; - Addr addr_mod8 = addr % 8; - - Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 + - addr_mod8 + w->privBase; - - assert(ret < w->privBase + - (w->privSizePerItem * w->computeUnit->wfSize())); - - return ret; - } - - template - void - LdInst::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - typedef typename MemDataType::CType MemCType; - const VectorMask &mask = w->getPred(); - - // Kernarg references are handled uniquely for now (no Memory Request - // is used), so special-case them up front. Someday we should - // make this more realistic, at which we should get rid of this - // block and fold this case into the switch below. - if (this->segment == Brig::BRIG_SEGMENT_KERNARG) { - MemCType val; - - // I assume no vector ld for kernargs - assert(num_dest_operands == 1); - - // assuming for the moment that we'll never do register - // offsets into kernarg space... just to make life simpler - uint64_t address = this->addr.calcUniform(); - - val = *(MemCType*)&w->kernelArgs[address]; - - DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - this->dest.set(w, lane, val); - } - } - - return; - } else if (this->segment == Brig::BRIG_SEGMENT_ARG) { - uint64_t address = this->addr.calcUniform(); - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - MemCType val = w->readCallArgMem(lane, address); - - DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address, - (unsigned long long)val); - - this->dest.set(w, lane, val); - } - } - - return; - } - - GPUDynInstPtr m = gpuDynInst; - - this->addr.calcVector(w, m->addr); - - m->m_type = MemDataType::memType; - m->v_type = DestDataType::vgprType; - - m->exec_mask = w->execMask(); - m->statusBitVector = 0; - m->equiv = this->equivClass; - - if (num_dest_operands == 1) { - m->dst_reg = this->dest.regIndex(); - m->n_reg = 1; - } else { - m->n_reg = num_dest_operands; - for (int i = 0; i < num_dest_operands; ++i) { - m->dst_reg_vec[i] = this->dest_vect[i].regIndex(); - } - } - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->kern_id = w->kernId; - m->cu_id = w->computeUnit->cu_id; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - switch (this->segment) { - case Brig::BRIG_SEGMENT_GLOBAL: - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - - // this is a complete hack to get around a compiler bug - // (the compiler currently generates global access for private - // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (m->addr[lane] < w->privSizePerItem) { - if (mask[lane]) { - // what is the size of the object we are accessing? - // find base for for this wavefront - - // calcPrivAddr will fail if accesses are unaligned - assert(!((sizeof(MemCType) - 1) & m->addr[lane])); - - Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, - this); - - m->addr[lane] = privAddr; - } - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_SPILL: - assert(num_dest_operands == 1); - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - // note: this calculation will NOT WORK if the compiler - // ever generates loads/stores to the same address with - // different widths (e.g., a ld_u32 addr and a ld_u16 addr) - if (mask[lane]) { - assert(m->addr[lane] < w->spillSizePerItem); - - m->addr[lane] = m->addr[lane] * w->spillWidth + - lane * sizeof(MemCType) + w->spillBase; - - w->lastAddr[lane] = m->addr[lane]; - } - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_GROUP: - m->pipeId = LDSMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(24)); - w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstandingReqsRdLm++; - w->rdLmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_READONLY: - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - assert(m->addr[lane] + sizeof(MemCType) <= w->roSize); - m->addr[lane] += w->roBase; - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_PRIVATE: - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - assert(m->addr[lane] < w->privSizePerItem); - - m->addr[lane] = m->addr[lane] + - lane * sizeof(MemCType) + w->privBase; - } - } - } - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - break; - - default: - fatal("Load to unsupported segment %d %llxe\n", this->segment, - m->addr[0]); - } - - w->outstandingReqs++; - w->memReqsInPipe--; - } - - template - void - StInst::execute(GPUDynInstPtr gpuDynInst) - { - Wavefront *w = gpuDynInst->wavefront(); - - typedef typename OperationType::CType CType; - - const VectorMask &mask = w->getPred(); - - // arg references are handled uniquely for now (no Memory Request - // is used), so special-case them up front. Someday we should - // make this more realistic, at which we should get rid of this - // block and fold this case into the switch below. - if (this->segment == Brig::BRIG_SEGMENT_ARG) { - uint64_t address = this->addr.calcUniform(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - CType data = this->src.template get(w, lane); - DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data); - w->writeCallArgMem(lane, address, data); - } - } - - return; - } - - GPUDynInstPtr m = gpuDynInst; - - m->exec_mask = w->execMask(); - - this->addr.calcVector(w, m->addr); - - if (num_src_operands == 1) { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - ((CType*)m->d_data)[lane] = - this->src.template get(w, lane); - } - } - } else { - for (int k= 0; k < num_src_operands; ++k) { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] = - this->src_vect[k].template get(w, lane); - } - } - } - } - - m->m_type = OperationType::memType; - m->v_type = OperationType::vgprType; - - m->statusBitVector = 0; - m->equiv = this->equivClass; - - if (num_src_operands == 1) { - m->n_reg = 1; - } else { - m->n_reg = num_src_operands; - } - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->kern_id = w->kernId; - m->cu_id = w->computeUnit->cu_id; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - switch (this->segment) { - case Brig::BRIG_SEGMENT_GLOBAL: - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - - // this is a complete hack to get around a compiler bug - // (the compiler currently generates global access for private - // addresses (starting from 0). We need to add the private offset) - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - if (m->addr[lane] < w->privSizePerItem) { - - // calcPrivAddr will fail if accesses are unaligned - assert(!((sizeof(CType)-1) & m->addr[lane])); - - Addr privAddr = calcPrivAddr(m->addr[lane], w, lane, - this); - - m->addr[lane] = privAddr; - } - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_SPILL: - assert(num_src_operands == 1); - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - assert(m->addr[lane] < w->spillSizePerItem); - - m->addr[lane] = m->addr[lane] * w->spillWidth + - lane * sizeof(CType) + w->spillBase; - } - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_GROUP: - m->pipeId = LDSMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(24)); - w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstandingReqsWrLm++; - w->wrLmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_PRIVATE: - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - assert(m->addr[lane] < w->privSizePerItem); - m->addr[lane] = m->addr[lane] + lane * - sizeof(CType)+w->privBase; - } - } - } - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - break; - - default: - fatal("Store to unsupported segment %d\n", this->segment); - } - - w->outstandingReqs++; - w->memReqsInPipe--; - } - - template - void - StInst::generateDisassembly() - { - switch (num_src_operands) { - case 1: - this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode, - segmentNames[this->segment], - OperationType::label, - this->src.disassemble(), - this->addr.disassemble()); - break; - case 2: - this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode, - segmentNames[this->segment], - OperationType::label, - this->src_vect[0].disassemble(), - this->src_vect[1].disassemble(), - this->addr.disassemble()); - break; - case 4: - this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s", - this->opcode, - segmentNames[this->segment], - OperationType::label, - this->src_vect[0].disassemble(), - this->src_vect[1].disassemble(), - this->src_vect[2].disassemble(), - this->src_vect[3].disassemble(), - this->addr.disassemble()); - break; - default: fatal("Bad ld register src operand, num vector operands: " - "%d \n", num_src_operands); - break; - } - } - - template - void - AtomicInst::execute(GPUDynInstPtr gpuDynInst) - { - typedef typename DataType::CType CType; - - Wavefront *w = gpuDynInst->wavefront(); - - GPUDynInstPtr m = gpuDynInst; - - this->addr.calcVector(w, m->addr); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - ((CType *)m->a_data)[lane] = - this->src[0].template get(w, lane); - } - - // load second source operand for CAS - if (NumSrcOperands > 1) { - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - ((CType*)m->x_data)[lane] = - this->src[1].template get(w, lane); - } - } - - assert(NumSrcOperands <= 2); - - m->m_type = DataType::memType; - m->v_type = DataType::vgprType; - - m->exec_mask = w->execMask(); - m->statusBitVector = 0; - m->equiv = 0; // atomics don't have an equivalence class operand - m->n_reg = 1; - - if (HasDst) { - m->dst_reg = this->dest.regIndex(); - } - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->kern_id = w->kernId; - m->cu_id = w->computeUnit->cu_id; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - switch (this->segment) { - case Brig::BRIG_SEGMENT_GLOBAL: - m->latency.set(w->computeUnit->shader->ticks(64)); - m->pipeId = GLBMEM_PIPE; - - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - break; - - case Brig::BRIG_SEGMENT_GROUP: - m->pipeId = LDSMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(24)); - w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m); - w->outstandingReqsWrLm++; - w->wrLmReqsInPipe--; - w->outstandingReqsRdLm++; - w->rdLmReqsInPipe--; - break; - - default: - fatal("Atomic op to unsupported segment %d\n", - this->segment); - } - - w->outstandingReqs++; - w->memReqsInPipe--; - } - - const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp); - - template - void - AtomicInst::generateDisassembly() - { - if (HasDst) { - this->disassembly = - csprintf("%s_%s_%s_%s %s,%s", this->opcode, - atomicOpToString(this->atomicOperation), - segmentNames[this->segment], - DataType::label, this->dest.disassemble(), - this->addr.disassemble()); - } else { - this->disassembly = - csprintf("%s_%s_%s_%s %s", this->opcode, - atomicOpToString(this->atomicOperation), - segmentNames[this->segment], - DataType::label, this->addr.disassemble()); - } - - for (int i = 0; i < NumSrcOperands; ++i) { - this->disassembly += ","; - this->disassembly += this->src[i].disassemble(); - } - } -} // namespace HsailISA diff --git a/src/arch/hsail/insts/pseudo_inst.cc b/src/arch/hsail/insts/pseudo_inst.cc deleted file mode 100644 index 580328aed..000000000 --- a/src/arch/hsail/insts/pseudo_inst.cc +++ /dev/null @@ -1,791 +0,0 @@ -/* - * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Marc Orr - */ - -#include - -#include "arch/hsail/insts/decl.hh" -#include "arch/hsail/insts/mem.hh" - -namespace HsailISA -{ - // Pseudo (or magic) instructions are overloaded on the hsail call - // instruction, because of its flexible parameter signature. - - // To add a new magic instruction: - // 1. Add an entry to the enum. - // 2. Implement it in the switch statement below (Call::exec). - // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h, - // so its easy to call from an OpenCL kernel. - - // This enum should be identical to the enum in - // hsa/hsail-gpu-compute/util/magicinst.h - enum - { - MAGIC_PRINT_WF_32 = 0, - MAGIC_PRINT_WF_64, - MAGIC_PRINT_LANE, - MAGIC_PRINT_LANE_64, - MAGIC_PRINT_WF_FLOAT, - MAGIC_SIM_BREAK, - MAGIC_PREF_SUM, - MAGIC_REDUCTION, - MAGIC_MASKLANE_LOWER, - MAGIC_MASKLANE_UPPER, - MAGIC_JOIN_WF_BAR, - MAGIC_WAIT_WF_BAR, - MAGIC_PANIC, - MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG, - MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG, - MAGIC_LOAD_GLOBAL_U32_REG, - MAGIC_XACT_CAS_LD, - MAGIC_MOST_SIG_THD, - MAGIC_MOST_SIG_BROADCAST, - MAGIC_PRINT_WFID_32, - MAGIC_PRINT_WFID_64 - }; - - void - Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst) - { - const VectorMask &mask = w->getPred(); - - int op = 0; - bool got_op = false; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val0 = src1.get(w, lane, 0); - if (got_op) { - if (src_val0 != op) { - fatal("Multiple magic instructions per PC not " - "supported\n"); - } - } else { - op = src_val0; - got_op = true; - } - } - } - - switch(op) { - case MAGIC_PRINT_WF_32: - MagicPrintWF32(w); - break; - case MAGIC_PRINT_WF_64: - MagicPrintWF64(w); - break; - case MAGIC_PRINT_LANE: - MagicPrintLane(w); - break; - case MAGIC_PRINT_LANE_64: - MagicPrintLane64(w); - break; - case MAGIC_PRINT_WF_FLOAT: - MagicPrintWFFloat(w); - break; - case MAGIC_SIM_BREAK: - MagicSimBreak(w); - break; - case MAGIC_PREF_SUM: - MagicPrefixSum(w); - break; - case MAGIC_REDUCTION: - MagicReduction(w); - break; - case MAGIC_MASKLANE_LOWER: - MagicMaskLower(w); - break; - case MAGIC_MASKLANE_UPPER: - MagicMaskUpper(w); - break; - case MAGIC_JOIN_WF_BAR: - MagicJoinWFBar(w); - break; - case MAGIC_WAIT_WF_BAR: - MagicWaitWFBar(w); - break; - case MAGIC_PANIC: - MagicPanic(w); - break; - - // atomic instructions - case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG: - MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst); - break; - - case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG: - MagicAtomicNRAddGroupU32Reg(w, gpuDynInst); - break; - - case MAGIC_LOAD_GLOBAL_U32_REG: - MagicLoadGlobalU32Reg(w, gpuDynInst); - break; - - case MAGIC_XACT_CAS_LD: - MagicXactCasLd(w); - break; - - case MAGIC_MOST_SIG_THD: - MagicMostSigThread(w); - break; - - case MAGIC_MOST_SIG_BROADCAST: - MagicMostSigBroadcast(w); - break; - - case MAGIC_PRINT_WFID_32: - MagicPrintWF32ID(w); - break; - - case MAGIC_PRINT_WFID_64: - MagicPrintWFID64(w); - break; - - default: fatal("unrecognized magic instruction: %d\n", op); - } - } - - void - Call::MagicPrintLane(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - if (src_val2) { - DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", - disassemble(), w->computeUnit->cu_id, w->simdId, - w->wfSlotId, lane, src_val1); - } else { - DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", - disassemble(), w->computeUnit->cu_id, w->simdId, - w->wfSlotId, lane, src_val1); - } - } - } - #endif - } - - void - Call::MagicPrintLane64(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int64_t src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - if (src_val2) { - DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n", - disassemble(), w->computeUnit->cu_id, w->simdId, - w->wfSlotId, lane, src_val1); - } else { - DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n", - disassemble(), w->computeUnit->cu_id, w->simdId, - w->wfSlotId, lane, src_val1); - } - } - } - #endif - } - - void - Call::MagicPrintWF32(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - std::string res_str; - res_str = csprintf("krl_prt (%s)\n", disassemble()); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (!(lane & 7)) { - res_str += csprintf("DB%03d: ", (int)w->wfDynId); - } - - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - - if (src_val2) { - res_str += csprintf("%08x", src_val1); - } else { - res_str += csprintf("%08d", src_val1); - } - } else { - res_str += csprintf("xxxxxxxx"); - } - - if ((lane & 7) == 7) { - res_str += csprintf("\n"); - } else { - res_str += csprintf(" "); - } - } - - res_str += "\n\n"; - DPRINTFN(res_str.c_str()); - #endif - } - - void - Call::MagicPrintWF32ID(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - std::string res_str; - int src_val3 = -1; - res_str = csprintf("krl_prt (%s)\n", disassemble()); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (!(lane & 7)) { - res_str += csprintf("DB%03d: ", (int)w->wfDynId); - } - - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - src_val3 = src1.get(w, lane, 3); - - if (src_val2) { - res_str += csprintf("%08x", src_val1); - } else { - res_str += csprintf("%08d", src_val1); - } - } else { - res_str += csprintf("xxxxxxxx"); - } - - if ((lane & 7) == 7) { - res_str += csprintf("\n"); - } else { - res_str += csprintf(" "); - } - } - - res_str += "\n\n"; - if (w->wfDynId == src_val3) { - DPRINTFN(res_str.c_str()); - } - #endif - } - - void - Call::MagicPrintWF64(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - std::string res_str; - res_str = csprintf("krl_prt (%s)\n", disassemble()); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (!(lane & 3)) { - res_str += csprintf("DB%03d: ", (int)w->wfDynId); - } - - if (mask[lane]) { - int64_t src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - - if (src_val2) { - res_str += csprintf("%016x", src_val1); - } else { - res_str += csprintf("%016d", src_val1); - } - } else { - res_str += csprintf("xxxxxxxxxxxxxxxx"); - } - - if ((lane & 3) == 3) { - res_str += csprintf("\n"); - } else { - res_str += csprintf(" "); - } - } - - res_str += "\n\n"; - DPRINTFN(res_str.c_str()); - #endif - } - - void - Call::MagicPrintWFID64(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - std::string res_str; - int src_val3 = -1; - res_str = csprintf("krl_prt (%s)\n", disassemble()); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (!(lane & 3)) { - res_str += csprintf("DB%03d: ", (int)w->wfDynId); - } - - if (mask[lane]) { - int64_t src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - src_val3 = src1.get(w, lane, 3); - - if (src_val2) { - res_str += csprintf("%016x", src_val1); - } else { - res_str += csprintf("%016d", src_val1); - } - } else { - res_str += csprintf("xxxxxxxxxxxxxxxx"); - } - - if ((lane & 3) == 3) { - res_str += csprintf("\n"); - } else { - res_str += csprintf(" "); - } - } - - res_str += "\n\n"; - if (w->wfDynId == src_val3) { - DPRINTFN(res_str.c_str()); - } - #endif - } - - void - Call::MagicPrintWFFloat(Wavefront *w) - { - #if TRACING_ON - const VectorMask &mask = w->getPred(); - std::string res_str; - res_str = csprintf("krl_prt (%s)\n", disassemble()); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (!(lane & 7)) { - res_str += csprintf("DB%03d: ", (int)w->wfDynId); - } - - if (mask[lane]) { - float src_val1 = src1.get(w, lane, 1); - res_str += csprintf("%08f", src_val1); - } else { - res_str += csprintf("xxxxxxxx"); - } - - if ((lane & 7) == 7) { - res_str += csprintf("\n"); - } else { - res_str += csprintf(" "); - } - } - - res_str += "\n\n"; - DPRINTFN(res_str.c_str()); - #endif - } - - // raises a signal that GDB will catch - // when done with the break, type "signal 0" in gdb to continue - void - Call::MagicSimBreak(Wavefront *w) - { - std::string res_str; - // print out state for this wavefront and then break - res_str = csprintf("Breakpoint encountered for wavefront %i\n", - w->wfSlotId); - - res_str += csprintf(" Kern ID: %i\n", w->kernId); - res_str += csprintf(" Phase ID: %i\n", w->simdId); - res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id); - res_str += csprintf(" Exec mask: "); - - for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) { - if (w->execMask(i)) - res_str += "1"; - else - res_str += "0"; - - if ((i & 7) == 7) - res_str += " "; - } - - res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong()); - - res_str += "\nHelpful debugging hints:\n"; - res_str += " Check out w->s_reg / w->d_reg for register state\n"; - - res_str += "\n\n"; - DPRINTFN(res_str.c_str()); - fflush(stdout); - - raise(SIGTRAP); - } - - void - Call::MagicPrefixSum(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int res = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - dest.set(w, lane, res); - res += src_val1; - } - } - } - - void - Call::MagicReduction(Wavefront *w) - { - // reduction magic instruction - // The reduction instruction takes up to 64 inputs (one from - // each thread in a WF) and sums them. It returns the sum to - // each thread in the WF. - const VectorMask &mask = w->getPred(); - int res = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - res += src_val1; - } - } - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - dest.set(w, lane, res); - } - } - } - - void - Call::MagicMaskLower(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int res = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - - if (src_val1) { - if (lane < (w->computeUnit->wfSize()/2)) { - res = res | ((uint32_t)(1) << lane); - } - } - } - } - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - dest.set(w, lane, res); - } - } - } - - void - Call::MagicMaskUpper(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int res = 0; - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - - if (src_val1) { - if (lane >= (w->computeUnit->wfSize()/2)) { - res = res | ((uint32_t)(1) << - (lane - (w->computeUnit->wfSize()/2))); - } - } - } - } - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - dest.set(w, lane, res); - } - } - } - - void - Call::MagicJoinWFBar(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int max_cnt = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - w->barCnt[lane]++; - - if (w->barCnt[lane] > max_cnt) { - max_cnt = w->barCnt[lane]; - } - } - } - - if (max_cnt > w->maxBarCnt) { - w->maxBarCnt = max_cnt; - } - } - - void - Call::MagicWaitWFBar(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int max_cnt = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - w->barCnt[lane]--; - } - - if (w->barCnt[lane] > max_cnt) { - max_cnt = w->barCnt[lane]; - } - } - - if (max_cnt < w->maxBarCnt) { - w->maxBarCnt = max_cnt; - } - - w->instructionBuffer.erase(w->instructionBuffer.begin() + 1, - w->instructionBuffer.end()); - if (w->pendingFetch) - w->dropFetch = true; - } - - void - Call::MagicPanic(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - int src_val1 = src1.get(w, lane, 1); - panic("OpenCL Code failed assertion #%d. Triggered by lane %s", - src_val1, lane); - } - } - } - - void - Call::calcAddr(Wavefront *w, GPUDynInstPtr m) - { - // the address is in src1 | src2 - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - int src_val1 = src1.get(w, lane, 1); - int src_val2 = src1.get(w, lane, 2); - Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2); - - m->addr[lane] = addr; - } - - } - - void - Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) - { - GPUDynInstPtr m = gpuDynInst; - - calcAddr(w, m); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - ((int*)m->a_data)[lane] = src1.get(w, lane, 3); - } - - setFlag(AtomicNoReturn); - setFlag(AtomicAdd); - setFlag(NoScope); - setFlag(NoOrder); - setFlag(GlobalSegment); - - m->m_type = U32::memType; - m->v_type = U32::vgprType; - - m->exec_mask = w->execMask(); - m->statusBitVector = 0; - m->equiv = 0; // atomics don't have an equivalence class operand - m->n_reg = 1; - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(64)); - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - w->outstandingReqs++; - w->memReqsInPipe--; - } - - void - Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) - { - GPUDynInstPtr m = gpuDynInst; - calcAddr(w, m); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - ((int*)m->a_data)[lane] = src1.get(w, lane, 1); - } - - setFlag(AtomicNoReturn); - setFlag(AtomicAdd); - setFlag(NoScope); - setFlag(NoOrder); - setFlag(GlobalSegment); - - m->m_type = U32::memType; - m->v_type = U32::vgprType; - - m->exec_mask = w->execMask(); - m->statusBitVector = 0; - m->equiv = 0; // atomics don't have an equivalence class operand - m->n_reg = 1; - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(64)); - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsWrGm++; - w->wrGmReqsInPipe--; - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - w->outstandingReqs++; - w->memReqsInPipe--; - } - - void - Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst) - { - GPUDynInstPtr m = gpuDynInst; - // calculate the address - calcAddr(w, m); - - setFlag(Load); - setFlag(NoScope); - setFlag(NoOrder); - setFlag(GlobalSegment); - - m->m_type = U32::memType; //MemDataType::memType; - m->v_type = U32::vgprType; //DestDataType::vgprType; - - m->exec_mask = w->execMask(); - m->statusBitVector = 0; - m->equiv = 0; - m->n_reg = 1; - - // FIXME - //m->dst_reg = this->dest.regIndex(); - - m->simdId = w->simdId; - m->wfSlotId = w->wfSlotId; - m->wfDynId = w->wfDynId; - m->latency.init(&w->computeUnit->shader->tick_cnt); - - m->pipeId = GLBMEM_PIPE; - m->latency.set(w->computeUnit->shader->ticks(1)); - w->computeUnit->globalMemoryPipe.issueRequest(m); - w->outstandingReqsRdGm++; - w->rdGmReqsInPipe--; - w->outstandingReqs++; - w->memReqsInPipe--; - } - - void - Call::MagicXactCasLd(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int src_val1 = 0; - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (mask[lane]) { - src_val1 = src1.get(w, lane, 1); - break; - } - } - - if (!w->computeUnit->xactCasLoadMap.count(src_val1)) { - w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue(); - w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear(); - } - - w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue - .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId)); - } - - void - Call::MagicMostSigThread(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - unsigned mst = true; - - for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { - if (mask[lane]) { - dest.set(w, lane, mst); - mst = false; - } - } - } - - void - Call::MagicMostSigBroadcast(Wavefront *w) - { - const VectorMask &mask = w->getPred(); - int res = 0; - bool got_res = false; - - for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) { - if (mask[lane]) { - if (!got_res) { - res = src1.get(w, lane, 1); - got_res = true; - } - dest.set(w, lane, res); - } - } - } - -} // namespace HsailISA diff --git a/src/arch/hsail/operand.cc b/src/arch/hsail/operand.cc deleted file mode 100644 index 993d3521f..000000000 --- a/src/arch/hsail/operand.cc +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "arch/hsail/operand.hh" - -using namespace Brig; - -bool -BaseRegOperand::init(unsigned opOffset, const BrigObject *obj, - unsigned &maxRegIdx, char _regFileChar) -{ - regFileChar = _regFileChar; - const BrigOperand *brigOp = obj->getOperand(opOffset); - - if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER) - return false; - - const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp; - - regIdx = brigRegOp->regNum; - - DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx, - brigRegOp->regKind); - - maxRegIdx = std::max(maxRegIdx, regIdx); - - return true; -} - -void -ListOperand::init(unsigned opOffset, const BrigObject *obj) -{ - const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset); - - switch (brigOp->kind) { - case BRIG_KIND_OPERAND_CODE_LIST: - { - const BrigOperandCodeList *opList = - (const BrigOperandCodeList*)brigOp; - - const Brig::BrigData *oprnd_data = - obj->getBrigBaseData(opList->elements); - - // Note: for calls Dest list of operands could be size of 0. - elementCount = oprnd_data->byteCount / 4; - - DPRINTF(GPUReg, "Operand Code List: # elements: %d\n", - elementCount); - - for (int i = 0; i < elementCount; ++i) { - unsigned *data_offset = - (unsigned*)obj->getData(opList->elements + 4 * (i + 1)); - - const BrigDirectiveVariable *p = - (const BrigDirectiveVariable*)obj-> - getCodeSectionEntry(*data_offset); - - StorageElement *se = obj->currentCode->storageMap-> - findSymbol(BRIG_SEGMENT_ARG, p); - - assert(se); - callArgs.push_back(se); - } - } - break; - default: - fatal("ListOperand: bad operand kind %d\n", brigOp->kind); - } -} - -std::string -ListOperand::disassemble() -{ - std::string res_str(""); - - for (auto it : callArgs) { - res_str += csprintf("%s ", it->name.c_str()); - } - - return res_str; -} - -void -FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj) -{ - const BrigOperand *baseOp = obj->getOperand(opOffset); - - if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) { - fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind); - } - - const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp; - - const BrigDirectiveExecutable *p = - (const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref); - - func_name = obj->getString(p->name); -} - -std::string -FunctionRefOperand::disassemble() -{ - DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name); - - return csprintf("%s", func_name); -} - -bool -BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj, - int at, unsigned &maxRegIdx, char _regFileChar) -{ - regFileChar = _regFileChar; - const BrigOperand *brigOp = obj->getOperand(opOffset); - - if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST) - return false; - - - const Brig::BrigOperandOperandList *brigRegVecOp = - (const Brig::BrigOperandOperandList*)brigOp; - - unsigned *data_offset = - (unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1)); - - const BrigOperand *p = - (const BrigOperand*)obj->getOperand(*data_offset); - if (p->kind != BRIG_KIND_OPERAND_REGISTER) { - return false; - } - - const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p; - - regIdx = brigRegOp->regNum; - - DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx, - brigRegOp->regKind); - - maxRegIdx = std::max(maxRegIdx, regIdx); - - return true; -} - -void -BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj, - unsigned &maxRegIdx, char _regFileChar) -{ - const char *name = obj->getString(strOffset); - char *endptr; - regIdx = strtoul(name + 2, &endptr, 10); - - if (name[0] != '$' || name[1] != _regFileChar) { - fatal("register operand parse error on \"%s\"\n", name); - } - - maxRegIdx = std::max(maxRegIdx, regIdx); -} - -unsigned SRegOperand::maxRegIdx; -unsigned DRegOperand::maxRegIdx; -unsigned CRegOperand::maxRegIdx; - -std::string -SRegOperand::disassemble() -{ - return csprintf("$s%d", regIdx); -} - -std::string -DRegOperand::disassemble() -{ - return csprintf("$d%d", regIdx); -} - -std::string -CRegOperand::disassemble() -{ - return csprintf("$c%d", regIdx); -} - -BrigRegOperandInfo -findRegDataType(unsigned opOffset, const BrigObject *obj) -{ - const BrigOperand *baseOp = obj->getOperand(opOffset); - - switch (baseOp->kind) { - case BRIG_KIND_OPERAND_REGISTER: - { - const BrigOperandRegister *op = (BrigOperandRegister*)baseOp; - - return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, - (BrigRegisterKind)op->regKind); - } - break; - - case BRIG_KIND_OPERAND_WAVESIZE: - { - BrigRegisterKind reg_kind = BRIG_REGISTER_KIND_DOUBLE; - return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind); - } - - case BRIG_KIND_OPERAND_OPERAND_LIST: - { - const BrigOperandOperandList *op = - (BrigOperandOperandList*)baseOp; - const BrigData *data_p = (BrigData*)obj->getData(op->elements); - - - int num_operands = 0; - BrigRegisterKind reg_kind = (BrigRegisterKind)0; - for (int offset = 0; offset < data_p->byteCount; offset += 4) { - const BrigOperand *op_p = (const BrigOperand *) - obj->getOperand(((int *)data_p->bytes)[offset/4]); - - if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) { - const BrigOperandRegister *brigRegOp = - (const BrigOperandRegister*)op_p; - reg_kind = (BrigRegisterKind)brigRegOp->regKind; - } else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) { - uint16_t num_bytes = - ((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount - - sizeof(BrigBase); - if (num_bytes == sizeof(uint32_t)) { - reg_kind = BRIG_REGISTER_KIND_SINGLE; - } else if (num_bytes == sizeof(uint64_t)) { - reg_kind = BRIG_REGISTER_KIND_DOUBLE; - } else { - fatal("OperandList: bad operand size %d\n", num_bytes); - } - } else if (op_p->kind == BRIG_KIND_OPERAND_WAVESIZE) { - reg_kind = BRIG_REGISTER_KIND_DOUBLE; - } else { - fatal("OperandList: bad operand kind %d\n", op_p->kind); - } - - num_operands++; - } - assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST); - - return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind); - } - break; - - case BRIG_KIND_OPERAND_ADDRESS: - { - const BrigOperandAddress *op = (BrigOperandAddress*)baseOp; - - if (!op->reg) { - BrigType type = BRIG_TYPE_NONE; - - if (op->symbol) { - const BrigDirective *dir = (BrigDirective*) - obj->getCodeSectionEntry(op->symbol); - - assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE); - - const BrigDirectiveVariable *sym = - (const BrigDirectiveVariable*)dir; - - type = (BrigType)sym->type; - } - return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS, - (BrigType)type); - } else { - const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp; - const BrigOperand *reg = obj->getOperand(b->reg); - const BrigOperandRegister *rop = (BrigOperandRegister*)reg; - - return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER, - (BrigRegisterKind)rop->regKind); - } - } - break; - - default: - fatal("AddrOperand: bad operand kind %d\n", baseOp->kind); - break; - } -} - -void -AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj) -{ - assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS); - - const BrigDirective *d = - (BrigDirective*)obj->getCodeSectionEntry(op->symbol); - - /** - * HSAIL does not properly handle immediate offsets for instruction types - * that utilize them. It currently only supports instructions that use - * variables instead. Again, these pop up in code that is never executed - * (i.e. the HCC AMP codes) so we just hack it here to let us pass through - * the HSAIL object initialization. If such code is ever called, we would - * have to implement this properly. - */ - if (d->kind != BRIG_KIND_DIRECTIVE_VARIABLE) { - warn("HSAIL implementation does not support instructions with " - "address calculations where the operand is not a variable\n"); - } - - const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d; - name = obj->getString(sym->name); - - if (sym->segment != BRIG_SEGMENT_ARG) { - storageElement = - obj->currentCode->storageMap->findSymbol(sym->segment, name); - offset = 0; - } else { - // sym->name does not work for BRIG_SEGMENT_ARG for the following case: - // - // void foo(int a); - // void bar(double a); - // - // foo(...) --> arg_u32 %param_p0; - // st_arg_u32 $s0, [%param_p0]; - // call &foo (%param_p0); - // bar(...) --> arg_f64 %param_p0; - // st_arg_u64 $d0, [%param_p0]; - // call &foo (%param_p0); - // - // Both functions use the same variable name (param_p0)!!! - // - // Maybe this is a bug in the compiler (I don't know). - // - // Solution: - // Use directive pointer (BrigDirectiveVariable) to differentiate 2 - // versions of param_p0. - // - // Note this solution is kind of stupid, because we are pulling stuff - // out of the brig binary via the directive pointer and putting it into - // the symbol table, but now we are indexing the symbol table by the - // brig directive pointer! It makes the symbol table sort of pointless. - // But I don't want to mess with the rest of the infrastructure, so - // let's go with this for now. - // - // When we update the compiler again, we should see if this problem goes - // away. If so, we can fold some of this functionality into the code for - // kernel arguments. If not, maybe we can index the symbol name on a - // hash of the variable AND function name - storageElement = obj->currentCode-> - storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym); - - assert(storageElement); - } -} - -uint64_t -AddrOperandBase::calcUniformBase() -{ - // start with offset, will be 0 if not specified - uint64_t address = offset; - - // add in symbol value if specified - if (storageElement) { - address += storageElement->offset; - } - - return address; -} - -std::string -AddrOperandBase::disassemble(std::string reg_disassembly) -{ - std::string disasm; - - if (offset || reg_disassembly != "") { - disasm += "["; - - if (reg_disassembly != "") { - disasm += reg_disassembly; - - if (offset > 0) { - disasm += "+"; - } - } - - if (offset) { - disasm += csprintf("%d", offset); - } - - disasm += "]"; - } else if (name) { - disasm += csprintf("[%s]", name); - } - - return disasm; -} - -void -NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj) -{ - const BrigOperand *baseOp = obj->getOperand(opOffset); - - if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) { - BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp; - parseAddr(addrOp, obj); - offset = (uint64_t(addrOp->offset.hi) << 32) | - uint64_t(addrOp->offset.lo); - } else { - fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind); - } - -} - -std::string -NoRegAddrOperand::disassemble() -{ - return AddrOperandBase::disassemble(std::string("")); -} - -void -LabelOperand::init(unsigned opOffset, const BrigObject *obj) -{ - const BrigOperandCodeRef *op = - (const BrigOperandCodeRef*)obj->getOperand(opOffset); - - assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF); - - const BrigDirective *dir = - (const BrigDirective*)obj->getCodeSectionEntry(op->ref); - - assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL); - label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj); -} - -uint32_t -LabelOperand::getTarget(Wavefront *w, int lane) -{ - return label->get(); -} - -std::string -LabelOperand::disassemble() -{ - return label->name; -} diff --git a/src/arch/hsail/operand.hh b/src/arch/hsail/operand.hh deleted file mode 100644 index 3fbb0991a..000000000 --- a/src/arch/hsail/operand.hh +++ /dev/null @@ -1,796 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __ARCH_HSAIL_OPERAND_HH__ -#define __ARCH_HSAIL_OPERAND_HH__ - -/** - * @file operand.hh - * - * Defines classes encapsulating HSAIL instruction operands. - */ - -#include -#include - -#include "arch/hsail/Brig.h" -#include "base/trace.hh" -#include "base/types.hh" -#include "debug/GPUReg.hh" -#include "enums/RegisterType.hh" -#include "gpu-compute/brig_object.hh" -#include "gpu-compute/compute_unit.hh" -#include "gpu-compute/hsail_code.hh" -#include "gpu-compute/shader.hh" -#include "gpu-compute/vector_register_file.hh" -#include "gpu-compute/wavefront.hh" - -class Label; -class StorageElement; - -class BaseOperand -{ - public: - Enums::RegisterType registerType; - uint32_t regOperandSize; - BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; } - bool isVectorRegister() { return registerType == Enums::RT_VECTOR; } - bool isScalarRegister() { return registerType == Enums::RT_SCALAR; } - bool isCondRegister() { return registerType == Enums::RT_CONDITION; } - unsigned int regIndex() { return 0; } - uint32_t opSize() { return regOperandSize; } - virtual ~BaseOperand() { } -}; - -class BrigRegOperandInfo -{ - public: - Brig::BrigKind16_t kind; - Brig::BrigType type; - Brig::BrigRegisterKind regKind; - - BrigRegOperandInfo(Brig::BrigKind16_t _kind, - Brig::BrigRegisterKind _regKind) - : kind(_kind), regKind(_regKind) - { - } - - BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type) - : kind(_kind), type(_type) - { - } - - BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES), - type(Brig::BRIG_TYPE_NONE) - { - } -}; - -BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj); - -class BaseRegOperand : public BaseOperand -{ - public: - unsigned regIdx; - char regFileChar; - - bool init(unsigned opOffset, const BrigObject *obj, - unsigned &maxRegIdx, char _regFileChar); - - bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at, - unsigned &maxRegIdx, char _regFileChar); - - void initWithStrOffset(unsigned strOffset, const BrigObject *obj, - unsigned &maxRegIdx, char _regFileChar); - unsigned int regIndex() { return regIdx; } -}; - -class SRegOperand : public BaseRegOperand -{ - public: - static unsigned maxRegIdx; - - bool - init(unsigned opOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint32_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's'); - } - - bool - init_from_vect(unsigned opOffset, const BrigObject *obj, int at) - { - regOperandSize = sizeof(uint32_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, - 's'); - } - - void - initWithStrOffset(unsigned strOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint32_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, - 's'); - } - - template - OperandType - get(Wavefront *w, int lane) - { - assert(sizeof(OperandType) <= sizeof(uint32_t)); - assert(regIdx < w->maxSpVgprs); - // if OperandType is smaller than 32-bit, we truncate the value - OperandType ret; - uint32_t vgprIdx; - - switch (sizeof(OperandType)) { - case 1: // 1 byte operand - vgprIdx = w->remap(regIdx, 1, 1); - ret = (w->computeUnit->vrf[w->simdId]-> - read(vgprIdx, lane)) & 0xff; - break; - case 2: // 2 byte operand - vgprIdx = w->remap(regIdx, 2, 1); - ret = (w->computeUnit->vrf[w->simdId]-> - read(vgprIdx, lane)) & 0xffff; - break; - case 4: // 4 byte operand - vgprIdx = w->remap(regIdx,sizeof(OperandType), 1); - ret = w->computeUnit->vrf[w->simdId]-> - read(vgprIdx, lane); - break; - default: - panic("Bad OperandType\n"); - break; - } - - return (OperandType)ret; - } - - // special get method for compatibility with LabelOperand - uint32_t - getTarget(Wavefront *w, int lane) - { - return get(w, lane); - } - - template - void set(Wavefront *w, int lane, OperandType &val); - std::string disassemble(); -}; - -template -void -SRegOperand::set(Wavefront *w, int lane, OperandType &val) -{ - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val); - - assert(sizeof(OperandType) == sizeof(uint32_t)); - assert(regIdx < w->maxSpVgprs); - uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); - w->computeUnit->vrf[w->simdId]->write(vgprIdx,val,lane); -} - -template<> -inline void -SRegOperand::set(Wavefront *w, int lane, uint64_t &val) -{ - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val); - - assert(regIdx < w->maxSpVgprs); - uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1); - w->computeUnit->vrf[w->simdId]->write(vgprIdx, val, lane); -} - -class DRegOperand : public BaseRegOperand -{ - public: - static unsigned maxRegIdx; - - bool - init(unsigned opOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint64_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd'); - } - - bool - init_from_vect(unsigned opOffset, const BrigObject *obj, int at) - { - regOperandSize = sizeof(uint64_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, - 'd'); - } - - void - initWithStrOffset(unsigned strOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint64_t); - registerType = Enums::RT_VECTOR; - - return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, - 'd'); - } - - template - OperandType - get(Wavefront *w, int lane) - { - assert(sizeof(OperandType) <= sizeof(uint64_t)); - // TODO: this check is valid only for HSAIL - assert(regIdx < w->maxDpVgprs); - uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); - - return w->computeUnit->vrf[w->simdId]->read(vgprIdx,lane); - } - - template - void - set(Wavefront *w, int lane, OperandType &val) - { - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, - val); - - assert(sizeof(OperandType) <= sizeof(uint64_t)); - // TODO: this check is valid only for HSAIL - assert(regIdx < w->maxDpVgprs); - uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1); - w->computeUnit->vrf[w->simdId]->write(vgprIdx,val,lane); - } - - std::string disassemble(); -}; - -class CRegOperand : public BaseRegOperand -{ - public: - static unsigned maxRegIdx; - - bool - init(unsigned opOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint8_t); - registerType = Enums::RT_CONDITION; - - return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c'); - } - - bool - init_from_vect(unsigned opOffset, const BrigObject *obj, int at) - { - regOperandSize = sizeof(uint8_t); - registerType = Enums::RT_CONDITION; - - return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx, - 'c'); - } - - void - initWithStrOffset(unsigned strOffset, const BrigObject *obj) - { - regOperandSize = sizeof(uint8_t); - registerType = Enums::RT_CONDITION; - - return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx, - 'c'); - } - - template - OperandType - get(Wavefront *w, int lane) - { - assert(regIdx < w->condRegState->numRegs()); - - return w->condRegState->read((int)regIdx, lane); - } - - template - void - set(Wavefront *w, int lane, OperandType &val) - { - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, - val); - - assert(regIdx < w->condRegState->numRegs()); - w->condRegState->write(regIdx,lane,val); - } - - std::string disassemble(); -}; - -template -class ImmOperand : public BaseOperand -{ - private: - uint16_t kind; - public: - T bits; - - bool init(unsigned opOffset, const BrigObject *obj); - bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at); - std::string disassemble(); - - template - OperandType - get(Wavefront *w) - { - assert(sizeof(OperandType) <= sizeof(T)); - panic_if(w == nullptr, "WF pointer needs to be set"); - - switch (kind) { - // immediate operand is WF size - case Brig::BRIG_KIND_OPERAND_WAVESIZE: - return (OperandType)w->computeUnit->wfSize(); - break; - - default: - return *(OperandType*)&bits; - break; - } - } - - // This version of get() takes a WF* and a lane id for - // compatibility with the register-based get() methods. - template - OperandType - get(Wavefront *w, int lane) - { - return get(w); - } -}; - -template -bool -ImmOperand::init(unsigned opOffset, const BrigObject *obj) -{ - const Brig::BrigOperand *brigOp = obj->getOperand(opOffset); - - switch (brigOp->kind) { - // this is immediate operand - case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES: - { - DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T), - brigOp->byteCount); - - auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp; - - bits = *((T*)(obj->getData(cbptr->bytes + 4))); - kind = brigOp->kind; - return true; - } - break; - - case Brig::BRIG_KIND_OPERAND_WAVESIZE: - kind = brigOp->kind; - bits = std::numeric_limits::digits; - return true; - - default: - kind = Brig::BRIG_KIND_NONE; - return false; - } -} - -template -bool -ImmOperand::init_from_vect(unsigned opOffset, const BrigObject *obj, int at) -{ - const Brig::BrigOperand *brigOp = obj->getOperand(opOffset); - - if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) { - kind = Brig::BRIG_KIND_NONE; - return false; - } - - - const Brig::BrigOperandOperandList *brigVecOp = - (const Brig::BrigOperandOperandList *)brigOp; - - unsigned *data_offset = - (unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1)); - - const Brig::BrigOperand *p = - (const Brig::BrigOperand *)obj->getOperand(*data_offset); - - if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) { - kind = Brig::BRIG_KIND_NONE; - return false; - } - - return init(*data_offset, obj); -} -template -std::string -ImmOperand::disassemble() -{ - return csprintf("0x%08x", bits); -} - -template -class RegOrImmOperand : public BaseOperand -{ - private: - bool is_imm; - - public: - void setImm(const bool value) { is_imm = value; } - - ImmOperand imm_op; - RegOperand reg_op; - - RegOrImmOperand() { is_imm = false; } - void init(unsigned opOffset, const BrigObject *obj); - void init_from_vect(unsigned opOffset, const BrigObject *obj, int at); - std::string disassemble(); - - template - OperandType - get(Wavefront *w, int lane) - { - return is_imm ? imm_op.template get(w) : - reg_op.template get(w, lane); - } - - uint32_t - opSize() - { - if (!is_imm) { - return reg_op.opSize(); - } - - return 0; - } - - bool - isVectorRegister() - { - if (!is_imm) { - return reg_op.registerType == Enums::RT_VECTOR; - } - return false; - } - - bool - isCondRegister() - { - if (!is_imm) { - return reg_op.registerType == Enums::RT_CONDITION; - } - - return false; - } - - bool - isScalarRegister() - { - if (!is_imm) { - return reg_op.registerType == Enums::RT_SCALAR; - } - - return false; - } - - unsigned int - regIndex() - { - if (!is_imm) { - return reg_op.regIndex(); - } - return 0; - } -}; - -template -void -RegOrImmOperand::init(unsigned opOffset, const BrigObject *obj) -{ - is_imm = false; - - if (reg_op.init(opOffset, obj)) { - return; - } - - if (imm_op.init(opOffset, obj)) { - is_imm = true; - return; - } - - fatal("RegOrImmOperand::init(): bad operand kind %d\n", - obj->getOperand(opOffset)->kind); -} - -template -void -RegOrImmOperand::init_from_vect(unsigned opOffset, - const BrigObject *obj, int at) -{ - if (reg_op.init_from_vect(opOffset, obj, at)) { - is_imm = false; - - return; - } - - if (imm_op.init_from_vect(opOffset, obj, at)) { - is_imm = true; - - return; - } - - fatal("RegOrImmOperand::init(): bad operand kind %d\n", - obj->getOperand(opOffset)->kind); -} - -template -std::string -RegOrImmOperand::disassemble() -{ - return is_imm ? imm_op.disassemble() : reg_op.disassemble(); -} - -typedef RegOrImmOperand SRegOrImmOperand; -typedef RegOrImmOperand DRegOrImmOperand; -typedef RegOrImmOperand CRegOrImmOperand; - -class AddrOperandBase : public BaseOperand -{ - protected: - // helper function for init() - void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj); - - // helper function for disassemble() - std::string disassemble(std::string reg_disassembly); - uint64_t calcUniformBase(); - - public: - virtual void calcVector(Wavefront *w, std::vector &addrVec) = 0; - virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0; - - int64_t offset; - const char *name = nullptr; - StorageElement *storageElement; -}; - -template -class RegAddrOperand : public AddrOperandBase -{ - public: - RegOperandType reg; - void init(unsigned opOffset, const BrigObject *obj); - uint64_t calcUniform(); - void calcVector(Wavefront *w, std::vector &addrVec); - uint64_t calcLane(Wavefront *w, int lane=0); - uint32_t opSize() { return reg.opSize(); } - bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; } - bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; } - bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; } - unsigned int regIndex() { return reg.regIndex(); } - std::string disassemble(); -}; - -template -void -RegAddrOperand::init(unsigned opOffset, const BrigObject *obj) -{ - using namespace Brig; - - const BrigOperand *baseOp = obj->getOperand(opOffset); - - switch (baseOp->kind) { - case BRIG_KIND_OPERAND_ADDRESS: - { - const BrigOperandAddress *op = (BrigOperandAddress*)baseOp; - storageElement = nullptr; - - reg.init(op->reg, obj); - - if (reg.regFileChar == 's') { - // if the address expression is 32b, then the hi - // bits of the offset must be set to 0 in the BRIG - assert(!op->offset.hi); - /** - * the offset field of an HSAIL instruction may be negative - * so here we cast the raw bits we get from the BRIG file to - * a signed type to avoid address calculation errors - */ - offset = (int32_t)(op->offset.lo); - reg.regOperandSize = sizeof(uint32_t); - registerType = Enums::RT_VECTOR; - } - else if (reg.regFileChar == 'd') { - offset = (int64_t)(((uint64_t)(op->offset.hi) << 32) - | (uint64_t)(op->offset.lo)); - reg.regOperandSize = sizeof(uint64_t); - registerType = Enums::RT_VECTOR; - } - } - break; - - default: - fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind); - break; - } -} - -template -uint64_t -RegAddrOperand::calcUniform() -{ - fatal("can't do calcUniform() on register-based address\n"); - - return 0; -} - -template -void -RegAddrOperand::calcVector(Wavefront *w, - std::vector &addrVec) -{ - Addr address = calcUniformBase(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { - if (w->execMask(lane)) { - if (reg.regFileChar == 's') { - addrVec[lane] = address + reg.template get(w, lane); - } else { - addrVec[lane] = address + reg.template get(w, lane); - } - } - } -} - -template -uint64_t -RegAddrOperand::calcLane(Wavefront *w, int lane) -{ - Addr address = calcUniformBase(); - - return address + reg.template get(w, lane); -} - -template -std::string -RegAddrOperand::disassemble() -{ - return AddrOperandBase::disassemble(reg.disassemble()); -} - -typedef RegAddrOperand SRegAddrOperand; -typedef RegAddrOperand DRegAddrOperand; - -class NoRegAddrOperand : public AddrOperandBase -{ - public: - void init(unsigned opOffset, const BrigObject *obj); - uint64_t calcUniform(); - void calcVector(Wavefront *w, std::vector &addrVec); - uint64_t calcLane(Wavefront *w, int lane=0); - std::string disassemble(); -}; - -inline uint64_t -NoRegAddrOperand::calcUniform() -{ - return AddrOperandBase::calcUniformBase(); -} - -inline uint64_t -NoRegAddrOperand::calcLane(Wavefront *w, int lane) -{ - return calcUniform(); -} - -inline void -NoRegAddrOperand::calcVector(Wavefront *w, std::vector &addrVec) -{ - uint64_t address = calcUniformBase(); - - for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) - addrVec[lane] = address; -} - -class LabelOperand : public BaseOperand -{ - public: - Label *label; - - void init(unsigned opOffset, const BrigObject *obj); - std::string disassemble(); - - // special get method for compatibility with SRegOperand - uint32_t getTarget(Wavefront *w, int lane); - -}; - -class ListOperand : public BaseOperand -{ - public: - int elementCount; - std::vector callArgs; - - int - getSrcOperand(int idx) - { - DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx, - callArgs.size()); - - return callArgs.at(idx)->offset; - } - - void init(unsigned opOffset, const BrigObject *obj); - - std::string disassemble(); - - template - OperandType - get(Wavefront *w, int lane, int arg_idx) - { - return w->readCallArgMem(lane, getSrcOperand(arg_idx)); - } - - template - void - set(Wavefront *w, int lane, OperandType val) - { - w->writeCallArgMem(lane, getSrcOperand(0), val); - DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n", - w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, - getSrcOperand(0), val); - } -}; - -class FunctionRefOperand : public BaseOperand -{ - public: - const char *func_name; - - void init(unsigned opOffset, const BrigObject *obj); - std::string disassemble(); -}; - -#endif // __ARCH_HSAIL_OPERAND_HH__ diff --git a/src/gpu-compute/brig_object.cc b/src/gpu-compute/brig_object.cc deleted file mode 100644 index 6211598d4..000000000 --- a/src/gpu-compute/brig_object.cc +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt, Anthony Gutierrez - */ - -#include "gpu-compute/brig_object.hh" - -#include -#include -#include -#include - -#include -#include -#include - -#include "arch/hsail/Brig.h" -#include "base/logging.hh" -#include "base/trace.hh" -#include "debug/BRIG.hh" -#include "debug/HSAILObject.hh" -#include "debug/HSALoader.hh" - -using namespace Brig; - -std::vector> - HsaObject::tryFileFuncs = { BrigObject::tryFile }; - -extern int getBrigDataTypeBytes(BrigType16_t t); - -const char *BrigObject::sectionNames[] = -{ - "hsa_data", - "hsa_code", - "hsa_operand", - ".shstrtab" -}; - -const char *segmentNames[] = -{ - "none", - "flat", - "global", - "readonly", - "kernarg", - "group", - "private", - "spill", - "args" -}; - -const uint8_t* -BrigObject::getSectionOffset(enum SectionIndex sec, int offs) const -{ - // allow offs == size for dummy end pointers - assert(offs <= sectionInfo[sec].size); - - return sectionInfo[sec].ptr + offs; -} - -const char* -BrigObject::getString(int offs) const -{ - return (const char*)(getSectionOffset(DataSectionIndex, offs) + 4); -} - -const BrigBase* -BrigObject::getCodeSectionEntry(int offs) const -{ - return (const BrigBase*)getSectionOffset(CodeSectionIndex, offs); -} - -const BrigData* -BrigObject::getBrigBaseData(int offs) const -{ - return (Brig::BrigData*)(getSectionOffset(DataSectionIndex, offs)); -} - -const uint8_t* -BrigObject::getData(int offs) const -{ - return getSectionOffset(DataSectionIndex, offs); -} - -const BrigOperand* -BrigObject::getOperand(int offs) const -{ - return (const BrigOperand*)getSectionOffset(OperandsSectionIndex, offs); -} - -unsigned -BrigObject::getOperandPtr(int offs, int index) const -{ - unsigned *op_offs = (unsigned*)(getData(offs + 4 * (index + 1))); - - return *op_offs; -} - -const BrigInstBase* -BrigObject::getInst(int offs) const -{ - return (const BrigInstBase*)getSectionOffset(CodeSectionIndex, offs); -} - -HsaCode* -BrigObject::getKernel(const std::string &name) const -{ - return nullptr; -} - -HsaCode* -BrigObject::getFunction(const std::string &name) const -{ - for (int i = 0; i < functions.size(); ++i) { - if (functions[i]->name() == name) { - return functions[i]; - } - } - - return nullptr; -} - -void -BrigObject::processDirectives(const BrigBase *dirPtr, const BrigBase *endPtr, - StorageMap *storageMap) -{ - while (dirPtr < endPtr) { - if (!dirPtr->byteCount) { - fatal("Bad directive size 0\n"); - } - - // calculate next pointer now so we can override it if needed - const BrigBase *nextDirPtr = brigNext(dirPtr); - - DPRINTF(HSAILObject, "Code section entry kind: #%x, byte count: %d\n", - dirPtr->kind, dirPtr->byteCount); - - switch (dirPtr->kind) { - case BRIG_KIND_DIRECTIVE_FUNCTION: - { - const BrigDirectiveExecutable *p M5_VAR_USED = - reinterpret_cast(dirPtr); - - DPRINTF(HSAILObject,"DIRECTIVE_FUNCTION: %s offset: " - "%d next: %d\n", getString(p->name), - p->firstCodeBlockEntry, p->nextModuleEntry); - - if (p->firstCodeBlockEntry != p->nextModuleEntry) { - // Function calls are not supported. We allow the BRIG - // object file to create stubs, but the function calls will - // not work properly if the application makes use of them. - warn("HSA function invocations are unsupported.\n"); - - const char *name = getString(p->name); - - HsailCode *code_obj = nullptr; - - for (int i = 0; i < functions.size(); ++i) { - if (functions[i]->name() == name) { - code_obj = functions[i]; - break; - } - } - - if (!code_obj) { - // create new local storage map for kernel-local symbols - code_obj = new HsailCode(name, p, this, - new StorageMap(storageMap)); - functions.push_back(code_obj); - } else { - panic("Multiple definition of Function!!: %s\n", - getString(p->name)); - } - } - - nextDirPtr = getCodeSectionEntry(p->nextModuleEntry); - } - break; - - case BRIG_KIND_DIRECTIVE_KERNEL: - { - const BrigDirectiveExecutable *p = - reinterpret_cast(dirPtr); - - DPRINTF(HSAILObject,"DIRECTIVE_KERNEL: %s offset: %d count: " - "next: %d\n", getString(p->name), - p->firstCodeBlockEntry, p->nextModuleEntry); - - const char *name = getString(p->name); - - if (name[0] == '&') - name++; - - std::string str = name; - char *temp; - int len = str.length(); - - if (str[len - 1] >= 'a' && str[len - 1] <= 'z') { - temp = new char[str.size() + 1]; - std::copy(str.begin(), str.end() , temp); - temp[str.size()] = '\0'; - } else { - temp = new char[str.size()]; - std::copy(str.begin(), str.end() - 1 , temp); - temp[str.size() - 1 ] = '\0'; - } - - std::string kernel_name = temp; - delete[] temp; - - HsailCode *code_obj = nullptr; - - for (const auto &kernel : kernels) { - if (kernel->name() == kernel_name) { - code_obj = kernel; - break; - } - } - - if (!code_obj) { - // create new local storage map for kernel-local symbols - code_obj = new HsailCode(kernel_name, p, this, - new StorageMap(storageMap)); - - kernels.push_back(code_obj); - } - - nextDirPtr = getCodeSectionEntry(p->nextModuleEntry); - } - break; - - case BRIG_KIND_DIRECTIVE_VARIABLE: - { - const BrigDirectiveVariable *p = - reinterpret_cast(dirPtr); - - uint64_t readonlySize_old = - storageMap->getSize(BRIG_SEGMENT_READONLY); - - StorageElement* se = storageMap->addSymbol(p, this); - - DPRINTF(HSAILObject, "DIRECTIVE_VARIABLE, symbol %s\n", - getString(p->name)); - - if (p->segment == BRIG_SEGMENT_READONLY) { - // readonly memory has initialization data - uint8_t* readonlyData_old = readonlyData; - - readonlyData = - new uint8_t[storageMap->getSize(BRIG_SEGMENT_READONLY)]; - - if (p->init) { - if ((p->type == BRIG_TYPE_ROIMG) || - (p->type == BRIG_TYPE_WOIMG) || - (p->type == BRIG_TYPE_SAMP) || - (p->type == BRIG_TYPE_SIG32) || - (p->type == BRIG_TYPE_SIG64)) { - panic("Read only data type not supported: %s\n", - getString(p->name)); - } - - const BrigOperand *brigOp = getOperand(p->init); - assert(brigOp->kind == - BRIG_KIND_OPERAND_CONSTANT_BYTES); - - const Brig::BrigData *operand_data M5_VAR_USED = - getBrigBaseData(((BrigOperandConstantBytes*) - brigOp)->bytes); - - assert((operand_data->byteCount / 4) > 0); - - uint8_t *symbol_data = - (uint8_t*)getData(((BrigOperandConstantBytes*) - brigOp)->bytes + 4); - - // copy the old data and add the new data - if (readonlySize_old > 0) { - memcpy(readonlyData, readonlyData_old, - readonlySize_old); - } - - memcpy(readonlyData + se->offset, symbol_data, - se->size); - - delete[] readonlyData_old; - } - } - } - break; - - case BRIG_KIND_DIRECTIVE_LABEL: - { - const BrigDirectiveLabel M5_VAR_USED *p = - reinterpret_cast(dirPtr); - - panic("Label directives cannot be at the module level: %s\n", - getString(p->name)); - - } - break; - - case BRIG_KIND_DIRECTIVE_COMMENT: - { - const BrigDirectiveComment M5_VAR_USED *p = - reinterpret_cast(dirPtr); - - DPRINTF(HSAILObject, "DIRECTIVE_COMMENT: %s\n", - getString(p->name)); - } - break; - - case BRIG_KIND_DIRECTIVE_LOC: - { - DPRINTF(HSAILObject, "BRIG_DIRECTIVE_LOC\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_MODULE: - { - const BrigDirectiveModule M5_VAR_USED *p = - reinterpret_cast(dirPtr); - - DPRINTF(HSAILObject, "BRIG_DIRECTIVE_MODULE: %s\n", - getString(p->name)); - } - break; - - case BRIG_KIND_DIRECTIVE_CONTROL: - { - DPRINTF(HSAILObject, "DIRECTIVE_CONTROL\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_PRAGMA: - { - DPRINTF(HSAILObject, "DIRECTIVE_PRAGMA\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_EXTENSION: - { - DPRINTF(HSAILObject, "DIRECTIVE_EXTENSION\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: - { - DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_START\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: - { - DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_END\n"); - } - break; - default: - if (dirPtr->kind >= BRIG_KIND_INST_BEGIN && - dirPtr->kind <= BRIG_KIND_INST_END) - break; - - if (dirPtr->kind >= BRIG_KIND_OPERAND_BEGIN && - dirPtr->kind <= BRIG_KIND_OPERAND_END) - break; - - warn("Unknown Brig directive kind: %d\n", dirPtr->kind); - break; - } - - dirPtr = nextDirPtr; - } -} - -HsaObject* -BrigObject::tryFile(const std::string &fname, int len, uint8_t *fileData) -{ - const char *brig_ident = "HSA BRIG"; - - if (memcmp(brig_ident, fileData, MODULE_IDENTIFICATION_LENGTH)) - return nullptr; - - return new BrigObject(fname, len, fileData); -} - -BrigObject::BrigObject(const std::string &fname, int len, uint8_t *fileData) - : HsaObject(fname), storageMap(new StorageMap()) -{ - const char *brig_ident = "HSA BRIG"; - BrigModuleHeader *mod_hdr = (BrigModuleHeader*)fileData; - - fatal_if(memcmp(brig_ident, mod_hdr, MODULE_IDENTIFICATION_LENGTH), - "%s is not a BRIG file\n", fname); - - if (mod_hdr->brigMajor != BRIG_VERSION_BRIG_MAJOR || - mod_hdr->brigMinor != BRIG_VERSION_BRIG_MINOR) { - fatal("%s: BRIG version mismatch, %d.%d != %d.%d\n", - fname, mod_hdr->brigMajor, mod_hdr->brigMinor, - BRIG_VERSION_BRIG_MAJOR, BRIG_VERSION_BRIG_MINOR); - } - - fatal_if(mod_hdr->sectionCount != NumSectionIndices, "%s: BRIG section " - "count (%d) != expected value (%d)\n", fname, - mod_hdr->sectionCount, NumSectionIndices); - - for (int i = 0; i < NumSectionIndices; ++i) { - sectionInfo[i].ptr = nullptr; - } - - uint64_t *sec_idx_table = (uint64_t*)(fileData + mod_hdr->sectionIndex); - for (int sec_idx = 0; sec_idx < mod_hdr->sectionCount; ++sec_idx) { - uint8_t *sec_hdr_byte_ptr = fileData + sec_idx_table[sec_idx]; - BrigSectionHeader *sec_hdr = (BrigSectionHeader*)sec_hdr_byte_ptr; - - // It doesn't look like cprintf supports string precision values, - // but if this breaks, the right answer is to fix that - DPRINTF(HSAILObject, "found section %.*s\n", sec_hdr->nameLength, - sec_hdr->name); - - sectionInfo[sec_idx].ptr = new uint8_t[sec_hdr->byteCount]; - memcpy(sectionInfo[sec_idx].ptr, sec_hdr_byte_ptr, sec_hdr->byteCount); - sectionInfo[sec_idx].size = sec_hdr->byteCount; - } - - BrigSectionHeader *code_hdr = - (BrigSectionHeader*)sectionInfo[CodeSectionIndex].ptr; - - DPRINTF(HSAILObject, "Code section hdr, count: %d, hdr count: %d, " - "name len: %d\n", code_hdr->byteCount, code_hdr->headerByteCount, - code_hdr->nameLength); - - // start at offset 4 to skip initial null entry (see Brig spec) - processDirectives(getCodeSectionEntry(code_hdr->headerByteCount), - getCodeSectionEntry(sectionInfo[CodeSectionIndex].size), - storageMap); - - delete[] fileData; - - DPRINTF(HSALoader, "BRIG object %s loaded.\n", fname); -} - -BrigObject::~BrigObject() -{ - for (int i = 0; i < NumSectionIndices; ++i) - if (sectionInfo[i].ptr) - delete[] sectionInfo[i].ptr; -} diff --git a/src/gpu-compute/brig_object.hh b/src/gpu-compute/brig_object.hh deleted file mode 100644 index 59a585914..000000000 --- a/src/gpu-compute/brig_object.hh +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt, Anthony Gutierrez - */ - -#ifndef __BRIG_OBJECT_HH__ -#define __BRIG_OBJECT_HH__ - -#include -#include -#include -#include - -#include "arch/hsail/Brig.h" -#include "gpu-compute/hsa_object.hh" -#include "gpu-compute/hsail_code.hh" - -class LabelMap; -class StorageMap; - -/* @class BrigObject - * this class implements the BRIG loader object, and - * is used when the simulator directly executes HSAIL. - * this class is responsible for extracting all - * information about kernels contained in BRIG format - * and converts them to HsailCode objects that are - * usable by the simulator and emulated runtime. - */ - -class BrigObject final : public HsaObject -{ - public: - enum SectionIndex - { - DataSectionIndex, - CodeSectionIndex, - OperandsSectionIndex, - NumSectionIndices - }; - - static const char *sectionNames[]; - - struct SectionInfo - { - uint8_t *ptr; - int size; - }; - - static HsaObject* tryFile(const std::string &fname, int len, - uint8_t *fileData); - - SectionInfo sectionInfo[NumSectionIndices]; - const uint8_t *getSectionOffset(enum SectionIndex sec, int offs) const; - - std::vector kernels; - std::vector functions; - std::string kern_block_name; - - void processDirectives(const Brig::BrigBase *dirPtr, - const Brig::BrigBase *endPtr, - StorageMap *storageMap); - - BrigObject(const std::string &fname, int len, uint8_t *fileData); - ~BrigObject(); - - // eventually these will need to be per-kernel not per-object-file - StorageMap *storageMap; - LabelMap *labelMap; - - const char* getString(int offs) const; - const Brig::BrigData* getBrigBaseData(int offs) const; - const uint8_t* getData(int offs) const; - const Brig::BrigBase* getCodeSectionEntry(int offs) const; - const Brig::BrigOperand* getOperand(int offs) const; - unsigned getOperandPtr(int offs, int index) const; - const Brig::BrigInstBase* getInst(int offs) const; - - HsaCode* getKernel(const std::string &name) const override; - HsaCode* getFunction(const std::string &name) const override; - - int numKernels() const override { return kernels.size(); } - - HsaCode* getKernel(int i) const override { return kernels[i]; } - - // pointer to the current kernel/function we're processing, so elements - // under construction can reference it. kinda ugly, but easier - // than passing it all over for the few places it's needed. - mutable HsailCode *currentCode; -}; - -// Utility function to bump Brig item pointer to next element given -// item size in bytes. Really just an add but with lots of casting. -template -T* -brigNext(T *ptr) -{ - Brig::BrigBase *base_ptr = (Brig::BrigBase*)ptr; - int size = base_ptr->byteCount; - assert(size); - - return (T*)((uint8_t*)ptr + size); -} - -#endif // __BRIG_OBJECT_HH__ diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc deleted file mode 100644 index ee86017ec..000000000 --- a/src/gpu-compute/cl_driver.cc +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#include "gpu-compute/cl_driver.hh" - -#include - -#include "base/intmath.hh" -#include "cpu/thread_context.hh" -#include "gpu-compute/dispatcher.hh" -#include "gpu-compute/hsa_code.hh" -#include "gpu-compute/hsa_kernel_info.hh" -#include "gpu-compute/hsa_object.hh" -#include "params/ClDriver.hh" -#include "sim/process.hh" -#include "sim/syscall_emul_buf.hh" - -ClDriver::ClDriver(ClDriverParams *p) - : EmulatedDriver(p), hsaCode(0) -{ - for (const auto &codeFile : p->codefile) - codeFiles.push_back(&codeFile); - - maxFuncArgsSize = 0; - - for (int i = 0; i < codeFiles.size(); ++i) { - HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); - - for (int k = 0; k < obj->numKernels(); ++k) { - assert(obj->getKernel(k)); - kernels.push_back(obj->getKernel(k)); - kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); - int kern_funcargs_size = kernels.back()->funcarg_size; - maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? - kern_funcargs_size : maxFuncArgsSize; - } - } - - int name_offs = 0; - int code_offs = 0; - - for (int i = 0; i < kernels.size(); ++i) { - kernelInfo.push_back(HsaKernelInfo()); - HsaCode *k = kernels[i]; - - k->generateHsaKernelInfo(&kernelInfo[i]); - - kernelInfo[i].name_offs = name_offs; - kernelInfo[i].code_offs = code_offs; - - name_offs += k->name().size() + 1; - code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); - } -} - -void -ClDriver::handshake(GpuDispatcher *_dispatcher) -{ - dispatcher = _dispatcher; - dispatcher->setFuncargsSize(maxFuncArgsSize); -} - -int -ClDriver::open(ThreadContext *tc, int mode, int flags) -{ - auto p = tc->getProcessPtr(); - std::shared_ptr fdp; - fdp = std::make_shared(this, filename); - int tgt_fd = p->fds->allocFD(fdp); - return tgt_fd; -} - -int -ClDriver::ioctl(ThreadContext *tc, unsigned req, Addr buf_addr) -{ - switch (req) { - case HSA_GET_SIZES: - { - TypedBufferArg sizes(buf_addr); - sizes->num_kernels = kernels.size(); - sizes->string_table_size = 0; - sizes->code_size = 0; - sizes->readonly_size = 0; - - if (kernels.size() > 0) { - // all kernels will share the same read-only memory - sizes->readonly_size = - kernels[0]->getSize(HsaCode::MemorySegment::READONLY); - // check our assumption - for (int i = 1; ireadonly_size == - kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); - } - } - - for (int i = 0; i < kernels.size(); ++i) { - HsaCode *k = kernels[i]; - // add one for terminating '\0' - sizes->string_table_size += k->name().size() + 1; - sizes->code_size += - k->numInsts() * sizeof(TheGpuISA::RawMachInst); - } - - sizes.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_KINFO: - { - TypedBufferArg - kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); - - for (int i = 0; i < kernels.size(); ++i) { - HsaKernelInfo *ki = &kinfo[i]; - ki->name_offs = kernelInfo[i].name_offs; - ki->code_offs = kernelInfo[i].code_offs; - ki->sRegCount = kernelInfo[i].sRegCount; - ki->dRegCount = kernelInfo[i].dRegCount; - ki->cRegCount = kernelInfo[i].cRegCount; - ki->static_lds_size = kernelInfo[i].static_lds_size; - ki->private_mem_size = kernelInfo[i].private_mem_size; - ki->spill_mem_size = kernelInfo[i].spill_mem_size; - } - - kinfo.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_STRINGS: - { - int string_table_size = 0; - for (int i = 0; i < kernels.size(); ++i) { - HsaCode *k = kernels[i]; - string_table_size += k->name().size() + 1; - } - - BufferArg buf(buf_addr, string_table_size); - char *bufp = (char*)buf.bufferPtr(); - - for (int i = 0; i < kernels.size(); ++i) { - HsaCode *k = kernels[i]; - const char *n = k->name().c_str(); - - // idiomatic string copy - while ((*bufp++ = *n++)); - } - - assert(bufp - (char *)buf.bufferPtr() == string_table_size); - - buf.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_READONLY_DATA: - { - // we can pick any kernel --- they share the same - // readonly segment (this assumption is checked in GET_SIZES) - uint64_t size = - kernels.back()->getSize(HsaCode::MemorySegment::READONLY); - BufferArg data(buf_addr, size); - char *datap = (char *)data.bufferPtr(); - memcpy(datap, - kernels.back()->readonly_data, - size); - data.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_CODE: - { - // set hsaCode pointer - hsaCode = buf_addr; - int code_size = 0; - - for (int i = 0; i < kernels.size(); ++i) { - HsaCode *k = kernels[i]; - code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); - } - - TypedBufferArg buf(buf_addr, code_size); - TheGpuISA::RawMachInst *bufp = buf; - - int buf_idx = 0; - - for (int i = 0; i < kernels.size(); ++i) { - HsaCode *k = kernels[i]; - - for (int j = 0; j < k->numInsts(); ++j) { - bufp[buf_idx] = k->insts()->at(j); - ++buf_idx; - } - } - - buf.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_CU_CNT: - { - BufferArg buf(buf_addr, sizeof(uint32_t)); - *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); - buf.copyOut(tc->getVirtProxy()); - } - break; - - case HSA_GET_VSZ: - { - BufferArg buf(buf_addr, sizeof(uint32_t)); - *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize(); - buf.copyOut(tc->getVirtProxy()); - } - break; - case HSA_GET_HW_STATIC_CONTEXT_SIZE: - { - BufferArg buf(buf_addr, sizeof(uint32_t)); - *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); - buf.copyOut(tc->getVirtProxy()); - } - break; - - default: - fatal("ClDriver: bad ioctl %d\n", req); - } - - return 0; -} - -const char* -ClDriver::codeOffToKernelName(uint64_t code_ptr) -{ - assert(hsaCode); - uint32_t code_offs = code_ptr - hsaCode; - - for (int i = 0; i < kernels.size(); ++i) { - if (code_offs == kernelInfo[i].code_offs) { - return kernels[i]->name().c_str(); - } - } - - return nullptr; -} - -ClDriver* -ClDriverParams::create() -{ - return new ClDriver(this); -} diff --git a/src/gpu-compute/cl_driver.hh b/src/gpu-compute/cl_driver.hh deleted file mode 100644 index bc7b749e8..000000000 --- a/src/gpu-compute/cl_driver.hh +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __CL_DRIVER_HH__ -#define __CL_DRIVER_HH__ - -#include - -#include "gpu-compute/hsa_kernel_info.hh" -#include "sim/emul_driver.hh" - -class GpuDispatcher; -class HsaCode; -class Process; -class ThreadContext; - -struct ClDriverParams; - -class ClDriver final : public EmulatedDriver -{ - public: - ClDriver(ClDriverParams *p); - void handshake(GpuDispatcher *_dispatcher); - int open(ThreadContext *tc, int mode, int flags); - int ioctl(ThreadContext *tc, unsigned req, Addr buf); - const char* codeOffToKernelName(uint64_t code_ptr); - - private: - GpuDispatcher *dispatcher; - - std::vector codeFiles; - - // All the kernels we know about - std::vector kernels; - std::vector functions; - - std::vector kernelInfo; - - // maximum size necessary for function arguments - int maxFuncArgsSize; - // The host virtual address for the kernel code - uint64_t hsaCode; -}; - -#endif // __CL_DRIVER_HH__ diff --git a/src/gpu-compute/cl_event.hh b/src/gpu-compute/cl_event.hh deleted file mode 100644 index 9722600d2..000000000 --- a/src/gpu-compute/cl_event.hh +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __GPU_CL_EVENT_HH__ -#define __GPU_CL_EVENT_HH__ - -struct HsaQueueEntry; - -class _cl_event { - public: - _cl_event() : done(false), hsaTaskPtr(nullptr), start(0), end(0) { } - - volatile bool done; - HsaQueueEntry *hsaTaskPtr; - uint64_t start; - uint64_t end; -}; - -#endif // __GPU_CL_EVENT_HH__ diff --git a/src/gpu-compute/condition_register_state.cc b/src/gpu-compute/condition_register_state.cc deleted file mode 100644 index 08555bb7c..000000000 --- a/src/gpu-compute/condition_register_state.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: John Kalamatianos - */ - -#include "gpu-compute/condition_register_state.hh" - -#include "gpu-compute/compute_unit.hh" -#include "gpu-compute/gpu_static_inst.hh" -#include "gpu-compute/shader.hh" -#include "gpu-compute/wavefront.hh" - -ConditionRegisterState::ConditionRegisterState() -{ - computeUnit = nullptr; - c_reg.clear(); - busy.clear(); -} - -void -ConditionRegisterState::setParent(ComputeUnit *_computeUnit) -{ - computeUnit = _computeUnit; - _name = computeUnit->name() + ".CondRegState"; -} - -void -ConditionRegisterState::init(uint32_t _size) -{ - c_reg.resize(_size); - busy.resize(_size, 0); -} - -void -ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w) -{ - // iterate over all operands - for (auto i = 0; i < ii->getNumOperands(); ++i) { - // is this a condition register destination operand? - if (ii->isCondRegister(i) && ii->isDstOperand(i)) { - // mark the register as busy - markReg(ii->getRegisterIndex(i, ii), 1); - uint32_t pipeLen = w->computeUnit->spBypassLength(); - - // schedule an event for marking the register as ready - w->computeUnit-> - registerEvent(w->simdId, ii->getRegisterIndex(i, ii), - ii->getOperandSize(i), - w->computeUnit->shader->tick_cnt + - w->computeUnit->shader->ticks(pipeLen), 0); - } - } -} diff --git a/src/gpu-compute/condition_register_state.hh b/src/gpu-compute/condition_register_state.hh deleted file mode 100644 index 2d3f5e160..000000000 --- a/src/gpu-compute/condition_register_state.hh +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: John Kalamatianos - */ - -#ifndef __CONDITION_REGISTER_STATE_HH__ -#define __CONDITION_REGISTER_STATE_HH__ - -#include -#include - -#include "gpu-compute/misc.hh" - -class ComputeUnit; -class GPUStaticInst; -class Shader; -class Wavefront; - -// Condition Register State (used only when executing HSAIL) -class ConditionRegisterState -{ - public: - ConditionRegisterState(); - void init(uint32_t _size); - const std::string name() const { return _name; } - void setParent(ComputeUnit *_computeUnit); - void regStats() { } - - template - T - read(int regIdx, int threadId) - { - bool tmp = c_reg[regIdx][threadId]; - T *p0 = (T*)(&tmp); - - return *p0; - } - - template - void - write(int regIdx, int threadId, T value) - { - c_reg[regIdx][threadId] = (bool)(value & 0x01); - } - - void - markReg(int regIdx, uint8_t value) - { - busy.at(regIdx) = value; - } - - uint8_t - regBusy(int idx) - { - uint8_t status = busy.at(idx); - return status; - } - - int numRegs() { return c_reg.size(); } - void exec(GPUDynInstPtr ii, Wavefront *w); - - private: - ComputeUnit* computeUnit; - std::string _name; - // Condition Register state - std::vector c_reg; - // flag indicating if a register is busy - std::vector busy; -}; - -#endif diff --git a/src/gpu-compute/hsa_code.hh b/src/gpu-compute/hsa_code.hh deleted file mode 100644 index 9f358e23c..000000000 --- a/src/gpu-compute/hsa_code.hh +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __HSA_CODE_HH__ -#define __HSA_CODE_HH__ - -#include -#include - -#include "arch/gpu_types.hh" -#include "config/the_gpu_isa.hh" - -class HsaKernelInfo; - -/* @class HsaCode - * base code object for the set of HSA kernels associated - * with a single application. this class provides the common - * methods for creating, accessing, and storing information - * about kernel and variable symbols, symbol name, memory - * segment sizes, and instruction count, etc. - */ - -class HsaCode -{ - public: - HsaCode(const std::string &name) : readonly_data(nullptr), funcarg_size(0), - _name(name) - { - } - - enum class MemorySegment { - NONE, - FLAT, - GLOBAL, - READONLY, - KERNARG, - GROUP, - PRIVATE, - SPILL, - ARG, - EXTSPACE0 - }; - - const std::string& name() const { return _name; } - int numInsts() const { return _insts.size(); } - std::vector* insts() { return &_insts; } - - void - setReadonlyData(uint8_t *_readonly_data) - { - readonly_data = _readonly_data; - } - - virtual int getSize(MemorySegment segment) const = 0; - virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const = 0; - - uint8_t *readonly_data; - int funcarg_size; - - protected: - // An array that stores instruction indices (0 through kernel size) - // for a kernel passed to code object constructor as an argument. - std::vector _insts; - - private: - const std::string _name; -}; - -#endif // __HSA_CODE_HH__ diff --git a/src/gpu-compute/hsa_kernel_info.hh b/src/gpu-compute/hsa_kernel_info.hh deleted file mode 100644 index 4151695eb..000000000 --- a/src/gpu-compute/hsa_kernel_info.hh +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __HSA_KERNEL_INFO_HH__ -#define __HSA_KERNEL_INFO_HH__ - -// This file defines the public interface between the HSA emulated -// driver and application programs. - -#include - -static const int HSA_GET_SIZES = 0x4801; -static const int HSA_GET_KINFO = 0x4802; -static const int HSA_GET_STRINGS = 0x4803; -static const int HSA_GET_CODE = 0x4804; -static const int HSA_GET_READONLY_DATA = 0x4805; -static const int HSA_GET_CU_CNT = 0x4806; -static const int HSA_GET_VSZ = 0x4807; -static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808; - -// Return value (via buffer ptr) for HSA_GET_SIZES -struct HsaDriverSizes -{ - uint32_t num_kernels; - uint32_t string_table_size; - uint32_t code_size; - uint32_t readonly_size; -}; - -// HSA_GET_KINFO returns an array of num_kernels of these structs -struct HsaKernelInfo -{ - // byte offset into string table - uint32_t name_offs; - // byte offset into code array - uint32_t code_offs; - uint32_t static_lds_size; - uint32_t private_mem_size; - uint32_t spill_mem_size; - // Number of s registers - uint32_t sRegCount; - // Number of d registers - uint32_t dRegCount; - // Number of c registers - uint32_t cRegCount; -}; - -#endif // __HSA_KERNEL_INFO_HH__ diff --git a/src/gpu-compute/hsa_object.cc b/src/gpu-compute/hsa_object.cc deleted file mode 100644 index ac734a437..000000000 --- a/src/gpu-compute/hsa_object.cc +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#include "gpu-compute/hsa_object.hh" - -#include -#include - -#include "base/logging.hh" - -HsaObject::HsaObject(const std::string &fname) - : readonlyData(nullptr), filename(fname) -{ -} - -HsaObject* -HsaObject::createHsaObject(const std::string &fname) -{ - HsaObject *hsaObj = nullptr; - uint8_t *file_data = nullptr; - int file_length = 0; - - std::ifstream code_file(fname, std::ifstream::ate | std::ifstream::in | - std::ifstream::binary); - - assert(code_file.is_open()); - assert(code_file.good()); - - file_length = code_file.tellg(); - code_file.seekg(0, code_file.beg); - file_data = new uint8_t[file_length]; - code_file.read((char*)file_data, file_length); - code_file.close(); - - for (const auto &tryFile : tryFileFuncs) { - if ((hsaObj = tryFile(fname, file_length, file_data))) { - return hsaObj; - } - } - - delete[] file_data; - fatal("Unknown HSA object type for file: %s.\n", fname); - - return nullptr; -} diff --git a/src/gpu-compute/hsa_object.hh b/src/gpu-compute/hsa_object.hh deleted file mode 100644 index 1f08f5d80..000000000 --- a/src/gpu-compute/hsa_object.hh +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Anthony Gutierrez - */ - -#ifndef __HSA_OBJECT_HH__ -#define __HSA_OBJECT_HH__ - -#include -#include -#include - -class HsaCode; - -/* @class HsaObject - * base loader object for HSA kernels. this class provides - * the base method definitions for loading, storing, and - * accessing HSA kernel objects into the simulator. - */ - -class HsaObject -{ - public: - HsaObject(const std::string &fileName); - - static HsaObject* createHsaObject(const std::string &fname); - static std::vector> tryFileFuncs; - - virtual HsaCode* getKernel(const std::string &name) const = 0; - virtual HsaCode* getKernel(int i) const = 0; - virtual HsaCode* getFunction(const std::string &name) const = 0; - virtual int numKernels() const = 0; - - const std::string& name() const { return filename; } - - uint8_t *readonlyData; - - - protected: - const std::string filename; -}; - -#endif // __HSA_OBJECT_HH__ diff --git a/src/gpu-compute/hsail_code.cc b/src/gpu-compute/hsail_code.cc deleted file mode 100644 index a5b47b1b8..000000000 --- a/src/gpu-compute/hsail_code.cc +++ /dev/null @@ -1,460 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "gpu-compute/hsail_code.hh" - -#include "arch/gpu_types.hh" -#include "arch/hsail/Brig.h" -#include "arch/hsail/operand.hh" -#include "config/the_gpu_isa.hh" -#include "debug/BRIG.hh" -#include "debug/HSAILObject.hh" -#include "gpu-compute/brig_object.hh" -#include "gpu-compute/gpu_static_inst.hh" -#include "gpu-compute/kernel_cfg.hh" - -using namespace Brig; - -int getBrigDataTypeBytes(BrigType16_t t); - -HsailCode::HsailCode(const std::string &name_str) - : HsaCode(name_str), private_size(-1), readonly_size(-1) -{ -} - -void -HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj, - StorageMap *objStorageMap) -{ - storageMap = objStorageMap; - - // set pointer so that decoding process can find this kernel context when - // needed - obj->currentCode = this; - - if (code_dir->base.kind != BRIG_KIND_DIRECTIVE_FUNCTION && - code_dir->base.kind != BRIG_KIND_DIRECTIVE_KERNEL) { - fatal("unexpected directive kind %d inside kernel/function init\n", - code_dir->base.kind); - } - - DPRINTF(HSAILObject, "Initializing code, first code block entry is: %d\n", - code_dir->firstCodeBlockEntry); - - // clear these static vars so we can properly track the max index - // for this kernel - SRegOperand::maxRegIdx = 0; - DRegOperand::maxRegIdx = 0; - CRegOperand::maxRegIdx = 0; - setPrivateSize(0); - - const BrigBase *entryPtr = brigNext((BrigBase*)code_dir); - const BrigBase *endPtr = - obj->getCodeSectionEntry(code_dir->nextModuleEntry); - - // the instruction's byte address (relative to the base addr - // of the code section) - int inst_addr = 0; - // the index that points to the instruction in the instruction - // array - int inst_idx = 0; - std::vector instructions; - int funcarg_size_scope = 0; - - // walk through instructions in code section and directives in - // directive section in parallel, processing directives that apply - // when we reach the relevant code point. - while (entryPtr < endPtr) { - switch (entryPtr->kind) { - case BRIG_KIND_DIRECTIVE_VARIABLE: - { - const BrigDirectiveVariable *sym = - (const BrigDirectiveVariable*)entryPtr; - - DPRINTF(HSAILObject,"Initializing code, directive is " - "kind_variable, symbol is: %s\n", - obj->getString(sym->name)); - - StorageElement *se = storageMap->addSymbol(sym, obj); - - if (sym->segment == BRIG_SEGMENT_PRIVATE) { - setPrivateSize(se->size); - } else { // spill - funcarg_size_scope += se->size; - } - } - break; - - case BRIG_KIND_DIRECTIVE_LABEL: - { - const BrigDirectiveLabel *lbl = - (const BrigDirectiveLabel*)entryPtr; - - DPRINTF(HSAILObject,"Initializing code, directive is " - "kind_label, label is: %s \n", - obj->getString(lbl->name)); - - labelMap.addLabel(lbl, inst_addr, obj); - } - break; - - case BRIG_KIND_DIRECTIVE_PRAGMA: - { - DPRINTF(HSAILObject, "Initializing code, directive " - "is kind_pragma\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_COMMENT: - { - DPRINTF(HSAILObject, "Initializing code, directive is " - "kind_comment\n"); - } - break; - - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: - { - DPRINTF(HSAILObject, "Initializing code, directive is " - "kind_arg_block_start\n"); - - storageMap->resetOffset(BRIG_SEGMENT_ARG); - funcarg_size_scope = 0; - } - break; - - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: - { - DPRINTF(HSAILObject, "Initializing code, directive is " - "kind_arg_block_end\n"); - - funcarg_size = funcarg_size < funcarg_size_scope ? - funcarg_size_scope : funcarg_size; - } - break; - - case BRIG_KIND_DIRECTIVE_END: - DPRINTF(HSAILObject, "Initializing code, dircetive is " - "kind_end\n"); - - break; - - default: - if (entryPtr->kind >= BRIG_KIND_INST_BEGIN && - entryPtr->kind <= BRIG_KIND_INST_END) { - - BrigInstBase *instPtr = (BrigInstBase*)entryPtr; - TheGpuISA::MachInst machInst = { instPtr, obj }; - GPUStaticInst *iptr = decoder.decode(machInst); - - if (iptr) { - DPRINTF(HSAILObject, "Initializing code, processing inst " - "byte addr #%d idx %d: OPCODE=%d\n", inst_addr, - inst_idx, instPtr->opcode); - - TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr); - iptr->instNum(inst_idx); - iptr->instAddr(inst_addr); - _insts.push_back(raw_inst); - instructions.push_back(iptr); - } - inst_addr += sizeof(TheGpuISA::RawMachInst); - ++inst_idx; - } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN && - entryPtr->kind < BRIG_KIND_OPERAND_END) { - warn("unexpected operand entry in code segment\n"); - } else { - // there are surely some more cases we will need to handle, - // but we'll deal with them as we find them. - fatal("unexpected directive kind %d inside kernel scope\n", - entryPtr->kind); - } - } - - entryPtr = brigNext(entryPtr); - } - - // compute Control Flow Graph for current kernel - ControlFlowInfo::assignImmediatePostDominators(instructions); - - max_sreg = SRegOperand::maxRegIdx; - max_dreg = DRegOperand::maxRegIdx; - max_creg = CRegOperand::maxRegIdx; - - obj->currentCode = nullptr; -} - -HsailCode::HsailCode(const std::string &name_str, - const BrigDirectiveExecutable *code_dir, - const BrigObject *obj, StorageMap *objStorageMap) - : HsaCode(name_str), private_size(-1), readonly_size(-1) -{ - init(code_dir, obj, objStorageMap); -} - -void -LabelMap::addLabel(const Brig::BrigDirectiveLabel *lblDir, int inst_index, - const BrigObject *obj) -{ - std::string lbl_name = obj->getString(lblDir->name); - Label &lbl = map[lbl_name]; - - if (lbl.defined()) { - fatal("Attempt to redefine existing label %s\n", lbl_name); - } - - lbl.define(lbl_name, inst_index); - DPRINTF(HSAILObject, "label %s = %d\n", lbl_name, inst_index); -} - -Label* -LabelMap::refLabel(const Brig::BrigDirectiveLabel *lblDir, - const BrigObject *obj) -{ - std::string name = obj->getString(lblDir->name); - Label &lbl = map[name]; - lbl.checkName(name); - - return &lbl; -} - -int -getBrigDataTypeBytes(BrigType16_t t) -{ - switch (t) { - case BRIG_TYPE_S8: - case BRIG_TYPE_U8: - case BRIG_TYPE_B8: - return 1; - - case BRIG_TYPE_S16: - case BRIG_TYPE_U16: - case BRIG_TYPE_B16: - case BRIG_TYPE_F16: - return 2; - - case BRIG_TYPE_S32: - case BRIG_TYPE_U32: - case BRIG_TYPE_B32: - case BRIG_TYPE_F32: - return 4; - - case BRIG_TYPE_S64: - case BRIG_TYPE_U64: - case BRIG_TYPE_B64: - case BRIG_TYPE_F64: - return 8; - - case BRIG_TYPE_B1: - - default: - fatal("unhandled symbol data type %d", t); - return 0; - } -} - -StorageElement* -StorageSpace::addSymbol(const BrigDirectiveVariable *sym, - const BrigObject *obj) -{ - const char *sym_name = obj->getString(sym->name); - uint64_t size = 0; - uint64_t offset = 0; - - if (sym->type & BRIG_TYPE_ARRAY) { - size = getBrigDataTypeBytes(sym->type & ~BRIG_TYPE_ARRAY); - size *= (((uint64_t)sym->dim.hi) << 32 | (uint64_t)sym->dim.lo); - - offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type & - ~BRIG_TYPE_ARRAY)); - } else { - size = getBrigDataTypeBytes(sym->type); - offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type)); - } - - nextOffset = offset + size; - - DPRINTF(HSAILObject, "Adding SYMBOL %s size %d offset %#x, init: %d\n", - sym_name, size, offset, sym->init); - - StorageElement* se = new StorageElement(sym_name, offset, size, sym); - elements.push_back(se); - elements_by_addr.insert(AddrRange(offset, offset + size - 1), se); - elements_by_brigptr[sym] = se; - - return se; -} - -StorageElement* -StorageSpace::findSymbol(std::string name) -{ - for (auto it : elements) { - if (it->name == name) { - return it; - } - } - - return nullptr; -} - -StorageElement* -StorageSpace::findSymbol(uint64_t addr) -{ - assert(elements_by_addr.size() > 0); - - auto se = elements_by_addr.contains(addr); - - if (se == elements_by_addr.end()) { - return nullptr; - } else { - return se->second; - } -} - -StorageElement* -StorageSpace::findSymbol(const BrigDirectiveVariable *brigptr) -{ - assert(elements_by_brigptr.size() > 0); - - auto se = elements_by_brigptr.find(brigptr); - - if (se == elements_by_brigptr.end()) { - return nullptr; - } else { - return se->second; - } -} - -StorageMap::StorageMap(StorageMap *outerScope) - : outerScopeMap(outerScope) -{ - for (int i = 0; i < NumSegments; ++i) - space[i] = new StorageSpace((BrigSegment)i); -} - -StorageElement* -StorageMap::addSymbol(const BrigDirectiveVariable *sym, const BrigObject *obj) -{ - BrigSegment8_t segment = sym->segment; - - assert(segment >= Brig::BRIG_SEGMENT_FLAT); - assert(segment < NumSegments); - - return space[segment]->addSymbol(sym, obj); -} - -int -StorageMap::getSize(Brig::BrigSegment segment) -{ - assert(segment > Brig::BRIG_SEGMENT_GLOBAL); - assert(segment < NumSegments); - - if (segment != Brig::BRIG_SEGMENT_GROUP && - segment != Brig::BRIG_SEGMENT_READONLY) { - return space[segment]->getSize(); - } else { - int ret = space[segment]->getSize(); - - if (outerScopeMap) { - ret += outerScopeMap->getSize(segment); - } - - return ret; - } -} - -void -StorageMap::resetOffset(Brig::BrigSegment segment) -{ - space[segment]->resetOffset(); -} - -StorageElement* -StorageMap::findSymbol(BrigSegment segment, std::string name) -{ - StorageElement *se = space[segment]->findSymbol(name); - - if (se) - return se; - - if (outerScopeMap) - return outerScopeMap->findSymbol(segment, name); - - return nullptr; -} - -StorageElement* -StorageMap::findSymbol(Brig::BrigSegment segment, uint64_t addr) -{ - StorageSpace *sp = space[segment]; - - if (!sp) { - // there is no memory in segment? - return nullptr; - } - - StorageElement *se = sp->findSymbol(addr); - - if (se) - return se; - - if (outerScopeMap) - return outerScopeMap->findSymbol(segment, addr); - - return nullptr; - -} - -StorageElement* -StorageMap::findSymbol(Brig::BrigSegment segment, - const BrigDirectiveVariable *brigptr) -{ - StorageSpace *sp = space[segment]; - - if (!sp) { - // there is no memory in segment? - return nullptr; - } - - StorageElement *se = sp->findSymbol(brigptr); - - if (se) - return se; - - if (outerScopeMap) - return outerScopeMap->findSymbol(segment, brigptr); - - return nullptr; - -} diff --git a/src/gpu-compute/hsail_code.hh b/src/gpu-compute/hsail_code.hh deleted file mode 100644 index 991946197..000000000 --- a/src/gpu-compute/hsail_code.hh +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __HSAIL_CODE_HH__ -#define __HSAIL_CODE_HH__ - -#include -#include -#include -#include -#include - -#include "arch/gpu_decoder.hh" -#include "arch/hsail/Brig.h" -#include "base/addr_range_map.hh" -#include "base/intmath.hh" -#include "config/the_gpu_isa.hh" -#include "gpu-compute/hsa_code.hh" -#include "gpu-compute/hsa_kernel_info.hh" -#include "gpu-compute/misc.hh" - -class BrigObject; -class GPUStaticInst; - -inline int -popcount(uint64_t src, int sz) -{ - int cnt = 0; - - for (int i = 0; i < sz; ++i) { - if (src & 1) - ++cnt; - src >>= 1; - } - - return cnt; -} - -inline int -firstbit(uint64_t src, int sz) -{ - int i; - - for (i = 0; i < sz; ++i) { - if (src & 1) - break; - src >>= 1; - } - - return i; -} - -inline int -lastbit(uint64_t src, int sz) -{ - int i0 = -1; - - for (int i = 0; i < sz; ++i) { - if (src & 1) - i0 = i; - src >>= 1; - } - - return i0; -} - -inline int -signbit(uint64_t src, int sz) -{ - int i0 = -1; - - if (src & (1 << (sz - 1))) { - for (int i = 0; i < sz - 1; ++i) { - if (!(src & 1)) - i0 = i; - src >>= 1; - } - } else { - for (int i = 0; i < sz - 1; ++i) { - if (src & 1) - i0 = i; - src >>= 1; - } - } - - return i0; -} - -inline uint64_t -bitrev(uint64_t src, int sz) -{ - uint64_t r = 0; - - for (int i = 0; i < sz; ++i) { - r <<= 1; - if (src & 1) - r |= 1; - src >>= 1; - } - - return r; -} - -inline uint64_t -mul_hi(uint32_t a, uint32_t b) -{ - return ((uint64_t)a * (uint64_t)b) >> 32; -} - -inline uint64_t -mul_hi(int32_t a, int32_t b) -{ - return ((int64_t)a * (int64_t)b) >> 32; -} - -inline uint64_t -mul_hi(uint64_t a, uint64_t b) -{ - return ((uint64_t)a * (uint64_t)b) >> 32; -} - -inline uint64_t -mul_hi(int64_t a, int64_t b) -{ - return ((int64_t)a * (int64_t)b) >> 32; -} - -inline uint64_t -mul_hi(double a, double b) -{ - return 0; -} - -class Label -{ - public: - std::string name; - int value; - - Label() : value(-1) - { - } - - bool defined() { return value != -1; } - - void - checkName(std::string &_name) - { - if (name.empty()) { - name = _name; - } else { - assert(name == _name); - } - } - - void - define(std::string &_name, int _value) - { - assert(!defined()); - assert(_value != -1); - value = _value; - checkName(_name); - } - - int - get() - { - assert(defined()); - return value; - } -}; - -class LabelMap -{ - std::map map; - - public: - LabelMap() { } - - void addLabel(const Brig::BrigDirectiveLabel *lbl, int inst_index, - const BrigObject *obj); - - Label *refLabel(const Brig::BrigDirectiveLabel *lbl, - const BrigObject *obj); -}; - -const int NumSegments = Brig::BRIG_SEGMENT_AMD_GCN; - -extern const char *segmentNames[]; - -class StorageElement -{ - public: - std::string name; - uint64_t offset; - - uint64_t size; - const Brig::BrigDirectiveVariable *brigSymbol; - StorageElement(const char *_name, uint64_t _offset, int _size, - const Brig::BrigDirectiveVariable *sym) - : name(_name), offset(_offset), size(_size), brigSymbol(sym) - { - } -}; - -class StorageSpace -{ - typedef std::map - DirVarToSE_map; - - std::list elements; - AddrRangeMap elements_by_addr; - DirVarToSE_map elements_by_brigptr; - - uint64_t nextOffset; - - public: - StorageSpace(Brig::BrigSegment _class) : nextOffset(0) - { - } - - StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym, - const BrigObject *obj); - - StorageElement* findSymbol(std::string name); - StorageElement* findSymbol(uint64_t addr); - StorageElement* findSymbol(const Brig::BrigDirectiveVariable *brigptr); - - int getSize() { return nextOffset; } - void resetOffset() { nextOffset = 0; } -}; - -class StorageMap -{ - StorageMap *outerScopeMap; - StorageSpace *space[NumSegments]; - - public: - StorageMap(StorageMap *outerScope = nullptr); - - StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym, - const BrigObject *obj); - - StorageElement* findSymbol(Brig::BrigSegment segment, std::string name); - StorageElement* findSymbol(Brig::BrigSegment segment, uint64_t addr); - - StorageElement* findSymbol(Brig::BrigSegment segment, - const Brig::BrigDirectiveVariable *brigptr); - - // overloaded version to avoid casting - StorageElement* - findSymbol(Brig::BrigSegment8_t segment, std::string name) - { - return findSymbol((Brig::BrigSegment)segment, name); - } - - int getSize(Brig::BrigSegment segment); - void resetOffset(Brig::BrigSegment segment); -}; - -typedef enum -{ - BT_DEFAULT, - BT_B8, - BT_U8, - BT_U16, - BT_U32, - BT_U64, - BT_S8, - BT_S16, - BT_S32, - BT_S64, - BT_F16, - BT_F32, - BT_F64, - BT_NULL -} base_type_e; - -/* @class HsailCode - * the HsailCode class is used to store information - * about HSA kernels stored in the BRIG format. it holds - * all information about a kernel, function, or variable - * symbol and provides methods for accessing that - * information. - */ - -class HsailCode final : public HsaCode -{ - public: - TheGpuISA::Decoder decoder; - - StorageMap *storageMap; - LabelMap labelMap; - uint32_t kernarg_start; - uint32_t kernarg_end; - int32_t private_size; - - int32_t readonly_size; - - // We track the maximum register index used for each register - // class when we load the code so we can size the register files - // appropriately (i.e., one more than the max index). - uint32_t max_creg; // maximum c-register index - uint32_t max_sreg; // maximum s-register index - uint32_t max_dreg; // maximum d-register index - - HsailCode(const std::string &name_str, - const Brig::BrigDirectiveExecutable *code_dir, - const BrigObject *obj, - StorageMap *objStorageMap); - - // this version is used to create a placeholder when - // we encounter a kernel-related directive before the - // kernel itself - HsailCode(const std::string &name_str); - - void init(const Brig::BrigDirectiveExecutable *code_dir, - const BrigObject *obj, StorageMap *objStorageMap); - - void - generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const - { - hsaKernelInfo->sRegCount = max_sreg + 1; - hsaKernelInfo->dRegCount = max_dreg + 1; - hsaKernelInfo->cRegCount = max_creg + 1; - - hsaKernelInfo->static_lds_size = getSize(Brig::BRIG_SEGMENT_GROUP); - - hsaKernelInfo->private_mem_size = - roundUp(getSize(Brig::BRIG_SEGMENT_PRIVATE), 8); - - hsaKernelInfo->spill_mem_size = - roundUp(getSize(Brig::BRIG_SEGMENT_SPILL), 8); - } - - int - getSize(MemorySegment segment) const - { - Brig::BrigSegment brigSeg; - - switch (segment) { - case MemorySegment::NONE: - brigSeg = Brig::BRIG_SEGMENT_NONE; - break; - case MemorySegment::FLAT: - brigSeg = Brig::BRIG_SEGMENT_FLAT; - break; - case MemorySegment::GLOBAL: - brigSeg = Brig::BRIG_SEGMENT_GLOBAL; - break; - case MemorySegment::READONLY: - brigSeg = Brig::BRIG_SEGMENT_READONLY; - break; - case MemorySegment::KERNARG: - brigSeg = Brig::BRIG_SEGMENT_KERNARG; - break; - case MemorySegment::GROUP: - brigSeg = Brig::BRIG_SEGMENT_GROUP; - break; - case MemorySegment::PRIVATE: - brigSeg = Brig::BRIG_SEGMENT_PRIVATE; - break; - case MemorySegment::SPILL: - brigSeg = Brig::BRIG_SEGMENT_SPILL; - break; - case MemorySegment::ARG: - brigSeg = Brig::BRIG_SEGMENT_ARG; - break; - case MemorySegment::EXTSPACE0: - brigSeg = Brig::BRIG_SEGMENT_AMD_GCN; - break; - default: - fatal("Unknown BrigSegment type.\n"); - } - - return getSize(brigSeg); - } - - private: - int - getSize(Brig::BrigSegment segment) const - { - if (segment == Brig::BRIG_SEGMENT_PRIVATE) { - // with the code generated by new HSA compiler the assertion - // does not hold anymore.. - //assert(private_size != -1); - return private_size; - } else { - return storageMap->getSize(segment); - } - } - - public: - StorageElement* - findSymbol(Brig::BrigSegment segment, uint64_t addr) - { - return storageMap->findSymbol(segment, addr); - } - - void - setPrivateSize(int32_t _private_size) - { - private_size = _private_size; - } - - Label* - refLabel(const Brig::BrigDirectiveLabel *lbl, const BrigObject *obj) - { - return labelMap.refLabel(lbl, obj); - } -}; - -#endif // __HSAIL_CODE_HH__ diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc deleted file mode 100644 index de518ec84..000000000 --- a/src/gpu-compute/kernel_cfg.cc +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#include "gpu-compute/kernel_cfg.hh" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gpu-compute/gpu_static_inst.hh" - -void -ControlFlowInfo::assignImmediatePostDominators( - const std::vector& instructions) -{ - ControlFlowInfo cfg(instructions); - cfg.findImmediatePostDominators(); -} - - -ControlFlowInfo::ControlFlowInfo(const std::vector& insts) : - instructions(insts) -{ - createBasicBlocks(); - connectBasicBlocks(); -} - -BasicBlock* -ControlFlowInfo::basicBlock(int inst_addr) const { - for (auto& block: basicBlocks) { - int first_block_addr = block->firstInstruction->instAddr(); - if (inst_addr >= first_block_addr && inst_addr < - first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) { - return block.get(); - } - } - return nullptr; -} - - -GPUStaticInst* -ControlFlowInfo::lastInstruction(const BasicBlock* block) const -{ - if (block->isExit()) { - return nullptr; - } - - return instructions.at(block->firstInstruction->instNum() + - block->size - 1); -} - -BasicBlock* -ControlFlowInfo::postDominator(const BasicBlock* block) const -{ - if (block->isExit()) { - return nullptr; - } - return basicBlock(lastInstruction(block)->ipdInstNum()); -} - -void -ControlFlowInfo::createBasicBlocks() -{ - assert(!instructions.empty()); - std::set leaders; - // first instruction is a leader - leaders.insert(0); - for (const auto &instruction : instructions) { - if (instruction->isBranch()) { - const int target_pc = instruction->getTargetPc(); - leaders.insert(target_pc); - leaders.insert(instruction->nextInstAddr()); - } - } - - size_t block_size = 0; - for (const auto &instruction : instructions) { - if (leaders.find(instruction->instAddr()) != leaders.end()) { - uint32_t id = basicBlocks.size(); - if (id > 0) { - basicBlocks.back()->size = block_size; - } - block_size = 0; - basicBlocks.emplace_back(new BasicBlock(id, instruction)); - } - block_size++; - } - basicBlocks.back()->size = block_size; - // exit basic block - basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr)); -} - -void -ControlFlowInfo::connectBasicBlocks() -{ - BasicBlock* exit_bb = basicBlocks.back().get(); - for (auto& bb : basicBlocks) { - if (bb->isExit()) { - break; - } - GPUStaticInst* last = lastInstruction(bb.get()); - if (last->isReturn()) { - bb->successorIds.insert(exit_bb->id); - continue; - } - if (last->isBranch()) { - const uint32_t target_pc = last->getTargetPc(); - BasicBlock* target_bb = basicBlock(target_pc); - bb->successorIds.insert(target_bb->id); - } - - // Unconditional jump instructions have a unique successor - if (!last->isUnconditionalJump()) { - BasicBlock* next_bb = basicBlock(last->nextInstAddr()); - bb->successorIds.insert(next_bb->id); - } - } -} - - -// In-place set intersection -static void -intersect(std::set& a, const std::set& b) -{ - std::set::iterator it = a.begin(); - while (it != a.end()) { - it = b.find(*it) != b.end() ? ++it : a.erase(it); - } -} - - -void -ControlFlowInfo::findPostDominators() -{ - // the only postdominator of the exit block is itself - basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id); - //copy all basic blocks to all postdominator lists except for exit block - for (auto& block : basicBlocks) { - if (!block->isExit()) { - for (uint32_t i = 0; i < basicBlocks.size(); i++) { - block->postDominatorIds.insert(i); - } - } - } - - bool change = true; - while (change) { - change = false; - for (int h = basicBlocks.size() - 2; h >= 0; --h) { - size_t num_postdominators = - basicBlocks[h]->postDominatorIds.size(); - for (int s : basicBlocks[h]->successorIds) { - intersect(basicBlocks[h]->postDominatorIds, - basicBlocks[s]->postDominatorIds); - } - basicBlocks[h]->postDominatorIds.insert(h); - change |= (num_postdominators - != basicBlocks[h]->postDominatorIds.size()); - } - } -} - - -// In-place set difference -static void -setDifference(std::set&a, - const std::set& b, uint32_t exception) -{ - for (uint32_t b_elem : b) { - if (b_elem != exception) { - a.erase(b_elem); - } - } -} - -void -ControlFlowInfo::findImmediatePostDominators() -{ - assert(basicBlocks.size() > 1); // Entry and exit blocks must be present - - findPostDominators(); - - for (auto& basicBlock : basicBlocks) { - if (basicBlock->isExit()) { - continue; - } - std::set candidates = basicBlock->postDominatorIds; - candidates.erase(basicBlock->id); - for (uint32_t postDominatorId : basicBlock->postDominatorIds) { - if (postDominatorId != basicBlock->id) { - setDifference(candidates, - basicBlocks[postDominatorId]->postDominatorIds, - postDominatorId); - } - } - assert(candidates.size() == 1); - GPUStaticInst* last_instruction = lastInstruction(basicBlock.get()); - BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get(); - if (!ipd_block->isExit()) { - GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction; - last_instruction->ipdInstNum(ipd_first_inst->instAddr()); - } else { - last_instruction->ipdInstNum(last_instruction->nextInstAddr()); - } - } -} - -void -ControlFlowInfo::printPostDominators() const -{ - for (auto& block : basicBlocks) { - std::cout << "PD(" << block->id << ") = {"; - std::copy(block->postDominatorIds.begin(), - block->postDominatorIds.end(), - std::ostream_iterator(std::cout, ", ")); - std::cout << "}" << std::endl; - } -} - -void -ControlFlowInfo::printImmediatePostDominators() const -{ - for (const auto& block : basicBlocks) { - if (block->isExit()) { - continue; - } - std::cout << "IPD(" << block->id << ") = "; - std::cout << postDominator(block.get())->id << ", "; - } - std::cout << std::endl; -} -void -ControlFlowInfo::printBasicBlocks() const -{ - for (GPUStaticInst* inst : instructions) { - int inst_addr = inst->instAddr(); - std::cout << inst_addr << " [" << basicBlock(inst_addr)->id - << "]: " << inst->disassemble(); - if (inst->isBranch()) { - std::cout << ", PC = " << inst->getTargetPc(); - } - std::cout << std::endl; - } -} - -void -ControlFlowInfo::printBasicBlockDot() const -{ - printf("digraph {\n"); - for (const auto& basic_block : basicBlocks) { - printf("\t"); - for (uint32_t successorId : basic_block->successorIds) { - printf("%d -> %d; ", basic_block->id, successorId); - } - printf("\n"); - } - printf("}\n"); -} diff --git a/src/gpu-compute/kernel_cfg.hh b/src/gpu-compute/kernel_cfg.hh deleted file mode 100644 index d4959c857..000000000 --- a/src/gpu-compute/kernel_cfg.hh +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __KERNEL_CFG_HH__ -#define __KERNEL_CFG_HH__ - -#include -#include -#include -#include -#include - - -class GPUStaticInst; -class HsailCode; - -struct BasicBlock -{ - BasicBlock(uint32_t num, GPUStaticInst* begin) : - id(num), size(0), firstInstruction(begin) - { - } - - bool - isEntry() const - { - return !id; - } - - bool - isExit() const - { - return !size; - } - - /** - * Unique identifier for the block within a given kernel. - */ - const uint32_t id; - - /** - * Number of instructions contained in the block - */ - size_t size; - - /** - * Pointer to first instruction of the block. - */ - GPUStaticInst* firstInstruction; - - /** - * Identifiers of the blocks that follow (are reachable from) this block. - */ - std::set successorIds; - - /** - * Identifiers of the blocks that will be visited from this block. - */ - std::set postDominatorIds; -}; - -class ControlFlowInfo -{ -public: - - /** - * Compute immediate post-dominator instruction for kernel instructions. - */ - static void assignImmediatePostDominators( - const std::vector& instructions); - -private: - ControlFlowInfo(const std::vector& instructions); - - GPUStaticInst* lastInstruction(const BasicBlock* block) const; - - BasicBlock* basicBlock(int inst_addr) const; - - BasicBlock* postDominator(const BasicBlock* block) const; - - void createBasicBlocks(); - - void connectBasicBlocks(); - - void findPostDominators(); - - void findImmediatePostDominators(); - - void printBasicBlocks() const; - - void printBasicBlockDot() const; - - void printPostDominators() const; - - void printImmediatePostDominators() const; - - std::vector> basicBlocks; - std::vector instructions; -}; - -#endif // __KERNEL_CFG_HH__ diff --git a/src/gpu-compute/ndrange.hh b/src/gpu-compute/ndrange.hh deleted file mode 100644 index db6dc455f..000000000 --- a/src/gpu-compute/ndrange.hh +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Steve Reinhardt - */ - -#ifndef __NDRANGE_HH__ -#define __NDRANGE_HH__ - -#include "base/types.hh" -#include "gpu-compute/qstruct.hh" - -struct NDRange -{ - // copy of the queue entry provided at dispatch - HsaQueueEntry q; - - // The current workgroup id (3 dimensions) - int wgId[3]; - // The number of workgroups in each dimension - int numWg[3]; - // The total number of workgroups - int numWgTotal; - - // The number of completed work groups - int numWgCompleted; - // The global workgroup ID - uint32_t globalWgId; - - // flag indicating whether all work groups have been launched - bool wg_disp_rem; - // kernel complete - bool execDone; - bool userDoorBellSet; - volatile bool *addrToNotify; - volatile uint32_t *numDispLeft; - int dispatchId; - int curCid; // Current context id -}; - -#endif // __NDRANGE_HH__ diff --git a/src/gpu-compute/qstruct.hh b/src/gpu-compute/qstruct.hh deleted file mode 100644 index b400dc0ee..000000000 --- a/src/gpu-compute/qstruct.hh +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: Brad Beckmann, Marc Orr - */ - -#ifndef __Q_STRUCT_HH__ -#define __Q_STRUCT_HH__ - -#include -#include - -// Maximum number of arguments -static const int KER_NUM_ARGS = 32; -// Kernel argument buffer size -static const int KER_ARGS_LENGTH = 512; - -class LdsChunk; -struct NDRange; - -// Be very careful of alignment in this structure. The structure -// must compile to the same layout in both 32-bit and 64-bit mode. -struct HsaQueueEntry -{ - // Base pointer for array of instruction pointers - uint64_t code_ptr; - // Grid Size (3 dimensions) - uint32_t gdSize[3]; - // Workgroup Size (3 dimensions) - uint32_t wgSize[3]; - uint16_t sRegCount; - uint16_t dRegCount; - uint16_t cRegCount; - uint64_t privMemStart; - uint32_t privMemPerItem; - uint32_t privMemTotal; - uint64_t spillMemStart; - uint32_t spillMemPerItem; - uint32_t spillMemTotal; - uint64_t roMemStart; - uint32_t roMemTotal; - // Size (in bytes) of LDS - uint32_t ldsSize; - // Virtual Memory Id (unused right now) - uint32_t vmId; - - // Pointer to dependency chain (unused now) - uint64_t depends; - - // pointer to bool - uint64_t addrToNotify; - // pointer to uint32_t - uint64_t numDispLeft; - - // variables to pass arguments when running in standalone mode, - // will be removed when run.py and sh.cpp have been updated to - // use args and offset arrays - uint64_t arg1; - uint64_t arg2; - uint64_t arg3; - uint64_t arg4; - - // variables to pass arguments when running in cpu+gpu mode - uint8_t args[KER_ARGS_LENGTH]; - uint16_t offsets[KER_NUM_ARGS]; - uint16_t num_args; -}; - -// State that needs to be passed between the simulation and simulated app, a -// pointer to this struct can be passed through the depends field in the -// HsaQueueEntry struct -struct HostState -{ - // cl_event* has original HsaQueueEntry for init - uint64_t event; -}; - -// Total number of HSA queues -static const int HSAQ_NQUEUES = 8; - -// These values will eventually live in memory mapped registers -// and be settable by the kernel mode driver. - -// Number of entries in each HSA queue -static const int HSAQ_SIZE = 64; -// Address of first HSA queue index -static const int HSAQ_INDX_BASE = 0x10000ll; -// Address of first HSA queue -static const int HSAQ_BASE = 0x11000ll; -// Suggested start of HSA code -static const int HSA_CODE_BASE = 0x18000ll; - -// These are shortcuts for deriving the address of a specific -// HSA queue or queue index -#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n) -#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue)) -#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0)) -#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1)) -#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2)) - -/* - * Example code for writing to a queue - * - * void - * ToQueue(int n,struct fsaQueue *val) - * { - * int wi = *(int*)HSAQ_WI(n); - * int ri = *(int*)HSAQ_RI(n); - * int ci = *(int*)HSAQ_CI(n); - * - * if (ci - ri < HSAQ_SIZE) { - * (*(int*)HSAQ_CI(n))++; - * *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val; - * (*(int*)HSAQ_WI(n))++; - * } - * } - */ - -#endif // __Q_STRUCT_HH__ diff --git a/src/gpu-compute/vector_register_state.cc b/src/gpu-compute/vector_register_state.cc deleted file mode 100644 index e177d3b64..000000000 --- a/src/gpu-compute/vector_register_state.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: John Kalamatianos - */ - -#include "gpu-compute/vector_register_state.hh" - -#include - -#include "gpu-compute/compute_unit.hh" - -VecRegisterState::VecRegisterState() : computeUnit(nullptr) -{ - s_reg.clear(); - d_reg.clear(); -} - -void -VecRegisterState::setParent(ComputeUnit *_computeUnit) -{ - computeUnit = _computeUnit; - _name = computeUnit->name() + ".VecRegState"; -} - -void -VecRegisterState::init(uint32_t _size, uint32_t wf_size) -{ - s_reg.resize(_size); - fatal_if(wf_size > std::numeric_limits::digits || - wf_size <= 0, - "WF size is larger than the host can support or is zero"); - fatal_if((wf_size & (wf_size - 1)) != 0, - "Wavefront size should be a power of 2"); - for (int i = 0; i < s_reg.size(); ++i) { - s_reg[i].resize(wf_size, 0); - } - d_reg.resize(_size); - for (int i = 0; i < d_reg.size(); ++i) { - d_reg[i].resize(wf_size, 0); - } -} diff --git a/src/gpu-compute/vector_register_state.hh b/src/gpu-compute/vector_register_state.hh deleted file mode 100644 index 97a0d8e25..000000000 --- a/src/gpu-compute/vector_register_state.hh +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2015 Advanced Micro Devices, Inc. - * All rights reserved. - * - * For use for simulation and test purposes only - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * Author: John Kalamatianos - */ - -#ifndef __VECTOR_REGISTER_STATE_HH__ -#define __VECTOR_REGISTER_STATE_HH__ - -#include -#include -#include -#include - -#include "gpu-compute/misc.hh" - -class ComputeUnit; - -// Vector Register State per SIMD unit (contents of the vector -// registers in the VRF of the SIMD) -class VecRegisterState -{ - public: - VecRegisterState(); - void init(uint32_t _size, uint32_t wf_size); - - const std::string& name() const { return _name; } - void setParent(ComputeUnit *_computeUnit); - void regStats() { } - - // Access methods - template - T - read(int regIdx, int threadId=0) { - T *p0; - assert(sizeof(T) == 4 || sizeof(T) == 8); - if (sizeof(T) == 4) { - p0 = (T*)(&s_reg[regIdx][threadId]); - } else { - p0 = (T*)(&d_reg[regIdx][threadId]); - } - - return *p0; - } - - template - void - write(unsigned int regIdx, T value, int threadId=0) { - T *p0; - assert(sizeof(T) == 4 || sizeof(T) == 8); - if (sizeof(T) == 4) { - p0 = (T*)(&s_reg[regIdx][threadId]); - } else { - p0 = (T*)(&d_reg[regIdx][threadId]); - } - - *p0 = value; - } - - // (Single Precision) Vector Register File size. - int regSize() { return s_reg.size(); } - - private: - ComputeUnit *computeUnit; - std::string _name; - // 32-bit Single Precision Vector Register State - std::vector> s_reg; - // 64-bit Double Precision Vector Register State - std::vector> d_reg; -}; - -#endif // __VECTOR_REGISTER_STATE_HH__ diff --git a/util/git-commit-msg.py b/util/git-commit-msg.py index 500c5c71d..d33b5b014 100755 --- a/util/git-commit-msg.py +++ b/util/git-commit-msg.py @@ -88,7 +88,7 @@ def _validateTags(commit_header): # @todo this is error prone, and should be extracted automatically from # a file - valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-hsail", + valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-mips", "arch-power", "arch-riscv", "arch-sparc", "arch-x86", "base", "configs", "cpu", "cpu-kvm", "cpu-minor", "cpu-o3", "cpu-simple", "dev", "dev-arm", "dev-virtio", "ext", "fastmodel", diff --git a/util/regress b/util/regress index 1f1404c94..e7187757c 100755 --- a/util/regress +++ b/util/regress @@ -49,7 +49,7 @@ add_option('--builds', 'SPARC,' \ 'X86,X86_MESI_Two_Level,' \ 'RISCV,' \ - 'HSAIL_X86', + 'GCN3_X86', help="comma-separated build targets to test (default: '%default')") add_option('--modes', default='se,fs', -- 2.30.2