Andreas Sandberg <andreas.sandberg@arm.com>
Giacomo Travaglini <giacomo.travaglini@arm.com>
arch-gcn3:
-arch-hsail:
Tony Gutierrez <anthony.gutierrez@amd.com>
arch-mips:
arch-power:
sticky_vars.AddVariables(
EnumVariable('TARGET_ISA', 'Target ISA', 'null', all_isa_list),
- EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'hsail', all_gpu_isa_list),
+ EnumVariable('TARGET_GPU_ISA', 'Target GPU ISA', 'gcn3', all_gpu_isa_list),
ListVariable('CPU_MODELS', 'CPU models',
sorted(n for n,m in CpuModel.dict.items() if m.default),
sorted(CpuModel.dict.keys())),
+++ /dev/null
-PROTOCOL = 'GPU_RfO'
-TARGET_ISA = 'x86'
-TARGET_GPU_ISA = 'hsail'
-BUILD_GPU = True
-CPU_MODELS = 'AtomicSimpleCPU,O3CPU,TimingSimpleCPU'
Import('*')
+if not env['BUILD_GPU']:
+ Return()
+
if env['TARGET_GPU_ISA'] == 'gcn3':
Source('decoder.cc')
Source('insts/gpu_static_inst.cc')
+++ /dev/null
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-// HSA Team
-//
-// Advanced Micro Devices, Inc
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-//
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimers in the
-// documentation and/or other materials provided with the distribution.
-//
-// * Neither the names of the LLVM Team, University of Illinois at
-// Urbana-Champaign, nor the names of its contributors may be used to
-// endorse or promote products derived from this Software without specific
-// prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-#ifndef INTERNAL_BRIG_H
-#define INTERNAL_BRIG_H
-
-#include <stdint.h>
-
-namespace Brig {
-#include "Brig_new.hpp"
-
-// These typedefs provide some backward compatibility with earlier versions
-// of Brig.h, reducing the number of code changes. The distinct names also
-// increase legibility by showing the code's intent.
-typedef BrigBase BrigDirective;
-typedef BrigBase BrigOperand;
-
-enum BrigMemoryFenceSegments { // for internal use only
- //.mnemo={ s/^BRIG_MEMORY_FENCE_SEGMENT_//;lc }
- //.mnemo_token=_EMMemoryFenceSegments
- //.mnemo_context=EInstModifierInstFenceContext
- BRIG_MEMORY_FENCE_SEGMENT_GLOBAL = 0,
- BRIG_MEMORY_FENCE_SEGMENT_GROUP = 1,
- BRIG_MEMORY_FENCE_SEGMENT_IMAGE = 2,
- BRIG_MEMORY_FENCE_SEGMENT_LAST = 3 //.skip
-};
-
-}
-
-#endif // defined(INTERNAL_BRIG_H)
+++ /dev/null
-// University of Illinois/NCSA
-// Open Source License
-//
-// Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
-// All rights reserved.
-//
-// Developed by:
-//
-// HSA Team
-//
-// Advanced Micro Devices, Inc
-//
-// www.amd.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy of
-// this software and associated documentation files (the "Software"), to deal with
-// the Software without restriction, including without limitation the rights to
-// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
-// so, subject to the following conditions:
-//
-// * Redistributions of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimers.
-//
-// * Redistributions in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimers in the
-// documentation and/or other materials provided with the distribution.
-//
-// * Neither the names of the LLVM Team, University of Illinois at
-// Urbana-Champaign, nor the names of its contributors may be used to
-// endorse or promote products derived from this Software without specific
-// prior written permission.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-// SOFTWARE.
-
-//.ignore{
-
-#ifndef INCLUDED_BRIG_H
-#define INCLUDED_BRIG_H
-
-#include <stdint.h>
-
-enum BrigAuxDefs {
- MAX_OPERANDS_NUM = 6
-};
-
-//}
-
-typedef uint32_t BrigVersion32_t;
-
-enum BrigVersion {
-
- //.nowrap
- //.nodump
- //.nollvm
-
- BRIG_VERSION_HSAIL_MAJOR = 1,
- BRIG_VERSION_HSAIL_MINOR = 0,
- BRIG_VERSION_BRIG_MAJOR = 1,
- BRIG_VERSION_BRIG_MINOR = 0
-};
-
-typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE
-
-typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE
-
-typedef uint8_t BrigAluModifier8_t;
-
-typedef uint8_t BrigAtomicOperation8_t;
-
-typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef<Code>
-
-typedef uint8_t BrigCompareOperation8_t;
-
-typedef uint16_t BrigControlDirective16_t;
-
-typedef uint32_t BrigDataOffset32_t;
-
-typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef<Code> //.defValue=0
-
-typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef<Operand> //.defValue=0
-
-typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0
-
-typedef uint8_t BrigExecutableModifier8_t;
-
-typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN
-
-typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN
-
-typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN
-
-typedef uint8_t BrigImageQuery8_t;
-
-typedef uint16_t BrigKind16_t;
-
-typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE
-
-typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE
-
-typedef uint8_t BrigMemoryModifier8_t;
-
-typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED
-
-typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM
-
-typedef uint16_t BrigOpcode16_t;
-
-typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef<Operand>
-
-typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE
-
-typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL
-
-typedef uint16_t BrigRegisterKind16_t;
-
-typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE
-
-typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE
-
-typedef uint8_t BrigSamplerCoordNormalization8_t;
-
-typedef uint8_t BrigSamplerFilter8_t;
-
-typedef uint8_t BrigSamplerQuery8_t;
-
-typedef uint32_t BrigSectionIndex32_t;
-
-typedef uint8_t BrigSegCvtModifier8_t;
-
-typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE
-
-typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef
-
-typedef uint16_t BrigType16_t;
-
-typedef uint8_t BrigVariableModifier8_t;
-
-typedef uint8_t BrigWidth8_t;
-
-typedef uint32_t BrigExceptions32_t;
-
-enum BrigKind {
-
- //.nollvm
- //
- //.wname={ s/^BRIG_KIND//; MACRO2Name($_) }
- //.mnemo=$wname{ $wname }
- //
- //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" }
- //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1"
- //
- //.isBodyOnly={ "false" }
- //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()"
- //.isBodyOnly_default="assert(false); return false"
- //
- //.isToplevelOnly={ "false" }
- //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()"
- //.isToplevelOnly_default="assert(false); return false"
-
- BRIG_KIND_NONE = 0x0000, //.skip
-
- BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true
- BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
- BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true
- BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
- BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true
- BRIG_KIND_DIRECTIVE_LOC = 0x100a,
- BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
- BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true
- BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
- BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip
-
- BRIG_KIND_INST_BEGIN = 0x2000, //.skip
- BRIG_KIND_INST_ADDR = 0x2000,
- BRIG_KIND_INST_ATOMIC = 0x2001,
- BRIG_KIND_INST_BASIC = 0x2002,
- BRIG_KIND_INST_BR = 0x2003,
- BRIG_KIND_INST_CMP = 0x2004,
- BRIG_KIND_INST_CVT = 0x2005,
- BRIG_KIND_INST_IMAGE = 0x2006,
- BRIG_KIND_INST_LANE = 0x2007,
- BRIG_KIND_INST_MEM = 0x2008,
- BRIG_KIND_INST_MEM_FENCE = 0x2009,
- BRIG_KIND_INST_MOD = 0x200a,
- BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
- BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
- BRIG_KIND_INST_QUEUE = 0x200d,
- BRIG_KIND_INST_SEG = 0x200e,
- BRIG_KIND_INST_SEG_CVT = 0x200f,
- BRIG_KIND_INST_SIGNAL = 0x2010,
- BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
- BRIG_KIND_INST_END = 0x2012, //.skip
-
- BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip
- BRIG_KIND_OPERAND_ADDRESS = 0x3000,
- BRIG_KIND_OPERAND_ALIGN = 0x3001,
- BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
- BRIG_KIND_OPERAND_CODE_REF = 0x3003,
- BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
- BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip
- BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
- BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
- BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
- BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
- BRIG_KIND_OPERAND_REGISTER = 0x300a,
- BRIG_KIND_OPERAND_STRING = 0x300b,
- BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
- BRIG_KIND_OPERAND_END = 0x300d //.skip
-};
-
-enum BrigAlignment {
-
- //.mnemo={ s/^BRIG_ALIGNMENT_//; lc }
- //.mnemo_proto="const char* align2str(unsigned arg)"
- //
- //.bytes={ /(\d+)/ ? $1 : undef }
- //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1"
- //
- //.rbytes=$bytes{ $bytes }
- //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)"
- //.rbytes_default="return BRIG_ALIGNMENT_LAST"
- //
- //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" }
-
- BRIG_ALIGNMENT_NONE = 0, //.no_mnemo
- BRIG_ALIGNMENT_1 = 1, //.mnemo=""
- BRIG_ALIGNMENT_2 = 2,
- BRIG_ALIGNMENT_4 = 3,
- BRIG_ALIGNMENT_8 = 4,
- BRIG_ALIGNMENT_16 = 5,
- BRIG_ALIGNMENT_32 = 6,
- BRIG_ALIGNMENT_64 = 7,
- BRIG_ALIGNMENT_128 = 8,
- BRIG_ALIGNMENT_256 = 9,
-
- BRIG_ALIGNMENT_LAST, //.skip
- BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip
-};
-
-enum BrigAllocation {
-
- //.mnemo={ s/^BRIG_ALLOCATION_//;lc }
- //.mnemo_token=EAllocKind
-
- BRIG_ALLOCATION_NONE = 0, //.mnemo=""
- BRIG_ALLOCATION_PROGRAM = 1,
- BRIG_ALLOCATION_AGENT = 2,
- BRIG_ALLOCATION_AUTOMATIC = 3
-};
-
-enum BrigAluModifierMask {
- BRIG_ALU_FTZ = 1
-};
-
-enum BrigAtomicOperation {
-
- //.tdcaption="Atomic Operations"
- //
- //.mnemo={ s/^BRIG_ATOMIC_//;lc }
- //.mnemo_token=_EMAtomicOp
- //.mnemo_context=EInstModifierInstAtomicContext
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_ATOMIC_ADD = 0,
- BRIG_ATOMIC_AND = 1,
- BRIG_ATOMIC_CAS = 2,
- BRIG_ATOMIC_EXCH = 3,
- BRIG_ATOMIC_LD = 4,
- BRIG_ATOMIC_MAX = 5,
- BRIG_ATOMIC_MIN = 6,
- BRIG_ATOMIC_OR = 7,
- BRIG_ATOMIC_ST = 8,
- BRIG_ATOMIC_SUB = 9,
- BRIG_ATOMIC_WRAPDEC = 10,
- BRIG_ATOMIC_WRAPINC = 11,
- BRIG_ATOMIC_XOR = 12,
- BRIG_ATOMIC_WAIT_EQ = 13,
- BRIG_ATOMIC_WAIT_NE = 14,
- BRIG_ATOMIC_WAIT_LT = 15,
- BRIG_ATOMIC_WAIT_GTE = 16,
- BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
- BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
- BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
- BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-enum BrigCompareOperation {
-
- //.tdcaption="Comparison Operators"
- //
- //.mnemo={ s/^BRIG_COMPARE_//;lc }
- //.mnemo_token=_EMCompare
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_COMPARE_EQ = 0,
- BRIG_COMPARE_NE = 1,
- BRIG_COMPARE_LT = 2,
- BRIG_COMPARE_LE = 3,
- BRIG_COMPARE_GT = 4,
- BRIG_COMPARE_GE = 5,
- BRIG_COMPARE_EQU = 6,
- BRIG_COMPARE_NEU = 7,
- BRIG_COMPARE_LTU = 8,
- BRIG_COMPARE_LEU = 9,
- BRIG_COMPARE_GTU = 10,
- BRIG_COMPARE_GEU = 11,
- BRIG_COMPARE_NUM = 12,
- BRIG_COMPARE_NAN = 13,
- BRIG_COMPARE_SEQ = 14,
- BRIG_COMPARE_SNE = 15,
- BRIG_COMPARE_SLT = 16,
- BRIG_COMPARE_SLE = 17,
- BRIG_COMPARE_SGT = 18,
- BRIG_COMPARE_SGE = 19,
- BRIG_COMPARE_SGEU = 20,
- BRIG_COMPARE_SEQU = 21,
- BRIG_COMPARE_SNEU = 22,
- BRIG_COMPARE_SLTU = 23,
- BRIG_COMPARE_SLEU = 24,
- BRIG_COMPARE_SNUM = 25,
- BRIG_COMPARE_SNAN = 26,
- BRIG_COMPARE_SGTU = 27
-};
-
-enum BrigControlDirective {
-
- //.mnemo={ s/^BRIG_CONTROL_//;lc }
- //.mnemo_token=EControl
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_CONTROL_NONE = 0, //.skip
- BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
- BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
- BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
- BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
- BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
- BRIG_CONTROL_REQUIREDDIM = 6,
- BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
- BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
- BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
-};
-
-enum BrigExecutableModifierMask {
- //.nodump
- BRIG_EXECUTABLE_DEFINITION = 1
-};
-
-enum BrigImageChannelOrder {
-
- //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc }
- //.mnemo_token=EImageOrder
- //.mnemo_context=EImageOrderContext
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_CHANNEL_ORDER_A = 0,
- BRIG_CHANNEL_ORDER_R = 1,
- BRIG_CHANNEL_ORDER_RX = 2,
- BRIG_CHANNEL_ORDER_RG = 3,
- BRIG_CHANNEL_ORDER_RGX = 4,
- BRIG_CHANNEL_ORDER_RA = 5,
- BRIG_CHANNEL_ORDER_RGB = 6,
- BRIG_CHANNEL_ORDER_RGBX = 7,
- BRIG_CHANNEL_ORDER_RGBA = 8,
- BRIG_CHANNEL_ORDER_BGRA = 9,
- BRIG_CHANNEL_ORDER_ARGB = 10,
- BRIG_CHANNEL_ORDER_ABGR = 11,
- BRIG_CHANNEL_ORDER_SRGB = 12,
- BRIG_CHANNEL_ORDER_SRGBX = 13,
- BRIG_CHANNEL_ORDER_SRGBA = 14,
- BRIG_CHANNEL_ORDER_SBGRA = 15,
- BRIG_CHANNEL_ORDER_INTENSITY = 16,
- BRIG_CHANNEL_ORDER_LUMINANCE = 17,
- BRIG_CHANNEL_ORDER_DEPTH = 18,
- BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
-
- // used internally
- BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified
-
- BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip
-
-};
-
-enum BrigImageChannelType {
-
- //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc }
- //.mnemo_token=EImageFormat
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
- BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
- BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
- BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
- BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
- BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
- BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
- BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
- BRIG_CHANNEL_TYPE_FLOAT = 15,
-
- // used internally
- BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo=""
-
- BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigImageGeometry {
-
- //.tdcaption="Geometry"
- //
- //.mnemo={ s/^BRIG_GEOMETRY_//;lc }
- //.mnemo_token=EImageGeometry
- //
- //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef}
- //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo"
- //.dim_default="assert(0); return 0"
- //
- //.depth={/DEPTH$/?"true":"false"}
- //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo"
- //.depth_default="return false"
-
- BRIG_GEOMETRY_1D = 0,
- BRIG_GEOMETRY_2D = 1,
- BRIG_GEOMETRY_3D = 2,
- BRIG_GEOMETRY_1DA = 3,
- BRIG_GEOMETRY_2DA = 4,
- BRIG_GEOMETRY_1DB = 5,
- BRIG_GEOMETRY_2DDEPTH = 6,
- BRIG_GEOMETRY_2DADEPTH = 7,
-
- // used internally
- BRIG_GEOMETRY_UNKNOWN, //.mnemo=""
-
- BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigImageQuery {
-
- //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc }
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_IMAGE_QUERY_WIDTH = 0,
- BRIG_IMAGE_QUERY_HEIGHT = 1,
- BRIG_IMAGE_QUERY_DEPTH = 2,
- BRIG_IMAGE_QUERY_ARRAY = 3,
- BRIG_IMAGE_QUERY_CHANNELORDER = 4,
- BRIG_IMAGE_QUERY_CHANNELTYPE = 5,
- BRIG_IMAGE_QUERY_NUMMIPLEVELS = 6
-};
-
-enum BrigLinkage {
-
- //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc }
-
- BRIG_LINKAGE_NONE = 0,
- BRIG_LINKAGE_PROGRAM = 1,
- BRIG_LINKAGE_MODULE = 2,
- BRIG_LINKAGE_FUNCTION = 3,
- BRIG_LINKAGE_ARG = 4
-};
-
-enum BrigMachineModel {
-
- //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc }
- //.mnemo_token=ETargetMachine
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_MACHINE_SMALL = 0,
- BRIG_MACHINE_LARGE = 1,
-
- BRIG_MACHINE_UNDEF = 2 //.skip
-};
-
-enum BrigMemoryModifierMask { //.tddef=0
- BRIG_MEMORY_CONST = 1
-};
-
-enum BrigMemoryOrder {
-
- //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc }
- //.mnemo_token=_EMMemoryOrder
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_MEMORY_ORDER_NONE = 0, //.mnemo=""
- BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx
- BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq
- BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl
- BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar
-
- BRIG_MEMORY_ORDER_LAST = 5 //.skip
-};
-
-enum BrigMemoryScope {
-
- //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc }
- //.mnemo_token=_EMMemoryScope
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo=""
- BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo=""
- BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave
- BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg
- BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent
- BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system
-
- BRIG_MEMORY_SCOPE_LAST = 6 //.skip
-};
-
-enum BrigOpcode {
-
- //.tdcaption="Instruction Opcodes"
- //
- //.k={ "BASIC" }
- //.pscode=$k{ MACRO2Name("_".$k) }
- //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" }
- //.opcodeparser_incfile=ParserUtilities
- //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic"
- //
- //.psopnd={undef}
- //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" }
- //.opndparser_incfile=ParserUtilities
- //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands"
- //
- //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc }
- //.mnemo_scanner=Instructions //.mnemo_token=EInstruction
- //.mnemo_context=EDefaultContext
- //
- //.has_memory_order={undef}
- //.semsupport=$has_memory_order{ return $has_memory_order && "true" }
- //
- //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; }
- //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true"
- //
- //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" }
- //.opcodevis_switch //.opcodevis_proto="template <typename RetType, typename Visitor> RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)"
- //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()"
- //.opcodevis_incfile=ItemUtils
- //
- //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef }
- //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false"
- //
- //.vecOpndIndex={undef}
- //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1"
- //.vecOpndIndex_incfile=ParserUtilities
- //
- //.numdst={undef}
- //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1"
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false
- BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD
- BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD
- BRIG_OPCODE_BORROW = 3,
- BRIG_OPCODE_CARRY = 4,
- BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD
- BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD
- BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD
- BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD
- BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD
- BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD
- BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD
- BRIG_OPCODE_REM = 17,
- BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD
- BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD
- BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD
- BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD
- BRIG_OPCODE_MAD24 = 22,
- BRIG_OPCODE_MAD24HI = 23,
- BRIG_OPCODE_MUL24 = 24,
- BRIG_OPCODE_MUL24HI = 25,
- BRIG_OPCODE_SHL = 26,
- BRIG_OPCODE_SHR = 27,
- BRIG_OPCODE_AND = 28,
- BRIG_OPCODE_NOT = 29,
- BRIG_OPCODE_OR = 30,
- BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE
- BRIG_OPCODE_XOR = 32,
- BRIG_OPCODE_BITEXTRACT = 33,
- BRIG_OPCODE_BITINSERT = 34,
- BRIG_OPCODE_BITMASK = 35,
- BRIG_OPCODE_BITREV = 36,
- BRIG_OPCODE_BITSELECT = 37,
- BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE
- BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE
- BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1
- BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0
- BRIG_OPCODE_LDA = 42, //.k=ADDR
- BRIG_OPCODE_MOV = 43,
- BRIG_OPCODE_SHUFFLE = 44,
- BRIG_OPCODE_UNPACKHI = 45,
- BRIG_OPCODE_UNPACKLO = 46,
- BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE
- BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE
- BRIG_OPCODE_CMOV = 49,
- BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE
- BRIG_OPCODE_NCOS = 51,
- BRIG_OPCODE_NEXP2 = 52,
- BRIG_OPCODE_NFMA = 53,
- BRIG_OPCODE_NLOG2 = 54,
- BRIG_OPCODE_NRCP = 55,
- BRIG_OPCODE_NRSQRT = 56,
- BRIG_OPCODE_NSIN = 57,
- BRIG_OPCODE_NSQRT = 58,
- BRIG_OPCODE_BITALIGN = 59,
- BRIG_OPCODE_BYTEALIGN = 60,
- BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE
- BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE
- BRIG_OPCODE_LERP = 63,
- BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE
- BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE
- BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT
- BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT
- BRIG_OPCODE_STOF = 68, //.k=SEG_CVT
- BRIG_OPCODE_CMP = 69, //.k=CMP
- BRIG_OPCODE_CVT = 70, //.k=CVT
- BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0
- BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0
- BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC
- BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0
- BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL
- BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0
- BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0
- BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0
- BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0
- BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0
- BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE
- BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE
- BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER
- BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0
- BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands
- BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false
- BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0
- BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false
- BRIG_OPCODE_LDF = 95,
- BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE
- BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE
- BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0
- BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE
- BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false
- BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0
- BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0
- BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE
- BRIG_OPCODE_ALLOCA = 104, //.k=MEM
- BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
- BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
- BRIG_OPCODE_DIM = 107,
- BRIG_OPCODE_GRIDGROUPS = 108,
- BRIG_OPCODE_GRIDSIZE = 109,
- BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
- BRIG_OPCODE_PACKETID = 111,
- BRIG_OPCODE_WORKGROUPID = 112,
- BRIG_OPCODE_WORKGROUPSIZE = 113,
- BRIG_OPCODE_WORKITEMABSID = 114,
- BRIG_OPCODE_WORKITEMFLATABSID = 115,
- BRIG_OPCODE_WORKITEMFLATID = 116,
- BRIG_OPCODE_WORKITEMID = 117,
- BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0
- BRIG_OPCODE_GETDETECTEXCEPT = 119,
- BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0
- BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE
- BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE
- BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE
- BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE
- BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0
- BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0
- BRIG_OPCODE_CLOCK = 127,
- BRIG_OPCODE_CUID = 128,
- BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0
- BRIG_OPCODE_GROUPBASEPTR = 130,
- BRIG_OPCODE_KERNARGBASEPTR = 131,
- BRIG_OPCODE_LANEID = 132,
- BRIG_OPCODE_MAXCUID = 133,
- BRIG_OPCODE_MAXWAVEID = 134,
- BRIG_OPCODE_NULLPTR = 135, //.k=SEG
- BRIG_OPCODE_WAVEID = 136,
- BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip
-
- BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE
- BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE
- BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2,
- BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3,
- BRIG_OPCODE_GCNMED3 = (1u << 15) | 4,
- BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD
- BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD
- BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD
- BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD
- BRIG_OPCODE_GCNBFM = (1u << 15) | 9,
- BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0
- BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0
- BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC
- BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet
- BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14,
- BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15,
- BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc
- BRIG_OPCODE_GCNMSAD = (1u << 15) | 17,
- BRIG_OPCODE_GCNQSAD = (1u << 15) | 18,
- BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19,
- BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE
- BRIG_OPCODE_GCNSADW = (1u << 15) | 21,
- BRIG_OPCODE_GCNSADD = (1u << 15) | 22,
- BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume
- BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append
- BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg
- BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg
- BRIG_OPCODE_GCNMAX = (1u << 15) | 27,
- BRIG_OPCODE_GCNMIN = (1u << 15) | 28,
- BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD
- BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30,
-
- BRIG_OPCODE_AMDRDIMAGELOD = (1u << 15) | 31, //.k=IMAGE //.mnemo=amd_rdimagelod //.vecOpndIndex=0
- BRIG_OPCODE_AMDRDIMAGEGRAD = (1u << 15) | 32, //.k=IMAGE //.mnemo=amd_rdimagegrad //.vecOpndIndex=0
- BRIG_OPCODE_AMDLDIMAGEMIP = (1u << 15) | 33, //.k=IMAGE //.mnemo=amd_ldimagemip //.vecOpndIndex=0
- BRIG_OPCODE_AMDSTIMAGEMIP = (1u << 15) | 34, //.k=IMAGE //.mnemo=amd_stimagemip //.vecOpndIndex=0 //.numdst=0
- BRIG_OPCODE_AMDQUERYIMAGE = (1u << 15) | 35 //.k=QUERY_IMAGE //.mnemo=amd_queryimage
-};
-
-enum BrigPack {
-
- //.tdcaption="Packing"
- //
- //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc }
- //.mnemo_token=_EMPacking
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_PACK_NONE = 0, //.mnemo=""
- BRIG_PACK_PP = 1,
- BRIG_PACK_PS = 2,
- BRIG_PACK_SP = 3,
- BRIG_PACK_SS = 4,
- BRIG_PACK_S = 5,
- BRIG_PACK_P = 6,
- BRIG_PACK_PPSAT = 7,
- BRIG_PACK_PSSAT = 8,
- BRIG_PACK_SPSAT = 9,
- BRIG_PACK_SSSAT = 10,
- BRIG_PACK_SSAT = 11,
- BRIG_PACK_PSAT = 12
-};
-
-enum BrigProfile {
-
- //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc }
- //.mnemo_token=ETargetProfile
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_PROFILE_BASE = 0,
- BRIG_PROFILE_FULL = 1,
-
- BRIG_PROFILE_UNDEF = 2 //.skip
-};
-
-enum BrigRegisterKind {
-
- //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) }
- //
- //.bits={ }
- //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1"
- //
- //.nollvm
-
- BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1
- BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32
- BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64
- BRIG_REGISTER_KIND_QUAD = 3 //.bits=128
-};
-
-enum BrigRound {
-
- //.mnemo={}
- //.mnemo_fn=round2str //.mnemo_token=_EMRound
- //
- //.sat={/_SAT$/? "true" : "false"}
- //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding"
- //.sat_default="return false"
- //
- //.sig={/_SIGNALING_/? "true" : "false"}
- //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding"
- //.sig_default="return false"
- //
- //.int={/_INTEGER_/? "true" : "false"}
- //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding"
- //.int_default="return false"
- //
- //.flt={/_FLOAT_/? "true" : "false"}
- //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding"
- //.flt_default="return false"
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_ROUND_NONE = 0, //.no_mnemo
- BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo
- BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near
- BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero
- BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up
- BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down
- BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari
- BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi
- BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi
- BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi
- BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat
- BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat
- BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat
- BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari
- BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat
- BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat
-};
-
-enum BrigSamplerAddressing {
-
- //.mnemo={ s/^BRIG_ADDRESSING_//;lc }
- //.mnemo_token=ESamplerAddressingMode
-
- BRIG_ADDRESSING_UNDEFINED = 0,
- BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
- BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
- BRIG_ADDRESSING_REPEAT = 3,
- BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
-
- BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigSamplerCoordNormalization {
-
- //.mnemo={ s/^BRIG_COORD_//;lc }
- //.mnemo_token=ESamplerCoord
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_COORD_UNNORMALIZED = 0,
- BRIG_COORD_NORMALIZED = 1
-};
-
-enum BrigSamplerFilter {
-
- //.mnemo={ s/^BRIG_FILTER_//;lc }
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_FILTER_NEAREST = 0,
- BRIG_FILTER_LINEAR = 1,
-
- BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip
-};
-
-enum BrigSamplerQuery {
-
- //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc }
- //.mnemo_token=_EMSamplerQuery
- //
- //.print=$mnemo{ $mnemo }
-
- BRIG_SAMPLER_QUERY_ADDRESSING = 0,
- BRIG_SAMPLER_QUERY_COORD = 1,
- BRIG_SAMPLER_QUERY_FILTER = 2
-};
-
-enum BrigSectionIndex {
-
- //.nollvm
- //
- //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc }
-
- BRIG_SECTION_INDEX_DATA = 0,
- BRIG_SECTION_INDEX_CODE = 1,
- BRIG_SECTION_INDEX_OPERAND = 2,
- BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3,
-
- // used internally
- BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip
-};
-
-enum BrigSegCvtModifierMask {
- BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull"
-};
-
-enum BrigSegment {
-
- //.mnemo={ s/^BRIG_SEGMENT_//;lc}
- //.mnemo_token=_EMSegment
- //.mnemo_context=EInstModifierContext
- //
- //.print=$mnemo{ $mnemo ? "_$mnemo" : "" }
-
- BRIG_SEGMENT_NONE = 0, //.mnemo=""
- BRIG_SEGMENT_FLAT = 1, //.mnemo=""
- BRIG_SEGMENT_GLOBAL = 2,
- BRIG_SEGMENT_READONLY = 3,
- BRIG_SEGMENT_KERNARG = 4,
- BRIG_SEGMENT_GROUP = 5,
- BRIG_SEGMENT_PRIVATE = 6,
- BRIG_SEGMENT_SPILL = 7,
- BRIG_SEGMENT_ARG = 8,
-
- BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip
-
- BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region"
-};
-
-enum BrigPackedTypeBits {
-
- //.nodump
- //
- //.nollvm
-
- BRIG_TYPE_BASE_SIZE = 5,
- BRIG_TYPE_PACK_SIZE = 2,
- BRIG_TYPE_ARRAY_SIZE = 1,
-
- BRIG_TYPE_BASE_SHIFT = 0,
- BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
- BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
-
- BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT,
- BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT,
-
- BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
-
- BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
-};
-
-enum BrigType {
-
- //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef }
- //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0"
- //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef }
- //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0"
- //
- //.mnemo={ s/^BRIG_TYPE_//;lc }
- //.mnemo_token=_EMType
- //
- //.array={/ARRAY$/?"true":"false"}
- //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type"
- //.array_default="return false"
- //
- //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"}
- //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type"
- //.a2e_default="return BRIG_TYPE_NONE"
- //
- //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"}
- //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type"
- //.e2a_default="return BRIG_TYPE_NONE"
- //
- //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc}
- //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type"
- //.t2s_default="return NULL"
- //
- //.dispatch_switch //.dispatch_incfile=TemplateUtilities
- //.dispatch_proto="template<typename RetType, typename Visitor>\nRetType dispatchByType_gen(unsigned type, Visitor& v)"
- //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" }
- //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)"
- //
- //- .tdname=BrigType
- //
- //.print=$mnemo{ "_$mnemo" }
-
- BRIG_TYPE_NONE = 0, //.mnemo="" //.print=""
- BRIG_TYPE_U8 = 1, //.ctype=uint8_t
- BRIG_TYPE_U16 = 2, //.ctype=uint16_t
- BRIG_TYPE_U32 = 3, //.ctype=uint32_t
- BRIG_TYPE_U64 = 4, //.ctype=uint64_t
- BRIG_TYPE_S8 = 5, //.ctype=int8_t
- BRIG_TYPE_S16 = 6, //.ctype=int16_t
- BRIG_TYPE_S32 = 7, //.ctype=int32_t
- BRIG_TYPE_S64 = 8, //.ctype=int64_t
- BRIG_TYPE_F16 = 9, //.ctype=f16_t
- BRIG_TYPE_F32 = 10, //.ctype=float
- BRIG_TYPE_F64 = 11, //.ctype=double
- BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1
- BRIG_TYPE_B8 = 13, //.ctype=uint8_t
- BRIG_TYPE_B16 = 14, //.ctype=uint16_t
- BRIG_TYPE_B32 = 15, //.ctype=uint32_t
- BRIG_TYPE_B64 = 16, //.ctype=uint64_t
- BRIG_TYPE_B128 = 17, //.ctype=b128_t
- BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64
- BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64
- BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64
- BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64
- BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64
- BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64
-
- BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t
- BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t
- BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t
- BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t
- BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t
- BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t
- BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t
- BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t
- BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t
- BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t
- BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t
- BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t
- BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t
- BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t
- BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t
- BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t
- BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t
- BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t
- BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t
- BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t
- BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t
- BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float
- BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float
- BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double
-
- BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
- BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print=""
-
- // Used internally
- BRIG_TYPE_INVALID = (unsigned) -1 //.skip
-};
-
-enum BrigVariableModifierMask {
-
- //.nodump
-
- BRIG_VARIABLE_DEFINITION = 1,
- BRIG_VARIABLE_CONST = 2
-};
-
-enum BrigWidth {
-
- //.tddef=1
- //
- //.print={ s/^BRIG_WIDTH_//; "_width($_)" }
-
- BRIG_WIDTH_NONE = 0,
- BRIG_WIDTH_1 = 1,
- BRIG_WIDTH_2 = 2,
- BRIG_WIDTH_4 = 3,
- BRIG_WIDTH_8 = 4,
- BRIG_WIDTH_16 = 5,
- BRIG_WIDTH_32 = 6,
- BRIG_WIDTH_64 = 7,
- BRIG_WIDTH_128 = 8,
- BRIG_WIDTH_256 = 9,
- BRIG_WIDTH_512 = 10,
- BRIG_WIDTH_1024 = 11,
- BRIG_WIDTH_2048 = 12,
- BRIG_WIDTH_4096 = 13,
- BRIG_WIDTH_8192 = 14,
- BRIG_WIDTH_16384 = 15,
- BRIG_WIDTH_32768 = 16,
- BRIG_WIDTH_65536 = 17,
- BRIG_WIDTH_131072 = 18,
- BRIG_WIDTH_262144 = 19,
- BRIG_WIDTH_524288 = 20,
- BRIG_WIDTH_1048576 = 21,
- BRIG_WIDTH_2097152 = 22,
- BRIG_WIDTH_4194304 = 23,
- BRIG_WIDTH_8388608 = 24,
- BRIG_WIDTH_16777216 = 25,
- BRIG_WIDTH_33554432 = 26,
- BRIG_WIDTH_67108864 = 27,
- BRIG_WIDTH_134217728 = 28,
- BRIG_WIDTH_268435456 = 29,
- BRIG_WIDTH_536870912 = 30,
- BRIG_WIDTH_1073741824 = 31,
- BRIG_WIDTH_2147483648 = 32,
- BRIG_WIDTH_WAVESIZE = 33,
- BRIG_WIDTH_ALL = 34,
-
- BRIG_WIDTH_LAST //.skip
-};
-
-struct BrigUInt64 { //.isroot //.standalone
- uint32_t lo; //.defValue=0
- uint32_t hi; //.defValue=0
-
- //+hcode KLASS& operator=(uint64_t rhs);
- //+hcode operator uint64_t();
- //+implcode inline KLASS& KLASS::operator=(uint64_t rhs) { lo() = (uint32_t)rhs; hi() = (uint32_t)(rhs >> 32); return *this; }
- //+implcode inline KLASS::operator uint64_t() { return ((uint64_t)hi()) << 32 | lo(); }
-};
-
-struct BrigAluModifier { //.isroot //.standalone
- BrigAluModifier8_t allBits; //.defValue=0
- //^^ bool ftz; //.wtype=BitValRef<0>
-};
-
-struct BrigBase { //.nowrap
- uint16_t byteCount;
- BrigKind16_t kind;
-};
-
-//.alias Code:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_CODE };
-//.alias Directive:Code { //.generic };
-//.alias Operand:Base { //.generic //.isroot //.section=BRIG_SECTION_INDEX_OPERAND };
-
-struct BrigData {
- //.nowrap
- uint32_t byteCount;
- uint8_t bytes[1];
-};
-
-struct BrigExecutableModifier { //.isroot //.standalone
- BrigExecutableModifier8_t allBits; //.defValue=0
- //^^ bool isDefinition; //.wtype=BitValRef<0>
-};
-
-struct BrigMemoryModifier { //.isroot //.standalone
- BrigMemoryModifier8_t allBits; //.defValue=0
- //^^ bool isConst; //.wtype=BitValRef<0>
-};
-
-struct BrigSegCvtModifier { //.isroot //.standalone
- BrigSegCvtModifier8_t allBits; //.defValue=0
- //^^ bool isNoNull; //.wtype=BitValRef<0>
-};
-
-struct BrigVariableModifier { //.isroot //.standalone
- BrigVariableModifier8_t allBits; //.defValue=0
-
- //^^ bool isDefinition; //.wtype=BitValRef<0>
- //^^ bool isConst; //.wtype=BitValRef<1>
-};
-
-struct BrigDirectiveArgBlockEnd {
- BrigBase base;
-};
-
-struct BrigDirectiveArgBlockStart {
- BrigBase base;
-};
-
-struct BrigDirectiveComment {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveControl {
- BrigBase base;
- BrigControlDirective16_t control;
- uint16_t reserved; //.defValue=0
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveExecutable { //.generic
- BrigBase base;
- BrigDataOffsetString32_t name;
- uint16_t outArgCount; //.defValue=0
- uint16_t inArgCount; //.defValue=0
- BrigCodeOffset32_t firstInArg;
- BrigCodeOffset32_t firstCodeBlockEntry;
- BrigCodeOffset32_t nextModuleEntry;
- BrigExecutableModifier modifier; //.acc=subItem<ExecutableModifier> //.wtype=ExecutableModifier
- BrigLinkage8_t linkage;
- uint16_t reserved; //.defValue=0
-};
-
-//.alias DirectiveKernel:DirectiveExecutable { };
-//.alias DirectiveFunction:DirectiveExecutable { };
-//.alias DirectiveSignature:DirectiveExecutable { };
-//.alias DirectiveIndirectFunction:DirectiveExecutable { };
-
-struct BrigDirectiveExtension {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveFbarrier {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
- BrigLinkage8_t linkage;
- uint16_t reserved; //.defValue=0
-};
-
-struct BrigDirectiveLabel {
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveLoc {
- BrigBase base;
- BrigDataOffsetString32_t filename;
- uint32_t line;
- uint32_t column; //.defValue=1
-};
-
-struct BrigDirectiveNone { //.enum=BRIG_KIND_NONE
- BrigBase base;
-};
-
-struct BrigDirectivePragma {
- BrigBase base;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveVariable {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigOperandOffset32_t init;
- BrigType16_t type;
-
- //+hcode bool isArray();
- //+implcode inline bool KLASS::isArray() { return isArrayType(type()); }
-
- //+hcode unsigned elementType();
- //+implcode inline unsigned KLASS::elementType() { return isArray()? arrayType2elementType(type()) : type(); }
-
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- BrigUInt64 dim; //.acc=subItem<UInt64> //.wtype=UInt64
- BrigVariableModifier modifier; //.acc=subItem<VariableModifier> //.wtype=VariableModifier
- BrigLinkage8_t linkage;
- BrigAllocation8_t allocation;
- uint8_t reserved; //.defValue=0
-};
-
-struct BrigDirectiveModule {
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVersion32_t hsailMajor; //.wtype=ValRef<uint32_t>
- BrigVersion32_t hsailMinor; //.wtype=ValRef<uint32_t>
- BrigProfile8_t profile;
- BrigMachineModel8_t machineModel;
- BrigRound8_t defaultFloatRound;
- uint8_t reserved; //.defValue=0
-};
-
-struct BrigInstBase { //.wname=Inst //.generic //.parent=BrigCode
- BrigBase base;
- BrigOpcode16_t opcode;
- BrigType16_t type;
- BrigDataOffsetOperandList32_t operands;
-
- //+hcode Operand operand(int index);
- //+implcode inline Operand KLASS::operand(int index) { return operands()[index]; }
-};
-
-struct BrigInstAddr {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstAtomic {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t memoryScope;
- BrigAtomicOperation8_t atomicOperation;
- uint8_t equivClass;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstBasic {
- BrigInstBase base;
-};
-
-struct BrigInstBr {
- BrigInstBase base;
- BrigWidth8_t width;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstCmp {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
- BrigCompareOperation8_t compare;
- BrigPack8_t pack;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
- BrigRound8_t round;
-};
-
-struct BrigInstImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigType16_t coordType;
- BrigImageGeometry8_t geometry;
- uint8_t equivClass;
- uint16_t reserved; //.defValue=0
-};
-
-struct BrigInstLane {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigWidth8_t width;
- uint8_t reserved; //.defValue=0
-};
-
-struct BrigInstMem {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- uint8_t equivClass;
- BrigWidth8_t width;
- BrigMemoryModifier modifier; //.acc=subItem<MemoryModifier> //.wtype=MemoryModifier
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstMemFence {
- BrigInstBase base;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t globalSegmentMemoryScope;
- BrigMemoryScope8_t groupSegmentMemoryScope;
- BrigMemoryScope8_t imageSegmentMemoryScope;
-};
-
-struct BrigInstMod {
- BrigInstBase base;
- BrigAluModifier modifier; //.acc=subItem<AluModifier> //.wtype=AluModifier
- BrigRound8_t round;
- BrigPack8_t pack;
- uint8_t reserved; //.defValue=0
-};
-
-struct BrigInstQueryImage {
- BrigInstBase base;
- BrigType16_t imageType;
- BrigImageGeometry8_t geometry;
- BrigImageQuery8_t imageQuery;
-};
-
-struct BrigInstQuerySampler {
- BrigInstBase base;
- BrigSamplerQuery8_t samplerQuery;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstQueue {
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- uint16_t reserved; //.defValue=0
-};
-
-struct BrigInstSeg {
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigInstSegCvt {
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigSegment8_t segment;
- BrigSegCvtModifier modifier; //.acc=subItem<SegCvtModifier> //.wtype=SegCvtModifier
-};
-
-struct BrigInstSignal {
- BrigInstBase base;
- BrigType16_t signalType;
- BrigMemoryOrder8_t memoryOrder;
- BrigAtomicOperation8_t signalOperation;
-};
-
-struct BrigInstSourceType {
- BrigInstBase base;
- BrigType16_t sourceType;
- uint16_t reserved; //.defValue=0
-};
-
-typedef BrigInstSourceType BrigInstPopcount;
-
-struct BrigOperandAddress {
- BrigBase base;
- BrigCodeOffset32_t symbol; //.wtype=ItemRef<DirectiveVariable>
- BrigOperandOffset32_t reg; //.wtype=ItemRef<OperandRegister>
- BrigUInt64 offset; //.acc=subItem<UInt64> //.wtype=UInt64
-};
-
-struct BrigOperandAlign {
- BrigBase base;
- BrigAlignment8_t align;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigOperandCodeList {
- BrigBase base;
- BrigDataOffsetCodeList32_t elements;
-
- //+hcode unsigned elementCount();
- //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
- //+hcode Code elements(int index);
- //+implcode inline Code KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandCodeRef {
- BrigBase base;
- BrigCodeOffset32_t ref;
-};
-
-struct BrigOperandConstantBytes {
- BrigBase base;
- BrigType16_t type; //.defValue=0
- uint16_t reserved; //.defValue=0
- BrigDataOffsetString32_t bytes;
-};
-
-struct BrigOperandConstantOperandList {
- BrigBase base;
- BrigType16_t type;
- uint16_t reserved; //.defValue=0
- BrigDataOffsetOperandList32_t elements;
-
- //+hcode unsigned elementCount();
- //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
- //+hcode Operand elements(int index);
- //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandConstantImage {
- BrigBase base;
- BrigType16_t type;
- BrigImageGeometry8_t geometry;
- BrigImageChannelOrder8_t channelOrder;
- BrigImageChannelType8_t channelType;
- uint8_t reserved[3]; //.defValue=0
- BrigUInt64 width; //.acc=subItem<UInt64> //.wtype=UInt64
- BrigUInt64 height; //.acc=subItem<UInt64> //.wtype=UInt64
- BrigUInt64 depth; //.acc=subItem<UInt64> //.wtype=UInt64
- BrigUInt64 array; //.acc=subItem<UInt64> //.wtype=UInt64
-};
-
-struct BrigOperandOperandList {
- BrigBase base;
- BrigDataOffsetOperandList32_t elements;
-
- //+hcode unsigned elementCount();
- //+implcode inline unsigned KLASS::elementCount() { return elements().size(); }
- //+hcode Operand elements(int index);
- //+implcode inline Operand KLASS::elements(int index) { return elements()[index]; }
-};
-
-struct BrigOperandRegister {
- BrigBase base;
- BrigRegisterKind16_t regKind;
- uint16_t regNum;
-};
-
-struct BrigOperandConstantSampler {
- BrigBase base;
- BrigType16_t type;
- BrigSamplerCoordNormalization8_t coord;
- BrigSamplerFilter8_t filter;
- BrigSamplerAddressing8_t addressing;
- uint8_t reserved[3]; //.defValue=0
-};
-
-struct BrigOperandString {
- BrigBase base;
- BrigDataOffsetString32_t string;
-};
-
-struct BrigOperandWavesize {
- BrigBase base;
-};
-
-//.ignore{
-
-enum BrigExceptionsMask {
- BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
- BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
- BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
- BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
- BRIG_EXCEPTIONS_INEXACT = 1 << 4,
-
- BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
-};
-
-struct BrigSectionHeader {
- uint64_t byteCount;
- uint32_t headerByteCount;
- uint32_t nameLength;
- uint8_t name[1];
-};
-
-#define MODULE_IDENTIFICATION_LENGTH (8)
-
-struct BrigModuleHeader {
- char identification[MODULE_IDENTIFICATION_LENGTH];
- BrigVersion32_t brigMajor;
- BrigVersion32_t brigMinor;
- uint64_t byteCount;
- uint8_t hash[64];
- uint32_t reserved;
- uint32_t sectionCount;
- uint64_t sectionIndex;
-};
-
-typedef BrigModuleHeader* BrigModule_t;
-
-#endif // defined(INCLUDED_BRIG_H)
-//}
+++ /dev/null
-# -*- mode:python -*-
-
-# Copyright (c) 2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# Author: Anthony Gutierrez
-#
-
-Import('*')
-
-if not env['BUILD_GPU']:
- Return()
-
-if env['TARGET_GPU_ISA'] == 'hsail':
- env.Command(['insts/gen_decl.hh', 'gpu_decoder.cc', 'insts/gen_exec.cc'],
- 'gen.py', '$SOURCE $TARGETS')
-
- Source('gpu_decoder.cc')
- Source('insts/branch.cc')
- Source('insts/gen_exec.cc')
- Source('insts/gpu_static_inst.cc')
- Source('insts/main.cc')
- Source('insts/pseudo_inst.cc')
- Source('insts/mem.cc')
- Source('operand.cc')
+++ /dev/null
-# -*- mode:python -*-
-
-#
-# Copyright (c) 2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# Author: Anthony Gutierrez
-#
-
-Import('*')
-
-all_gpu_isa_list.append('hsail')
+++ /dev/null
-#!/usr/bin/env python
-# Copyright (c) 2015 Advanced Micro Devices, Inc.
-# All rights reserved.
-#
-# For use for simulation and test purposes only
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software
-# without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# Author: Steve Reinhardt
-#
-
-from __future__ import print_function
-
-import sys, re
-
-from m5.util import code_formatter
-
-if len(sys.argv) != 4:
- print("Error: need 3 args (file names)")
- sys.exit(0)
-
-header_code = code_formatter()
-decoder_code = code_formatter()
-exec_code = code_formatter()
-
-###############
-#
-# Generate file prologs (includes etc.)
-#
-###############
-
-header_code('''
-#include "arch/hsail/insts/decl.hh"
-#include "base/bitfield.hh"
-#include "gpu-compute/hsail_code.hh"
-#include "gpu-compute/wavefront.hh"
-
-namespace HsailISA
-{
-''')
-header_code.indent()
-
-decoder_code('''
-#include "arch/hsail/gpu_decoder.hh"
-#include "arch/hsail/insts/branch.hh"
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/gen_decl.hh"
-#include "arch/hsail/insts/mem.hh"
-#include "arch/hsail/insts/mem_impl.hh"
-#include "gpu-compute/brig_object.hh"
-
-namespace HsailISA
-{
- std::vector<GPUStaticInst*> Decoder::decodedInsts;
-
- GPUStaticInst*
- Decoder::decode(MachInst machInst)
- {
- using namespace Brig;
-
- const BrigInstBase *ib = machInst.brigInstBase;
- const BrigObject *obj = machInst.brigObj;
-
- switch(ib->opcode) {
-''')
-decoder_code.indent()
-decoder_code.indent()
-
-exec_code('''
-#include "arch/hsail/insts/gen_decl.hh"
-#include "base/intmath.hh"
-
-namespace HsailISA
-{
-''')
-exec_code.indent()
-
-###############
-#
-# Define code templates for class declarations (for header file)
-#
-###############
-
-# Basic header template for an instruction stub.
-header_template_stub = '''
-class $class_name : public $base_class
-{
- public:
- typedef $base_class Base;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Basic header template for an instruction with no template parameters.
-header_template_nodt = '''
-class $class_name : public $base_class
-{
- public:
- typedef $base_class Base;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Basic header template for an instruction with a single DataType
-# template parameter.
-header_template_1dt = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType>
-{
- public:
- typedef $base_class<DataType> Base;
- typedef typename DataType::CType CType;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-header_template_1dt_noexec = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType>
-{
- public:
- typedef $base_class<DataType> Base;
- typedef typename DataType::CType CType;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-};
-
-'''
-
-# Same as header_template_1dt, except the base class has a second
-# template parameter NumSrcOperands to allow a variable number of
-# source operands. Note that since this is implemented with an array,
-# it only works for instructions where all sources are of the same
-# type (like most arithmetics).
-header_template_1dt_varsrcs = '''
-template<typename DataType>
-class $class_name : public $base_class<DataType, $num_srcs>
-{
- public:
- typedef $base_class<DataType, $num_srcs> Base;
- typedef typename DataType::CType CType;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-# Header template for instruction with two DataType template
-# parameters, one for the dest and one for the source. This is used
-# by compare and convert.
-header_template_2dt = '''
-template<typename DestDataType, class SrcDataType>
-class $class_name : public $base_class<DestDataType, SrcDataType>
-{
- public:
- typedef $base_class<DestDataType, SrcDataType> Base;
- typedef typename DestDataType::CType DestCType;
- typedef typename SrcDataType::CType SrcCType;
-
- $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "$opcode")
- {
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
-};
-
-'''
-
-header_templates = {
- 'ArithInst': header_template_1dt_varsrcs,
- 'CmovInst': header_template_1dt,
- 'ClassInst': header_template_1dt,
- 'ShiftInst': header_template_1dt,
- 'ExtractInsertInst': header_template_1dt,
- 'CmpInst': header_template_2dt,
- 'CvtInst': header_template_2dt,
- 'PopcountInst': header_template_2dt,
- 'LdInst': '',
- 'StInst': '',
- 'SpecialInstNoSrc': header_template_nodt,
- 'SpecialInst1Src': header_template_nodt,
- 'SpecialInstNoSrcNoDest': '',
- 'Stub': header_template_stub,
-}
-
-###############
-#
-# Define code templates for exec functions
-#
-###############
-
-# exec function body
-exec_template_stub = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
- fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
-}
-
-'''
-exec_template_nodt_nosrc = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef Base::DestCType DestCType;
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- DestCType dest_val = $expr;
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_nodt_1src = '''
-void
-$class_name::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef Base::DestCType DestCType;
- typedef Base::SrcCType SrcCType;
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
- DestCType dest_val = $expr;
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_1dt_varsrcs = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType dest_val;
- if ($dest_is_src_flag) {
- dest_val = this->dest.template get<CType>(w, lane);
- }
-
- CType src_val[$num_srcs];
-
- for (int i = 0; i < $num_srcs; ++i) {
- src_val[i] = this->src[i].template get<CType>(w, lane);
- }
-
- dest_val = (CType)($expr);
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_1dt_3srcs = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef typename Base::Src0CType Src0T;
- typedef typename Base::Src1CType Src1T;
- typedef typename Base::Src2CType Src2T;
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType dest_val;
-
- if ($dest_is_src_flag) {
- dest_val = this->dest.template get<CType>(w, lane);
- }
-
- Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
- Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
- Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
-
- dest_val = $expr;
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_1dt_2src_1dest = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef typename Base::DestCType DestT;
- typedef CType Src0T;
- typedef typename Base::Src1CType Src1T;
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- DestT dest_val;
- if ($dest_is_src_flag) {
- dest_val = this->dest.template get<DestT>(w, lane);
- }
- Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
- Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
-
- dest_val = $expr;
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_shift = '''
-template<typename DataType>
-void
-$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType dest_val;
-
- if ($dest_is_src_flag) {
- dest_val = this->dest.template get<CType>(w, lane);
- }
-
- CType src_val0 = this->src0.template get<CType>(w, lane);
- uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
-
- dest_val = $expr;
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_template_2dt = '''
-template<typename DestDataType, class SrcDataType>
-void
-$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- DestCType dest_val;
- SrcCType src_val[$num_srcs];
-
- for (int i = 0; i < $num_srcs; ++i) {
- src_val[i] = this->src[i].template get<SrcCType>(w, lane);
- }
-
- dest_val = $expr;
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-'''
-
-exec_templates = {
- 'ArithInst': exec_template_1dt_varsrcs,
- 'CmovInst': exec_template_1dt_3srcs,
- 'ExtractInsertInst': exec_template_1dt_3srcs,
- 'ClassInst': exec_template_1dt_2src_1dest,
- 'CmpInst': exec_template_2dt,
- 'CvtInst': exec_template_2dt,
- 'PopcountInst': exec_template_2dt,
- 'LdInst': '',
- 'StInst': '',
- 'SpecialInstNoSrc': exec_template_nodt_nosrc,
- 'SpecialInst1Src': exec_template_nodt_1src,
- 'SpecialInstNoSrcNoDest': '',
- 'Stub': exec_template_stub,
-}
-
-###############
-#
-# Define code templates for the decoder cases
-#
-###############
-
-# decode template for nodt-opcode case
-decode_nodt_template = '''
- case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
-
-decode_case_prolog_class_inst = '''
- case BRIG_OPCODE_$brig_opcode_upper:
- {
- //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
- BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
- //switch (baseOp->kind) {
- // case BRIG_OPERAND_REG:
- // type = ((const BrigOperandReg*)baseOp)->type;
- // break;
- // case BRIG_OPERAND_IMMED:
- // type = ((const BrigOperandImmed*)baseOp)->type;
- // break;
- // default:
- // fatal("CLASS unrecognized kind of operand %d\\n",
- // baseOp->kind);
- //}
- switch (type) {'''
-
-# common prolog for 1dt- or 2dt-opcode case: switch on data type
-decode_case_prolog = '''
- case BRIG_OPCODE_$brig_opcode_upper:
- {
- switch (ib->type) {'''
-
-# single-level decode case entry (for 1dt opcodes)
-decode_case_entry = \
-' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
-
-decode_store_prolog = \
-' case BRIG_TYPE_$type_name: {'
-
-decode_store_case_epilog = '''
- }'''
-
-decode_store_case_entry = \
-' return $constructor(ib, obj);'
-
-# common epilog for type switch
-decode_case_epilog = '''
- default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
- ib->type);
- }
- }
- break;'''
-
-# Additional templates for nested decode on a second type field (for
-# compare and convert). These are used in place of the
-# decode_case_entry template to create a second-level switch on on the
-# second type field inside each case of the first-level type switch.
-# Because the name and location of the second type can vary, the Brig
-# instruction type must be provided in $brig_type, and the name of the
-# second type field must be provided in $type_field.
-decode_case2_prolog = '''
- case BRIG_TYPE_$type_name:
- switch (((Brig$brig_type*)ib)->$type2_field) {'''
-
-decode_case2_entry = \
-' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
-
-decode_case2_epilog = '''
- default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
- ((Brig$brig_type*)ib)->$type2_field);
- }
- break;'''
-
-# Figure out how many source operands an expr needs by looking for the
-# highest-numbered srcN value referenced. Since sources are numbered
-# starting at 0, the return value is N+1.
-def num_src_operands(expr):
- if expr.find('src2') != -1:
- return 3
- elif expr.find('src1') != -1:
- return 2
- elif expr.find('src0') != -1:
- return 1
- else:
- return 0
-
-###############
-#
-# Define final code generation methods
-#
-# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
-# generating actual instructions.
-#
-###############
-
-# Generate class declaration, exec function, and decode switch case
-# for an brig_opcode with a single-level type switch. The 'types'
-# parameter is a list or tuple of types for which the instruction
-# should be instantiated.
-def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
- type2_info=None, constructor_prefix='new ', is_store=False):
- brig_opcode_upper = brig_opcode.upper()
- class_name = brig_opcode
- opcode = class_name.lower()
-
- if base_class == 'ArithInst':
- # note that expr must be provided with ArithInst so we can
- # derive num_srcs for the template
- assert expr
-
- if expr:
- # Derive several bits of info from expr. If expr is not used,
- # this info will be irrelevant.
- num_srcs = num_src_operands(expr)
- # if the RHS expression includes 'dest', then we're doing an RMW
- # on the reg and we need to treat it like a source
- dest_is_src = expr.find('dest') != -1
- dest_is_src_flag = str(dest_is_src).lower() # for C++
- if base_class in ['ShiftInst']:
- expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
- elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
- expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
- else:
- expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
- expr = re.sub(r'\bdest\b', r'dest_val', expr)
-
- # Strip template arguments off of base class before looking up
- # appropriate templates
- base_class_base = re.sub(r'<.*>$', '', base_class)
- header_code(header_templates[base_class_base])
-
- if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
- exec_code(exec_templates[base_class_base])
- elif base_class.startswith('ShiftInst'):
- header_code(exec_template_shift)
- else:
- header_code(exec_templates[base_class_base])
-
- if not types or isinstance(types, str):
- # Just a single type
- constructor = constructor_prefix + class_name
- decoder_code(decode_nodt_template)
- else:
- # multiple types, need at least one level of decode
- if brig_opcode == 'Class':
- decoder_code(decode_case_prolog_class_inst)
- else:
- decoder_code(decode_case_prolog)
- if not type2_info:
- if not is_store:
- # single list of types, to basic one-level decode
- for type_name in types:
- full_class_name = '%s<%s>' % (class_name, type_name.upper())
- constructor = constructor_prefix + full_class_name
- decoder_code(decode_case_entry)
- else:
- # single list of types, to basic one-level decode
- for type_name in types:
- decoder_code(decode_store_prolog)
- type_size = int(re.findall(r'[0-9]+', type_name)[0])
- src_size = 32
- type_type = type_name[0]
- full_class_name = '%s<%s,%s>' % (class_name, \
- type_name.upper(), \
- '%s%d' % \
- (type_type.upper(), \
- type_size))
- constructor = constructor_prefix + full_class_name
- decoder_code(decode_store_case_entry)
- decoder_code(decode_store_case_epilog)
- else:
- # need secondary type switch (convert, compare)
- # unpack extra info on second switch
- (type2_field, types2) = type2_info
- brig_type = 'Inst%s' % brig_opcode
- for type_name in types:
- decoder_code(decode_case2_prolog)
- fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
- for type2_name in types2:
- full_class_name = fmt % type2_name.upper()
- constructor = constructor_prefix + full_class_name
- decoder_code(decode_case2_entry)
-
- decoder_code(decode_case2_epilog)
-
- decoder_code(decode_case_epilog)
-
-###############
-#
-# Generate instructions
-#
-###############
-
-# handy abbreviations for common sets of types
-
-# arithmetic ops are typically defined only on 32- and 64-bit sizes
-arith_int_types = ('S32', 'U32', 'S64', 'U64')
-arith_float_types = ('F32', 'F64')
-arith_types = arith_int_types + arith_float_types
-
-bit_types = ('B1', 'B32', 'B64')
-
-all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
-
-# I think you might be able to do 'f16' memory ops too, but we'll
-# ignore them for now.
-mem_types = all_int_types + arith_float_types
-mem_atom_types = all_int_types + ('B32', 'B64')
-
-##### Arithmetic & logical operations
-gen('Add', arith_types, 'src0 + src1')
-gen('Sub', arith_types, 'src0 - src1')
-gen('Mul', arith_types, 'src0 * src1')
-gen('Div', arith_types, 'src0 / src1')
-gen('Min', arith_types, 'std::min(src0, src1)')
-gen('Max', arith_types, 'std::max(src0, src1)')
-gen('Gcnmin', arith_types, 'std::min(src0, src1)')
-
-gen('CopySign', arith_float_types,
- 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
-gen('Sqrt', arith_float_types, 'sqrt(src0)')
-gen('Floor', arith_float_types, 'floor(src0)')
-
-# "fast" sqrt... same as slow for us
-gen('Nsqrt', arith_float_types, 'sqrt(src0)')
-gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
-gen('Nrcp', arith_float_types, '1.0/src0')
-gen('Fract', arith_float_types,
- '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
-
-gen('Ncos', arith_float_types, 'cos(src0)');
-gen('Nsin', arith_float_types, 'sin(src0)');
-
-gen('And', bit_types, 'src0 & src1')
-gen('Or', bit_types, 'src0 | src1')
-gen('Xor', bit_types, 'src0 ^ src1')
-
-gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
-gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
- ('sourceType', ('B32', 'B64')))
-
-gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
-gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
-
-# gen('Mul_hi', types=('s32','u32', '??'))
-# gen('Mul24', types=('s32','u32', '??'))
-gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
-
-gen('Abs', arith_types, 'std::abs(src0)')
-gen('Neg', arith_types, '-src0')
-
-gen('Mov', bit_types + arith_types, 'src0')
-gen('Not', bit_types, 'heynot(src0)')
-
-# mad and fma differ only in rounding behavior, which we don't emulate
-# also there's an integer form of mad, but not of fma
-gen('Mad', arith_types, 'src0 * src1 + src2')
-gen('Fma', arith_float_types, 'src0 * src1 + src2')
-
-#native floating point operations
-gen('Nfma', arith_float_types, 'src0 * src1 + src2')
-
-gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
-gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
-gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
-
-# see base/bitfield.hh
-gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
- 'ExtractInsertInst')
-
-gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
- 'ExtractInsertInst')
-
-##### Compare
-gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
- 'CmpInst', ('sourceType', arith_types + bit_types))
-gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
-
-##### Conversion
-
-# Conversion operations are only defined on B1, not B32 or B64
-cvt_types = ('B1',) + mem_types
-
-gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
-
-
-##### Load & Store
-gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
-gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
-gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
- is_store=True)
-gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
-gen('AtomicNoRet', mem_atom_types, base_class='StInst',
- constructor_prefix='decode')
-
-gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
-gen('Br', base_class = 'LdInst', constructor_prefix='decode')
-
-##### Special operations
-def gen_special(brig_opcode, expr, dest_type='U32'):
- num_srcs = num_src_operands(expr)
- if num_srcs == 0:
- base_class = 'SpecialInstNoSrc<%s>' % dest_type
- elif num_srcs == 1:
- base_class = 'SpecialInst1Src<%s>' % dest_type
- else:
- assert false
-
- gen(brig_opcode, None, expr, base_class)
-
-gen_special('WorkItemId', 'w->workItemId[src0][lane]')
-gen_special('WorkItemAbsId',
- 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
-gen_special('WorkGroupId', 'w->workGroupId[src0]')
-gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
-gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
-gen_special('GridSize', 'w->gridSz[src0]')
-gen_special('GridGroups',
- 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
-gen_special('LaneId', 'lane')
-gen_special('WaveId', 'w->wfId')
-gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
-
-# gen_special('CU'', ')
-
-gen('Ret', base_class='SpecialInstNoSrcNoDest')
-gen('Barrier', base_class='SpecialInstNoSrcNoDest')
-gen('MemFence', base_class='SpecialInstNoSrcNoDest')
-
-# Map magic instructions to the BrigSyscall opcode
-# Magic instructions are defined in magic.hh
-#
-# In the future, real HSA kernel system calls can be implemented and coexist
-# with magic instructions.
-gen('Call', base_class='SpecialInstNoSrcNoDest')
-
-# Stubs for unimplemented instructions:
-# These may need to be implemented at some point in the future, but
-# for now we just match the instructions with their operands.
-#
-# By defining stubs for these instructions, we can work with
-# applications that have them in dead/unused code paths.
-#
-# Needed for rocm-hcc compilations for HSA backends since
-# builtins-hsail library is `cat`d onto the generated kernels.
-# The builtins-hsail library consists of handcoded hsail functions
-# that __might__ be needed by the rocm-hcc compiler in certain binaries.
-gen('Bitmask', base_class='Stub')
-gen('Bitrev', base_class='Stub')
-gen('Firstbit', base_class='Stub')
-gen('Lastbit', base_class='Stub')
-gen('Unpacklo', base_class='Stub')
-gen('Unpackhi', base_class='Stub')
-gen('Pack', base_class='Stub')
-gen('Unpack', base_class='Stub')
-gen('Lerp', base_class='Stub')
-gen('Packcvt', base_class='Stub')
-gen('Unpackcvt', base_class='Stub')
-gen('Sad', base_class='Stub')
-gen('Sadhi', base_class='Stub')
-gen('Activelanecount', base_class='Stub')
-gen('Activelaneid', base_class='Stub')
-gen('Activelanemask', base_class='Stub')
-gen('Activelanepermute', base_class='Stub')
-gen('Groupbaseptr', base_class='Stub')
-gen('Signalnoret', base_class='Stub')
-
-###############
-#
-# Generate file epilogs
-#
-###############
-header_code('''
-template<>
-inline void
-Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType dest_val;
- CType src_val;
-
- src_val = this->src[0].template get<CType>(w, lane);
-
- dest_val = (CType)(src_val);
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-
-template<>
-inline void
-Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
-{
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType dest_val;
- CType src_val;
-
- src_val = this->src[0].template get<CType>(w, lane);
-
- dest_val = (CType)(src_val);
-
- this->dest.set(w, lane, dest_val);
- }
- }
-}
-''')
-
-header_code.dedent()
-header_code('''
-} // namespace HsailISA
-''')
-
-# close off main decode switch
-decoder_code.dedent()
-decoder_code.dedent()
-decoder_code('''
- default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
- } // end switch(ib->opcode)
- } // end decode()
-} // namespace HsailISA
-''')
-
-exec_code.dedent()
-exec_code('''
-} // namespace HsailISA
-''')
-
-###############
-#
-# Output accumulated code to files
-#
-###############
-header_code.write(sys.argv[1])
-decoder_code.write(sys.argv[2])
-exec_code.write(sys.argv[3])
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_GPU_DECODER_HH__
-#define __ARCH_HSAIL_GPU_DECODER_HH__
-
-#include <vector>
-
-#include "arch/hsail/gpu_types.hh"
-
-class BrigObject;
-class GPUStaticInst;
-
-namespace Brig
-{
- class BrigInstBase;
-}
-
-namespace HsailISA
-{
- class Decoder
- {
- public:
- GPUStaticInst* decode(MachInst machInst);
-
- GPUStaticInst*
- decode(RawMachInst inst)
- {
- return inst < decodedInsts.size() ? decodedInsts.at(inst) : nullptr;
- }
-
- RawMachInst
- saveInst(GPUStaticInst *decodedInst)
- {
- decodedInsts.push_back(decodedInst);
-
- return decodedInsts.size() - 1;
- }
-
- private:
- static std::vector<GPUStaticInst*> decodedInsts;
- };
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_GPU_DECODER_HH__
+++ /dev/null
-/*
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __ARCH_HSAIL_GPU_ISA_HH__
-#define __ARCH_HSAIL_GPU_ISA_HH__
-
-#include <cstdint>
-
-#include "arch/hsail/gpu_types.hh"
-#include "base/logging.hh"
-#include "base/types.hh"
-#include "gpu-compute/misc.hh"
-
-namespace HsailISA
-{
- class GPUISA
- {
- public:
- GPUISA()
- {
- }
-
- void
- writeMiscReg(int opIdx, RegVal operandVal)
- {
- fatal("HSAIL does not implement misc registers yet\n");
- }
-
- RegVal
- readMiscReg(int opIdx) const
- {
- fatal("HSAIL does not implement misc registers yet\n");
- }
-
- bool hasScalarUnit() const { return false; }
-
- uint32_t
- advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
- {
- return old_pc + sizeof(RawMachInst);
- }
- };
-}
-
-#endif // __ARCH_HSAIL_GPU_ISA_HH__
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_GPU_TYPES_HH__
-#define __ARCH_HSAIL_GPU_TYPES_HH__
-
-#include <cstdint>
-
-namespace Brig
-{
- class BrigInstBase;
-}
-
-class BrigObject;
-
-namespace HsailISA
-{
- // A raw machine instruction represents the raw bits that
- // our model uses to represent an actual instruction. In
- // the case of HSAIL this is just an index into a list of
- // instruction objects.
- typedef uint32_t RawMachInst;
-
- // The MachInst is a representation of an instruction
- // that has more information than just the machine code.
- // For HSAIL the actual machine code is a BrigInstBase
- // and the BrigObject contains more pertinent
- // information related to operaands, etc.
-
- struct MachInst
- {
- const Brig::BrigInstBase *brigInstBase;
- const BrigObject *brigObj;
- };
-}
-
-#endif // __ARCH_HSAIL_GPU_TYPES_HH__
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "arch/hsail/insts/branch.hh"
-
-#include "gpu-compute/hsail_code.hh"
-
-namespace HsailISA
-{
- GPUStaticInst*
- decodeBrn(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- // Detect direct vs indirect branch by seeing whether we have a
- // register operand.
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
- if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- return new BrnIndirectInst(ib, obj);
- } else {
- return new BrnDirectInst(ib, obj);
- }
- }
-
- GPUStaticInst*
- decodeCbr(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- // Detect direct vs indirect branch by seeing whether we have a
- // second register operand (after the condition).
- unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
- const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
- if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- return new CbrIndirectInst(ib, obj);
- } else {
- return new CbrDirectInst(ib, obj);
- }
- }
-
- GPUStaticInst*
- decodeBr(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- // Detect direct vs indirect branch by seeing whether we have a
- // second register operand (after the condition).
- unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
- const Brig::BrigOperand *reg = obj->getOperand(op_offs);
-
- if (reg->kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- return new BrIndirectInst(ib, obj);
- } else {
- return new BrDirectInst(ib, obj);
- }
- }
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_BRANCH_HH__
-#define __ARCH_HSAIL_INSTS_BRANCH_HH__
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "gpu-compute/gpu_dyn_inst.hh"
-#include "gpu-compute/wavefront.hh"
-
-namespace HsailISA
-{
-
- // The main difference between a direct branch and an indirect branch
- // is whether the target is a register or a label, so we can share a
- // lot of code if we template the base implementation on that type.
- template<typename TargetType>
- class BrnInstBase : public HsailGPUStaticInst
- {
- public:
- void generateDisassembly() override;
-
- Brig::BrigWidth8_t width;
- TargetType target;
-
- BrnInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : HsailGPUStaticInst(obj, "brn")
- {
- setFlag(Branch);
- setFlag(UnconditionalJump);
- width = ((Brig::BrigInstBr*)ib)->width;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- target.init(op_offs, obj);
- }
-
- uint32_t getTargetPc() override { return target.getTarget(0, 0); }
-
- bool isVectorRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isScalarRegister();
- }
-
- bool isSrcOperand(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return true;
- }
-
- bool isDstOperand(int operandIndex) override {
- return false;
- }
-
- int getOperandSize(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.opSize();
- }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.regIndex();
- }
-
- int getNumOperands() override {
- return 1;
- }
-
- void execute(GPUDynInstPtr gpuDynInst) override;
- };
-
- template<typename TargetType>
- void
- BrnInstBase<TargetType>::generateDisassembly()
- {
- std::string widthClause;
-
- if (width != 1) {
- widthClause = csprintf("_width(%d)", width);
- }
-
- disassembly = csprintf("%s%s %s", opcode, widthClause,
- target.disassemble());
- }
-
- template<typename TargetType>
- void
- BrnInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- if (getTargetPc() == w->rpc()) {
- w->popFromReconvergenceStack();
- } else {
- // Rpc and execution mask remain the same
- w->pc(getTargetPc());
- }
- }
-
- class BrnDirectInst : public BrnInstBase<LabelOperand>
- {
- public:
- BrnDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : BrnInstBase<LabelOperand>(ib, obj)
- {
- }
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return 0; }
- };
-
- class BrnIndirectInst : public BrnInstBase<SRegOperand>
- {
- public:
- BrnIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : BrnInstBase<SRegOperand>(ib, obj)
- {
- }
- int numSrcRegOperands() { return target.isVectorRegister(); }
- int numDstRegOperands() { return 0; }
- };
-
- GPUStaticInst* decodeBrn(const Brig::BrigInstBase *ib,
- const BrigObject *obj);
-
- template<typename TargetType>
- class CbrInstBase : public HsailGPUStaticInst
- {
- public:
- void generateDisassembly() override;
-
- Brig::BrigWidth8_t width;
- CRegOperand cond;
- TargetType target;
-
- CbrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : HsailGPUStaticInst(obj, "cbr")
- {
- setFlag(Branch);
- width = ((Brig::BrigInstBr *)ib)->width;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- cond.init(op_offs, obj);
- op_offs = obj->getOperandPtr(ib->operands, 1);
- target.init(op_offs, obj);
- }
-
- uint32_t getTargetPc() override { return target.getTarget(0, 0); }
-
- void execute(GPUDynInstPtr gpuDynInst) override;
- // Assumption: Target is operand 0, Condition Register is operand 1
- bool isVectorRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- if (!operandIndex)
- return target.isVectorRegister();
- else
- return false;
- }
- bool isCondRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- if (!operandIndex)
- return target.isCondRegister();
- else
- return true;
- }
- bool isScalarRegister(int operandIndex) override {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return target.isScalarRegister();
- else
- return false;
- }
- bool isSrcOperand(int operandIndex) override {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == 0)
- return true;
- return false;
- }
- // both Condition Register and Target are source operands
- bool isDstOperand(int operandIndex) override {
- return false;
- }
- int getOperandSize(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- if (!operandIndex)
- return target.opSize();
- else
- return 1;
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- if (!operandIndex)
- return target.regIndex();
- else
- return -1;
- }
-
- // Operands = Target, Condition Register
- int getNumOperands() override {
- return 2;
- }
- };
-
- template<typename TargetType>
- void
- CbrInstBase<TargetType>::generateDisassembly()
- {
- std::string widthClause;
-
- if (width != 1) {
- widthClause = csprintf("_width(%d)", width);
- }
-
- disassembly = csprintf("%s%s %s,%s", opcode, widthClause,
- cond.disassemble(), target.disassemble());
- }
-
- template<typename TargetType>
- void
- CbrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- const uint32_t curr_pc M5_VAR_USED = w->pc();
- const uint32_t curr_rpc = w->rpc();
- const VectorMask curr_mask = w->execMask();
-
- /**
- * TODO: can we move this pop outside the instruction, and
- * into the wavefront?
- */
- w->popFromReconvergenceStack();
-
- // immediate post-dominator instruction
- const uint32_t rpc = static_cast<uint32_t>(ipdInstNum());
- if (curr_rpc != rpc) {
- w->pushToReconvergenceStack(rpc, curr_rpc, curr_mask);
- }
-
- // taken branch
- const uint32_t true_pc = getTargetPc();
- VectorMask true_mask;
- for (unsigned int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- true_mask[lane] = cond.get<bool>(w, lane) & curr_mask[lane];
- }
-
- // not taken branch
- const uint32_t false_pc = nextInstAddr();
- assert(true_pc != false_pc);
- if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
- VectorMask false_mask = curr_mask & ~true_mask;
- w->pushToReconvergenceStack(false_pc, rpc, false_mask);
- }
-
- if (true_pc != rpc && true_mask.count()) {
- w->pushToReconvergenceStack(true_pc, rpc, true_mask);
- }
- assert(w->pc() != curr_pc);
- }
-
-
- class CbrDirectInst : public CbrInstBase<LabelOperand>
- {
- public:
- CbrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : CbrInstBase<LabelOperand>(ib, obj)
- {
- }
- // the source operand of a conditional branch is a Condition
- // Register which is not stored in the VRF
- // so we do not count it as a source-register operand
- // even though, formally, it is one.
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return 0; }
- };
-
- class CbrIndirectInst : public CbrInstBase<SRegOperand>
- {
- public:
- CbrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : CbrInstBase<SRegOperand>(ib, obj)
- {
- }
- // one source operand of the conditional indirect branch is a Condition
- // register which is not stored in the VRF so we do not count it
- // as a source-register operand even though, formally, it is one.
- int numSrcRegOperands() { return target.isVectorRegister(); }
- int numDstRegOperands() { return 0; }
- };
-
- GPUStaticInst* decodeCbr(const Brig::BrigInstBase *ib,
- const BrigObject *obj);
-
- template<typename TargetType>
- class BrInstBase : public HsailGPUStaticInst
- {
- public:
- void generateDisassembly() override;
-
- ImmOperand<uint32_t> width;
- TargetType target;
-
- BrInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : HsailGPUStaticInst(obj, "br")
- {
- setFlag(Branch);
- setFlag(UnconditionalJump);
- width.init(((Brig::BrigInstBr *)ib)->width, obj);
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- target.init(op_offs, obj);
- }
-
- uint32_t getTargetPc() override { return target.getTarget(0, 0); }
-
- void execute(GPUDynInstPtr gpuDynInst) override;
- bool isVectorRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) override {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return true;
- }
- bool isDstOperand(int operandIndex) override { return false; }
- int getOperandSize(int operandIndex) override {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.opSize();
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return target.regIndex();
- }
- int getNumOperands() override { return 1; }
- };
-
- template<typename TargetType>
- void
- BrInstBase<TargetType>::generateDisassembly()
- {
- std::string widthClause;
-
- if (width.bits != 1) {
- widthClause = csprintf("_width(%d)", width.bits);
- }
-
- disassembly = csprintf("%s%s %s", opcode, widthClause,
- target.disassemble());
- }
-
- template<typename TargetType>
- void
- BrInstBase<TargetType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- if (getTargetPc() == w->rpc()) {
- w->popFromReconvergenceStack();
- } else {
- // Rpc and execution mask remain the same
- w->pc(getTargetPc());
- }
- }
-
- class BrDirectInst : public BrInstBase<LabelOperand>
- {
- public:
- BrDirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : BrInstBase<LabelOperand>(ib, obj)
- {
- }
-
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return 0; }
- };
-
- class BrIndirectInst : public BrInstBase<SRegOperand>
- {
- public:
- BrIndirectInst(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : BrInstBase<SRegOperand>(ib, obj)
- {
- }
- int numSrcRegOperands() { return target.isVectorRegister(); }
- int numDstRegOperands() { return 0; }
- };
-
- GPUStaticInst* decodeBr(const Brig::BrigInstBase *ib,
- const BrigObject *obj);
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_BRANCH_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_DECL_HH__
-#define __ARCH_HSAIL_INSTS_DECL_HH__
-
-#include <cmath>
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "debug/HSAIL.hh"
-#include "gpu-compute/gpu_dyn_inst.hh"
-#include "gpu-compute/shader.hh"
-
-namespace HsailISA
-{
- template<typename _DestOperand, typename _SrcOperand>
- class HsailOperandType
- {
- public:
- typedef _DestOperand DestOperand;
- typedef _SrcOperand SrcOperand;
- };
-
- typedef HsailOperandType<CRegOperand, CRegOrImmOperand> CRegOperandType;
- typedef HsailOperandType<SRegOperand, SRegOrImmOperand> SRegOperandType;
- typedef HsailOperandType<DRegOperand, DRegOrImmOperand> DRegOperandType;
-
- // The IsBits parameter serves only to disambiguate tbhe B* types from
- // the U* types, which otherwise would be identical (and
- // indistinguishable).
- template<typename _OperandType, typename _CType, Enums::MemType _memType,
- vgpr_type _vgprType, int IsBits=0>
- class HsailDataType
- {
- public:
- typedef _OperandType OperandType;
- typedef _CType CType;
- static const Enums::MemType memType = _memType;
- static const vgpr_type vgprType = _vgprType;
- static const char *label;
- };
-
- typedef HsailDataType<CRegOperandType, bool, Enums::M_U8, VT_32, 1> B1;
- typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32, 1> B8;
-
- typedef HsailDataType<SRegOperandType, uint16_t,
- Enums::M_U16, VT_32, 1> B16;
-
- typedef HsailDataType<SRegOperandType, uint32_t,
- Enums::M_U32, VT_32, 1> B32;
-
- typedef HsailDataType<DRegOperandType, uint64_t,
- Enums::M_U64, VT_64, 1> B64;
-
- typedef HsailDataType<SRegOperandType, int8_t, Enums::M_S8, VT_32> S8;
- typedef HsailDataType<SRegOperandType, int16_t, Enums::M_S16, VT_32> S16;
- typedef HsailDataType<SRegOperandType, int32_t, Enums::M_S32, VT_32> S32;
- typedef HsailDataType<DRegOperandType, int64_t, Enums::M_S64, VT_64> S64;
-
- typedef HsailDataType<SRegOperandType, uint8_t, Enums::M_U8, VT_32> U8;
- typedef HsailDataType<SRegOperandType, uint16_t, Enums::M_U16, VT_32> U16;
- typedef HsailDataType<SRegOperandType, uint32_t, Enums::M_U32, VT_32> U32;
- typedef HsailDataType<DRegOperandType, uint64_t, Enums::M_U64, VT_64> U64;
-
- typedef HsailDataType<SRegOperandType, float, Enums::M_F32, VT_32> F32;
- typedef HsailDataType<DRegOperandType, double, Enums::M_F64, VT_64> F64;
-
- template<typename DestOperandType, typename SrcOperandType,
- int NumSrcOperands>
- class CommonInstBase : public HsailGPUStaticInst
- {
- protected:
- typename DestOperandType::DestOperand dest;
- typename SrcOperandType::SrcOperand src[NumSrcOperands];
-
- void
- generateDisassembly()
- {
- disassembly = csprintf("%s%s %s", opcode, opcode_suffix(),
- dest.disassemble());
-
- for (int i = 0; i < NumSrcOperands; ++i) {
- disassembly += ",";
- disassembly += src[i].disassemble();
- }
- }
-
- virtual std::string opcode_suffix() = 0;
-
- public:
- CommonInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : HsailGPUStaticInst(obj, opcode)
- {
- setFlag(ALU);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
-
- dest.init(op_offs, obj);
-
- for (int i = 0; i < NumSrcOperands; ++i) {
- op_offs = obj->getOperandPtr(ib->operands, i + 1);
- src[i].init(op_offs, obj);
- }
- }
-
- bool isVectorRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isVectorRegister();
- else
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isCondRegister();
- else
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isScalarRegister();
- else
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return true;
- return false;
- }
-
- bool isDstOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex >= NumSrcOperands)
- return true;
- return false;
- }
- int getOperandSize(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].opSize();
- else
- return dest.opSize();
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
-
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].regIndex();
- else
- return dest.regIndex();
- }
- int numSrcRegOperands() {
- int operands = 0;
- for (int i = 0; i < NumSrcOperands; i++) {
- if (src[i].isVectorRegister()) {
- operands++;
- }
- }
- return operands;
- }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands() { return NumSrcOperands + 1; }
- };
-
- template<typename DataType, int NumSrcOperands>
- class ArithInst : public CommonInstBase<typename DataType::OperandType,
- typename DataType::OperandType,
- NumSrcOperands>
- {
- public:
- std::string opcode_suffix() { return csprintf("_%s", DataType::label); }
-
- ArithInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : CommonInstBase<typename DataType::OperandType,
- typename DataType::OperandType,
- NumSrcOperands>(ib, obj, opcode)
- {
- }
- };
-
- template<typename DestOperandType, typename Src0OperandType,
- typename Src1OperandType, typename Src2OperandType>
- class ThreeNonUniformSourceInstBase : public HsailGPUStaticInst
- {
- protected:
- typename DestOperandType::DestOperand dest;
- typename Src0OperandType::SrcOperand src0;
- typename Src1OperandType::SrcOperand src1;
- typename Src2OperandType::SrcOperand src2;
-
- void
- generateDisassembly()
- {
- disassembly = csprintf("%s %s,%s,%s,%s", opcode, dest.disassemble(),
- src0.disassemble(), src1.disassemble(),
- src2.disassemble());
- }
-
- public:
- ThreeNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
- const BrigObject *obj,
- const char *opcode)
- : HsailGPUStaticInst(obj, opcode)
- {
- setFlag(ALU);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src0.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 2);
- src1.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 3);
- src2.init(op_offs, obj);
- }
-
- bool isVectorRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isVectorRegister();
- else if (operandIndex == 1)
- return src1.isVectorRegister();
- else if (operandIndex == 2)
- return src2.isVectorRegister();
- else
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isCondRegister();
- else if (operandIndex == 1)
- return src1.isCondRegister();
- else if (operandIndex == 2)
- return src2.isCondRegister();
- else
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isScalarRegister();
- else if (operandIndex == 1)
- return src1.isScalarRegister();
- else if (operandIndex == 2)
- return src2.isScalarRegister();
- else
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < 3)
- return true;
- else
- return false;
- }
- bool isDstOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex >= 3)
- return true;
- else
- return false;
- }
- int getOperandSize(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.opSize();
- else if (operandIndex == 1)
- return src1.opSize();
- else if (operandIndex == 2)
- return src2.opSize();
- else
- return dest.opSize();
- }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.regIndex();
- else if (operandIndex == 1)
- return src1.regIndex();
- else if (operandIndex == 2)
- return src2.regIndex();
- else
- return dest.regIndex();
- }
-
- int numSrcRegOperands() {
- int operands = 0;
- if (src0.isVectorRegister()) {
- operands++;
- }
- if (src1.isVectorRegister()) {
- operands++;
- }
- if (src2.isVectorRegister()) {
- operands++;
- }
- return operands;
- }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands() { return 4; }
- };
-
- template<typename DestDataType, typename Src0DataType,
- typename Src1DataType, typename Src2DataType>
- class ThreeNonUniformSourceInst :
- public ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
- typename Src0DataType::OperandType,
- typename Src1DataType::OperandType,
- typename Src2DataType::OperandType>
- {
- public:
- typedef typename DestDataType::CType DestCType;
- typedef typename Src0DataType::CType Src0CType;
- typedef typename Src1DataType::CType Src1CType;
- typedef typename Src2DataType::CType Src2CType;
-
- ThreeNonUniformSourceInst(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *opcode)
- : ThreeNonUniformSourceInstBase<typename DestDataType::OperandType,
- typename Src0DataType::OperandType,
- typename Src1DataType::OperandType,
- typename Src2DataType::OperandType>(ib,
- obj, opcode)
- {
- }
- };
-
- template<typename DataType>
- class CmovInst : public ThreeNonUniformSourceInst<DataType, B1,
- DataType, DataType>
- {
- public:
- CmovInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : ThreeNonUniformSourceInst<DataType, B1, DataType,
- DataType>(ib, obj, opcode)
- {
- }
- };
-
- template<typename DataType>
- class ExtractInsertInst : public ThreeNonUniformSourceInst<DataType,
- DataType, U32,
- U32>
- {
- public:
- ExtractInsertInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : ThreeNonUniformSourceInst<DataType, DataType, U32,
- U32>(ib, obj, opcode)
- {
- }
- };
-
- template<typename DestOperandType, typename Src0OperandType,
- typename Src1OperandType>
- class TwoNonUniformSourceInstBase : public HsailGPUStaticInst
- {
- protected:
- typename DestOperandType::DestOperand dest;
- typename Src0OperandType::SrcOperand src0;
- typename Src1OperandType::SrcOperand src1;
-
- void
- generateDisassembly()
- {
- disassembly = csprintf("%s %s,%s,%s", opcode, dest.disassemble(),
- src0.disassemble(), src1.disassemble());
- }
-
-
- public:
- TwoNonUniformSourceInstBase(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *opcode)
- : HsailGPUStaticInst(obj, opcode)
- {
- setFlag(ALU);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src0.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 2);
- src1.init(op_offs, obj);
- }
- bool isVectorRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isVectorRegister();
- else if (operandIndex == 1)
- return src1.isVectorRegister();
- else
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isCondRegister();
- else if (operandIndex == 1)
- return src1.isCondRegister();
- else
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.isScalarRegister();
- else if (operandIndex == 1)
- return src1.isScalarRegister();
- else
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < 2)
- return true;
- else
- return false;
- }
- bool isDstOperand(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex >= 2)
- return true;
- else
- return false;
- }
- int getOperandSize(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.opSize();
- else if (operandIndex == 1)
- return src1.opSize();
- else
- return dest.opSize();
- }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (!operandIndex)
- return src0.regIndex();
- else if (operandIndex == 1)
- return src1.regIndex();
- else
- return dest.regIndex();
- }
-
- int numSrcRegOperands() {
- int operands = 0;
- if (src0.isVectorRegister()) {
- operands++;
- }
- if (src1.isVectorRegister()) {
- operands++;
- }
- return operands;
- }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands() { return 3; }
- };
-
- template<typename DestDataType, typename Src0DataType,
- typename Src1DataType>
- class TwoNonUniformSourceInst :
- public TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
- typename Src0DataType::OperandType,
- typename Src1DataType::OperandType>
- {
- public:
- typedef typename DestDataType::CType DestCType;
- typedef typename Src0DataType::CType Src0CType;
- typedef typename Src1DataType::CType Src1CType;
-
- TwoNonUniformSourceInst(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *opcode)
- : TwoNonUniformSourceInstBase<typename DestDataType::OperandType,
- typename Src0DataType::OperandType,
- typename Src1DataType::OperandType>(ib,
- obj, opcode)
- {
- }
- };
-
- // helper function for ClassInst
- template<typename T>
- bool
- fpclassify(T src0, uint32_t src1)
- {
- int fpclass = std::fpclassify(src0);
-
- if ((src1 & 0x3) && (fpclass == FP_NAN)) {
- return true;
- }
-
- if (src0 <= -0.0) {
- if ((src1 & 0x4) && fpclass == FP_INFINITE)
- return true;
- if ((src1 & 0x8) && fpclass == FP_NORMAL)
- return true;
- if ((src1 & 0x10) && fpclass == FP_SUBNORMAL)
- return true;
- if ((src1 & 0x20) && fpclass == FP_ZERO)
- return true;
- } else {
- if ((src1 & 0x40) && fpclass == FP_ZERO)
- return true;
- if ((src1 & 0x80) && fpclass == FP_SUBNORMAL)
- return true;
- if ((src1 & 0x100) && fpclass == FP_NORMAL)
- return true;
- if ((src1 & 0x200) && fpclass == FP_INFINITE)
- return true;
- }
- return false;
- }
-
- template<typename DataType>
- class ClassInst : public TwoNonUniformSourceInst<B1, DataType, U32>
- {
- public:
- ClassInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : TwoNonUniformSourceInst<B1, DataType, U32>(ib, obj, opcode)
- {
- }
- };
-
- template<typename DataType>
- class ShiftInst : public TwoNonUniformSourceInst<DataType, DataType, U32>
- {
- public:
- ShiftInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *opcode)
- : TwoNonUniformSourceInst<DataType, DataType, U32>(ib, obj, opcode)
- {
- }
- };
-
- // helper function for CmpInst
- template<typename T>
- bool
- compare(T src0, T src1, Brig::BrigCompareOperation cmpOp)
- {
- using namespace Brig;
-
- switch (cmpOp) {
- case BRIG_COMPARE_EQ:
- case BRIG_COMPARE_EQU:
- case BRIG_COMPARE_SEQ:
- case BRIG_COMPARE_SEQU:
- return (src0 == src1);
-
- case BRIG_COMPARE_NE:
- case BRIG_COMPARE_NEU:
- case BRIG_COMPARE_SNE:
- case BRIG_COMPARE_SNEU:
- return (src0 != src1);
-
- case BRIG_COMPARE_LT:
- case BRIG_COMPARE_LTU:
- case BRIG_COMPARE_SLT:
- case BRIG_COMPARE_SLTU:
- return (src0 < src1);
-
- case BRIG_COMPARE_LE:
- case BRIG_COMPARE_LEU:
- case BRIG_COMPARE_SLE:
- case BRIG_COMPARE_SLEU:
- return (src0 <= src1);
-
- case BRIG_COMPARE_GT:
- case BRIG_COMPARE_GTU:
- case BRIG_COMPARE_SGT:
- case BRIG_COMPARE_SGTU:
- return (src0 > src1);
-
- case BRIG_COMPARE_GE:
- case BRIG_COMPARE_GEU:
- case BRIG_COMPARE_SGE:
- case BRIG_COMPARE_SGEU:
- return (src0 >= src1);
-
- case BRIG_COMPARE_NUM:
- case BRIG_COMPARE_SNUM:
- return (src0 == src0) || (src1 == src1);
-
- case BRIG_COMPARE_NAN:
- case BRIG_COMPARE_SNAN:
- return (src0 != src0) || (src1 != src1);
-
- default:
- fatal("Bad cmpOp value %d\n", (int)cmpOp);
- }
- }
-
- template<typename T>
- int32_t
- firstbit(T src0)
- {
- if (!src0)
- return -1;
-
- //handle positive and negative numbers
- T tmp = ((int64_t)src0 < 0) ? (~src0) : (src0);
-
- //the starting pos is MSB
- int pos = 8 * sizeof(T) - 1;
- int cnt = 0;
-
- //search the first bit set to 1
- while (!(tmp & (1 << pos))) {
- ++cnt;
- --pos;
- }
- return cnt;
- }
-
- const char* cmpOpToString(Brig::BrigCompareOperation cmpOp);
-
- template<typename DestOperandType, typename SrcOperandType>
- class CmpInstBase : public CommonInstBase<DestOperandType, SrcOperandType,
- 2>
- {
- protected:
- Brig::BrigCompareOperation cmpOp;
-
- public:
- CmpInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : CommonInstBase<DestOperandType, SrcOperandType, 2>(ib, obj,
- _opcode)
- {
- assert(ib->base.kind == Brig::BRIG_KIND_INST_CMP);
- Brig::BrigInstCmp *i = (Brig::BrigInstCmp*)ib;
- cmpOp = (Brig::BrigCompareOperation)i->compare;
- }
- };
-
- template<typename DestDataType, typename SrcDataType>
- class CmpInst : public CmpInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType>
- {
- public:
- std::string
- opcode_suffix()
- {
- return csprintf("_%s_%s_%s", cmpOpToString(this->cmpOp),
- DestDataType::label, SrcDataType::label);
- }
-
- CmpInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : CmpInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType>(ib, obj, _opcode)
- {
- }
- };
-
- template<typename DestDataType, typename SrcDataType>
- class CvtInst : public CommonInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType, 1>
- {
- public:
- std::string opcode_suffix()
- {
- return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
- }
-
- CvtInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : CommonInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType,
- 1>(ib, obj, _opcode)
- {
- }
- };
-
- template<typename DestDataType, typename SrcDataType>
- class PopcountInst :
- public CommonInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType, 1>
- {
- public:
- std::string opcode_suffix()
- {
- return csprintf("_%s_%s", DestDataType::label, SrcDataType::label);
- }
-
- PopcountInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : CommonInstBase<typename DestDataType::OperandType,
- typename SrcDataType::OperandType,
- 1>(ib, obj, _opcode)
- {
- }
- };
-
- class Stub : public HsailGPUStaticInst
- {
- public:
- Stub(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- }
-
- void generateDisassembly() override
- {
- disassembly = csprintf("%s", opcode);
- }
-
- bool isVectorRegister(int operandIndex) override { return false; }
- bool isCondRegister(int operandIndex) override { return false; }
- bool isScalarRegister(int operandIndex) override { return false; }
- bool isSrcOperand(int operandIndex) override { return false; }
- bool isDstOperand(int operandIndex) override { return false; }
- int getOperandSize(int operandIndex) override { return 0; }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- return -1;
- }
-
- int numSrcRegOperands() override { return 0; }
- int numDstRegOperands() override { return 0; }
- int getNumOperands() override { return 0; }
- };
-
- class SpecialInstNoSrcNoDest : public HsailGPUStaticInst
- {
- public:
- SpecialInstNoSrcNoDest(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- }
-
- bool isVectorRegister(int operandIndex) override { return false; }
- bool isCondRegister(int operandIndex) override { return false; }
- bool isScalarRegister(int operandIndex) override { return false; }
- bool isSrcOperand(int operandIndex) override { return false; }
- bool isDstOperand(int operandIndex) override { return false; }
- int getOperandSize(int operandIndex) override { return 0; }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- return -1;
- }
-
- int numSrcRegOperands() override { return 0; }
- int numDstRegOperands() override { return 0; }
- int getNumOperands() override { return 0; }
- };
-
- template<typename DestOperandType>
- class SpecialInstNoSrcBase : public HsailGPUStaticInst
- {
- protected:
- typename DestOperandType::DestOperand dest;
-
- void generateDisassembly()
- {
- disassembly = csprintf("%s %s", opcode, dest.disassemble());
- }
-
- public:
- SpecialInstNoSrcBase(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
- }
-
- bool isVectorRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) { return false; }
- bool isDstOperand(int operandIndex) { return true; }
- int getOperandSize(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.opSize();
- }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.regIndex();
- }
-
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands() { return 1; }
- };
-
- template<typename DestDataType>
- class SpecialInstNoSrc :
- public SpecialInstNoSrcBase<typename DestDataType::OperandType>
- {
- public:
- typedef typename DestDataType::CType DestCType;
-
- SpecialInstNoSrc(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : SpecialInstNoSrcBase<typename DestDataType::OperandType>(ib, obj,
- _opcode)
- {
- }
- };
-
- template<typename DestOperandType>
- class SpecialInst1SrcBase : public HsailGPUStaticInst
- {
- protected:
- typedef int SrcCType; // used in execute() template
-
- typename DestOperandType::DestOperand dest;
- ImmOperand<SrcCType> src0;
-
- void
- generateDisassembly()
- {
- disassembly = csprintf("%s %s,%s", opcode, dest.disassemble(),
- src0.disassemble());
- }
-
- public:
- SpecialInst1SrcBase(const Brig::BrigInstBase *ib,
- const BrigObject *obj, const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- setFlag(ALU);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src0.init(op_offs, obj);
- }
- bool isVectorRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) { return false; }
- bool isDstOperand(int operandIndex) { return true; }
- int getOperandSize(int operandIndex) {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.opSize();
- }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return dest.regIndex();
- }
-
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands() { return 1; }
- };
-
- template<typename DestDataType>
- class SpecialInst1Src :
- public SpecialInst1SrcBase<typename DestDataType::OperandType>
- {
- public:
- typedef typename DestDataType::CType DestCType;
-
- SpecialInst1Src(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : SpecialInst1SrcBase<typename DestDataType::OperandType>(ib, obj,
- _opcode)
- {
- }
- };
-
- class Ret : public SpecialInstNoSrcNoDest
- {
- public:
- typedef SpecialInstNoSrcNoDest Base;
-
- Ret(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "ret")
- {
- setFlag(GPUStaticInst::Return);
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
- };
-
- class Barrier : public SpecialInstNoSrcNoDest
- {
- public:
- typedef SpecialInstNoSrcNoDest Base;
- uint8_t width;
-
- Barrier(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "barrier")
- {
- setFlag(GPUStaticInst::MemBarrier);
- assert(ib->base.kind == Brig::BRIG_KIND_INST_BR);
- width = (uint8_t)((Brig::BrigInstBr*)ib)->width;
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
- };
-
- class MemFence : public SpecialInstNoSrcNoDest
- {
- public:
- typedef SpecialInstNoSrcNoDest Base;
-
- Brig::BrigMemoryOrder memFenceMemOrder;
- Brig::BrigMemoryScope memFenceScopeSegGroup;
- Brig::BrigMemoryScope memFenceScopeSegGlobal;
- Brig::BrigMemoryScope memFenceScopeSegImage;
-
- MemFence(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : Base(ib, obj, "memfence")
- {
- assert(ib->base.kind == Brig::BRIG_KIND_INST_MEM_FENCE);
-
- memFenceScopeSegGlobal = (Brig::BrigMemoryScope)
- ((Brig::BrigInstMemFence*)ib)->globalSegmentMemoryScope;
-
- memFenceScopeSegGroup = (Brig::BrigMemoryScope)
- ((Brig::BrigInstMemFence*)ib)->groupSegmentMemoryScope;
-
- memFenceScopeSegImage = (Brig::BrigMemoryScope)
- ((Brig::BrigInstMemFence*)ib)->imageSegmentMemoryScope;
-
- memFenceMemOrder = (Brig::BrigMemoryOrder)
- ((Brig::BrigInstMemFence*)ib)->memoryOrder;
-
- setFlag(MemoryRef);
- setFlag(GPUStaticInst::MemFence);
-
- switch (memFenceMemOrder) {
- case Brig::BRIG_MEMORY_ORDER_NONE:
- setFlag(NoOrder);
- break;
- case Brig::BRIG_MEMORY_ORDER_RELAXED:
- setFlag(RelaxedOrder);
- break;
- case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE:
- setFlag(Acquire);
- break;
- case Brig::BRIG_MEMORY_ORDER_SC_RELEASE:
- setFlag(Release);
- break;
- case Brig::BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- setFlag(AcquireRelease);
- break;
- default:
- fatal("MemInst has bad BrigMemoryOrder\n");
- }
-
- // set inst flags based on scopes
- if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE &&
- memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
- setFlag(GPUStaticInst::GlobalSegment);
-
- /**
- * A memory fence that has scope for
- * both segments will use the global
- * segment, and be executed in the
- * global memory pipeline, therefore,
- * we set the segment to match the
- * global scope only
- */
- switch (memFenceScopeSegGlobal) {
- case Brig::BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("MemFence has bad global scope type\n");
- }
- } else if (memFenceScopeSegGlobal != Brig::BRIG_MEMORY_SCOPE_NONE) {
- setFlag(GPUStaticInst::GlobalSegment);
-
- switch (memFenceScopeSegGlobal) {
- case Brig::BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("MemFence has bad global scope type\n");
- }
- } else if (memFenceScopeSegGroup != Brig::BRIG_MEMORY_SCOPE_NONE) {
- setFlag(GPUStaticInst::GroupSegment);
-
- switch (memFenceScopeSegGroup) {
- case Brig::BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case Brig::BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("MemFence has bad group scope type\n");
- }
- } else {
- fatal("MemFence constructor: bad scope specifiers\n");
- }
- }
-
- void
- initiateAcc(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *wave = gpuDynInst->wavefront();
- wave->computeUnit->injectGlobalMemFence(gpuDynInst);
- }
-
- void
- execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
- // 2 cases:
- // * memfence to a sequentially consistent memory (e.g., LDS).
- // These can be handled as no-ops.
- // * memfence to a relaxed consistency cache (e.g., Hermes, Viper,
- // etc.). We send a packet, tagged with the memory order and
- // scope, and let the GPU coalescer handle it.
-
- if (isGlobalSeg()) {
- gpuDynInst->simdId = w->simdId;
- gpuDynInst->wfSlotId = w->wfSlotId;
- gpuDynInst->wfDynId = w->wfDynId;
- gpuDynInst->kern_id = w->kernId;
- gpuDynInst->cu_id = w->computeUnit->cu_id;
-
- gpuDynInst->useContinuation = false;
- GlobalMemPipeline* gmp = &(w->computeUnit->globalMemoryPipe);
- gmp->issueRequest(gpuDynInst);
-
- w->wrGmReqsInPipe--;
- w->rdGmReqsInPipe--;
- w->memReqsInPipe--;
- w->outstandingReqs++;
- } else if (isGroupSeg()) {
- // no-op
- } else {
- fatal("MemFence execute: bad op type\n");
- }
- }
- };
-
- class Call : public HsailGPUStaticInst
- {
- public:
- // private helper functions
- void calcAddr(Wavefront* w, GPUDynInstPtr m);
-
- void
- generateDisassembly()
- {
- if (dest.disassemble() == "") {
- disassembly = csprintf("%s %s (%s)", opcode, src0.disassemble(),
- src1.disassemble());
- } else {
- disassembly = csprintf("%s %s (%s) (%s)", opcode,
- src0.disassemble(), dest.disassemble(),
- src1.disassemble());
- }
- }
-
- bool
- isPseudoOp()
- {
- std::string func_name = src0.disassemble();
- if (func_name.find("__gem5_hsail_op") != std::string::npos) {
- return true;
- }
- return false;
- }
-
- // member variables
- ListOperand dest;
- FunctionRefOperand src0;
- ListOperand src1;
- HsailCode *func_ptr;
-
- // exec function for pseudo instructions mapped on top of call opcode
- void execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst);
-
- // user-defined pseudo instructions
- void MagicPrintLane(Wavefront *w);
- void MagicPrintLane64(Wavefront *w);
- void MagicPrintWF32(Wavefront *w);
- void MagicPrintWF64(Wavefront *w);
- void MagicPrintWFFloat(Wavefront *w);
- void MagicSimBreak(Wavefront *w);
- void MagicPrefixSum(Wavefront *w);
- void MagicReduction(Wavefront *w);
- void MagicMaskLower(Wavefront *w);
- void MagicMaskUpper(Wavefront *w);
- void MagicJoinWFBar(Wavefront *w);
- void MagicWaitWFBar(Wavefront *w);
- void MagicPanic(Wavefront *w);
-
- void MagicAtomicNRAddGlobalU32Reg(Wavefront *w,
- GPUDynInstPtr gpuDynInst);
-
- void MagicAtomicNRAddGroupU32Reg(Wavefront *w,
- GPUDynInstPtr gpuDynInst);
-
- void MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst);
-
- void MagicXactCasLd(Wavefront *w);
- void MagicMostSigThread(Wavefront *w);
- void MagicMostSigBroadcast(Wavefront *w);
-
- void MagicPrintWF32ID(Wavefront *w);
- void MagicPrintWFID64(Wavefront *w);
-
- Call(const Brig::BrigInstBase *ib, const BrigObject *obj)
- : HsailGPUStaticInst(obj, "call")
- {
- setFlag(ALU);
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src0.init(op_offs, obj);
-
- func_ptr = nullptr;
- std::string func_name = src0.disassemble();
- if (!isPseudoOp()) {
- func_ptr = dynamic_cast<HsailCode*>(obj->
- getFunction(func_name));
-
- if (!func_ptr)
- fatal("call::exec cannot find function: %s\n", func_name);
- }
-
- op_offs = obj->getOperandPtr(ib->operands, 2);
- src1.init(op_offs, obj);
- }
-
- bool isVectorRegister(int operandIndex) { return false; }
- bool isCondRegister(int operandIndex) { return false; }
- bool isScalarRegister(int operandIndex) { return false; }
- bool isSrcOperand(int operandIndex) { return false; }
- bool isDstOperand(int operandIndex) { return false; }
- int getOperandSize(int operandIndex) { return 0; }
-
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- return -1;
- }
-
- void
- execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- std::string func_name = src0.disassemble();
- if (isPseudoOp()) {
- execPseudoInst(w, gpuDynInst);
- } else {
- fatal("Native HSAIL functions are not yet implemented: %s\n",
- func_name);
- }
- }
- int numSrcRegOperands() { return 0; }
- int numDstRegOperands() { return 0; }
- int getNumOperands() { return 2; }
- };
-
- template<typename T> T heynot(T arg) { return ~arg; }
- template<> inline bool heynot<bool>(bool arg) { return !arg; }
-
-
- /* Explicitly declare template static member variables to avoid
- * warnings in some clang versions
- */
- template<> const char *B1::label;
- template<> const char *B8::label;
- template<> const char *B16::label;
- template<> const char *B32::label;
- template<> const char *B64::label;
- template<> const char *S8::label;
- template<> const char *S16::label;
- template<> const char *S32::label;
- template<> const char *S64::label;
- template<> const char *U8::label;
- template<> const char *U16::label;
- template<> const char *U32::label;
- template<> const char *U64::label;
- template<> const char *F32::label;
- template<> const char *F64::label;
-
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_DECL_HH__
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "arch/hsail/insts/gpu_static_inst.hh"
-
-#include "gpu-compute/brig_object.hh"
-
-namespace HsailISA
-{
- HsailGPUStaticInst::HsailGPUStaticInst(const BrigObject *obj,
- const std::string &opcode)
- : GPUStaticInst(opcode), hsailCode(obj->currentCode)
- {
- }
-
- void
- HsailGPUStaticInst::generateDisassembly()
- {
- disassembly = opcode;
- }
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
-#define __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
-
-/*
- * @file gpu_static_inst.hh
- *
- * Defines the base class representing HSAIL GPU static instructions.
- */
-
-#include "arch/hsail/gpu_types.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-
-class BrigObject;
-class HsailCode;
-
-namespace HsailISA
-{
- class HsailGPUStaticInst : public GPUStaticInst
- {
- public:
- HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
- void generateDisassembly() override;
- int instSize() const override { return sizeof(RawMachInst); }
- bool isValid() const override { return true; }
-
- protected:
- HsailCode *hsailCode;
- };
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_GPU_STATIC_INST_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/insts/decl.hh"
-#include "debug/GPUExec.hh"
-#include "gpu-compute/dispatcher.hh"
-#include "gpu-compute/simple_pool_manager.hh"
-
-namespace HsailISA
-{
- template<> const char *B1::label = "b1";
- template<> const char *B8::label = "b8";
- template<> const char *B16::label = "b16";
- template<> const char *B32::label = "b32";
- template<> const char *B64::label = "b64";
-
- template<> const char *S8::label = "s8";
- template<> const char *S16::label = "s16";
- template<> const char *S32::label = "s32";
- template<> const char *S64::label = "s64";
-
- template<> const char *U8::label = "u8";
- template<> const char *U16::label = "u16";
- template<> const char *U32::label = "u32";
- template<> const char *U64::label = "u64";
-
- template<> const char *F32::label = "f32";
- template<> const char *F64::label = "f64";
-
- const char*
- cmpOpToString(Brig::BrigCompareOperation cmpOp)
- {
- using namespace Brig;
-
- switch (cmpOp) {
- case BRIG_COMPARE_EQ:
- return "eq";
- case BRIG_COMPARE_NE:
- return "ne";
- case BRIG_COMPARE_LT:
- return "lt";
- case BRIG_COMPARE_LE:
- return "le";
- case BRIG_COMPARE_GT:
- return "gt";
- case BRIG_COMPARE_GE:
- return "ge";
- case BRIG_COMPARE_EQU:
- return "equ";
- case BRIG_COMPARE_NEU:
- return "neu";
- case BRIG_COMPARE_LTU:
- return "ltu";
- case BRIG_COMPARE_LEU:
- return "leu";
- case BRIG_COMPARE_GTU:
- return "gtu";
- case BRIG_COMPARE_GEU:
- return "geu";
- case BRIG_COMPARE_NUM:
- return "num";
- case BRIG_COMPARE_NAN:
- return "nan";
- case BRIG_COMPARE_SEQ:
- return "seq";
- case BRIG_COMPARE_SNE:
- return "sne";
- case BRIG_COMPARE_SLT:
- return "slt";
- case BRIG_COMPARE_SLE:
- return "sle";
- case BRIG_COMPARE_SGT:
- return "sgt";
- case BRIG_COMPARE_SGE:
- return "sge";
- case BRIG_COMPARE_SGEU:
- return "sgeu";
- case BRIG_COMPARE_SEQU:
- return "sequ";
- case BRIG_COMPARE_SNEU:
- return "sneu";
- case BRIG_COMPARE_SLTU:
- return "sltu";
- case BRIG_COMPARE_SLEU:
- return "sleu";
- case BRIG_COMPARE_SNUM:
- return "snum";
- case BRIG_COMPARE_SNAN:
- return "snan";
- case BRIG_COMPARE_SGTU:
- return "sgtu";
- default:
- return "unknown";
- }
- }
-
- void
- Ret::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- const VectorMask &mask = w->getPred();
-
- // mask off completed work-items
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- w->initMask[lane] = 0;
- }
-
- }
-
- // delete extra instructions fetched for completed work-items
- w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
- w->instructionBuffer.end());
- if (w->pendingFetch) {
- w->dropFetch = true;
- }
-
- // if all work-items have completed, then wave-front is done
- if (w->initMask.none()) {
- w->status = Wavefront::S_STOPPED;
-
- int32_t refCount = w->computeUnit->getLds().
- decreaseRefCounter(w->dispatchId, w->wgId);
-
- DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
- w->computeUnit->cu_id, w->wgId, refCount);
-
- // free the vector registers of the completed wavefront
- w->computeUnit->vectorRegsReserved[w->simdId] -=
- w->reservedVectorRegs;
-
- assert(w->computeUnit->vectorRegsReserved[w->simdId] >= 0);
-
- uint32_t endIndex = (w->startVgprIndex +
- w->reservedVectorRegs - 1) %
- w->computeUnit->vrf[w->simdId]->numRegs();
-
- w->computeUnit->vrf[w->simdId]->manager->
- freeRegion(w->startVgprIndex, endIndex);
-
- w->reservedVectorRegs = 0;
- w->startVgprIndex = 0;
- w->computeUnit->completedWfs++;
-
- DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, w->wfDynId);
-
- if (!refCount) {
- setFlag(SystemScope);
- setFlag(Release);
- setFlag(GlobalSegment);
- // Notify Memory System of Kernel Completion
- // Kernel End = isKernel + isRelease
- w->status = Wavefront::S_RETURNING;
- GPUDynInstPtr local_mempacket = gpuDynInst;
- local_mempacket->useContinuation = false;
- local_mempacket->simdId = w->simdId;
- local_mempacket->wfSlotId = w->wfSlotId;
- local_mempacket->wfDynId = w->wfDynId;
- w->computeUnit->injectGlobalMemFence(local_mempacket, true);
- } else {
- w->computeUnit->shader->dispatcher->scheduleDispatch();
- }
- }
- }
-
- void
- Barrier::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- assert(w->barrierCnt == w->oldBarrierCnt);
- w->barrierCnt = w->oldBarrierCnt + 1;
- w->stalledAtBarrier = true;
- }
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/insts/mem.hh"
-
-#include "arch/hsail/Brig.h"
-
-using namespace Brig;
-
-namespace HsailISA
-{
- const char* atomicOpToString(BrigAtomicOperation brigOp);
-
- const char*
- atomicOpToString(BrigAtomicOperation brigOp)
- {
- switch (brigOp) {
- case BRIG_ATOMIC_AND:
- return "and";
- case BRIG_ATOMIC_OR:
- return "or";
- case BRIG_ATOMIC_XOR:
- return "xor";
- case BRIG_ATOMIC_CAS:
- return "cas";
- case BRIG_ATOMIC_EXCH:
- return "exch";
- case BRIG_ATOMIC_ADD:
- return "add";
- case BRIG_ATOMIC_WRAPINC:
- return "inc";
- case BRIG_ATOMIC_WRAPDEC:
- return "dec";
- case BRIG_ATOMIC_MIN:
- return "min";
- case BRIG_ATOMIC_MAX:
- return "max";
- case BRIG_ATOMIC_SUB:
- return "sub";
- default:
- return "unknown";
- }
- }
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_INSTS_MEM_HH__
-#define __ARCH_HSAIL_INSTS_MEM_HH__
-
-#include <type_traits>
-
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/gpu_static_inst.hh"
-#include "arch/hsail/operand.hh"
-#include "gpu-compute/compute_unit.hh"
-
-namespace HsailISA
-{
- class MemInst
- {
- public:
- MemInst() : size(0), addr_operand(nullptr) { }
-
- MemInst(Enums::MemType m_type)
- {
- if (m_type == Enums::M_U64 ||
- m_type == Enums::M_S64 ||
- m_type == Enums::M_F64) {
- size = 8;
- } else if (m_type == Enums::M_U32 ||
- m_type == Enums::M_S32 ||
- m_type == Enums::M_F32) {
- size = 4;
- } else if (m_type == Enums::M_U16 ||
- m_type == Enums::M_S16 ||
- m_type == Enums::M_F16) {
- size = 2;
- } else {
- size = 1;
- }
-
- addr_operand = nullptr;
- }
-
- void
- init_addr(AddrOperandBase *_addr_operand)
- {
- addr_operand = _addr_operand;
- }
-
- private:
- int size;
- AddrOperandBase *addr_operand;
-
- public:
- int getMemOperandSize() { return size; }
- AddrOperandBase *getAddressOperand() { return addr_operand; }
- };
-
- template<typename DestOperandType, typename AddrOperandType>
- class LdaInstBase : public HsailGPUStaticInst
- {
- public:
- typename DestOperandType::DestOperand dest;
- AddrOperandType addr;
-
- LdaInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- setFlag(ALU);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
- }
-
- int numSrcRegOperands() override
- { return(this->addr.isVectorRegister()); }
- int numDstRegOperands() override
- { return dest.isVectorRegister(); }
- bool isVectorRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isVectorRegister() :
- this->addr.isVectorRegister());
- }
- bool isCondRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isCondRegister() :
- this->addr.isCondRegister());
- }
- bool isScalarRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isScalarRegister() :
- this->addr.isScalarRegister());
- }
- bool isSrcOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex > 0)
- return(this->addr.isVectorRegister());
- return false;
- }
- bool isDstOperand(int operandIndex) override {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return(operandIndex == 0);
- }
- int getOperandSize(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.opSize() :
- this->addr.opSize());
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.regIndex() :
- this->addr.regIndex());
- }
- int getNumOperands() override
- {
- if (this->addr.isVectorRegister())
- return 2;
- return 1;
- }
- };
-
- template<typename DestDataType, typename AddrOperandType>
- class LdaInst :
- public LdaInstBase<typename DestDataType::OperandType, AddrOperandType>,
- public MemInst
- {
- public:
- void generateDisassembly();
-
- LdaInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : LdaInstBase<typename DestDataType::OperandType,
- AddrOperandType>(ib, obj, _opcode)
- {
- init_addr(&this->addr);
- }
-
- void execute(GPUDynInstPtr gpuDynInst);
- };
-
- template<typename DataType>
- GPUStaticInst*
- decodeLda(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- unsigned op_offs = obj->getOperandPtr(ib->operands, 1);
- BrigRegOperandInfo regDataType = findRegDataType(op_offs, obj);
-
- if (regDataType.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
- return new LdaInst<DataType, NoRegAddrOperand>(ib, obj, "ldas");
- } else if (regDataType.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- // V2/V4 not allowed
- switch (regDataType.regKind) {
- case Brig::BRIG_REGISTER_KIND_SINGLE:
- return new LdaInst<DataType, SRegAddrOperand>(ib, obj, "ldas");
- case Brig::BRIG_REGISTER_KIND_DOUBLE:
- return new LdaInst<DataType, DRegAddrOperand>(ib, obj, "ldas");
- default:
- fatal("Bad ldas register operand type %d\n", regDataType.type);
- }
- } else {
- fatal("Bad ldas register operand kind %d\n", regDataType.kind);
- }
- }
-
- template<typename MemOperandType, typename DestOperandType,
- typename AddrOperandType>
- class LdInstBase : public HsailGPUStaticInst
- {
- public:
- Brig::BrigWidth8_t width;
- typename DestOperandType::DestOperand dest;
- AddrOperandType addr;
-
- Brig::BrigSegment segment;
- Brig::BrigMemoryOrder memoryOrder;
- Brig::BrigMemoryScope memoryScope;
- unsigned int equivClass;
-
- LdInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- setFlag(MemoryRef);
- setFlag(Load);
-
- if (ib->opcode == BRIG_OPCODE_LD) {
- const BrigInstMem *ldst = (const BrigInstMem*)ib;
-
- segment = (BrigSegment)ldst->segment;
- memoryOrder = BRIG_MEMORY_ORDER_NONE;
- memoryScope = BRIG_MEMORY_SCOPE_NONE;
- equivClass = ldst->equivClass;
-
- width = ldst->width;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
- if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
- } else {
- const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
- segment = (BrigSegment)at->segment;
- memoryOrder = (BrigMemoryOrder)at->memoryOrder;
- memoryScope = (BrigMemoryScope)at->memoryScope;
- equivClass = 0;
-
- width = BRIG_WIDTH_1;
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-
- if (brigOp->kind == BRIG_KIND_OPERAND_REGISTER)
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands,1);
- addr.init(op_offs, obj);
- }
-
- switch (memoryOrder) {
- case BRIG_MEMORY_ORDER_NONE:
- setFlag(NoOrder);
- break;
- case BRIG_MEMORY_ORDER_RELAXED:
- setFlag(RelaxedOrder);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE:
- setFlag(Acquire);
- break;
- case BRIG_MEMORY_ORDER_SC_RELEASE:
- setFlag(Release);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- setFlag(AcquireRelease);
- break;
- default:
- fatal("LdInst has bad memory order type\n");
- }
-
- switch (memoryScope) {
- case BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("LdInst has bad memory scope type\n");
- }
-
- switch (segment) {
- case BRIG_SEGMENT_GLOBAL:
- setFlag(GlobalSegment);
- break;
- case BRIG_SEGMENT_GROUP:
- setFlag(GroupSegment);
- break;
- case BRIG_SEGMENT_PRIVATE:
- setFlag(PrivateSegment);
- break;
- case BRIG_SEGMENT_READONLY:
- setFlag(ReadOnlySegment);
- break;
- case BRIG_SEGMENT_SPILL:
- setFlag(SpillSegment);
- break;
- case BRIG_SEGMENT_FLAT:
- setFlag(Flat);
- break;
- case BRIG_SEGMENT_KERNARG:
- setFlag(KernArgSegment);
- break;
- case BRIG_SEGMENT_ARG:
- setFlag(ArgSegment);
- break;
- default:
- panic("Ld: segment %d not supported\n", segment);
- }
- }
-
- int numSrcRegOperands() override
- { return(this->addr.isVectorRegister()); }
- int numDstRegOperands() override { return dest.isVectorRegister(); }
- int getNumOperands() override
- {
- if (this->addr.isVectorRegister())
- return 2;
- else
- return 1;
- }
- bool isVectorRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isVectorRegister() :
- this->addr.isVectorRegister());
- }
- bool isCondRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isCondRegister() :
- this->addr.isCondRegister());
- }
- bool isScalarRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.isScalarRegister() :
- this->addr.isScalarRegister());
- }
- bool isSrcOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex > 0)
- return(this->addr.isVectorRegister());
- return false;
- }
- bool isDstOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return(operandIndex == 0);
- }
- int getOperandSize(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.opSize() :
- this->addr.opSize());
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return((operandIndex == 0) ? dest.regIndex() :
- this->addr.regIndex());
- }
- };
-
- template<typename MemDataType, typename DestDataType,
- typename AddrOperandType>
- class LdInst :
- public LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType, AddrOperandType>,
- public MemInst
- {
- typename DestDataType::OperandType::DestOperand dest_vect[4];
- uint16_t num_dest_operands;
- void generateDisassembly() override;
-
- public:
- LdInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>(ib, obj, _opcode),
- MemInst(MemDataType::memType)
- {
- init_addr(&this->addr);
-
- unsigned op_offs = obj->getOperandPtr(ib->operands,0);
- const Brig::BrigOperand *brigOp = obj->getOperand(op_offs);
-
- if (brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
- const Brig::BrigOperandOperandList *brigRegVecOp =
- (const Brig::BrigOperandOperandList*)brigOp;
-
- num_dest_operands =
- *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
-
- assert(num_dest_operands <= 4);
- } else {
- num_dest_operands = 1;
- }
-
- if (num_dest_operands > 1) {
- assert(brigOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
-
- for (int i = 0; i < num_dest_operands; ++i) {
- dest_vect[i].init_from_vect(op_offs, obj, i);
- }
- }
- }
-
- void
- initiateAcc(GPUDynInstPtr gpuDynInst) override
- {
- typedef typename MemDataType::CType c0;
-
- gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
- if (num_dest_operands > 1) {
- for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
- if (gpuDynInst->exec_mask[i])
- gpuDynInst->statusVector.push_back(num_dest_operands);
- else
- gpuDynInst->statusVector.push_back(0);
- }
-
- for (int k = 0; k < num_dest_operands; ++k) {
-
- c0 *d = &((c0*)gpuDynInst->d_data)
- [k * gpuDynInst->computeUnit()->wfSize()];
-
- for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
- if (gpuDynInst->exec_mask[i]) {
- Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-
- if (this->isLocalMem()) {
- // load from shared memory
- *d = gpuDynInst->wavefront()->ldsChunk->
- read<c0>(vaddr);
- } else {
- RequestPtr req = std::make_shared<Request>(
- vaddr, sizeof(c0), 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
-
- gpuDynInst->setRequestFlags(req);
- PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
- pkt->dataStatic(d);
-
- if (gpuDynInst->computeUnit()->shader->
- separate_acquire_release &&
- gpuDynInst->isAcquire()) {
- // if this load has acquire semantics,
- // set the response continuation function
- // to perform an Acquire request
- gpuDynInst->execContinuation =
- &GPUStaticInst::execLdAcq;
-
- gpuDynInst->useContinuation = true;
- } else {
- // the request will be finished when
- // the load completes
- gpuDynInst->useContinuation = false;
- }
- // translation is performed in sendRequest()
- gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
- i, pkt);
- }
- }
- ++d;
- }
- }
-
- gpuDynInst->updateStats();
- }
-
- void
- completeAcc(GPUDynInstPtr gpuDynInst) override
- {
- typedef typename MemDataType::CType c1;
-
- constexpr bool is_vt_32 = DestDataType::vgprType == VT_32;
-
- /**
- * this code essentially replaces the long if-else chain
- * that was in used GlobalMemPipeline::exec() to infer the
- * size (single/double) and type (floating point/integer) of
- * the destination register. this is needed for load
- * instructions because the loaded value and the
- * destination type can be of different sizes, and we also
- * need to know if the value we're writing back is floating
- * point and signed/unsigned, so we can properly cast the
- * writeback value
- */
- typedef typename std::conditional<is_vt_32,
- typename std::conditional<std::is_floating_point<c1>::value,
- float, typename std::conditional<std::is_signed<c1>::value,
- int32_t, uint32_t>::type>::type,
- typename std::conditional<std::is_floating_point<c1>::value,
- double, typename std::conditional<std::is_signed<c1>::value,
- int64_t, uint64_t>::type>::type>::type c0;
-
-
- Wavefront *w = gpuDynInst->wavefront();
-
- std::vector<uint32_t> regVec;
- // iterate over number of destination register operands since
- // this is a load
- for (int k = 0; k < num_dest_operands; ++k) {
- assert((sizeof(c1) * num_dest_operands)
- <= MAX_WIDTH_FOR_MEM_INST);
-
- int dst = this->dest.regIndex() + k;
- if (num_dest_operands > MAX_REGS_FOR_NON_VEC_MEM_INST)
- dst = dest_vect[k].regIndex();
- // virtual->physical VGPR mapping
- int physVgpr = w->remap(dst, sizeof(c0), 1);
- // save the physical VGPR index
- regVec.push_back(physVgpr);
-
- c1 *p1 =
- &((c1*)gpuDynInst->d_data)[k * w->computeUnit->wfSize()];
-
- for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
- if (gpuDynInst->exec_mask[i]) {
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
- "$%s%d <- %d global ld done (src = wavefront "
- "ld inst)\n", w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, i, sizeof(c0) == 4 ? "s" : "d",
- dst, *p1);
- // write the value into the physical VGPR. This is a
- // purely functional operation. No timing is modeled.
- w->computeUnit->vrf[w->simdId]->write<c0>(physVgpr,
- *p1, i);
- }
- ++p1;
- }
- }
-
- // Schedule the write operation of the load data on the VRF.
- // This simply models the timing aspect of the VRF write operation.
- // It does not modify the physical VGPR.
- int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
- vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
- sizeof(c0), gpuDynInst->time);
-
- if (this->isGlobalMem()) {
- gpuDynInst->computeUnit()->globalMemoryPipe
- .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
- } else {
- assert(this->isLocalMem());
- gpuDynInst->computeUnit()->localMemoryPipe
- .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
- }
- }
-
- private:
- void
- execLdAcq(GPUDynInstPtr gpuDynInst) override
- {
- // after the load has complete and if the load has acquire
- // semantics, issue an acquire request.
- if (!this->isLocalMem()) {
- if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->isAcquire()) {
- gpuDynInst->statusBitVector = VectorMask(1);
- gpuDynInst->useContinuation = false;
- // create request
- RequestPtr req = std::make_shared<Request>(0, 0, 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
- req->setFlags(Request::ACQUIRE);
- gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
- }
- }
- }
-
- public:
- bool isVectorRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.isVectorRegister());
- if (num_dest_operands > 1) {
- return dest_vect[operandIndex].isVectorRegister();
- }
- else if (num_dest_operands == 1) {
- return LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>::dest.isVectorRegister();
- }
- return false;
- }
- bool isCondRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.isCondRegister());
- if (num_dest_operands > 1)
- return dest_vect[operandIndex].isCondRegister();
- else if (num_dest_operands == 1)
- return LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>::dest.isCondRegister();
- return false;
- }
- bool isScalarRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.isScalarRegister());
- if (num_dest_operands > 1)
- return dest_vect[operandIndex].isScalarRegister();
- else if (num_dest_operands == 1)
- return LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>::dest.isScalarRegister();
- return false;
- }
- bool isSrcOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.isVectorRegister());
- return false;
- }
- bool isDstOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return false;
- return true;
- }
- int getOperandSize(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.opSize());
- if (num_dest_operands > 1)
- return(dest_vect[operandIndex].opSize());
- else if (num_dest_operands == 1)
- return(LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>::dest.opSize());
- return 0;
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if ((num_dest_operands != getNumOperands()) &&
- (operandIndex == (getNumOperands()-1)))
- return(this->addr.regIndex());
- if (num_dest_operands > 1)
- return(dest_vect[operandIndex].regIndex());
- else if (num_dest_operands == 1)
- return(LdInstBase<typename MemDataType::CType,
- typename DestDataType::OperandType,
- AddrOperandType>::dest.regIndex());
- return -1;
- }
- int getNumOperands() override
- {
- if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
- return(num_dest_operands+1);
- else
- return(num_dest_operands);
- }
- void execute(GPUDynInstPtr gpuDynInst) override;
- };
-
- template<typename MemDT, typename DestDT>
- GPUStaticInst*
- decodeLd2(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- unsigned op_offs = obj->getOperandPtr(ib->operands,1);
- BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
- if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
- return new LdInst<MemDT, DestDT, NoRegAddrOperand>(ib, obj, "ld");
- } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
- tmp.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
- switch (tmp.regKind) {
- case Brig::BRIG_REGISTER_KIND_SINGLE:
- return new LdInst<MemDT, DestDT,
- SRegAddrOperand>(ib, obj, "ld");
- case Brig::BRIG_REGISTER_KIND_DOUBLE:
- return new LdInst<MemDT, DestDT,
- DRegAddrOperand>(ib, obj, "ld");
- default:
- fatal("Bad ld register operand type %d\n", tmp.regKind);
- }
- } else {
- fatal("Bad ld register operand kind %d\n", tmp.kind);
- }
- }
-
- template<typename MemDT>
- GPUStaticInst*
- decodeLd(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- unsigned op_offs = obj->getOperandPtr(ib->operands,0);
- BrigRegOperandInfo dest = findRegDataType(op_offs, obj);
-
- assert(dest.kind == Brig::BRIG_KIND_OPERAND_REGISTER ||
- dest.kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
- switch(dest.regKind) {
- case Brig::BRIG_REGISTER_KIND_SINGLE:
- switch (ib->type) {
- case Brig::BRIG_TYPE_B8:
- case Brig::BRIG_TYPE_B16:
- case Brig::BRIG_TYPE_B32:
- return decodeLd2<MemDT, B32>(ib, obj);
- case Brig::BRIG_TYPE_U8:
- case Brig::BRIG_TYPE_U16:
- case Brig::BRIG_TYPE_U32:
- return decodeLd2<MemDT, U32>(ib, obj);
- case Brig::BRIG_TYPE_S8:
- case Brig::BRIG_TYPE_S16:
- case Brig::BRIG_TYPE_S32:
- return decodeLd2<MemDT, S32>(ib, obj);
- case Brig::BRIG_TYPE_F16:
- case Brig::BRIG_TYPE_F32:
- return decodeLd2<MemDT, U32>(ib, obj);
- default:
- fatal("Bad ld register operand type %d, %d\n",
- dest.regKind, ib->type);
- };
- case Brig::BRIG_REGISTER_KIND_DOUBLE:
- switch (ib->type) {
- case Brig::BRIG_TYPE_B64:
- return decodeLd2<MemDT, B64>(ib, obj);
- case Brig::BRIG_TYPE_U64:
- return decodeLd2<MemDT, U64>(ib, obj);
- case Brig::BRIG_TYPE_S64:
- return decodeLd2<MemDT, S64>(ib, obj);
- case Brig::BRIG_TYPE_F64:
- return decodeLd2<MemDT, U64>(ib, obj);
- default:
- fatal("Bad ld register operand type %d, %d\n",
- dest.regKind, ib->type);
- };
- default:
- fatal("Bad ld register operand type %d, %d\n", dest.regKind,
- ib->type);
- }
- }
-
- template<typename MemDataType, typename SrcOperandType,
- typename AddrOperandType>
- class StInstBase : public HsailGPUStaticInst
- {
- public:
- typename SrcOperandType::SrcOperand src;
- AddrOperandType addr;
-
- Brig::BrigSegment segment;
- Brig::BrigMemoryScope memoryScope;
- Brig::BrigMemoryOrder memoryOrder;
- unsigned int equivClass;
-
- StInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- setFlag(MemoryRef);
- setFlag(Store);
-
- if (ib->opcode == BRIG_OPCODE_ST) {
- const BrigInstMem *ldst = (const BrigInstMem*)ib;
-
- segment = (BrigSegment)ldst->segment;
- memoryOrder = BRIG_MEMORY_ORDER_NONE;
- memoryScope = BRIG_MEMORY_SCOPE_NONE;
- equivClass = ldst->equivClass;
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- const BrigOperand *baseOp = obj->getOperand(op_offs);
-
- if ((baseOp->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) ||
- (baseOp->kind == BRIG_KIND_OPERAND_REGISTER)) {
- src.init(op_offs, obj);
- }
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
- } else {
- const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
- segment = (BrigSegment)at->segment;
- memoryScope = (BrigMemoryScope)at->memoryScope;
- memoryOrder = (BrigMemoryOrder)at->memoryOrder;
- equivClass = 0;
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- addr.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- src.init(op_offs, obj);
- }
-
- switch (memoryOrder) {
- case BRIG_MEMORY_ORDER_NONE:
- setFlag(NoOrder);
- break;
- case BRIG_MEMORY_ORDER_RELAXED:
- setFlag(RelaxedOrder);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE:
- setFlag(Acquire);
- break;
- case BRIG_MEMORY_ORDER_SC_RELEASE:
- setFlag(Release);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- setFlag(AcquireRelease);
- break;
- default:
- fatal("StInst has bad memory order type\n");
- }
-
- switch (memoryScope) {
- case BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("StInst has bad memory scope type\n");
- }
-
- switch (segment) {
- case BRIG_SEGMENT_GLOBAL:
- setFlag(GlobalSegment);
- break;
- case BRIG_SEGMENT_GROUP:
- setFlag(GroupSegment);
- break;
- case BRIG_SEGMENT_PRIVATE:
- setFlag(PrivateSegment);
- break;
- case BRIG_SEGMENT_READONLY:
- setFlag(ReadOnlySegment);
- break;
- case BRIG_SEGMENT_SPILL:
- setFlag(SpillSegment);
- break;
- case BRIG_SEGMENT_FLAT:
- setFlag(Flat);
- break;
- case BRIG_SEGMENT_ARG:
- setFlag(ArgSegment);
- break;
- default:
- panic("St: segment %d not supported\n", segment);
- }
- }
-
- int numDstRegOperands() override { return 0; }
- int numSrcRegOperands() override
- {
- return src.isVectorRegister() + this->addr.isVectorRegister();
- }
- int getNumOperands() override
- {
- if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
- return 2;
- else
- return 1;
- }
- bool isVectorRegister(int operandIndex) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return !operandIndex ? src.isVectorRegister() :
- this->addr.isVectorRegister();
- }
- bool isCondRegister(int operandIndex) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return !operandIndex ? src.isCondRegister() :
- this->addr.isCondRegister();
- }
- bool isScalarRegister(int operandIndex) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return !operandIndex ? src.isScalarRegister() :
- this->addr.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return true;
- }
- bool isDstOperand(int operandIndex) override { return false; }
- int getOperandSize(int operandIndex) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return !operandIndex ? src.opSize() : this->addr.opSize();
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert(operandIndex >= 0 && operandIndex < getNumOperands());
- return !operandIndex ? src.regIndex() : this->addr.regIndex();
- }
- };
-
-
- template<typename MemDataType, typename SrcDataType,
- typename AddrOperandType>
- class StInst :
- public StInstBase<MemDataType, typename SrcDataType::OperandType,
- AddrOperandType>,
- public MemInst
- {
- public:
- typename SrcDataType::OperandType::SrcOperand src_vect[4];
- uint16_t num_src_operands;
- void generateDisassembly() override;
-
- StInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode, int srcIdx)
- : StInstBase<MemDataType, typename SrcDataType::OperandType,
- AddrOperandType>(ib, obj, _opcode),
- MemInst(SrcDataType::memType)
- {
- init_addr(&this->addr);
-
- BrigRegOperandInfo rinfo;
- unsigned op_offs = obj->getOperandPtr(ib->operands,srcIdx);
- const Brig::BrigOperand *baseOp = obj->getOperand(op_offs);
-
- if (baseOp->kind == Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
- const Brig::BrigOperandConstantBytes *op =
- (Brig::BrigOperandConstantBytes*)baseOp;
-
- rinfo = BrigRegOperandInfo((Brig::BrigKind16_t)op->base.kind,
- Brig::BRIG_TYPE_NONE);
- } else {
- rinfo = findRegDataType(op_offs, obj);
- }
-
- if (baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
- const Brig::BrigOperandOperandList *brigRegVecOp =
- (const Brig::BrigOperandOperandList*)baseOp;
-
- num_src_operands =
- *((unsigned*)obj->getData(brigRegVecOp->elements)) / 4;
-
- assert(num_src_operands <= 4);
- } else {
- num_src_operands = 1;
- }
-
- if (num_src_operands > 1) {
- assert(baseOp->kind == Brig::BRIG_KIND_OPERAND_OPERAND_LIST);
-
- for (int i = 0; i < num_src_operands; ++i) {
- src_vect[i].init_from_vect(op_offs, obj, i);
- }
- }
- }
-
- void
- initiateAcc(GPUDynInstPtr gpuDynInst) override
- {
- // before performing a store, check if this store has
- // release semantics, and if so issue a release first
- if (!this->isLocalMem()) {
- if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->isRelease()) {
-
- gpuDynInst->statusBitVector = VectorMask(1);
- gpuDynInst->execContinuation = &GPUStaticInst::execSt;
- gpuDynInst->useContinuation = true;
- // create request
- RequestPtr req = std::make_shared<Request>(0, 0, 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
- req->setFlags(Request::RELEASE);
- gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-
- return;
- }
- }
-
- // if there is no release semantic, perform stores immediately
- execSt(gpuDynInst);
- }
-
- // stores don't write anything back, so there is nothing
- // to do here. we only override this method to avoid the
- // fatal in the base class implementation
- void completeAcc(GPUDynInstPtr gpuDynInst) override { }
-
- private:
- // execSt may be called through a continuation
- // if the store had release semantics. see comment for
- // execSt in gpu_static_inst.hh
- void
- execSt(GPUDynInstPtr gpuDynInst) override
- {
- typedef typename MemDataType::CType c0;
-
- gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
- if (num_src_operands > 1) {
- for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i)
- if (gpuDynInst->exec_mask[i])
- gpuDynInst->statusVector.push_back(num_src_operands);
- else
- gpuDynInst->statusVector.push_back(0);
- }
-
- for (int k = 0; k < num_src_operands; ++k) {
- c0 *d = &((c0*)gpuDynInst->d_data)
- [k * gpuDynInst->computeUnit()->wfSize()];
-
- for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
- if (gpuDynInst->exec_mask[i]) {
- Addr vaddr = gpuDynInst->addr[i] + k * sizeof(c0);
-
- if (this->isLocalMem()) {
- //store to shared memory
- gpuDynInst->wavefront()->ldsChunk->write<c0>(vaddr,
- *d);
- } else {
- RequestPtr req = std::make_shared<Request>(
- vaddr, sizeof(c0), 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
-
- gpuDynInst->setRequestFlags(req);
- PacketPtr pkt = new Packet(req, MemCmd::WriteReq);
- pkt->dataStatic<c0>(d);
-
- // translation is performed in sendRequest()
- // the request will be finished when the store completes
- gpuDynInst->useContinuation = false;
- gpuDynInst->computeUnit()->sendRequest(gpuDynInst,
- i, pkt);
-
- }
- }
- ++d;
- }
- }
-
- gpuDynInst->updateStats();
- }
-
- public:
- bool isVectorRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == num_src_operands)
- return this->addr.isVectorRegister();
- if (num_src_operands > 1)
- return src_vect[operandIndex].isVectorRegister();
- else if (num_src_operands == 1)
- return StInstBase<MemDataType,
- typename SrcDataType::OperandType,
- AddrOperandType>::src.isVectorRegister();
- return false;
- }
- bool isCondRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == num_src_operands)
- return this->addr.isCondRegister();
- if (num_src_operands > 1)
- return src_vect[operandIndex].isCondRegister();
- else if (num_src_operands == 1)
- return StInstBase<MemDataType,
- typename SrcDataType::OperandType,
- AddrOperandType>::src.isCondRegister();
- return false;
- }
- bool isScalarRegister(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == num_src_operands)
- return this->addr.isScalarRegister();
- if (num_src_operands > 1)
- return src_vect[operandIndex].isScalarRegister();
- else if (num_src_operands == 1)
- return StInstBase<MemDataType,
- typename SrcDataType::OperandType,
- AddrOperandType>::src.isScalarRegister();
- return false;
- }
- bool isSrcOperand(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- return true;
- }
- bool isDstOperand(int operandIndex) override { return false; }
- int getOperandSize(int operandIndex) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == num_src_operands)
- return this->addr.opSize();
- if (num_src_operands > 1)
- return src_vect[operandIndex].opSize();
- else if (num_src_operands == 1)
- return StInstBase<MemDataType,
- typename SrcDataType::OperandType,
- AddrOperandType>::src.opSize();
- return 0;
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex == num_src_operands)
- return this->addr.regIndex();
- if (num_src_operands > 1)
- return src_vect[operandIndex].regIndex();
- else if (num_src_operands == 1)
- return StInstBase<MemDataType,
- typename SrcDataType::OperandType,
- AddrOperandType>::src.regIndex();
- return -1;
- }
- int getNumOperands() override
- {
- if (this->addr.isVectorRegister() || this->addr.isScalarRegister())
- return num_src_operands + 1;
- else
- return num_src_operands;
- }
- void execute(GPUDynInstPtr gpuDynInst) override;
- };
-
- template<typename DataType, typename SrcDataType>
- GPUStaticInst*
- decodeSt(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- int srcIdx = 0;
- int destIdx = 1;
- if (ib->opcode == Brig::BRIG_OPCODE_ATOMIC ||
- ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET) {
- srcIdx = 1;
- destIdx = 0;
- }
- unsigned op_offs = obj->getOperandPtr(ib->operands,destIdx);
-
- BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
- if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
- return new StInst<DataType, SrcDataType,
- NoRegAddrOperand>(ib, obj, "st", srcIdx);
- } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- // V2/V4 not allowed
- switch (tmp.regKind) {
- case Brig::BRIG_REGISTER_KIND_SINGLE:
- return new StInst<DataType, SrcDataType,
- SRegAddrOperand>(ib, obj, "st", srcIdx);
- case Brig::BRIG_REGISTER_KIND_DOUBLE:
- return new StInst<DataType, SrcDataType,
- DRegAddrOperand>(ib, obj, "st", srcIdx);
- default:
- fatal("Bad st register operand type %d\n", tmp.type);
- }
- } else {
- fatal("Bad st register operand kind %d\n", tmp.kind);
- }
- }
-
- template<typename OperandType, typename AddrOperandType, int NumSrcOperands,
- bool HasDst>
- class AtomicInstBase : public HsailGPUStaticInst
- {
- public:
- typename OperandType::DestOperand dest;
- typename OperandType::SrcOperand src[NumSrcOperands];
- AddrOperandType addr;
-
- Brig::BrigSegment segment;
- Brig::BrigMemoryOrder memoryOrder;
- Brig::BrigAtomicOperation atomicOperation;
- Brig::BrigMemoryScope memoryScope;
- Brig::BrigOpcode opcode;
-
- AtomicInstBase(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : HsailGPUStaticInst(obj, _opcode)
- {
- using namespace Brig;
-
- const BrigInstAtomic *at = (const BrigInstAtomic*)ib;
-
- segment = (BrigSegment)at->segment;
- memoryScope = (BrigMemoryScope)at->memoryScope;
- memoryOrder = (BrigMemoryOrder)at->memoryOrder;
- atomicOperation = (BrigAtomicOperation)at->atomicOperation;
- opcode = (BrigOpcode)ib->opcode;
-
- assert(opcode == Brig::BRIG_OPCODE_ATOMICNORET ||
- opcode == Brig::BRIG_OPCODE_ATOMIC);
-
- setFlag(MemoryRef);
-
- if (opcode == Brig::BRIG_OPCODE_ATOMIC) {
- setFlag(AtomicReturn);
- } else {
- setFlag(AtomicNoReturn);
- }
-
- switch (memoryOrder) {
- case BRIG_MEMORY_ORDER_NONE:
- setFlag(NoOrder);
- break;
- case BRIG_MEMORY_ORDER_RELAXED:
- setFlag(RelaxedOrder);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE:
- setFlag(Acquire);
- break;
- case BRIG_MEMORY_ORDER_SC_RELEASE:
- setFlag(Release);
- break;
- case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- setFlag(AcquireRelease);
- break;
- default:
- fatal("AtomicInst has bad memory order type\n");
- }
-
- switch (memoryScope) {
- case BRIG_MEMORY_SCOPE_NONE:
- setFlag(NoScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKITEM:
- setFlag(WorkitemScope);
- break;
- case BRIG_MEMORY_SCOPE_WORKGROUP:
- setFlag(WorkgroupScope);
- break;
- case BRIG_MEMORY_SCOPE_AGENT:
- setFlag(DeviceScope);
- break;
- case BRIG_MEMORY_SCOPE_SYSTEM:
- setFlag(SystemScope);
- break;
- default:
- fatal("AtomicInst has bad memory scope type\n");
- }
-
- switch (atomicOperation) {
- case Brig::BRIG_ATOMIC_AND:
- setFlag(AtomicAnd);
- break;
- case Brig::BRIG_ATOMIC_OR:
- setFlag(AtomicOr);
- break;
- case Brig::BRIG_ATOMIC_XOR:
- setFlag(AtomicXor);
- break;
- case Brig::BRIG_ATOMIC_CAS:
- setFlag(AtomicCAS);
- break;
- case Brig::BRIG_ATOMIC_EXCH:
- setFlag(AtomicExch);
- break;
- case Brig::BRIG_ATOMIC_ADD:
- setFlag(AtomicAdd);
- break;
- case Brig::BRIG_ATOMIC_WRAPINC:
- setFlag(AtomicInc);
- break;
- case Brig::BRIG_ATOMIC_WRAPDEC:
- setFlag(AtomicDec);
- break;
- case Brig::BRIG_ATOMIC_MIN:
- setFlag(AtomicMin);
- break;
- case Brig::BRIG_ATOMIC_MAX:
- setFlag(AtomicMax);
- break;
- case Brig::BRIG_ATOMIC_SUB:
- setFlag(AtomicSub);
- break;
- default:
- fatal("Bad BrigAtomicOperation code %d\n", atomicOperation);
- }
-
- switch (segment) {
- case BRIG_SEGMENT_GLOBAL:
- setFlag(GlobalSegment);
- break;
- case BRIG_SEGMENT_GROUP:
- setFlag(GroupSegment);
- break;
- case BRIG_SEGMENT_FLAT:
- setFlag(Flat);
- break;
- default:
- panic("Atomic: segment %d not supported\n", segment);
- }
-
- if (HasDst) {
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- dest.init(op_offs, obj);
-
- op_offs = obj->getOperandPtr(ib->operands, 1);
- addr.init(op_offs, obj);
-
- for (int i = 0; i < NumSrcOperands; ++i) {
- op_offs = obj->getOperandPtr(ib->operands, i + 2);
- src[i].init(op_offs, obj);
- }
- } else {
-
- unsigned op_offs = obj->getOperandPtr(ib->operands, 0);
- addr.init(op_offs, obj);
-
- for (int i = 0; i < NumSrcOperands; ++i) {
- op_offs = obj->getOperandPtr(ib->operands, i + 1);
- src[i].init(op_offs, obj);
- }
- }
- }
-
- int numSrcRegOperands()
- {
- int operands = 0;
- for (int i = 0; i < NumSrcOperands; i++) {
- if (src[i].isVectorRegister()) {
- operands++;
- }
- }
- if (addr.isVectorRegister())
- operands++;
- return operands;
- }
- int numDstRegOperands() { return dest.isVectorRegister(); }
- int getNumOperands()
- {
- if (addr.isVectorRegister())
- return(NumSrcOperands + 2);
- return(NumSrcOperands + 1);
- }
- bool isVectorRegister(int operandIndex)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isVectorRegister();
- else if (operandIndex == NumSrcOperands)
- return(addr.isVectorRegister());
- else
- return dest.isVectorRegister();
- }
- bool isCondRegister(int operandIndex)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isCondRegister();
- else if (operandIndex == NumSrcOperands)
- return(addr.isCondRegister());
- else
- return dest.isCondRegister();
- }
- bool isScalarRegister(int operandIndex)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return src[operandIndex].isScalarRegister();
- else if (operandIndex == NumSrcOperands)
- return(addr.isScalarRegister());
- else
- return dest.isScalarRegister();
- }
- bool isSrcOperand(int operandIndex)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return true;
- else if (operandIndex == NumSrcOperands)
- return(addr.isVectorRegister());
- else
- return false;
- }
- bool isDstOperand(int operandIndex)
- {
- if (operandIndex <= NumSrcOperands)
- return false;
- else
- return true;
- }
- int getOperandSize(int operandIndex)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return(src[operandIndex].opSize());
- else if (operandIndex == NumSrcOperands)
- return(addr.opSize());
- else
- return(dest.opSize());
- }
- int
- getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst)
- {
- assert((operandIndex >= 0) && (operandIndex < getNumOperands()));
- if (operandIndex < NumSrcOperands)
- return(src[operandIndex].regIndex());
- else if (operandIndex == NumSrcOperands)
- return(addr.regIndex());
- else
- return(dest.regIndex());
- return -1;
- }
- };
-
- template<typename MemDataType, typename AddrOperandType, int NumSrcOperands,
- bool HasDst>
- class AtomicInst :
- public AtomicInstBase<typename MemDataType::OperandType,
- AddrOperandType, NumSrcOperands, HasDst>,
- public MemInst
- {
- public:
- void generateDisassembly() override;
-
- AtomicInst(const Brig::BrigInstBase *ib, const BrigObject *obj,
- const char *_opcode)
- : AtomicInstBase<typename MemDataType::OperandType, AddrOperandType,
- NumSrcOperands, HasDst>
- (ib, obj, _opcode),
- MemInst(MemDataType::memType)
- {
- init_addr(&this->addr);
- }
-
- void
- initiateAcc(GPUDynInstPtr gpuDynInst) override
- {
- // before doing the RMW, check if this atomic has
- // release semantics, and if so issue a release first
- if (!this->isLocalMem()) {
- if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && (gpuDynInst->isRelease()
- || gpuDynInst->isAcquireRelease())) {
-
- gpuDynInst->statusBitVector = VectorMask(1);
-
- gpuDynInst->execContinuation = &GPUStaticInst::execAtomic;
- gpuDynInst->useContinuation = true;
-
- // create request
- RequestPtr req = std::make_shared<Request>(0, 0, 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
- req->setFlags(Request::RELEASE);
- gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
-
- return;
- }
- }
-
- // if there is no release semantic, execute the RMW immediately
- execAtomic(gpuDynInst);
-
- }
-
- void
- completeAcc(GPUDynInstPtr gpuDynInst) override
- {
- // if this is not an atomic return op, then we
- // have nothing more to do.
- if (this->isAtomicRet()) {
- // the size of the src operands and the
- // memory being operated on must match
- // for HSAIL atomics - this assumption may
- // not apply to all ISAs
- typedef typename MemDataType::CType CType;
-
- Wavefront *w = gpuDynInst->wavefront();
- int dst = this->dest.regIndex();
- std::vector<uint32_t> regVec;
- // virtual->physical VGPR mapping
- int physVgpr = w->remap(dst, sizeof(CType), 1);
- regVec.push_back(physVgpr);
- CType *p1 = &((CType*)gpuDynInst->d_data)[0];
-
- for (int i = 0; i < w->computeUnit->wfSize(); ++i) {
- if (gpuDynInst->exec_mask[i]) {
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: "
- "$%s%d <- %d global ld done (src = wavefront "
- "ld inst)\n", w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, i, sizeof(CType) == 4 ? "s" : "d",
- dst, *p1);
- // write the value into the physical VGPR. This is a
- // purely functional operation. No timing is modeled.
- w->computeUnit->vrf[w->simdId]->write<CType>(physVgpr, *p1, i);
- }
- ++p1;
- }
-
- // Schedule the write operation of the load data on the VRF.
- // This simply models the timing aspect of the VRF write operation.
- // It does not modify the physical VGPR.
- int loadVrfBankConflictCycles = gpuDynInst->computeUnit()->
- vrf[w->simdId]->exec(gpuDynInst->seqNum(), w, regVec,
- sizeof(CType), gpuDynInst->time);
-
- if (this->isGlobalMem()) {
- gpuDynInst->computeUnit()->globalMemoryPipe
- .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
- } else {
- assert(this->isLocalMem());
- gpuDynInst->computeUnit()->localMemoryPipe
- .incLoadVRFBankConflictCycles(loadVrfBankConflictCycles);
- }
- }
- }
-
- void execute(GPUDynInstPtr gpuDynInst) override;
-
- private:
- // execAtomic may be called through a continuation
- // if the RMW had release semantics. see comment for
- // execContinuation in gpu_dyn_inst.hh
- void
- execAtomic(GPUDynInstPtr gpuDynInst) override
- {
- gpuDynInst->statusBitVector = gpuDynInst->exec_mask;
-
- typedef typename MemDataType::CType c0;
-
- c0 *d = &((c0*) gpuDynInst->d_data)[0];
- c0 *e = &((c0*) gpuDynInst->a_data)[0];
- c0 *f = &((c0*) gpuDynInst->x_data)[0];
-
- for (int i = 0; i < gpuDynInst->computeUnit()->wfSize(); ++i) {
- if (gpuDynInst->exec_mask[i]) {
- Addr vaddr = gpuDynInst->addr[i];
-
- if (this->isLocalMem()) {
- Wavefront *wavefront = gpuDynInst->wavefront();
- *d = wavefront->ldsChunk->read<c0>(vaddr);
-
- if (this->isAtomicAdd()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) + (*e));
- } else if (this->isAtomicSub()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) - (*e));
- } else if (this->isAtomicMax()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- std::max(wavefront->ldsChunk->read<c0>(vaddr),
- (*e)));
- } else if (this->isAtomicMin()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- std::min(wavefront->ldsChunk->read<c0>(vaddr),
- (*e)));
- } else if (this->isAtomicAnd()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) & (*e));
- } else if (this->isAtomicOr()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) | (*e));
- } else if (this->isAtomicXor()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) ^ (*e));
- } else if (this->isAtomicInc()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) + 1);
- } else if (this->isAtomicDec()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- wavefront->ldsChunk->read<c0>(vaddr) - 1);
- } else if (this->isAtomicExch()) {
- wavefront->ldsChunk->write<c0>(vaddr, (*e));
- } else if (this->isAtomicCAS()) {
- wavefront->ldsChunk->write<c0>(vaddr,
- (wavefront->ldsChunk->read<c0>(vaddr) == (*e)) ?
- (*f) : wavefront->ldsChunk->read<c0>(vaddr));
- } else {
- fatal("Unrecognized or invalid HSAIL atomic op "
- "type.\n");
- }
- } else {
- RequestPtr req =
- std::make_shared<Request>(vaddr, sizeof(c0), 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId,
- gpuDynInst->makeAtomicOpFunctor<c0>(e,
- f));
-
- gpuDynInst->setRequestFlags(req);
- PacketPtr pkt = new Packet(req, MemCmd::SwapReq);
- pkt->dataStatic(d);
-
- if (gpuDynInst->computeUnit()->shader->
- separate_acquire_release &&
- (gpuDynInst->isAcquire())) {
- // if this atomic has acquire semantics,
- // schedule the continuation to perform an
- // acquire after the RMW completes
- gpuDynInst->execContinuation =
- &GPUStaticInst::execAtomicAcq;
-
- gpuDynInst->useContinuation = true;
- } else {
- // the request will be finished when the RMW completes
- gpuDynInst->useContinuation = false;
- }
- // translation is performed in sendRequest()
- gpuDynInst->computeUnit()->sendRequest(gpuDynInst, i,
- pkt);
- }
- }
-
- ++d;
- ++e;
- ++f;
- }
-
- gpuDynInst->updateStats();
- }
-
- // execAtomicACq will always be called through a continuation.
- // see comment for execContinuation in gpu_dyn_inst.hh
- void
- execAtomicAcq(GPUDynInstPtr gpuDynInst) override
- {
- // after performing the RMW, check to see if this instruction
- // has acquire semantics, and if so, issue an acquire
- if (!this->isLocalMem()) {
- if (gpuDynInst->computeUnit()->shader->separate_acquire_release
- && gpuDynInst->isAcquire()) {
- gpuDynInst->statusBitVector = VectorMask(1);
-
- // the request will be finished when
- // the acquire completes
- gpuDynInst->useContinuation = false;
- // create request
- RequestPtr req = std::make_shared<Request>(0, 0, 0,
- gpuDynInst->computeUnit()->masterId(),
- 0, gpuDynInst->wfDynId);
- req->setFlags(Request::ACQUIRE);
- gpuDynInst->computeUnit()->injectGlobalMemFence(gpuDynInst, false, req);
- }
- }
- }
- };
-
- template<typename DataType, typename AddrOperandType, int NumSrcOperands>
- GPUStaticInst*
- constructAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
-
- if (at->atomicOperation == Brig::BRIG_ATOMIC_LD) {
- return decodeLd<DataType>(ib, obj);
- } else if (at->atomicOperation == Brig::BRIG_ATOMIC_ST) {
- switch (ib->type) {
- case Brig::BRIG_TYPE_B8:
- return decodeSt<S8,S8>(ib, obj);
- case Brig::BRIG_TYPE_B16:
- return decodeSt<S16,S16>(ib, obj);
- case Brig::BRIG_TYPE_B32:
- return decodeSt<S32,S32>(ib, obj);
- case Brig::BRIG_TYPE_B64:
- return decodeSt<S64,S64>(ib, obj);
- default: fatal("AtomicSt: Operand type mismatch %d\n", ib->type);
- }
- } else {
- if ((Brig::BrigOpcode)ib->opcode == Brig::BRIG_OPCODE_ATOMICNORET)
- return new AtomicInst<DataType, AddrOperandType,
- NumSrcOperands, false>(ib, obj, "atomicnoret");
- else
- return new AtomicInst<DataType, AddrOperandType,
- NumSrcOperands, true>(ib, obj, "atomic");
- }
- }
-
- template<typename DataType, int NumSrcOperands>
- GPUStaticInst*
- decodeAtomicHelper(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- unsigned addrIndex = (Brig::BrigOpcode)ib->opcode ==
- Brig::BRIG_OPCODE_ATOMICNORET ? 0 : 1;
-
- unsigned op_offs = obj->getOperandPtr(ib->operands,addrIndex);
-
- BrigRegOperandInfo tmp = findRegDataType(op_offs, obj);
-
- if (tmp.kind == Brig::BRIG_KIND_OPERAND_ADDRESS) {
- return constructAtomic<DataType, NoRegAddrOperand,
- NumSrcOperands>(ib, obj);
- } else if (tmp.kind == Brig::BRIG_KIND_OPERAND_REGISTER) {
- // V2/V4 not allowed
- switch (tmp.regKind) {
- case Brig::BRIG_REGISTER_KIND_SINGLE:
- return constructAtomic<DataType, SRegAddrOperand,
- NumSrcOperands>(ib, obj);
- case Brig::BRIG_REGISTER_KIND_DOUBLE:
- return constructAtomic<DataType, DRegAddrOperand,
- NumSrcOperands>(ib, obj);
- default:
- fatal("Bad atomic register operand type %d\n", tmp.type);
- }
- } else {
- fatal("Bad atomic register operand kind %d\n", tmp.kind);
- }
- }
-
-
- template<typename DataType>
- GPUStaticInst*
- decodeAtomic(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
-
- if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
- return decodeAtomicHelper<DataType, 2>(ib, obj);
- } else {
- return decodeAtomicHelper<DataType, 1>(ib, obj);
- }
- }
-
- template<typename DataType>
- GPUStaticInst*
- decodeAtomicNoRet(const Brig::BrigInstBase *ib, const BrigObject *obj)
- {
- const Brig::BrigInstAtomic *at = (const Brig::BrigInstAtomic*)ib;
- if (at->atomicOperation == Brig::BRIG_ATOMIC_CAS) {
- return decodeAtomicHelper<DataType, 2>(ib, obj);
- } else {
- return decodeAtomicHelper<DataType, 1>(ib, obj);
- }
- }
-} // namespace HsailISA
-
-#endif // __ARCH_HSAIL_INSTS_MEM_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/hsail_code.hh"
-
-// defined in code.cc, but not worth sucking in all of code.h for this
-// at this point
-extern const char *segmentNames[];
-
-namespace HsailISA
-{
- template<typename DestDataType, typename AddrRegOperandType>
- void
- LdaInst<DestDataType, AddrRegOperandType>::generateDisassembly()
- {
- this->disassembly = csprintf("%s_%s %s,%s", this->opcode,
- DestDataType::label,
- this->dest.disassemble(),
- this->addr.disassemble());
- }
-
- template<typename DestDataType, typename AddrRegOperandType>
- void
- LdaInst<DestDataType, AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef typename DestDataType::CType CType M5_VAR_USED;
- const VectorMask &mask = w->getPred();
- std::vector<Addr> addr_vec;
- addr_vec.resize(w->computeUnit->wfSize(), (Addr)0);
- this->addr.calcVector(w, addr_vec);
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- this->dest.set(w, lane, addr_vec[lane]);
- }
- }
- addr_vec.clear();
- }
-
- template<typename MemDataType, typename DestDataType,
- typename AddrRegOperandType>
- void
- LdInst<MemDataType, DestDataType, AddrRegOperandType>::generateDisassembly()
- {
- switch (num_dest_operands) {
- case 1:
- this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
- segmentNames[this->segment],
- MemDataType::label,
- this->dest.disassemble(),
- this->addr.disassemble());
- break;
- case 2:
- this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
- segmentNames[this->segment],
- MemDataType::label,
- this->dest_vect[0].disassemble(),
- this->dest_vect[1].disassemble(),
- this->addr.disassemble());
- break;
- case 3:
- this->disassembly = csprintf("%s_%s_%s (%s,%s,%s), %s", this->opcode,
- segmentNames[this->segment],
- MemDataType::label,
- this->dest_vect[0].disassemble(),
- this->dest_vect[1].disassemble(),
- this->dest_vect[2].disassemble(),
- this->addr.disassemble());
- break;
- case 4:
- this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
- this->opcode,
- segmentNames[this->segment],
- MemDataType::label,
- this->dest_vect[0].disassemble(),
- this->dest_vect[1].disassemble(),
- this->dest_vect[2].disassemble(),
- this->dest_vect[3].disassemble(),
- this->addr.disassemble());
- break;
- default:
- fatal("Bad ld register dest operand, num vector operands: %d \n",
- num_dest_operands);
- break;
- }
- }
-
- static Addr
- calcPrivAddr(Addr addr, Wavefront *w, int lane, GPUStaticInst *i)
- {
- // what is the size of the object we are accessing??
- // NOTE: the compiler doesn't generate enough information
- // to do this yet..have to just line up all the private
- // work-item spaces back to back for now
- /*
- StorageElement* se =
- i->parent->findSymbol(Brig::BrigPrivateSpace, addr);
- assert(se);
-
- return w->wfSlotId * w->privSizePerItem * w->computeUnit->wfSize() +
- se->offset * w->computeUnit->wfSize() +
- lane * se->size;
- */
-
- // addressing strategy: interleave the private spaces of
- // work-items in a wave-front on 8 byte granularity.
- // this won't be perfect coalescing like the spill space
- // strategy, but it's better than nothing. The spill space
- // strategy won't work with private because the same address
- // may be accessed by different sized loads/stores.
-
- // Note: I'm assuming that the largest load/store to private
- // is 8 bytes. If it is larger, the stride will have to increase
-
- Addr addr_div8 = addr / 8;
- Addr addr_mod8 = addr % 8;
-
- Addr ret = addr_div8 * 8 * w->computeUnit->wfSize() + lane * 8 +
- addr_mod8 + w->privBase;
-
- assert(ret < w->privBase +
- (w->privSizePerItem * w->computeUnit->wfSize()));
-
- return ret;
- }
-
- template<typename MemDataType, typename DestDataType,
- typename AddrRegOperandType>
- void
- LdInst<MemDataType, DestDataType,
- AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef typename MemDataType::CType MemCType;
- const VectorMask &mask = w->getPred();
-
- // Kernarg references are handled uniquely for now (no Memory Request
- // is used), so special-case them up front. Someday we should
- // make this more realistic, at which we should get rid of this
- // block and fold this case into the switch below.
- if (this->segment == Brig::BRIG_SEGMENT_KERNARG) {
- MemCType val;
-
- // I assume no vector ld for kernargs
- assert(num_dest_operands == 1);
-
- // assuming for the moment that we'll never do register
- // offsets into kernarg space... just to make life simpler
- uint64_t address = this->addr.calcUniform();
-
- val = *(MemCType*)&w->kernelArgs[address];
-
- DPRINTF(HSAIL, "ld_kernarg [%d] -> %d\n", address, val);
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- this->dest.set(w, lane, val);
- }
- }
-
- return;
- } else if (this->segment == Brig::BRIG_SEGMENT_ARG) {
- uint64_t address = this->addr.calcUniform();
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- MemCType val = w->readCallArgMem<MemCType>(lane, address);
-
- DPRINTF(HSAIL, "ld_arg [%d] -> %llu\n", address,
- (unsigned long long)val);
-
- this->dest.set(w, lane, val);
- }
- }
-
- return;
- }
-
- GPUDynInstPtr m = gpuDynInst;
-
- this->addr.calcVector(w, m->addr);
-
- m->m_type = MemDataType::memType;
- m->v_type = DestDataType::vgprType;
-
- m->exec_mask = w->execMask();
- m->statusBitVector = 0;
- m->equiv = this->equivClass;
-
- if (num_dest_operands == 1) {
- m->dst_reg = this->dest.regIndex();
- m->n_reg = 1;
- } else {
- m->n_reg = num_dest_operands;
- for (int i = 0; i < num_dest_operands; ++i) {
- m->dst_reg_vec[i] = this->dest_vect[i].regIndex();
- }
- }
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->kern_id = w->kernId;
- m->cu_id = w->computeUnit->cu_id;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- switch (this->segment) {
- case Brig::BRIG_SEGMENT_GLOBAL:
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
-
- // this is a complete hack to get around a compiler bug
- // (the compiler currently generates global access for private
- // addresses (starting from 0). We need to add the private offset)
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (m->addr[lane] < w->privSizePerItem) {
- if (mask[lane]) {
- // what is the size of the object we are accessing?
- // find base for for this wavefront
-
- // calcPrivAddr will fail if accesses are unaligned
- assert(!((sizeof(MemCType) - 1) & m->addr[lane]));
-
- Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
- this);
-
- m->addr[lane] = privAddr;
- }
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_SPILL:
- assert(num_dest_operands == 1);
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
- {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- // note: this calculation will NOT WORK if the compiler
- // ever generates loads/stores to the same address with
- // different widths (e.g., a ld_u32 addr and a ld_u16 addr)
- if (mask[lane]) {
- assert(m->addr[lane] < w->spillSizePerItem);
-
- m->addr[lane] = m->addr[lane] * w->spillWidth +
- lane * sizeof(MemCType) + w->spillBase;
-
- w->lastAddr[lane] = m->addr[lane];
- }
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_GROUP:
- m->pipeId = LDSMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(24));
- w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
- w->outstandingReqsRdLm++;
- w->rdLmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_READONLY:
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- assert(m->addr[lane] + sizeof(MemCType) <= w->roSize);
- m->addr[lane] += w->roBase;
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_PRIVATE:
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
- {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- assert(m->addr[lane] < w->privSizePerItem);
-
- m->addr[lane] = m->addr[lane] +
- lane * sizeof(MemCType) + w->privBase;
- }
- }
- }
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- break;
-
- default:
- fatal("Load to unsupported segment %d %llxe\n", this->segment,
- m->addr[0]);
- }
-
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- template<typename OperationType, typename SrcDataType,
- typename AddrRegOperandType>
- void
- StInst<OperationType, SrcDataType,
- AddrRegOperandType>::execute(GPUDynInstPtr gpuDynInst)
- {
- Wavefront *w = gpuDynInst->wavefront();
-
- typedef typename OperationType::CType CType;
-
- const VectorMask &mask = w->getPred();
-
- // arg references are handled uniquely for now (no Memory Request
- // is used), so special-case them up front. Someday we should
- // make this more realistic, at which we should get rid of this
- // block and fold this case into the switch below.
- if (this->segment == Brig::BRIG_SEGMENT_ARG) {
- uint64_t address = this->addr.calcUniform();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- CType data = this->src.template get<CType>(w, lane);
- DPRINTF(HSAIL, "st_arg [%d] <- %d\n", address, data);
- w->writeCallArgMem<CType>(lane, address, data);
- }
- }
-
- return;
- }
-
- GPUDynInstPtr m = gpuDynInst;
-
- m->exec_mask = w->execMask();
-
- this->addr.calcVector(w, m->addr);
-
- if (num_src_operands == 1) {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- ((CType*)m->d_data)[lane] =
- this->src.template get<CType>(w, lane);
- }
- }
- } else {
- for (int k= 0; k < num_src_operands; ++k) {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- ((CType*)m->d_data)[k * w->computeUnit->wfSize() + lane] =
- this->src_vect[k].template get<CType>(w, lane);
- }
- }
- }
- }
-
- m->m_type = OperationType::memType;
- m->v_type = OperationType::vgprType;
-
- m->statusBitVector = 0;
- m->equiv = this->equivClass;
-
- if (num_src_operands == 1) {
- m->n_reg = 1;
- } else {
- m->n_reg = num_src_operands;
- }
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->kern_id = w->kernId;
- m->cu_id = w->computeUnit->cu_id;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- switch (this->segment) {
- case Brig::BRIG_SEGMENT_GLOBAL:
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
-
- // this is a complete hack to get around a compiler bug
- // (the compiler currently generates global access for private
- // addresses (starting from 0). We need to add the private offset)
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- if (m->addr[lane] < w->privSizePerItem) {
-
- // calcPrivAddr will fail if accesses are unaligned
- assert(!((sizeof(CType)-1) & m->addr[lane]));
-
- Addr privAddr = calcPrivAddr(m->addr[lane], w, lane,
- this);
-
- m->addr[lane] = privAddr;
- }
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_SPILL:
- assert(num_src_operands == 1);
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
- {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- assert(m->addr[lane] < w->spillSizePerItem);
-
- m->addr[lane] = m->addr[lane] * w->spillWidth +
- lane * sizeof(CType) + w->spillBase;
- }
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_GROUP:
- m->pipeId = LDSMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(24));
- w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
- w->outstandingReqsWrLm++;
- w->wrLmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_PRIVATE:
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
- {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- assert(m->addr[lane] < w->privSizePerItem);
- m->addr[lane] = m->addr[lane] + lane *
- sizeof(CType)+w->privBase;
- }
- }
- }
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- break;
-
- default:
- fatal("Store to unsupported segment %d\n", this->segment);
- }
-
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- template<typename OperationType, typename SrcDataType,
- typename AddrRegOperandType>
- void
- StInst<OperationType, SrcDataType,
- AddrRegOperandType>::generateDisassembly()
- {
- switch (num_src_operands) {
- case 1:
- this->disassembly = csprintf("%s_%s_%s %s,%s", this->opcode,
- segmentNames[this->segment],
- OperationType::label,
- this->src.disassemble(),
- this->addr.disassemble());
- break;
- case 2:
- this->disassembly = csprintf("%s_%s_%s (%s,%s), %s", this->opcode,
- segmentNames[this->segment],
- OperationType::label,
- this->src_vect[0].disassemble(),
- this->src_vect[1].disassemble(),
- this->addr.disassemble());
- break;
- case 4:
- this->disassembly = csprintf("%s_%s_%s (%s,%s,%s,%s), %s",
- this->opcode,
- segmentNames[this->segment],
- OperationType::label,
- this->src_vect[0].disassemble(),
- this->src_vect[1].disassemble(),
- this->src_vect[2].disassemble(),
- this->src_vect[3].disassemble(),
- this->addr.disassemble());
- break;
- default: fatal("Bad ld register src operand, num vector operands: "
- "%d \n", num_src_operands);
- break;
- }
- }
-
- template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
- bool HasDst>
- void
- AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
- HasDst>::execute(GPUDynInstPtr gpuDynInst)
- {
- typedef typename DataType::CType CType;
-
- Wavefront *w = gpuDynInst->wavefront();
-
- GPUDynInstPtr m = gpuDynInst;
-
- this->addr.calcVector(w, m->addr);
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- ((CType *)m->a_data)[lane] =
- this->src[0].template get<CType>(w, lane);
- }
-
- // load second source operand for CAS
- if (NumSrcOperands > 1) {
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- ((CType*)m->x_data)[lane] =
- this->src[1].template get<CType>(w, lane);
- }
- }
-
- assert(NumSrcOperands <= 2);
-
- m->m_type = DataType::memType;
- m->v_type = DataType::vgprType;
-
- m->exec_mask = w->execMask();
- m->statusBitVector = 0;
- m->equiv = 0; // atomics don't have an equivalence class operand
- m->n_reg = 1;
-
- if (HasDst) {
- m->dst_reg = this->dest.regIndex();
- }
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->kern_id = w->kernId;
- m->cu_id = w->computeUnit->cu_id;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- switch (this->segment) {
- case Brig::BRIG_SEGMENT_GLOBAL:
- m->latency.set(w->computeUnit->shader->ticks(64));
- m->pipeId = GLBMEM_PIPE;
-
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- break;
-
- case Brig::BRIG_SEGMENT_GROUP:
- m->pipeId = LDSMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(24));
- w->computeUnit->localMemoryPipe.getLMReqFIFO().push(m);
- w->outstandingReqsWrLm++;
- w->wrLmReqsInPipe--;
- w->outstandingReqsRdLm++;
- w->rdLmReqsInPipe--;
- break;
-
- default:
- fatal("Atomic op to unsupported segment %d\n",
- this->segment);
- }
-
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- const char* atomicOpToString(Brig::BrigAtomicOperation atomicOp);
-
- template<typename DataType, typename AddrRegOperandType, int NumSrcOperands,
- bool HasDst>
- void
- AtomicInst<DataType, AddrRegOperandType, NumSrcOperands,
- HasDst>::generateDisassembly()
- {
- if (HasDst) {
- this->disassembly =
- csprintf("%s_%s_%s_%s %s,%s", this->opcode,
- atomicOpToString(this->atomicOperation),
- segmentNames[this->segment],
- DataType::label, this->dest.disassemble(),
- this->addr.disassemble());
- } else {
- this->disassembly =
- csprintf("%s_%s_%s_%s %s", this->opcode,
- atomicOpToString(this->atomicOperation),
- segmentNames[this->segment],
- DataType::label, this->addr.disassemble());
- }
-
- for (int i = 0; i < NumSrcOperands; ++i) {
- this->disassembly += ",";
- this->disassembly += this->src[i].disassemble();
- }
- }
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Marc Orr
- */
-
-#include <csignal>
-
-#include "arch/hsail/insts/decl.hh"
-#include "arch/hsail/insts/mem.hh"
-
-namespace HsailISA
-{
- // Pseudo (or magic) instructions are overloaded on the hsail call
- // instruction, because of its flexible parameter signature.
-
- // To add a new magic instruction:
- // 1. Add an entry to the enum.
- // 2. Implement it in the switch statement below (Call::exec).
- // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
- // so its easy to call from an OpenCL kernel.
-
- // This enum should be identical to the enum in
- // hsa/hsail-gpu-compute/util/magicinst.h
- enum
- {
- MAGIC_PRINT_WF_32 = 0,
- MAGIC_PRINT_WF_64,
- MAGIC_PRINT_LANE,
- MAGIC_PRINT_LANE_64,
- MAGIC_PRINT_WF_FLOAT,
- MAGIC_SIM_BREAK,
- MAGIC_PREF_SUM,
- MAGIC_REDUCTION,
- MAGIC_MASKLANE_LOWER,
- MAGIC_MASKLANE_UPPER,
- MAGIC_JOIN_WF_BAR,
- MAGIC_WAIT_WF_BAR,
- MAGIC_PANIC,
- MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
- MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
- MAGIC_LOAD_GLOBAL_U32_REG,
- MAGIC_XACT_CAS_LD,
- MAGIC_MOST_SIG_THD,
- MAGIC_MOST_SIG_BROADCAST,
- MAGIC_PRINT_WFID_32,
- MAGIC_PRINT_WFID_64
- };
-
- void
- Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
- {
- const VectorMask &mask = w->getPred();
-
- int op = 0;
- bool got_op = false;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val0 = src1.get<int>(w, lane, 0);
- if (got_op) {
- if (src_val0 != op) {
- fatal("Multiple magic instructions per PC not "
- "supported\n");
- }
- } else {
- op = src_val0;
- got_op = true;
- }
- }
- }
-
- switch(op) {
- case MAGIC_PRINT_WF_32:
- MagicPrintWF32(w);
- break;
- case MAGIC_PRINT_WF_64:
- MagicPrintWF64(w);
- break;
- case MAGIC_PRINT_LANE:
- MagicPrintLane(w);
- break;
- case MAGIC_PRINT_LANE_64:
- MagicPrintLane64(w);
- break;
- case MAGIC_PRINT_WF_FLOAT:
- MagicPrintWFFloat(w);
- break;
- case MAGIC_SIM_BREAK:
- MagicSimBreak(w);
- break;
- case MAGIC_PREF_SUM:
- MagicPrefixSum(w);
- break;
- case MAGIC_REDUCTION:
- MagicReduction(w);
- break;
- case MAGIC_MASKLANE_LOWER:
- MagicMaskLower(w);
- break;
- case MAGIC_MASKLANE_UPPER:
- MagicMaskUpper(w);
- break;
- case MAGIC_JOIN_WF_BAR:
- MagicJoinWFBar(w);
- break;
- case MAGIC_WAIT_WF_BAR:
- MagicWaitWFBar(w);
- break;
- case MAGIC_PANIC:
- MagicPanic(w);
- break;
-
- // atomic instructions
- case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
- MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
- break;
-
- case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
- MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
- break;
-
- case MAGIC_LOAD_GLOBAL_U32_REG:
- MagicLoadGlobalU32Reg(w, gpuDynInst);
- break;
-
- case MAGIC_XACT_CAS_LD:
- MagicXactCasLd(w);
- break;
-
- case MAGIC_MOST_SIG_THD:
- MagicMostSigThread(w);
- break;
-
- case MAGIC_MOST_SIG_BROADCAST:
- MagicMostSigBroadcast(w);
- break;
-
- case MAGIC_PRINT_WFID_32:
- MagicPrintWF32ID(w);
- break;
-
- case MAGIC_PRINT_WFID_64:
- MagicPrintWFID64(w);
- break;
-
- default: fatal("unrecognized magic instruction: %d\n", op);
- }
- }
-
- void
- Call::MagicPrintLane(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
- if (src_val2) {
- DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
- disassemble(), w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, lane, src_val1);
- } else {
- DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
- disassemble(), w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, lane, src_val1);
- }
- }
- }
- #endif
- }
-
- void
- Call::MagicPrintLane64(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
- if (src_val2) {
- DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
- disassemble(), w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, lane, src_val1);
- } else {
- DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
- disassemble(), w->computeUnit->cu_id, w->simdId,
- w->wfSlotId, lane, src_val1);
- }
- }
- }
- #endif
- }
-
- void
- Call::MagicPrintWF32(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- std::string res_str;
- res_str = csprintf("krl_prt (%s)\n", disassemble());
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (!(lane & 7)) {
- res_str += csprintf("DB%03d: ", (int)w->wfDynId);
- }
-
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
-
- if (src_val2) {
- res_str += csprintf("%08x", src_val1);
- } else {
- res_str += csprintf("%08d", src_val1);
- }
- } else {
- res_str += csprintf("xxxxxxxx");
- }
-
- if ((lane & 7) == 7) {
- res_str += csprintf("\n");
- } else {
- res_str += csprintf(" ");
- }
- }
-
- res_str += "\n\n";
- DPRINTFN(res_str.c_str());
- #endif
- }
-
- void
- Call::MagicPrintWF32ID(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- std::string res_str;
- int src_val3 = -1;
- res_str = csprintf("krl_prt (%s)\n", disassemble());
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (!(lane & 7)) {
- res_str += csprintf("DB%03d: ", (int)w->wfDynId);
- }
-
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
- src_val3 = src1.get<int>(w, lane, 3);
-
- if (src_val2) {
- res_str += csprintf("%08x", src_val1);
- } else {
- res_str += csprintf("%08d", src_val1);
- }
- } else {
- res_str += csprintf("xxxxxxxx");
- }
-
- if ((lane & 7) == 7) {
- res_str += csprintf("\n");
- } else {
- res_str += csprintf(" ");
- }
- }
-
- res_str += "\n\n";
- if (w->wfDynId == src_val3) {
- DPRINTFN(res_str.c_str());
- }
- #endif
- }
-
- void
- Call::MagicPrintWF64(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- std::string res_str;
- res_str = csprintf("krl_prt (%s)\n", disassemble());
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (!(lane & 3)) {
- res_str += csprintf("DB%03d: ", (int)w->wfDynId);
- }
-
- if (mask[lane]) {
- int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
-
- if (src_val2) {
- res_str += csprintf("%016x", src_val1);
- } else {
- res_str += csprintf("%016d", src_val1);
- }
- } else {
- res_str += csprintf("xxxxxxxxxxxxxxxx");
- }
-
- if ((lane & 3) == 3) {
- res_str += csprintf("\n");
- } else {
- res_str += csprintf(" ");
- }
- }
-
- res_str += "\n\n";
- DPRINTFN(res_str.c_str());
- #endif
- }
-
- void
- Call::MagicPrintWFID64(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- std::string res_str;
- int src_val3 = -1;
- res_str = csprintf("krl_prt (%s)\n", disassemble());
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (!(lane & 3)) {
- res_str += csprintf("DB%03d: ", (int)w->wfDynId);
- }
-
- if (mask[lane]) {
- int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
- src_val3 = src1.get<int>(w, lane, 3);
-
- if (src_val2) {
- res_str += csprintf("%016x", src_val1);
- } else {
- res_str += csprintf("%016d", src_val1);
- }
- } else {
- res_str += csprintf("xxxxxxxxxxxxxxxx");
- }
-
- if ((lane & 3) == 3) {
- res_str += csprintf("\n");
- } else {
- res_str += csprintf(" ");
- }
- }
-
- res_str += "\n\n";
- if (w->wfDynId == src_val3) {
- DPRINTFN(res_str.c_str());
- }
- #endif
- }
-
- void
- Call::MagicPrintWFFloat(Wavefront *w)
- {
- #if TRACING_ON
- const VectorMask &mask = w->getPred();
- std::string res_str;
- res_str = csprintf("krl_prt (%s)\n", disassemble());
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (!(lane & 7)) {
- res_str += csprintf("DB%03d: ", (int)w->wfDynId);
- }
-
- if (mask[lane]) {
- float src_val1 = src1.get<float>(w, lane, 1);
- res_str += csprintf("%08f", src_val1);
- } else {
- res_str += csprintf("xxxxxxxx");
- }
-
- if ((lane & 7) == 7) {
- res_str += csprintf("\n");
- } else {
- res_str += csprintf(" ");
- }
- }
-
- res_str += "\n\n";
- DPRINTFN(res_str.c_str());
- #endif
- }
-
- // raises a signal that GDB will catch
- // when done with the break, type "signal 0" in gdb to continue
- void
- Call::MagicSimBreak(Wavefront *w)
- {
- std::string res_str;
- // print out state for this wavefront and then break
- res_str = csprintf("Breakpoint encountered for wavefront %i\n",
- w->wfSlotId);
-
- res_str += csprintf(" Kern ID: %i\n", w->kernId);
- res_str += csprintf(" Phase ID: %i\n", w->simdId);
- res_str += csprintf(" Executing on CU #%i\n", w->computeUnit->cu_id);
- res_str += csprintf(" Exec mask: ");
-
- for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
- if (w->execMask(i))
- res_str += "1";
- else
- res_str += "0";
-
- if ((i & 7) == 7)
- res_str += " ";
- }
-
- res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
-
- res_str += "\nHelpful debugging hints:\n";
- res_str += " Check out w->s_reg / w->d_reg for register state\n";
-
- res_str += "\n\n";
- DPRINTFN(res_str.c_str());
- fflush(stdout);
-
- raise(SIGTRAP);
- }
-
- void
- Call::MagicPrefixSum(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int res = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- dest.set<int>(w, lane, res);
- res += src_val1;
- }
- }
- }
-
- void
- Call::MagicReduction(Wavefront *w)
- {
- // reduction magic instruction
- // The reduction instruction takes up to 64 inputs (one from
- // each thread in a WF) and sums them. It returns the sum to
- // each thread in the WF.
- const VectorMask &mask = w->getPred();
- int res = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- res += src_val1;
- }
- }
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- dest.set<int>(w, lane, res);
- }
- }
- }
-
- void
- Call::MagicMaskLower(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int res = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
-
- if (src_val1) {
- if (lane < (w->computeUnit->wfSize()/2)) {
- res = res | ((uint32_t)(1) << lane);
- }
- }
- }
- }
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- dest.set<int>(w, lane, res);
- }
- }
- }
-
- void
- Call::MagicMaskUpper(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int res = 0;
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
-
- if (src_val1) {
- if (lane >= (w->computeUnit->wfSize()/2)) {
- res = res | ((uint32_t)(1) <<
- (lane - (w->computeUnit->wfSize()/2)));
- }
- }
- }
- }
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- dest.set<int>(w, lane, res);
- }
- }
- }
-
- void
- Call::MagicJoinWFBar(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int max_cnt = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- w->barCnt[lane]++;
-
- if (w->barCnt[lane] > max_cnt) {
- max_cnt = w->barCnt[lane];
- }
- }
- }
-
- if (max_cnt > w->maxBarCnt) {
- w->maxBarCnt = max_cnt;
- }
- }
-
- void
- Call::MagicWaitWFBar(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int max_cnt = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- w->barCnt[lane]--;
- }
-
- if (w->barCnt[lane] > max_cnt) {
- max_cnt = w->barCnt[lane];
- }
- }
-
- if (max_cnt < w->maxBarCnt) {
- w->maxBarCnt = max_cnt;
- }
-
- w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
- w->instructionBuffer.end());
- if (w->pendingFetch)
- w->dropFetch = true;
- }
-
- void
- Call::MagicPanic(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- int src_val1 = src1.get<int>(w, lane, 1);
- panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
- src_val1, lane);
- }
- }
- }
-
- void
- Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
- {
- // the address is in src1 | src2
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- int src_val1 = src1.get<int>(w, lane, 1);
- int src_val2 = src1.get<int>(w, lane, 2);
- Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
-
- m->addr[lane] = addr;
- }
-
- }
-
- void
- Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
- {
- GPUDynInstPtr m = gpuDynInst;
-
- calcAddr(w, m);
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
- }
-
- setFlag(AtomicNoReturn);
- setFlag(AtomicAdd);
- setFlag(NoScope);
- setFlag(NoOrder);
- setFlag(GlobalSegment);
-
- m->m_type = U32::memType;
- m->v_type = U32::vgprType;
-
- m->exec_mask = w->execMask();
- m->statusBitVector = 0;
- m->equiv = 0; // atomics don't have an equivalence class operand
- m->n_reg = 1;
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(64));
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- void
- Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
- {
- GPUDynInstPtr m = gpuDynInst;
- calcAddr(w, m);
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
- }
-
- setFlag(AtomicNoReturn);
- setFlag(AtomicAdd);
- setFlag(NoScope);
- setFlag(NoOrder);
- setFlag(GlobalSegment);
-
- m->m_type = U32::memType;
- m->v_type = U32::vgprType;
-
- m->exec_mask = w->execMask();
- m->statusBitVector = 0;
- m->equiv = 0; // atomics don't have an equivalence class operand
- m->n_reg = 1;
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(64));
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsWrGm++;
- w->wrGmReqsInPipe--;
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- void
- Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
- {
- GPUDynInstPtr m = gpuDynInst;
- // calculate the address
- calcAddr(w, m);
-
- setFlag(Load);
- setFlag(NoScope);
- setFlag(NoOrder);
- setFlag(GlobalSegment);
-
- m->m_type = U32::memType; //MemDataType::memType;
- m->v_type = U32::vgprType; //DestDataType::vgprType;
-
- m->exec_mask = w->execMask();
- m->statusBitVector = 0;
- m->equiv = 0;
- m->n_reg = 1;
-
- // FIXME
- //m->dst_reg = this->dest.regIndex();
-
- m->simdId = w->simdId;
- m->wfSlotId = w->wfSlotId;
- m->wfDynId = w->wfDynId;
- m->latency.init(&w->computeUnit->shader->tick_cnt);
-
- m->pipeId = GLBMEM_PIPE;
- m->latency.set(w->computeUnit->shader->ticks(1));
- w->computeUnit->globalMemoryPipe.issueRequest(m);
- w->outstandingReqsRdGm++;
- w->rdGmReqsInPipe--;
- w->outstandingReqs++;
- w->memReqsInPipe--;
- }
-
- void
- Call::MagicXactCasLd(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int src_val1 = 0;
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (mask[lane]) {
- src_val1 = src1.get<int>(w, lane, 1);
- break;
- }
- }
-
- if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
- w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
- w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
- }
-
- w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
- .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
- }
-
- void
- Call::MagicMostSigThread(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- unsigned mst = true;
-
- for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
- if (mask[lane]) {
- dest.set<int>(w, lane, mst);
- mst = false;
- }
- }
- }
-
- void
- Call::MagicMostSigBroadcast(Wavefront *w)
- {
- const VectorMask &mask = w->getPred();
- int res = 0;
- bool got_res = false;
-
- for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
- if (mask[lane]) {
- if (!got_res) {
- res = src1.get<int>(w, lane, 1);
- got_res = true;
- }
- dest.set<int>(w, lane, res);
- }
- }
- }
-
-} // namespace HsailISA
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "arch/hsail/operand.hh"
-
-using namespace Brig;
-
-bool
-BaseRegOperand::init(unsigned opOffset, const BrigObject *obj,
- unsigned &maxRegIdx, char _regFileChar)
-{
- regFileChar = _regFileChar;
- const BrigOperand *brigOp = obj->getOperand(opOffset);
-
- if (brigOp->kind != BRIG_KIND_OPERAND_REGISTER)
- return false;
-
- const BrigOperandRegister *brigRegOp = (const BrigOperandRegister*)brigOp;
-
- regIdx = brigRegOp->regNum;
-
- DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d\n", regIdx,
- brigRegOp->regKind);
-
- maxRegIdx = std::max(maxRegIdx, regIdx);
-
- return true;
-}
-
-void
-ListOperand::init(unsigned opOffset, const BrigObject *obj)
-{
- const BrigOperand *brigOp = (const BrigOperand*)obj->getOperand(opOffset);
-
- switch (brigOp->kind) {
- case BRIG_KIND_OPERAND_CODE_LIST:
- {
- const BrigOperandCodeList *opList =
- (const BrigOperandCodeList*)brigOp;
-
- const Brig::BrigData *oprnd_data =
- obj->getBrigBaseData(opList->elements);
-
- // Note: for calls Dest list of operands could be size of 0.
- elementCount = oprnd_data->byteCount / 4;
-
- DPRINTF(GPUReg, "Operand Code List: # elements: %d\n",
- elementCount);
-
- for (int i = 0; i < elementCount; ++i) {
- unsigned *data_offset =
- (unsigned*)obj->getData(opList->elements + 4 * (i + 1));
-
- const BrigDirectiveVariable *p =
- (const BrigDirectiveVariable*)obj->
- getCodeSectionEntry(*data_offset);
-
- StorageElement *se = obj->currentCode->storageMap->
- findSymbol(BRIG_SEGMENT_ARG, p);
-
- assert(se);
- callArgs.push_back(se);
- }
- }
- break;
- default:
- fatal("ListOperand: bad operand kind %d\n", brigOp->kind);
- }
-}
-
-std::string
-ListOperand::disassemble()
-{
- std::string res_str("");
-
- for (auto it : callArgs) {
- res_str += csprintf("%s ", it->name.c_str());
- }
-
- return res_str;
-}
-
-void
-FunctionRefOperand::init(unsigned opOffset, const BrigObject *obj)
-{
- const BrigOperand *baseOp = obj->getOperand(opOffset);
-
- if (baseOp->kind != BRIG_KIND_OPERAND_CODE_REF) {
- fatal("FunctionRefOperand: bad operand kind %d\n", baseOp->kind);
- }
-
- const BrigOperandCodeRef *brigOp = (const BrigOperandCodeRef*)baseOp;
-
- const BrigDirectiveExecutable *p =
- (const BrigDirectiveExecutable*)obj->getCodeSectionEntry(brigOp->ref);
-
- func_name = obj->getString(p->name);
-}
-
-std::string
-FunctionRefOperand::disassemble()
-{
- DPRINTF(GPUReg, "Operand Func-ref name: %s\n", func_name);
-
- return csprintf("%s", func_name);
-}
-
-bool
-BaseRegOperand::init_from_vect(unsigned opOffset, const BrigObject *obj,
- int at, unsigned &maxRegIdx, char _regFileChar)
-{
- regFileChar = _regFileChar;
- const BrigOperand *brigOp = obj->getOperand(opOffset);
-
- if (brigOp->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
- return false;
-
-
- const Brig::BrigOperandOperandList *brigRegVecOp =
- (const Brig::BrigOperandOperandList*)brigOp;
-
- unsigned *data_offset =
- (unsigned*)obj->getData(brigRegVecOp->elements + 4 * (at + 1));
-
- const BrigOperand *p =
- (const BrigOperand*)obj->getOperand(*data_offset);
- if (p->kind != BRIG_KIND_OPERAND_REGISTER) {
- return false;
- }
-
- const BrigOperandRegister *brigRegOp =(const BrigOperandRegister*)p;
-
- regIdx = brigRegOp->regNum;
-
- DPRINTF(GPUReg, "Operand: regNum: %d, kind: %d \n", regIdx,
- brigRegOp->regKind);
-
- maxRegIdx = std::max(maxRegIdx, regIdx);
-
- return true;
-}
-
-void
-BaseRegOperand::initWithStrOffset(unsigned strOffset, const BrigObject *obj,
- unsigned &maxRegIdx, char _regFileChar)
-{
- const char *name = obj->getString(strOffset);
- char *endptr;
- regIdx = strtoul(name + 2, &endptr, 10);
-
- if (name[0] != '$' || name[1] != _regFileChar) {
- fatal("register operand parse error on \"%s\"\n", name);
- }
-
- maxRegIdx = std::max(maxRegIdx, regIdx);
-}
-
-unsigned SRegOperand::maxRegIdx;
-unsigned DRegOperand::maxRegIdx;
-unsigned CRegOperand::maxRegIdx;
-
-std::string
-SRegOperand::disassemble()
-{
- return csprintf("$s%d", regIdx);
-}
-
-std::string
-DRegOperand::disassemble()
-{
- return csprintf("$d%d", regIdx);
-}
-
-std::string
-CRegOperand::disassemble()
-{
- return csprintf("$c%d", regIdx);
-}
-
-BrigRegOperandInfo
-findRegDataType(unsigned opOffset, const BrigObject *obj)
-{
- const BrigOperand *baseOp = obj->getOperand(opOffset);
-
- switch (baseOp->kind) {
- case BRIG_KIND_OPERAND_REGISTER:
- {
- const BrigOperandRegister *op = (BrigOperandRegister*)baseOp;
-
- return BrigRegOperandInfo((BrigKind16_t)baseOp->kind,
- (BrigRegisterKind)op->regKind);
- }
- break;
-
- case BRIG_KIND_OPERAND_WAVESIZE:
- {
- BrigRegisterKind reg_kind = BRIG_REGISTER_KIND_DOUBLE;
- return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
- }
-
- case BRIG_KIND_OPERAND_OPERAND_LIST:
- {
- const BrigOperandOperandList *op =
- (BrigOperandOperandList*)baseOp;
- const BrigData *data_p = (BrigData*)obj->getData(op->elements);
-
-
- int num_operands = 0;
- BrigRegisterKind reg_kind = (BrigRegisterKind)0;
- for (int offset = 0; offset < data_p->byteCount; offset += 4) {
- const BrigOperand *op_p = (const BrigOperand *)
- obj->getOperand(((int *)data_p->bytes)[offset/4]);
-
- if (op_p->kind == BRIG_KIND_OPERAND_REGISTER) {
- const BrigOperandRegister *brigRegOp =
- (const BrigOperandRegister*)op_p;
- reg_kind = (BrigRegisterKind)brigRegOp->regKind;
- } else if (op_p->kind == BRIG_KIND_OPERAND_CONSTANT_BYTES) {
- uint16_t num_bytes =
- ((Brig::BrigOperandConstantBytes*)op_p)->base.byteCount
- - sizeof(BrigBase);
- if (num_bytes == sizeof(uint32_t)) {
- reg_kind = BRIG_REGISTER_KIND_SINGLE;
- } else if (num_bytes == sizeof(uint64_t)) {
- reg_kind = BRIG_REGISTER_KIND_DOUBLE;
- } else {
- fatal("OperandList: bad operand size %d\n", num_bytes);
- }
- } else if (op_p->kind == BRIG_KIND_OPERAND_WAVESIZE) {
- reg_kind = BRIG_REGISTER_KIND_DOUBLE;
- } else {
- fatal("OperandList: bad operand kind %d\n", op_p->kind);
- }
-
- num_operands++;
- }
- assert(baseOp->kind == BRIG_KIND_OPERAND_OPERAND_LIST);
-
- return BrigRegOperandInfo((BrigKind16_t)baseOp->kind, reg_kind);
- }
- break;
-
- case BRIG_KIND_OPERAND_ADDRESS:
- {
- const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
-
- if (!op->reg) {
- BrigType type = BRIG_TYPE_NONE;
-
- if (op->symbol) {
- const BrigDirective *dir = (BrigDirective*)
- obj->getCodeSectionEntry(op->symbol);
-
- assert(dir->kind == BRIG_KIND_DIRECTIVE_VARIABLE);
-
- const BrigDirectiveVariable *sym =
- (const BrigDirectiveVariable*)dir;
-
- type = (BrigType)sym->type;
- }
- return BrigRegOperandInfo(BRIG_KIND_OPERAND_ADDRESS,
- (BrigType)type);
- } else {
- const BrigOperandAddress *b = (const BrigOperandAddress*)baseOp;
- const BrigOperand *reg = obj->getOperand(b->reg);
- const BrigOperandRegister *rop = (BrigOperandRegister*)reg;
-
- return BrigRegOperandInfo(BRIG_KIND_OPERAND_REGISTER,
- (BrigRegisterKind)rop->regKind);
- }
- }
- break;
-
- default:
- fatal("AddrOperand: bad operand kind %d\n", baseOp->kind);
- break;
- }
-}
-
-void
-AddrOperandBase::parseAddr(const BrigOperandAddress *op, const BrigObject *obj)
-{
- assert(op->base.kind == BRIG_KIND_OPERAND_ADDRESS);
-
- const BrigDirective *d =
- (BrigDirective*)obj->getCodeSectionEntry(op->symbol);
-
- /**
- * HSAIL does not properly handle immediate offsets for instruction types
- * that utilize them. It currently only supports instructions that use
- * variables instead. Again, these pop up in code that is never executed
- * (i.e. the HCC AMP codes) so we just hack it here to let us pass through
- * the HSAIL object initialization. If such code is ever called, we would
- * have to implement this properly.
- */
- if (d->kind != BRIG_KIND_DIRECTIVE_VARIABLE) {
- warn("HSAIL implementation does not support instructions with "
- "address calculations where the operand is not a variable\n");
- }
-
- const BrigDirectiveVariable *sym = (BrigDirectiveVariable*)d;
- name = obj->getString(sym->name);
-
- if (sym->segment != BRIG_SEGMENT_ARG) {
- storageElement =
- obj->currentCode->storageMap->findSymbol(sym->segment, name);
- offset = 0;
- } else {
- // sym->name does not work for BRIG_SEGMENT_ARG for the following case:
- //
- // void foo(int a);
- // void bar(double a);
- //
- // foo(...) --> arg_u32 %param_p0;
- // st_arg_u32 $s0, [%param_p0];
- // call &foo (%param_p0);
- // bar(...) --> arg_f64 %param_p0;
- // st_arg_u64 $d0, [%param_p0];
- // call &foo (%param_p0);
- //
- // Both functions use the same variable name (param_p0)!!!
- //
- // Maybe this is a bug in the compiler (I don't know).
- //
- // Solution:
- // Use directive pointer (BrigDirectiveVariable) to differentiate 2
- // versions of param_p0.
- //
- // Note this solution is kind of stupid, because we are pulling stuff
- // out of the brig binary via the directive pointer and putting it into
- // the symbol table, but now we are indexing the symbol table by the
- // brig directive pointer! It makes the symbol table sort of pointless.
- // But I don't want to mess with the rest of the infrastructure, so
- // let's go with this for now.
- //
- // When we update the compiler again, we should see if this problem goes
- // away. If so, we can fold some of this functionality into the code for
- // kernel arguments. If not, maybe we can index the symbol name on a
- // hash of the variable AND function name
- storageElement = obj->currentCode->
- storageMap->findSymbol((Brig::BrigSegment)sym->segment, sym);
-
- assert(storageElement);
- }
-}
-
-uint64_t
-AddrOperandBase::calcUniformBase()
-{
- // start with offset, will be 0 if not specified
- uint64_t address = offset;
-
- // add in symbol value if specified
- if (storageElement) {
- address += storageElement->offset;
- }
-
- return address;
-}
-
-std::string
-AddrOperandBase::disassemble(std::string reg_disassembly)
-{
- std::string disasm;
-
- if (offset || reg_disassembly != "") {
- disasm += "[";
-
- if (reg_disassembly != "") {
- disasm += reg_disassembly;
-
- if (offset > 0) {
- disasm += "+";
- }
- }
-
- if (offset) {
- disasm += csprintf("%d", offset);
- }
-
- disasm += "]";
- } else if (name) {
- disasm += csprintf("[%s]", name);
- }
-
- return disasm;
-}
-
-void
-NoRegAddrOperand::init(unsigned opOffset, const BrigObject *obj)
-{
- const BrigOperand *baseOp = obj->getOperand(opOffset);
-
- if (baseOp->kind == BRIG_KIND_OPERAND_ADDRESS) {
- BrigOperandAddress *addrOp = (BrigOperandAddress*)baseOp;
- parseAddr(addrOp, obj);
- offset = (uint64_t(addrOp->offset.hi) << 32) |
- uint64_t(addrOp->offset.lo);
- } else {
- fatal("NoRegAddrOperand: bad operand kind %d\n", baseOp->kind);
- }
-
-}
-
-std::string
-NoRegAddrOperand::disassemble()
-{
- return AddrOperandBase::disassemble(std::string(""));
-}
-
-void
-LabelOperand::init(unsigned opOffset, const BrigObject *obj)
-{
- const BrigOperandCodeRef *op =
- (const BrigOperandCodeRef*)obj->getOperand(opOffset);
-
- assert(op->base.kind == BRIG_KIND_OPERAND_CODE_REF);
-
- const BrigDirective *dir =
- (const BrigDirective*)obj->getCodeSectionEntry(op->ref);
-
- assert(dir->kind == BRIG_KIND_DIRECTIVE_LABEL);
- label = obj->currentCode->refLabel((BrigDirectiveLabel*)dir, obj);
-}
-
-uint32_t
-LabelOperand::getTarget(Wavefront *w, int lane)
-{
- return label->get();
-}
-
-std::string
-LabelOperand::disassemble()
-{
- return label->name;
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __ARCH_HSAIL_OPERAND_HH__
-#define __ARCH_HSAIL_OPERAND_HH__
-
-/**
- * @file operand.hh
- *
- * Defines classes encapsulating HSAIL instruction operands.
- */
-
-#include <limits>
-#include <string>
-
-#include "arch/hsail/Brig.h"
-#include "base/trace.hh"
-#include "base/types.hh"
-#include "debug/GPUReg.hh"
-#include "enums/RegisterType.hh"
-#include "gpu-compute/brig_object.hh"
-#include "gpu-compute/compute_unit.hh"
-#include "gpu-compute/hsail_code.hh"
-#include "gpu-compute/shader.hh"
-#include "gpu-compute/vector_register_file.hh"
-#include "gpu-compute/wavefront.hh"
-
-class Label;
-class StorageElement;
-
-class BaseOperand
-{
- public:
- Enums::RegisterType registerType;
- uint32_t regOperandSize;
- BaseOperand() { registerType = Enums::RT_NONE; regOperandSize = 0; }
- bool isVectorRegister() { return registerType == Enums::RT_VECTOR; }
- bool isScalarRegister() { return registerType == Enums::RT_SCALAR; }
- bool isCondRegister() { return registerType == Enums::RT_CONDITION; }
- unsigned int regIndex() { return 0; }
- uint32_t opSize() { return regOperandSize; }
- virtual ~BaseOperand() { }
-};
-
-class BrigRegOperandInfo
-{
- public:
- Brig::BrigKind16_t kind;
- Brig::BrigType type;
- Brig::BrigRegisterKind regKind;
-
- BrigRegOperandInfo(Brig::BrigKind16_t _kind,
- Brig::BrigRegisterKind _regKind)
- : kind(_kind), regKind(_regKind)
- {
- }
-
- BrigRegOperandInfo(Brig::BrigKind16_t _kind, Brig::BrigType _type)
- : kind(_kind), type(_type)
- {
- }
-
- BrigRegOperandInfo() : kind(Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES),
- type(Brig::BRIG_TYPE_NONE)
- {
- }
-};
-
-BrigRegOperandInfo findRegDataType(unsigned opOffset, const BrigObject *obj);
-
-class BaseRegOperand : public BaseOperand
-{
- public:
- unsigned regIdx;
- char regFileChar;
-
- bool init(unsigned opOffset, const BrigObject *obj,
- unsigned &maxRegIdx, char _regFileChar);
-
- bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at,
- unsigned &maxRegIdx, char _regFileChar);
-
- void initWithStrOffset(unsigned strOffset, const BrigObject *obj,
- unsigned &maxRegIdx, char _regFileChar);
- unsigned int regIndex() { return regIdx; }
-};
-
-class SRegOperand : public BaseRegOperand
-{
- public:
- static unsigned maxRegIdx;
-
- bool
- init(unsigned opOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint32_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::init(opOffset, obj, maxRegIdx, 's');
- }
-
- bool
- init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
- {
- regOperandSize = sizeof(uint32_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
- 's');
- }
-
- void
- initWithStrOffset(unsigned strOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint32_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
- 's');
- }
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane)
- {
- assert(sizeof(OperandType) <= sizeof(uint32_t));
- assert(regIdx < w->maxSpVgprs);
- // if OperandType is smaller than 32-bit, we truncate the value
- OperandType ret;
- uint32_t vgprIdx;
-
- switch (sizeof(OperandType)) {
- case 1: // 1 byte operand
- vgprIdx = w->remap(regIdx, 1, 1);
- ret = (w->computeUnit->vrf[w->simdId]->
- read<uint32_t>(vgprIdx, lane)) & 0xff;
- break;
- case 2: // 2 byte operand
- vgprIdx = w->remap(regIdx, 2, 1);
- ret = (w->computeUnit->vrf[w->simdId]->
- read<uint32_t>(vgprIdx, lane)) & 0xffff;
- break;
- case 4: // 4 byte operand
- vgprIdx = w->remap(regIdx,sizeof(OperandType), 1);
- ret = w->computeUnit->vrf[w->simdId]->
- read<OperandType>(vgprIdx, lane);
- break;
- default:
- panic("Bad OperandType\n");
- break;
- }
-
- return (OperandType)ret;
- }
-
- // special get method for compatibility with LabelOperand
- uint32_t
- getTarget(Wavefront *w, int lane)
- {
- return get<uint32_t>(w, lane);
- }
-
- template<typename OperandType>
- void set(Wavefront *w, int lane, OperandType &val);
- std::string disassemble();
-};
-
-template<typename OperandType>
-void
-SRegOperand::set(Wavefront *w, int lane, OperandType &val)
-{
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
-
- assert(sizeof(OperandType) == sizeof(uint32_t));
- assert(regIdx < w->maxSpVgprs);
- uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
- w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
-}
-
-template<>
-inline void
-SRegOperand::set(Wavefront *w, int lane, uint64_t &val)
-{
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $s%d <- %d\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx, val);
-
- assert(regIdx < w->maxSpVgprs);
- uint32_t vgprIdx = w->remap(regIdx, sizeof(uint32_t), 1);
- w->computeUnit->vrf[w->simdId]->write<uint32_t>(vgprIdx, val, lane);
-}
-
-class DRegOperand : public BaseRegOperand
-{
- public:
- static unsigned maxRegIdx;
-
- bool
- init(unsigned opOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint64_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'd');
- }
-
- bool
- init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
- {
- regOperandSize = sizeof(uint64_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
- 'd');
- }
-
- void
- initWithStrOffset(unsigned strOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint64_t);
- registerType = Enums::RT_VECTOR;
-
- return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
- 'd');
- }
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane)
- {
- assert(sizeof(OperandType) <= sizeof(uint64_t));
- // TODO: this check is valid only for HSAIL
- assert(regIdx < w->maxDpVgprs);
- uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
-
- return w->computeUnit->vrf[w->simdId]->read<OperandType>(vgprIdx,lane);
- }
-
- template<typename OperandType>
- void
- set(Wavefront *w, int lane, OperandType &val)
- {
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $d%d <- %d\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
- val);
-
- assert(sizeof(OperandType) <= sizeof(uint64_t));
- // TODO: this check is valid only for HSAIL
- assert(regIdx < w->maxDpVgprs);
- uint32_t vgprIdx = w->remap(regIdx, sizeof(OperandType), 1);
- w->computeUnit->vrf[w->simdId]->write<OperandType>(vgprIdx,val,lane);
- }
-
- std::string disassemble();
-};
-
-class CRegOperand : public BaseRegOperand
-{
- public:
- static unsigned maxRegIdx;
-
- bool
- init(unsigned opOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint8_t);
- registerType = Enums::RT_CONDITION;
-
- return BaseRegOperand::init(opOffset, obj, maxRegIdx, 'c');
- }
-
- bool
- init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
- {
- regOperandSize = sizeof(uint8_t);
- registerType = Enums::RT_CONDITION;
-
- return BaseRegOperand::init_from_vect(opOffset, obj, at, maxRegIdx,
- 'c');
- }
-
- void
- initWithStrOffset(unsigned strOffset, const BrigObject *obj)
- {
- regOperandSize = sizeof(uint8_t);
- registerType = Enums::RT_CONDITION;
-
- return BaseRegOperand::initWithStrOffset(strOffset, obj, maxRegIdx,
- 'c');
- }
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane)
- {
- assert(regIdx < w->condRegState->numRegs());
-
- return w->condRegState->read<OperandType>((int)regIdx, lane);
- }
-
- template<typename OperandType>
- void
- set(Wavefront *w, int lane, OperandType &val)
- {
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: $c%d <- %d\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane, regIdx,
- val);
-
- assert(regIdx < w->condRegState->numRegs());
- w->condRegState->write<OperandType>(regIdx,lane,val);
- }
-
- std::string disassemble();
-};
-
-template<typename T>
-class ImmOperand : public BaseOperand
-{
- private:
- uint16_t kind;
- public:
- T bits;
-
- bool init(unsigned opOffset, const BrigObject *obj);
- bool init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
- std::string disassemble();
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w)
- {
- assert(sizeof(OperandType) <= sizeof(T));
- panic_if(w == nullptr, "WF pointer needs to be set");
-
- switch (kind) {
- // immediate operand is WF size
- case Brig::BRIG_KIND_OPERAND_WAVESIZE:
- return (OperandType)w->computeUnit->wfSize();
- break;
-
- default:
- return *(OperandType*)&bits;
- break;
- }
- }
-
- // This version of get() takes a WF* and a lane id for
- // compatibility with the register-based get() methods.
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane)
- {
- return get<OperandType>(w);
- }
-};
-
-template<typename T>
-bool
-ImmOperand<T>::init(unsigned opOffset, const BrigObject *obj)
-{
- const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
-
- switch (brigOp->kind) {
- // this is immediate operand
- case Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES:
- {
- DPRINTF(GPUReg, "sizeof(T): %lu, byteCount: %d\n", sizeof(T),
- brigOp->byteCount);
-
- auto cbptr = (Brig::BrigOperandConstantBytes*)brigOp;
-
- bits = *((T*)(obj->getData(cbptr->bytes + 4)));
- kind = brigOp->kind;
- return true;
- }
- break;
-
- case Brig::BRIG_KIND_OPERAND_WAVESIZE:
- kind = brigOp->kind;
- bits = std::numeric_limits<unsigned long long>::digits;
- return true;
-
- default:
- kind = Brig::BRIG_KIND_NONE;
- return false;
- }
-}
-
-template <typename T>
-bool
-ImmOperand<T>::init_from_vect(unsigned opOffset, const BrigObject *obj, int at)
-{
- const Brig::BrigOperand *brigOp = obj->getOperand(opOffset);
-
- if (brigOp->kind != Brig::BRIG_KIND_OPERAND_OPERAND_LIST) {
- kind = Brig::BRIG_KIND_NONE;
- return false;
- }
-
-
- const Brig::BrigOperandOperandList *brigVecOp =
- (const Brig::BrigOperandOperandList *)brigOp;
-
- unsigned *data_offset =
- (unsigned *)obj->getData(brigVecOp->elements + 4 * (at + 1));
-
- const Brig::BrigOperand *p =
- (const Brig::BrigOperand *)obj->getOperand(*data_offset);
-
- if (p->kind != Brig::BRIG_KIND_OPERAND_CONSTANT_BYTES) {
- kind = Brig::BRIG_KIND_NONE;
- return false;
- }
-
- return init(*data_offset, obj);
-}
-template<typename T>
-std::string
-ImmOperand<T>::disassemble()
-{
- return csprintf("0x%08x", bits);
-}
-
-template<typename RegOperand, typename T>
-class RegOrImmOperand : public BaseOperand
-{
- private:
- bool is_imm;
-
- public:
- void setImm(const bool value) { is_imm = value; }
-
- ImmOperand<T> imm_op;
- RegOperand reg_op;
-
- RegOrImmOperand() { is_imm = false; }
- void init(unsigned opOffset, const BrigObject *obj);
- void init_from_vect(unsigned opOffset, const BrigObject *obj, int at);
- std::string disassemble();
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane)
- {
- return is_imm ? imm_op.template get<OperandType>(w) :
- reg_op.template get<OperandType>(w, lane);
- }
-
- uint32_t
- opSize()
- {
- if (!is_imm) {
- return reg_op.opSize();
- }
-
- return 0;
- }
-
- bool
- isVectorRegister()
- {
- if (!is_imm) {
- return reg_op.registerType == Enums::RT_VECTOR;
- }
- return false;
- }
-
- bool
- isCondRegister()
- {
- if (!is_imm) {
- return reg_op.registerType == Enums::RT_CONDITION;
- }
-
- return false;
- }
-
- bool
- isScalarRegister()
- {
- if (!is_imm) {
- return reg_op.registerType == Enums::RT_SCALAR;
- }
-
- return false;
- }
-
- unsigned int
- regIndex()
- {
- if (!is_imm) {
- return reg_op.regIndex();
- }
- return 0;
- }
-};
-
-template<typename RegOperand, typename T>
-void
-RegOrImmOperand<RegOperand, T>::init(unsigned opOffset, const BrigObject *obj)
-{
- is_imm = false;
-
- if (reg_op.init(opOffset, obj)) {
- return;
- }
-
- if (imm_op.init(opOffset, obj)) {
- is_imm = true;
- return;
- }
-
- fatal("RegOrImmOperand::init(): bad operand kind %d\n",
- obj->getOperand(opOffset)->kind);
-}
-
-template<typename RegOperand, typename T>
-void
-RegOrImmOperand<RegOperand, T>::init_from_vect(unsigned opOffset,
- const BrigObject *obj, int at)
-{
- if (reg_op.init_from_vect(opOffset, obj, at)) {
- is_imm = false;
-
- return;
- }
-
- if (imm_op.init_from_vect(opOffset, obj, at)) {
- is_imm = true;
-
- return;
- }
-
- fatal("RegOrImmOperand::init(): bad operand kind %d\n",
- obj->getOperand(opOffset)->kind);
-}
-
-template<typename RegOperand, typename T>
-std::string
-RegOrImmOperand<RegOperand, T>::disassemble()
-{
- return is_imm ? imm_op.disassemble() : reg_op.disassemble();
-}
-
-typedef RegOrImmOperand<SRegOperand, uint32_t> SRegOrImmOperand;
-typedef RegOrImmOperand<DRegOperand, uint64_t> DRegOrImmOperand;
-typedef RegOrImmOperand<CRegOperand, bool> CRegOrImmOperand;
-
-class AddrOperandBase : public BaseOperand
-{
- protected:
- // helper function for init()
- void parseAddr(const Brig::BrigOperandAddress *op, const BrigObject *obj);
-
- // helper function for disassemble()
- std::string disassemble(std::string reg_disassembly);
- uint64_t calcUniformBase();
-
- public:
- virtual void calcVector(Wavefront *w, std::vector<Addr> &addrVec) = 0;
- virtual uint64_t calcLane(Wavefront *w, int lane=0) = 0;
-
- int64_t offset;
- const char *name = nullptr;
- StorageElement *storageElement;
-};
-
-template<typename RegOperandType>
-class RegAddrOperand : public AddrOperandBase
-{
- public:
- RegOperandType reg;
- void init(unsigned opOffset, const BrigObject *obj);
- uint64_t calcUniform();
- void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
- uint64_t calcLane(Wavefront *w, int lane=0);
- uint32_t opSize() { return reg.opSize(); }
- bool isVectorRegister() { return reg.registerType == Enums::RT_VECTOR; }
- bool isCondRegister() { return reg.registerType == Enums::RT_CONDITION; }
- bool isScalarRegister() { return reg.registerType == Enums::RT_SCALAR; }
- unsigned int regIndex() { return reg.regIndex(); }
- std::string disassemble();
-};
-
-template<typename RegOperandType>
-void
-RegAddrOperand<RegOperandType>::init(unsigned opOffset, const BrigObject *obj)
-{
- using namespace Brig;
-
- const BrigOperand *baseOp = obj->getOperand(opOffset);
-
- switch (baseOp->kind) {
- case BRIG_KIND_OPERAND_ADDRESS:
- {
- const BrigOperandAddress *op = (BrigOperandAddress*)baseOp;
- storageElement = nullptr;
-
- reg.init(op->reg, obj);
-
- if (reg.regFileChar == 's') {
- // if the address expression is 32b, then the hi
- // bits of the offset must be set to 0 in the BRIG
- assert(!op->offset.hi);
- /**
- * the offset field of an HSAIL instruction may be negative
- * so here we cast the raw bits we get from the BRIG file to
- * a signed type to avoid address calculation errors
- */
- offset = (int32_t)(op->offset.lo);
- reg.regOperandSize = sizeof(uint32_t);
- registerType = Enums::RT_VECTOR;
- }
- else if (reg.regFileChar == 'd') {
- offset = (int64_t)(((uint64_t)(op->offset.hi) << 32)
- | (uint64_t)(op->offset.lo));
- reg.regOperandSize = sizeof(uint64_t);
- registerType = Enums::RT_VECTOR;
- }
- }
- break;
-
- default:
- fatal("RegAddrOperand: bad operand kind %d\n", baseOp->kind);
- break;
- }
-}
-
-template<typename RegOperandType>
-uint64_t
-RegAddrOperand<RegOperandType>::calcUniform()
-{
- fatal("can't do calcUniform() on register-based address\n");
-
- return 0;
-}
-
-template<typename RegOperandType>
-void
-RegAddrOperand<RegOperandType>::calcVector(Wavefront *w,
- std::vector<Addr> &addrVec)
-{
- Addr address = calcUniformBase();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
- if (w->execMask(lane)) {
- if (reg.regFileChar == 's') {
- addrVec[lane] = address + reg.template get<uint32_t>(w, lane);
- } else {
- addrVec[lane] = address + reg.template get<Addr>(w, lane);
- }
- }
- }
-}
-
-template<typename RegOperandType>
-uint64_t
-RegAddrOperand<RegOperandType>::calcLane(Wavefront *w, int lane)
-{
- Addr address = calcUniformBase();
-
- return address + reg.template get<Addr>(w, lane);
-}
-
-template<typename RegOperandType>
-std::string
-RegAddrOperand<RegOperandType>::disassemble()
-{
- return AddrOperandBase::disassemble(reg.disassemble());
-}
-
-typedef RegAddrOperand<SRegOperand> SRegAddrOperand;
-typedef RegAddrOperand<DRegOperand> DRegAddrOperand;
-
-class NoRegAddrOperand : public AddrOperandBase
-{
- public:
- void init(unsigned opOffset, const BrigObject *obj);
- uint64_t calcUniform();
- void calcVector(Wavefront *w, std::vector<Addr> &addrVec);
- uint64_t calcLane(Wavefront *w, int lane=0);
- std::string disassemble();
-};
-
-inline uint64_t
-NoRegAddrOperand::calcUniform()
-{
- return AddrOperandBase::calcUniformBase();
-}
-
-inline uint64_t
-NoRegAddrOperand::calcLane(Wavefront *w, int lane)
-{
- return calcUniform();
-}
-
-inline void
-NoRegAddrOperand::calcVector(Wavefront *w, std::vector<Addr> &addrVec)
-{
- uint64_t address = calcUniformBase();
-
- for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane)
- addrVec[lane] = address;
-}
-
-class LabelOperand : public BaseOperand
-{
- public:
- Label *label;
-
- void init(unsigned opOffset, const BrigObject *obj);
- std::string disassemble();
-
- // special get method for compatibility with SRegOperand
- uint32_t getTarget(Wavefront *w, int lane);
-
-};
-
-class ListOperand : public BaseOperand
-{
- public:
- int elementCount;
- std::vector<StorageElement*> callArgs;
-
- int
- getSrcOperand(int idx)
- {
- DPRINTF(GPUReg, "getSrcOperand, idx: %d, sz_args: %d\n", idx,
- callArgs.size());
-
- return callArgs.at(idx)->offset;
- }
-
- void init(unsigned opOffset, const BrigObject *obj);
-
- std::string disassemble();
-
- template<typename OperandType>
- OperandType
- get(Wavefront *w, int lane, int arg_idx)
- {
- return w->readCallArgMem<OperandType>(lane, getSrcOperand(arg_idx));
- }
-
- template<typename OperandType>
- void
- set(Wavefront *w, int lane, OperandType val)
- {
- w->writeCallArgMem<OperandType>(lane, getSrcOperand(0), val);
- DPRINTF(GPUReg, "CU%d, WF[%d][%d], lane %d: arg[%d] <- %d\n",
- w->computeUnit->cu_id, w->simdId, w->wfSlotId, lane,
- getSrcOperand(0), val);
- }
-};
-
-class FunctionRefOperand : public BaseOperand
-{
- public:
- const char *func_name;
-
- void init(unsigned opOffset, const BrigObject *obj);
- std::string disassemble();
-};
-
-#endif // __ARCH_HSAIL_OPERAND_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt, Anthony Gutierrez
- */
-
-#include "gpu-compute/brig_object.hh"
-
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <cassert>
-#include <cstddef>
-#include <cstdlib>
-
-#include "arch/hsail/Brig.h"
-#include "base/logging.hh"
-#include "base/trace.hh"
-#include "debug/BRIG.hh"
-#include "debug/HSAILObject.hh"
-#include "debug/HSALoader.hh"
-
-using namespace Brig;
-
-std::vector<std::function<HsaObject*(const std::string&, int, uint8_t*)>>
- HsaObject::tryFileFuncs = { BrigObject::tryFile };
-
-extern int getBrigDataTypeBytes(BrigType16_t t);
-
-const char *BrigObject::sectionNames[] =
-{
- "hsa_data",
- "hsa_code",
- "hsa_operand",
- ".shstrtab"
-};
-
-const char *segmentNames[] =
-{
- "none",
- "flat",
- "global",
- "readonly",
- "kernarg",
- "group",
- "private",
- "spill",
- "args"
-};
-
-const uint8_t*
-BrigObject::getSectionOffset(enum SectionIndex sec, int offs) const
-{
- // allow offs == size for dummy end pointers
- assert(offs <= sectionInfo[sec].size);
-
- return sectionInfo[sec].ptr + offs;
-}
-
-const char*
-BrigObject::getString(int offs) const
-{
- return (const char*)(getSectionOffset(DataSectionIndex, offs) + 4);
-}
-
-const BrigBase*
-BrigObject::getCodeSectionEntry(int offs) const
-{
- return (const BrigBase*)getSectionOffset(CodeSectionIndex, offs);
-}
-
-const BrigData*
-BrigObject::getBrigBaseData(int offs) const
-{
- return (Brig::BrigData*)(getSectionOffset(DataSectionIndex, offs));
-}
-
-const uint8_t*
-BrigObject::getData(int offs) const
-{
- return getSectionOffset(DataSectionIndex, offs);
-}
-
-const BrigOperand*
-BrigObject::getOperand(int offs) const
-{
- return (const BrigOperand*)getSectionOffset(OperandsSectionIndex, offs);
-}
-
-unsigned
-BrigObject::getOperandPtr(int offs, int index) const
-{
- unsigned *op_offs = (unsigned*)(getData(offs + 4 * (index + 1)));
-
- return *op_offs;
-}
-
-const BrigInstBase*
-BrigObject::getInst(int offs) const
-{
- return (const BrigInstBase*)getSectionOffset(CodeSectionIndex, offs);
-}
-
-HsaCode*
-BrigObject::getKernel(const std::string &name) const
-{
- return nullptr;
-}
-
-HsaCode*
-BrigObject::getFunction(const std::string &name) const
-{
- for (int i = 0; i < functions.size(); ++i) {
- if (functions[i]->name() == name) {
- return functions[i];
- }
- }
-
- return nullptr;
-}
-
-void
-BrigObject::processDirectives(const BrigBase *dirPtr, const BrigBase *endPtr,
- StorageMap *storageMap)
-{
- while (dirPtr < endPtr) {
- if (!dirPtr->byteCount) {
- fatal("Bad directive size 0\n");
- }
-
- // calculate next pointer now so we can override it if needed
- const BrigBase *nextDirPtr = brigNext(dirPtr);
-
- DPRINTF(HSAILObject, "Code section entry kind: #%x, byte count: %d\n",
- dirPtr->kind, dirPtr->byteCount);
-
- switch (dirPtr->kind) {
- case BRIG_KIND_DIRECTIVE_FUNCTION:
- {
- const BrigDirectiveExecutable *p M5_VAR_USED =
- reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
-
- DPRINTF(HSAILObject,"DIRECTIVE_FUNCTION: %s offset: "
- "%d next: %d\n", getString(p->name),
- p->firstCodeBlockEntry, p->nextModuleEntry);
-
- if (p->firstCodeBlockEntry != p->nextModuleEntry) {
- // Function calls are not supported. We allow the BRIG
- // object file to create stubs, but the function calls will
- // not work properly if the application makes use of them.
- warn("HSA function invocations are unsupported.\n");
-
- const char *name = getString(p->name);
-
- HsailCode *code_obj = nullptr;
-
- for (int i = 0; i < functions.size(); ++i) {
- if (functions[i]->name() == name) {
- code_obj = functions[i];
- break;
- }
- }
-
- if (!code_obj) {
- // create new local storage map for kernel-local symbols
- code_obj = new HsailCode(name, p, this,
- new StorageMap(storageMap));
- functions.push_back(code_obj);
- } else {
- panic("Multiple definition of Function!!: %s\n",
- getString(p->name));
- }
- }
-
- nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_KERNEL:
- {
- const BrigDirectiveExecutable *p =
- reinterpret_cast<const BrigDirectiveExecutable*>(dirPtr);
-
- DPRINTF(HSAILObject,"DIRECTIVE_KERNEL: %s offset: %d count: "
- "next: %d\n", getString(p->name),
- p->firstCodeBlockEntry, p->nextModuleEntry);
-
- const char *name = getString(p->name);
-
- if (name[0] == '&')
- name++;
-
- std::string str = name;
- char *temp;
- int len = str.length();
-
- if (str[len - 1] >= 'a' && str[len - 1] <= 'z') {
- temp = new char[str.size() + 1];
- std::copy(str.begin(), str.end() , temp);
- temp[str.size()] = '\0';
- } else {
- temp = new char[str.size()];
- std::copy(str.begin(), str.end() - 1 , temp);
- temp[str.size() - 1 ] = '\0';
- }
-
- std::string kernel_name = temp;
- delete[] temp;
-
- HsailCode *code_obj = nullptr;
-
- for (const auto &kernel : kernels) {
- if (kernel->name() == kernel_name) {
- code_obj = kernel;
- break;
- }
- }
-
- if (!code_obj) {
- // create new local storage map for kernel-local symbols
- code_obj = new HsailCode(kernel_name, p, this,
- new StorageMap(storageMap));
-
- kernels.push_back(code_obj);
- }
-
- nextDirPtr = getCodeSectionEntry(p->nextModuleEntry);
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_VARIABLE:
- {
- const BrigDirectiveVariable *p =
- reinterpret_cast<const BrigDirectiveVariable*>(dirPtr);
-
- uint64_t readonlySize_old =
- storageMap->getSize(BRIG_SEGMENT_READONLY);
-
- StorageElement* se = storageMap->addSymbol(p, this);
-
- DPRINTF(HSAILObject, "DIRECTIVE_VARIABLE, symbol %s\n",
- getString(p->name));
-
- if (p->segment == BRIG_SEGMENT_READONLY) {
- // readonly memory has initialization data
- uint8_t* readonlyData_old = readonlyData;
-
- readonlyData =
- new uint8_t[storageMap->getSize(BRIG_SEGMENT_READONLY)];
-
- if (p->init) {
- if ((p->type == BRIG_TYPE_ROIMG) ||
- (p->type == BRIG_TYPE_WOIMG) ||
- (p->type == BRIG_TYPE_SAMP) ||
- (p->type == BRIG_TYPE_SIG32) ||
- (p->type == BRIG_TYPE_SIG64)) {
- panic("Read only data type not supported: %s\n",
- getString(p->name));
- }
-
- const BrigOperand *brigOp = getOperand(p->init);
- assert(brigOp->kind ==
- BRIG_KIND_OPERAND_CONSTANT_BYTES);
-
- const Brig::BrigData *operand_data M5_VAR_USED =
- getBrigBaseData(((BrigOperandConstantBytes*)
- brigOp)->bytes);
-
- assert((operand_data->byteCount / 4) > 0);
-
- uint8_t *symbol_data =
- (uint8_t*)getData(((BrigOperandConstantBytes*)
- brigOp)->bytes + 4);
-
- // copy the old data and add the new data
- if (readonlySize_old > 0) {
- memcpy(readonlyData, readonlyData_old,
- readonlySize_old);
- }
-
- memcpy(readonlyData + se->offset, symbol_data,
- se->size);
-
- delete[] readonlyData_old;
- }
- }
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_LABEL:
- {
- const BrigDirectiveLabel M5_VAR_USED *p =
- reinterpret_cast<const BrigDirectiveLabel*>(dirPtr);
-
- panic("Label directives cannot be at the module level: %s\n",
- getString(p->name));
-
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_COMMENT:
- {
- const BrigDirectiveComment M5_VAR_USED *p =
- reinterpret_cast<const BrigDirectiveComment*>(dirPtr);
-
- DPRINTF(HSAILObject, "DIRECTIVE_COMMENT: %s\n",
- getString(p->name));
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_LOC:
- {
- DPRINTF(HSAILObject, "BRIG_DIRECTIVE_LOC\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_MODULE:
- {
- const BrigDirectiveModule M5_VAR_USED *p =
- reinterpret_cast<const BrigDirectiveModule*>(dirPtr);
-
- DPRINTF(HSAILObject, "BRIG_DIRECTIVE_MODULE: %s\n",
- getString(p->name));
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_CONTROL:
- {
- DPRINTF(HSAILObject, "DIRECTIVE_CONTROL\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_PRAGMA:
- {
- DPRINTF(HSAILObject, "DIRECTIVE_PRAGMA\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_EXTENSION:
- {
- DPRINTF(HSAILObject, "DIRECTIVE_EXTENSION\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
- {
- DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_START\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
- {
- DPRINTF(HSAILObject, "DIRECTIVE_ARG_BLOCK_END\n");
- }
- break;
- default:
- if (dirPtr->kind >= BRIG_KIND_INST_BEGIN &&
- dirPtr->kind <= BRIG_KIND_INST_END)
- break;
-
- if (dirPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
- dirPtr->kind <= BRIG_KIND_OPERAND_END)
- break;
-
- warn("Unknown Brig directive kind: %d\n", dirPtr->kind);
- break;
- }
-
- dirPtr = nextDirPtr;
- }
-}
-
-HsaObject*
-BrigObject::tryFile(const std::string &fname, int len, uint8_t *fileData)
-{
- const char *brig_ident = "HSA BRIG";
-
- if (memcmp(brig_ident, fileData, MODULE_IDENTIFICATION_LENGTH))
- return nullptr;
-
- return new BrigObject(fname, len, fileData);
-}
-
-BrigObject::BrigObject(const std::string &fname, int len, uint8_t *fileData)
- : HsaObject(fname), storageMap(new StorageMap())
-{
- const char *brig_ident = "HSA BRIG";
- BrigModuleHeader *mod_hdr = (BrigModuleHeader*)fileData;
-
- fatal_if(memcmp(brig_ident, mod_hdr, MODULE_IDENTIFICATION_LENGTH),
- "%s is not a BRIG file\n", fname);
-
- if (mod_hdr->brigMajor != BRIG_VERSION_BRIG_MAJOR ||
- mod_hdr->brigMinor != BRIG_VERSION_BRIG_MINOR) {
- fatal("%s: BRIG version mismatch, %d.%d != %d.%d\n",
- fname, mod_hdr->brigMajor, mod_hdr->brigMinor,
- BRIG_VERSION_BRIG_MAJOR, BRIG_VERSION_BRIG_MINOR);
- }
-
- fatal_if(mod_hdr->sectionCount != NumSectionIndices, "%s: BRIG section "
- "count (%d) != expected value (%d)\n", fname,
- mod_hdr->sectionCount, NumSectionIndices);
-
- for (int i = 0; i < NumSectionIndices; ++i) {
- sectionInfo[i].ptr = nullptr;
- }
-
- uint64_t *sec_idx_table = (uint64_t*)(fileData + mod_hdr->sectionIndex);
- for (int sec_idx = 0; sec_idx < mod_hdr->sectionCount; ++sec_idx) {
- uint8_t *sec_hdr_byte_ptr = fileData + sec_idx_table[sec_idx];
- BrigSectionHeader *sec_hdr = (BrigSectionHeader*)sec_hdr_byte_ptr;
-
- // It doesn't look like cprintf supports string precision values,
- // but if this breaks, the right answer is to fix that
- DPRINTF(HSAILObject, "found section %.*s\n", sec_hdr->nameLength,
- sec_hdr->name);
-
- sectionInfo[sec_idx].ptr = new uint8_t[sec_hdr->byteCount];
- memcpy(sectionInfo[sec_idx].ptr, sec_hdr_byte_ptr, sec_hdr->byteCount);
- sectionInfo[sec_idx].size = sec_hdr->byteCount;
- }
-
- BrigSectionHeader *code_hdr =
- (BrigSectionHeader*)sectionInfo[CodeSectionIndex].ptr;
-
- DPRINTF(HSAILObject, "Code section hdr, count: %d, hdr count: %d, "
- "name len: %d\n", code_hdr->byteCount, code_hdr->headerByteCount,
- code_hdr->nameLength);
-
- // start at offset 4 to skip initial null entry (see Brig spec)
- processDirectives(getCodeSectionEntry(code_hdr->headerByteCount),
- getCodeSectionEntry(sectionInfo[CodeSectionIndex].size),
- storageMap);
-
- delete[] fileData;
-
- DPRINTF(HSALoader, "BRIG object %s loaded.\n", fname);
-}
-
-BrigObject::~BrigObject()
-{
- for (int i = 0; i < NumSectionIndices; ++i)
- if (sectionInfo[i].ptr)
- delete[] sectionInfo[i].ptr;
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt, Anthony Gutierrez
- */
-
-#ifndef __BRIG_OBJECT_HH__
-#define __BRIG_OBJECT_HH__
-
-#include <cassert>
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "arch/hsail/Brig.h"
-#include "gpu-compute/hsa_object.hh"
-#include "gpu-compute/hsail_code.hh"
-
-class LabelMap;
-class StorageMap;
-
-/* @class BrigObject
- * this class implements the BRIG loader object, and
- * is used when the simulator directly executes HSAIL.
- * this class is responsible for extracting all
- * information about kernels contained in BRIG format
- * and converts them to HsailCode objects that are
- * usable by the simulator and emulated runtime.
- */
-
-class BrigObject final : public HsaObject
-{
- public:
- enum SectionIndex
- {
- DataSectionIndex,
- CodeSectionIndex,
- OperandsSectionIndex,
- NumSectionIndices
- };
-
- static const char *sectionNames[];
-
- struct SectionInfo
- {
- uint8_t *ptr;
- int size;
- };
-
- static HsaObject* tryFile(const std::string &fname, int len,
- uint8_t *fileData);
-
- SectionInfo sectionInfo[NumSectionIndices];
- const uint8_t *getSectionOffset(enum SectionIndex sec, int offs) const;
-
- std::vector<HsailCode*> kernels;
- std::vector<HsailCode*> functions;
- std::string kern_block_name;
-
- void processDirectives(const Brig::BrigBase *dirPtr,
- const Brig::BrigBase *endPtr,
- StorageMap *storageMap);
-
- BrigObject(const std::string &fname, int len, uint8_t *fileData);
- ~BrigObject();
-
- // eventually these will need to be per-kernel not per-object-file
- StorageMap *storageMap;
- LabelMap *labelMap;
-
- const char* getString(int offs) const;
- const Brig::BrigData* getBrigBaseData(int offs) const;
- const uint8_t* getData(int offs) const;
- const Brig::BrigBase* getCodeSectionEntry(int offs) const;
- const Brig::BrigOperand* getOperand(int offs) const;
- unsigned getOperandPtr(int offs, int index) const;
- const Brig::BrigInstBase* getInst(int offs) const;
-
- HsaCode* getKernel(const std::string &name) const override;
- HsaCode* getFunction(const std::string &name) const override;
-
- int numKernels() const override { return kernels.size(); }
-
- HsaCode* getKernel(int i) const override { return kernels[i]; }
-
- // pointer to the current kernel/function we're processing, so elements
- // under construction can reference it. kinda ugly, but easier
- // than passing it all over for the few places it's needed.
- mutable HsailCode *currentCode;
-};
-
-// Utility function to bump Brig item pointer to next element given
-// item size in bytes. Really just an add but with lots of casting.
-template<typename T>
-T*
-brigNext(T *ptr)
-{
- Brig::BrigBase *base_ptr = (Brig::BrigBase*)ptr;
- int size = base_ptr->byteCount;
- assert(size);
-
- return (T*)((uint8_t*)ptr + size);
-}
-
-#endif // __BRIG_OBJECT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "gpu-compute/cl_driver.hh"
-
-#include <memory>
-
-#include "base/intmath.hh"
-#include "cpu/thread_context.hh"
-#include "gpu-compute/dispatcher.hh"
-#include "gpu-compute/hsa_code.hh"
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "gpu-compute/hsa_object.hh"
-#include "params/ClDriver.hh"
-#include "sim/process.hh"
-#include "sim/syscall_emul_buf.hh"
-
-ClDriver::ClDriver(ClDriverParams *p)
- : EmulatedDriver(p), hsaCode(0)
-{
- for (const auto &codeFile : p->codefile)
- codeFiles.push_back(&codeFile);
-
- maxFuncArgsSize = 0;
-
- for (int i = 0; i < codeFiles.size(); ++i) {
- HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
-
- for (int k = 0; k < obj->numKernels(); ++k) {
- assert(obj->getKernel(k));
- kernels.push_back(obj->getKernel(k));
- kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
- int kern_funcargs_size = kernels.back()->funcarg_size;
- maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
- kern_funcargs_size : maxFuncArgsSize;
- }
- }
-
- int name_offs = 0;
- int code_offs = 0;
-
- for (int i = 0; i < kernels.size(); ++i) {
- kernelInfo.push_back(HsaKernelInfo());
- HsaCode *k = kernels[i];
-
- k->generateHsaKernelInfo(&kernelInfo[i]);
-
- kernelInfo[i].name_offs = name_offs;
- kernelInfo[i].code_offs = code_offs;
-
- name_offs += k->name().size() + 1;
- code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
- }
-}
-
-void
-ClDriver::handshake(GpuDispatcher *_dispatcher)
-{
- dispatcher = _dispatcher;
- dispatcher->setFuncargsSize(maxFuncArgsSize);
-}
-
-int
-ClDriver::open(ThreadContext *tc, int mode, int flags)
-{
- auto p = tc->getProcessPtr();
- std::shared_ptr<DeviceFDEntry> fdp;
- fdp = std::make_shared<DeviceFDEntry>(this, filename);
- int tgt_fd = p->fds->allocFD(fdp);
- return tgt_fd;
-}
-
-int
-ClDriver::ioctl(ThreadContext *tc, unsigned req, Addr buf_addr)
-{
- switch (req) {
- case HSA_GET_SIZES:
- {
- TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
- sizes->num_kernels = kernels.size();
- sizes->string_table_size = 0;
- sizes->code_size = 0;
- sizes->readonly_size = 0;
-
- if (kernels.size() > 0) {
- // all kernels will share the same read-only memory
- sizes->readonly_size =
- kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
- // check our assumption
- for (int i = 1; i<kernels.size(); ++i) {
- assert(sizes->readonly_size ==
- kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
- }
- }
-
- for (int i = 0; i < kernels.size(); ++i) {
- HsaCode *k = kernels[i];
- // add one for terminating '\0'
- sizes->string_table_size += k->name().size() + 1;
- sizes->code_size +=
- k->numInsts() * sizeof(TheGpuISA::RawMachInst);
- }
-
- sizes.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_KINFO:
- {
- TypedBufferArg<HsaKernelInfo>
- kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
-
- for (int i = 0; i < kernels.size(); ++i) {
- HsaKernelInfo *ki = &kinfo[i];
- ki->name_offs = kernelInfo[i].name_offs;
- ki->code_offs = kernelInfo[i].code_offs;
- ki->sRegCount = kernelInfo[i].sRegCount;
- ki->dRegCount = kernelInfo[i].dRegCount;
- ki->cRegCount = kernelInfo[i].cRegCount;
- ki->static_lds_size = kernelInfo[i].static_lds_size;
- ki->private_mem_size = kernelInfo[i].private_mem_size;
- ki->spill_mem_size = kernelInfo[i].spill_mem_size;
- }
-
- kinfo.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_STRINGS:
- {
- int string_table_size = 0;
- for (int i = 0; i < kernels.size(); ++i) {
- HsaCode *k = kernels[i];
- string_table_size += k->name().size() + 1;
- }
-
- BufferArg buf(buf_addr, string_table_size);
- char *bufp = (char*)buf.bufferPtr();
-
- for (int i = 0; i < kernels.size(); ++i) {
- HsaCode *k = kernels[i];
- const char *n = k->name().c_str();
-
- // idiomatic string copy
- while ((*bufp++ = *n++));
- }
-
- assert(bufp - (char *)buf.bufferPtr() == string_table_size);
-
- buf.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_READONLY_DATA:
- {
- // we can pick any kernel --- they share the same
- // readonly segment (this assumption is checked in GET_SIZES)
- uint64_t size =
- kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
- BufferArg data(buf_addr, size);
- char *datap = (char *)data.bufferPtr();
- memcpy(datap,
- kernels.back()->readonly_data,
- size);
- data.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_CODE:
- {
- // set hsaCode pointer
- hsaCode = buf_addr;
- int code_size = 0;
-
- for (int i = 0; i < kernels.size(); ++i) {
- HsaCode *k = kernels[i];
- code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
- }
-
- TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
- TheGpuISA::RawMachInst *bufp = buf;
-
- int buf_idx = 0;
-
- for (int i = 0; i < kernels.size(); ++i) {
- HsaCode *k = kernels[i];
-
- for (int j = 0; j < k->numInsts(); ++j) {
- bufp[buf_idx] = k->insts()->at(j);
- ++buf_idx;
- }
- }
-
- buf.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_CU_CNT:
- {
- BufferArg buf(buf_addr, sizeof(uint32_t));
- *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
- buf.copyOut(tc->getVirtProxy());
- }
- break;
-
- case HSA_GET_VSZ:
- {
- BufferArg buf(buf_addr, sizeof(uint32_t));
- *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
- buf.copyOut(tc->getVirtProxy());
- }
- break;
- case HSA_GET_HW_STATIC_CONTEXT_SIZE:
- {
- BufferArg buf(buf_addr, sizeof(uint32_t));
- *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
- buf.copyOut(tc->getVirtProxy());
- }
- break;
-
- default:
- fatal("ClDriver: bad ioctl %d\n", req);
- }
-
- return 0;
-}
-
-const char*
-ClDriver::codeOffToKernelName(uint64_t code_ptr)
-{
- assert(hsaCode);
- uint32_t code_offs = code_ptr - hsaCode;
-
- for (int i = 0; i < kernels.size(); ++i) {
- if (code_offs == kernelInfo[i].code_offs) {
- return kernels[i]->name().c_str();
- }
- }
-
- return nullptr;
-}
-
-ClDriver*
-ClDriverParams::create()
-{
- return new ClDriver(this);
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __CL_DRIVER_HH__
-#define __CL_DRIVER_HH__
-
-#include <vector>
-
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "sim/emul_driver.hh"
-
-class GpuDispatcher;
-class HsaCode;
-class Process;
-class ThreadContext;
-
-struct ClDriverParams;
-
-class ClDriver final : public EmulatedDriver
-{
- public:
- ClDriver(ClDriverParams *p);
- void handshake(GpuDispatcher *_dispatcher);
- int open(ThreadContext *tc, int mode, int flags);
- int ioctl(ThreadContext *tc, unsigned req, Addr buf);
- const char* codeOffToKernelName(uint64_t code_ptr);
-
- private:
- GpuDispatcher *dispatcher;
-
- std::vector<const std::string*> codeFiles;
-
- // All the kernels we know about
- std::vector<HsaCode*> kernels;
- std::vector<HsaCode*> functions;
-
- std::vector<HsaKernelInfo> kernelInfo;
-
- // maximum size necessary for function arguments
- int maxFuncArgsSize;
- // The host virtual address for the kernel code
- uint64_t hsaCode;
-};
-
-#endif // __CL_DRIVER_HH__
+++ /dev/null
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __GPU_CL_EVENT_HH__
-#define __GPU_CL_EVENT_HH__
-
-struct HsaQueueEntry;
-
-class _cl_event {
- public:
- _cl_event() : done(false), hsaTaskPtr(nullptr), start(0), end(0) { }
-
- volatile bool done;
- HsaQueueEntry *hsaTaskPtr;
- uint64_t start;
- uint64_t end;
-};
-
-#endif // __GPU_CL_EVENT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#include "gpu-compute/condition_register_state.hh"
-
-#include "gpu-compute/compute_unit.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-#include "gpu-compute/shader.hh"
-#include "gpu-compute/wavefront.hh"
-
-ConditionRegisterState::ConditionRegisterState()
-{
- computeUnit = nullptr;
- c_reg.clear();
- busy.clear();
-}
-
-void
-ConditionRegisterState::setParent(ComputeUnit *_computeUnit)
-{
- computeUnit = _computeUnit;
- _name = computeUnit->name() + ".CondRegState";
-}
-
-void
-ConditionRegisterState::init(uint32_t _size)
-{
- c_reg.resize(_size);
- busy.resize(_size, 0);
-}
-
-void
-ConditionRegisterState::exec(GPUDynInstPtr ii, Wavefront *w)
-{
- // iterate over all operands
- for (auto i = 0; i < ii->getNumOperands(); ++i) {
- // is this a condition register destination operand?
- if (ii->isCondRegister(i) && ii->isDstOperand(i)) {
- // mark the register as busy
- markReg(ii->getRegisterIndex(i, ii), 1);
- uint32_t pipeLen = w->computeUnit->spBypassLength();
-
- // schedule an event for marking the register as ready
- w->computeUnit->
- registerEvent(w->simdId, ii->getRegisterIndex(i, ii),
- ii->getOperandSize(i),
- w->computeUnit->shader->tick_cnt +
- w->computeUnit->shader->ticks(pipeLen), 0);
- }
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#ifndef __CONDITION_REGISTER_STATE_HH__
-#define __CONDITION_REGISTER_STATE_HH__
-
-#include <string>
-#include <vector>
-
-#include "gpu-compute/misc.hh"
-
-class ComputeUnit;
-class GPUStaticInst;
-class Shader;
-class Wavefront;
-
-// Condition Register State (used only when executing HSAIL)
-class ConditionRegisterState
-{
- public:
- ConditionRegisterState();
- void init(uint32_t _size);
- const std::string name() const { return _name; }
- void setParent(ComputeUnit *_computeUnit);
- void regStats() { }
-
- template<typename T>
- T
- read(int regIdx, int threadId)
- {
- bool tmp = c_reg[regIdx][threadId];
- T *p0 = (T*)(&tmp);
-
- return *p0;
- }
-
- template<typename T>
- void
- write(int regIdx, int threadId, T value)
- {
- c_reg[regIdx][threadId] = (bool)(value & 0x01);
- }
-
- void
- markReg(int regIdx, uint8_t value)
- {
- busy.at(regIdx) = value;
- }
-
- uint8_t
- regBusy(int idx)
- {
- uint8_t status = busy.at(idx);
- return status;
- }
-
- int numRegs() { return c_reg.size(); }
- void exec(GPUDynInstPtr ii, Wavefront *w);
-
- private:
- ComputeUnit* computeUnit;
- std::string _name;
- // Condition Register state
- std::vector<VectorMask> c_reg;
- // flag indicating if a register is busy
- std::vector<uint8_t> busy;
-};
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __HSA_CODE_HH__
-#define __HSA_CODE_HH__
-
-#include <string>
-#include <vector>
-
-#include "arch/gpu_types.hh"
-#include "config/the_gpu_isa.hh"
-
-class HsaKernelInfo;
-
-/* @class HsaCode
- * base code object for the set of HSA kernels associated
- * with a single application. this class provides the common
- * methods for creating, accessing, and storing information
- * about kernel and variable symbols, symbol name, memory
- * segment sizes, and instruction count, etc.
- */
-
-class HsaCode
-{
- public:
- HsaCode(const std::string &name) : readonly_data(nullptr), funcarg_size(0),
- _name(name)
- {
- }
-
- enum class MemorySegment {
- NONE,
- FLAT,
- GLOBAL,
- READONLY,
- KERNARG,
- GROUP,
- PRIVATE,
- SPILL,
- ARG,
- EXTSPACE0
- };
-
- const std::string& name() const { return _name; }
- int numInsts() const { return _insts.size(); }
- std::vector<TheGpuISA::RawMachInst>* insts() { return &_insts; }
-
- void
- setReadonlyData(uint8_t *_readonly_data)
- {
- readonly_data = _readonly_data;
- }
-
- virtual int getSize(MemorySegment segment) const = 0;
- virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const = 0;
-
- uint8_t *readonly_data;
- int funcarg_size;
-
- protected:
- // An array that stores instruction indices (0 through kernel size)
- // for a kernel passed to code object constructor as an argument.
- std::vector<TheGpuISA::RawMachInst> _insts;
-
- private:
- const std::string _name;
-};
-
-#endif // __HSA_CODE_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __HSA_KERNEL_INFO_HH__
-#define __HSA_KERNEL_INFO_HH__
-
-// This file defines the public interface between the HSA emulated
-// driver and application programs.
-
-#include <cstdint>
-
-static const int HSA_GET_SIZES = 0x4801;
-static const int HSA_GET_KINFO = 0x4802;
-static const int HSA_GET_STRINGS = 0x4803;
-static const int HSA_GET_CODE = 0x4804;
-static const int HSA_GET_READONLY_DATA = 0x4805;
-static const int HSA_GET_CU_CNT = 0x4806;
-static const int HSA_GET_VSZ = 0x4807;
-static const int HSA_GET_HW_STATIC_CONTEXT_SIZE = 0x4808;
-
-// Return value (via buffer ptr) for HSA_GET_SIZES
-struct HsaDriverSizes
-{
- uint32_t num_kernels;
- uint32_t string_table_size;
- uint32_t code_size;
- uint32_t readonly_size;
-};
-
-// HSA_GET_KINFO returns an array of num_kernels of these structs
-struct HsaKernelInfo
-{
- // byte offset into string table
- uint32_t name_offs;
- // byte offset into code array
- uint32_t code_offs;
- uint32_t static_lds_size;
- uint32_t private_mem_size;
- uint32_t spill_mem_size;
- // Number of s registers
- uint32_t sRegCount;
- // Number of d registers
- uint32_t dRegCount;
- // Number of c registers
- uint32_t cRegCount;
-};
-
-#endif // __HSA_KERNEL_INFO_HH__
+++ /dev/null
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#include "gpu-compute/hsa_object.hh"
-
-#include <cassert>
-#include <fstream>
-
-#include "base/logging.hh"
-
-HsaObject::HsaObject(const std::string &fname)
- : readonlyData(nullptr), filename(fname)
-{
-}
-
-HsaObject*
-HsaObject::createHsaObject(const std::string &fname)
-{
- HsaObject *hsaObj = nullptr;
- uint8_t *file_data = nullptr;
- int file_length = 0;
-
- std::ifstream code_file(fname, std::ifstream::ate | std::ifstream::in |
- std::ifstream::binary);
-
- assert(code_file.is_open());
- assert(code_file.good());
-
- file_length = code_file.tellg();
- code_file.seekg(0, code_file.beg);
- file_data = new uint8_t[file_length];
- code_file.read((char*)file_data, file_length);
- code_file.close();
-
- for (const auto &tryFile : tryFileFuncs) {
- if ((hsaObj = tryFile(fname, file_length, file_data))) {
- return hsaObj;
- }
- }
-
- delete[] file_data;
- fatal("Unknown HSA object type for file: %s.\n", fname);
-
- return nullptr;
-}
+++ /dev/null
-/*
- * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Anthony Gutierrez
- */
-
-#ifndef __HSA_OBJECT_HH__
-#define __HSA_OBJECT_HH__
-
-#include <functional>
-#include <string>
-#include <vector>
-
-class HsaCode;
-
-/* @class HsaObject
- * base loader object for HSA kernels. this class provides
- * the base method definitions for loading, storing, and
- * accessing HSA kernel objects into the simulator.
- */
-
-class HsaObject
-{
- public:
- HsaObject(const std::string &fileName);
-
- static HsaObject* createHsaObject(const std::string &fname);
- static std::vector<std::function<HsaObject*(const std::string&, int,
- uint8_t*)>> tryFileFuncs;
-
- virtual HsaCode* getKernel(const std::string &name) const = 0;
- virtual HsaCode* getKernel(int i) const = 0;
- virtual HsaCode* getFunction(const std::string &name) const = 0;
- virtual int numKernels() const = 0;
-
- const std::string& name() const { return filename; }
-
- uint8_t *readonlyData;
-
-
- protected:
- const std::string filename;
-};
-
-#endif // __HSA_OBJECT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/hsail_code.hh"
-
-#include "arch/gpu_types.hh"
-#include "arch/hsail/Brig.h"
-#include "arch/hsail/operand.hh"
-#include "config/the_gpu_isa.hh"
-#include "debug/BRIG.hh"
-#include "debug/HSAILObject.hh"
-#include "gpu-compute/brig_object.hh"
-#include "gpu-compute/gpu_static_inst.hh"
-#include "gpu-compute/kernel_cfg.hh"
-
-using namespace Brig;
-
-int getBrigDataTypeBytes(BrigType16_t t);
-
-HsailCode::HsailCode(const std::string &name_str)
- : HsaCode(name_str), private_size(-1), readonly_size(-1)
-{
-}
-
-void
-HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
- StorageMap *objStorageMap)
-{
- storageMap = objStorageMap;
-
- // set pointer so that decoding process can find this kernel context when
- // needed
- obj->currentCode = this;
-
- if (code_dir->base.kind != BRIG_KIND_DIRECTIVE_FUNCTION &&
- code_dir->base.kind != BRIG_KIND_DIRECTIVE_KERNEL) {
- fatal("unexpected directive kind %d inside kernel/function init\n",
- code_dir->base.kind);
- }
-
- DPRINTF(HSAILObject, "Initializing code, first code block entry is: %d\n",
- code_dir->firstCodeBlockEntry);
-
- // clear these static vars so we can properly track the max index
- // for this kernel
- SRegOperand::maxRegIdx = 0;
- DRegOperand::maxRegIdx = 0;
- CRegOperand::maxRegIdx = 0;
- setPrivateSize(0);
-
- const BrigBase *entryPtr = brigNext((BrigBase*)code_dir);
- const BrigBase *endPtr =
- obj->getCodeSectionEntry(code_dir->nextModuleEntry);
-
- // the instruction's byte address (relative to the base addr
- // of the code section)
- int inst_addr = 0;
- // the index that points to the instruction in the instruction
- // array
- int inst_idx = 0;
- std::vector<GPUStaticInst*> instructions;
- int funcarg_size_scope = 0;
-
- // walk through instructions in code section and directives in
- // directive section in parallel, processing directives that apply
- // when we reach the relevant code point.
- while (entryPtr < endPtr) {
- switch (entryPtr->kind) {
- case BRIG_KIND_DIRECTIVE_VARIABLE:
- {
- const BrigDirectiveVariable *sym =
- (const BrigDirectiveVariable*)entryPtr;
-
- DPRINTF(HSAILObject,"Initializing code, directive is "
- "kind_variable, symbol is: %s\n",
- obj->getString(sym->name));
-
- StorageElement *se = storageMap->addSymbol(sym, obj);
-
- if (sym->segment == BRIG_SEGMENT_PRIVATE) {
- setPrivateSize(se->size);
- } else { // spill
- funcarg_size_scope += se->size;
- }
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_LABEL:
- {
- const BrigDirectiveLabel *lbl =
- (const BrigDirectiveLabel*)entryPtr;
-
- DPRINTF(HSAILObject,"Initializing code, directive is "
- "kind_label, label is: %s \n",
- obj->getString(lbl->name));
-
- labelMap.addLabel(lbl, inst_addr, obj);
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_PRAGMA:
- {
- DPRINTF(HSAILObject, "Initializing code, directive "
- "is kind_pragma\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_COMMENT:
- {
- DPRINTF(HSAILObject, "Initializing code, directive is "
- "kind_comment\n");
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
- {
- DPRINTF(HSAILObject, "Initializing code, directive is "
- "kind_arg_block_start\n");
-
- storageMap->resetOffset(BRIG_SEGMENT_ARG);
- funcarg_size_scope = 0;
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
- {
- DPRINTF(HSAILObject, "Initializing code, directive is "
- "kind_arg_block_end\n");
-
- funcarg_size = funcarg_size < funcarg_size_scope ?
- funcarg_size_scope : funcarg_size;
- }
- break;
-
- case BRIG_KIND_DIRECTIVE_END:
- DPRINTF(HSAILObject, "Initializing code, dircetive is "
- "kind_end\n");
-
- break;
-
- default:
- if (entryPtr->kind >= BRIG_KIND_INST_BEGIN &&
- entryPtr->kind <= BRIG_KIND_INST_END) {
-
- BrigInstBase *instPtr = (BrigInstBase*)entryPtr;
- TheGpuISA::MachInst machInst = { instPtr, obj };
- GPUStaticInst *iptr = decoder.decode(machInst);
-
- if (iptr) {
- DPRINTF(HSAILObject, "Initializing code, processing inst "
- "byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
- inst_idx, instPtr->opcode);
-
- TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
- iptr->instNum(inst_idx);
- iptr->instAddr(inst_addr);
- _insts.push_back(raw_inst);
- instructions.push_back(iptr);
- }
- inst_addr += sizeof(TheGpuISA::RawMachInst);
- ++inst_idx;
- } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
- entryPtr->kind < BRIG_KIND_OPERAND_END) {
- warn("unexpected operand entry in code segment\n");
- } else {
- // there are surely some more cases we will need to handle,
- // but we'll deal with them as we find them.
- fatal("unexpected directive kind %d inside kernel scope\n",
- entryPtr->kind);
- }
- }
-
- entryPtr = brigNext(entryPtr);
- }
-
- // compute Control Flow Graph for current kernel
- ControlFlowInfo::assignImmediatePostDominators(instructions);
-
- max_sreg = SRegOperand::maxRegIdx;
- max_dreg = DRegOperand::maxRegIdx;
- max_creg = CRegOperand::maxRegIdx;
-
- obj->currentCode = nullptr;
-}
-
-HsailCode::HsailCode(const std::string &name_str,
- const BrigDirectiveExecutable *code_dir,
- const BrigObject *obj, StorageMap *objStorageMap)
- : HsaCode(name_str), private_size(-1), readonly_size(-1)
-{
- init(code_dir, obj, objStorageMap);
-}
-
-void
-LabelMap::addLabel(const Brig::BrigDirectiveLabel *lblDir, int inst_index,
- const BrigObject *obj)
-{
- std::string lbl_name = obj->getString(lblDir->name);
- Label &lbl = map[lbl_name];
-
- if (lbl.defined()) {
- fatal("Attempt to redefine existing label %s\n", lbl_name);
- }
-
- lbl.define(lbl_name, inst_index);
- DPRINTF(HSAILObject, "label %s = %d\n", lbl_name, inst_index);
-}
-
-Label*
-LabelMap::refLabel(const Brig::BrigDirectiveLabel *lblDir,
- const BrigObject *obj)
-{
- std::string name = obj->getString(lblDir->name);
- Label &lbl = map[name];
- lbl.checkName(name);
-
- return &lbl;
-}
-
-int
-getBrigDataTypeBytes(BrigType16_t t)
-{
- switch (t) {
- case BRIG_TYPE_S8:
- case BRIG_TYPE_U8:
- case BRIG_TYPE_B8:
- return 1;
-
- case BRIG_TYPE_S16:
- case BRIG_TYPE_U16:
- case BRIG_TYPE_B16:
- case BRIG_TYPE_F16:
- return 2;
-
- case BRIG_TYPE_S32:
- case BRIG_TYPE_U32:
- case BRIG_TYPE_B32:
- case BRIG_TYPE_F32:
- return 4;
-
- case BRIG_TYPE_S64:
- case BRIG_TYPE_U64:
- case BRIG_TYPE_B64:
- case BRIG_TYPE_F64:
- return 8;
-
- case BRIG_TYPE_B1:
-
- default:
- fatal("unhandled symbol data type %d", t);
- return 0;
- }
-}
-
-StorageElement*
-StorageSpace::addSymbol(const BrigDirectiveVariable *sym,
- const BrigObject *obj)
-{
- const char *sym_name = obj->getString(sym->name);
- uint64_t size = 0;
- uint64_t offset = 0;
-
- if (sym->type & BRIG_TYPE_ARRAY) {
- size = getBrigDataTypeBytes(sym->type & ~BRIG_TYPE_ARRAY);
- size *= (((uint64_t)sym->dim.hi) << 32 | (uint64_t)sym->dim.lo);
-
- offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type &
- ~BRIG_TYPE_ARRAY));
- } else {
- size = getBrigDataTypeBytes(sym->type);
- offset = roundUp(nextOffset, getBrigDataTypeBytes(sym->type));
- }
-
- nextOffset = offset + size;
-
- DPRINTF(HSAILObject, "Adding SYMBOL %s size %d offset %#x, init: %d\n",
- sym_name, size, offset, sym->init);
-
- StorageElement* se = new StorageElement(sym_name, offset, size, sym);
- elements.push_back(se);
- elements_by_addr.insert(AddrRange(offset, offset + size - 1), se);
- elements_by_brigptr[sym] = se;
-
- return se;
-}
-
-StorageElement*
-StorageSpace::findSymbol(std::string name)
-{
- for (auto it : elements) {
- if (it->name == name) {
- return it;
- }
- }
-
- return nullptr;
-}
-
-StorageElement*
-StorageSpace::findSymbol(uint64_t addr)
-{
- assert(elements_by_addr.size() > 0);
-
- auto se = elements_by_addr.contains(addr);
-
- if (se == elements_by_addr.end()) {
- return nullptr;
- } else {
- return se->second;
- }
-}
-
-StorageElement*
-StorageSpace::findSymbol(const BrigDirectiveVariable *brigptr)
-{
- assert(elements_by_brigptr.size() > 0);
-
- auto se = elements_by_brigptr.find(brigptr);
-
- if (se == elements_by_brigptr.end()) {
- return nullptr;
- } else {
- return se->second;
- }
-}
-
-StorageMap::StorageMap(StorageMap *outerScope)
- : outerScopeMap(outerScope)
-{
- for (int i = 0; i < NumSegments; ++i)
- space[i] = new StorageSpace((BrigSegment)i);
-}
-
-StorageElement*
-StorageMap::addSymbol(const BrigDirectiveVariable *sym, const BrigObject *obj)
-{
- BrigSegment8_t segment = sym->segment;
-
- assert(segment >= Brig::BRIG_SEGMENT_FLAT);
- assert(segment < NumSegments);
-
- return space[segment]->addSymbol(sym, obj);
-}
-
-int
-StorageMap::getSize(Brig::BrigSegment segment)
-{
- assert(segment > Brig::BRIG_SEGMENT_GLOBAL);
- assert(segment < NumSegments);
-
- if (segment != Brig::BRIG_SEGMENT_GROUP &&
- segment != Brig::BRIG_SEGMENT_READONLY) {
- return space[segment]->getSize();
- } else {
- int ret = space[segment]->getSize();
-
- if (outerScopeMap) {
- ret += outerScopeMap->getSize(segment);
- }
-
- return ret;
- }
-}
-
-void
-StorageMap::resetOffset(Brig::BrigSegment segment)
-{
- space[segment]->resetOffset();
-}
-
-StorageElement*
-StorageMap::findSymbol(BrigSegment segment, std::string name)
-{
- StorageElement *se = space[segment]->findSymbol(name);
-
- if (se)
- return se;
-
- if (outerScopeMap)
- return outerScopeMap->findSymbol(segment, name);
-
- return nullptr;
-}
-
-StorageElement*
-StorageMap::findSymbol(Brig::BrigSegment segment, uint64_t addr)
-{
- StorageSpace *sp = space[segment];
-
- if (!sp) {
- // there is no memory in segment?
- return nullptr;
- }
-
- StorageElement *se = sp->findSymbol(addr);
-
- if (se)
- return se;
-
- if (outerScopeMap)
- return outerScopeMap->findSymbol(segment, addr);
-
- return nullptr;
-
-}
-
-StorageElement*
-StorageMap::findSymbol(Brig::BrigSegment segment,
- const BrigDirectiveVariable *brigptr)
-{
- StorageSpace *sp = space[segment];
-
- if (!sp) {
- // there is no memory in segment?
- return nullptr;
- }
-
- StorageElement *se = sp->findSymbol(brigptr);
-
- if (se)
- return se;
-
- if (outerScopeMap)
- return outerScopeMap->findSymbol(segment, brigptr);
-
- return nullptr;
-
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __HSAIL_CODE_HH__
-#define __HSAIL_CODE_HH__
-
-#include <cassert>
-#include <list>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "arch/gpu_decoder.hh"
-#include "arch/hsail/Brig.h"
-#include "base/addr_range_map.hh"
-#include "base/intmath.hh"
-#include "config/the_gpu_isa.hh"
-#include "gpu-compute/hsa_code.hh"
-#include "gpu-compute/hsa_kernel_info.hh"
-#include "gpu-compute/misc.hh"
-
-class BrigObject;
-class GPUStaticInst;
-
-inline int
-popcount(uint64_t src, int sz)
-{
- int cnt = 0;
-
- for (int i = 0; i < sz; ++i) {
- if (src & 1)
- ++cnt;
- src >>= 1;
- }
-
- return cnt;
-}
-
-inline int
-firstbit(uint64_t src, int sz)
-{
- int i;
-
- for (i = 0; i < sz; ++i) {
- if (src & 1)
- break;
- src >>= 1;
- }
-
- return i;
-}
-
-inline int
-lastbit(uint64_t src, int sz)
-{
- int i0 = -1;
-
- for (int i = 0; i < sz; ++i) {
- if (src & 1)
- i0 = i;
- src >>= 1;
- }
-
- return i0;
-}
-
-inline int
-signbit(uint64_t src, int sz)
-{
- int i0 = -1;
-
- if (src & (1 << (sz - 1))) {
- for (int i = 0; i < sz - 1; ++i) {
- if (!(src & 1))
- i0 = i;
- src >>= 1;
- }
- } else {
- for (int i = 0; i < sz - 1; ++i) {
- if (src & 1)
- i0 = i;
- src >>= 1;
- }
- }
-
- return i0;
-}
-
-inline uint64_t
-bitrev(uint64_t src, int sz)
-{
- uint64_t r = 0;
-
- for (int i = 0; i < sz; ++i) {
- r <<= 1;
- if (src & 1)
- r |= 1;
- src >>= 1;
- }
-
- return r;
-}
-
-inline uint64_t
-mul_hi(uint32_t a, uint32_t b)
-{
- return ((uint64_t)a * (uint64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(int32_t a, int32_t b)
-{
- return ((int64_t)a * (int64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(uint64_t a, uint64_t b)
-{
- return ((uint64_t)a * (uint64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(int64_t a, int64_t b)
-{
- return ((int64_t)a * (int64_t)b) >> 32;
-}
-
-inline uint64_t
-mul_hi(double a, double b)
-{
- return 0;
-}
-
-class Label
-{
- public:
- std::string name;
- int value;
-
- Label() : value(-1)
- {
- }
-
- bool defined() { return value != -1; }
-
- void
- checkName(std::string &_name)
- {
- if (name.empty()) {
- name = _name;
- } else {
- assert(name == _name);
- }
- }
-
- void
- define(std::string &_name, int _value)
- {
- assert(!defined());
- assert(_value != -1);
- value = _value;
- checkName(_name);
- }
-
- int
- get()
- {
- assert(defined());
- return value;
- }
-};
-
-class LabelMap
-{
- std::map<std::string, Label> map;
-
- public:
- LabelMap() { }
-
- void addLabel(const Brig::BrigDirectiveLabel *lbl, int inst_index,
- const BrigObject *obj);
-
- Label *refLabel(const Brig::BrigDirectiveLabel *lbl,
- const BrigObject *obj);
-};
-
-const int NumSegments = Brig::BRIG_SEGMENT_AMD_GCN;
-
-extern const char *segmentNames[];
-
-class StorageElement
-{
- public:
- std::string name;
- uint64_t offset;
-
- uint64_t size;
- const Brig::BrigDirectiveVariable *brigSymbol;
- StorageElement(const char *_name, uint64_t _offset, int _size,
- const Brig::BrigDirectiveVariable *sym)
- : name(_name), offset(_offset), size(_size), brigSymbol(sym)
- {
- }
-};
-
-class StorageSpace
-{
- typedef std::map<const Brig::BrigDirectiveVariable*, StorageElement*>
- DirVarToSE_map;
-
- std::list<StorageElement*> elements;
- AddrRangeMap<StorageElement*> elements_by_addr;
- DirVarToSE_map elements_by_brigptr;
-
- uint64_t nextOffset;
-
- public:
- StorageSpace(Brig::BrigSegment _class) : nextOffset(0)
- {
- }
-
- StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
- const BrigObject *obj);
-
- StorageElement* findSymbol(std::string name);
- StorageElement* findSymbol(uint64_t addr);
- StorageElement* findSymbol(const Brig::BrigDirectiveVariable *brigptr);
-
- int getSize() { return nextOffset; }
- void resetOffset() { nextOffset = 0; }
-};
-
-class StorageMap
-{
- StorageMap *outerScopeMap;
- StorageSpace *space[NumSegments];
-
- public:
- StorageMap(StorageMap *outerScope = nullptr);
-
- StorageElement *addSymbol(const Brig::BrigDirectiveVariable *sym,
- const BrigObject *obj);
-
- StorageElement* findSymbol(Brig::BrigSegment segment, std::string name);
- StorageElement* findSymbol(Brig::BrigSegment segment, uint64_t addr);
-
- StorageElement* findSymbol(Brig::BrigSegment segment,
- const Brig::BrigDirectiveVariable *brigptr);
-
- // overloaded version to avoid casting
- StorageElement*
- findSymbol(Brig::BrigSegment8_t segment, std::string name)
- {
- return findSymbol((Brig::BrigSegment)segment, name);
- }
-
- int getSize(Brig::BrigSegment segment);
- void resetOffset(Brig::BrigSegment segment);
-};
-
-typedef enum
-{
- BT_DEFAULT,
- BT_B8,
- BT_U8,
- BT_U16,
- BT_U32,
- BT_U64,
- BT_S8,
- BT_S16,
- BT_S32,
- BT_S64,
- BT_F16,
- BT_F32,
- BT_F64,
- BT_NULL
-} base_type_e;
-
-/* @class HsailCode
- * the HsailCode class is used to store information
- * about HSA kernels stored in the BRIG format. it holds
- * all information about a kernel, function, or variable
- * symbol and provides methods for accessing that
- * information.
- */
-
-class HsailCode final : public HsaCode
-{
- public:
- TheGpuISA::Decoder decoder;
-
- StorageMap *storageMap;
- LabelMap labelMap;
- uint32_t kernarg_start;
- uint32_t kernarg_end;
- int32_t private_size;
-
- int32_t readonly_size;
-
- // We track the maximum register index used for each register
- // class when we load the code so we can size the register files
- // appropriately (i.e., one more than the max index).
- uint32_t max_creg; // maximum c-register index
- uint32_t max_sreg; // maximum s-register index
- uint32_t max_dreg; // maximum d-register index
-
- HsailCode(const std::string &name_str,
- const Brig::BrigDirectiveExecutable *code_dir,
- const BrigObject *obj,
- StorageMap *objStorageMap);
-
- // this version is used to create a placeholder when
- // we encounter a kernel-related directive before the
- // kernel itself
- HsailCode(const std::string &name_str);
-
- void init(const Brig::BrigDirectiveExecutable *code_dir,
- const BrigObject *obj, StorageMap *objStorageMap);
-
- void
- generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const
- {
- hsaKernelInfo->sRegCount = max_sreg + 1;
- hsaKernelInfo->dRegCount = max_dreg + 1;
- hsaKernelInfo->cRegCount = max_creg + 1;
-
- hsaKernelInfo->static_lds_size = getSize(Brig::BRIG_SEGMENT_GROUP);
-
- hsaKernelInfo->private_mem_size =
- roundUp(getSize(Brig::BRIG_SEGMENT_PRIVATE), 8);
-
- hsaKernelInfo->spill_mem_size =
- roundUp(getSize(Brig::BRIG_SEGMENT_SPILL), 8);
- }
-
- int
- getSize(MemorySegment segment) const
- {
- Brig::BrigSegment brigSeg;
-
- switch (segment) {
- case MemorySegment::NONE:
- brigSeg = Brig::BRIG_SEGMENT_NONE;
- break;
- case MemorySegment::FLAT:
- brigSeg = Brig::BRIG_SEGMENT_FLAT;
- break;
- case MemorySegment::GLOBAL:
- brigSeg = Brig::BRIG_SEGMENT_GLOBAL;
- break;
- case MemorySegment::READONLY:
- brigSeg = Brig::BRIG_SEGMENT_READONLY;
- break;
- case MemorySegment::KERNARG:
- brigSeg = Brig::BRIG_SEGMENT_KERNARG;
- break;
- case MemorySegment::GROUP:
- brigSeg = Brig::BRIG_SEGMENT_GROUP;
- break;
- case MemorySegment::PRIVATE:
- brigSeg = Brig::BRIG_SEGMENT_PRIVATE;
- break;
- case MemorySegment::SPILL:
- brigSeg = Brig::BRIG_SEGMENT_SPILL;
- break;
- case MemorySegment::ARG:
- brigSeg = Brig::BRIG_SEGMENT_ARG;
- break;
- case MemorySegment::EXTSPACE0:
- brigSeg = Brig::BRIG_SEGMENT_AMD_GCN;
- break;
- default:
- fatal("Unknown BrigSegment type.\n");
- }
-
- return getSize(brigSeg);
- }
-
- private:
- int
- getSize(Brig::BrigSegment segment) const
- {
- if (segment == Brig::BRIG_SEGMENT_PRIVATE) {
- // with the code generated by new HSA compiler the assertion
- // does not hold anymore..
- //assert(private_size != -1);
- return private_size;
- } else {
- return storageMap->getSize(segment);
- }
- }
-
- public:
- StorageElement*
- findSymbol(Brig::BrigSegment segment, uint64_t addr)
- {
- return storageMap->findSymbol(segment, addr);
- }
-
- void
- setPrivateSize(int32_t _private_size)
- {
- private_size = _private_size;
- }
-
- Label*
- refLabel(const Brig::BrigDirectiveLabel *lbl, const BrigObject *obj)
- {
- return labelMap.refLabel(lbl, obj);
- }
-};
-
-#endif // __HSAIL_CODE_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#include "gpu-compute/kernel_cfg.hh"
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstring>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <string>
-
-#include "gpu-compute/gpu_static_inst.hh"
-
-void
-ControlFlowInfo::assignImmediatePostDominators(
- const std::vector<GPUStaticInst*>& instructions)
-{
- ControlFlowInfo cfg(instructions);
- cfg.findImmediatePostDominators();
-}
-
-
-ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
- instructions(insts)
-{
- createBasicBlocks();
- connectBasicBlocks();
-}
-
-BasicBlock*
-ControlFlowInfo::basicBlock(int inst_addr) const {
- for (auto& block: basicBlocks) {
- int first_block_addr = block->firstInstruction->instAddr();
- if (inst_addr >= first_block_addr && inst_addr <
- first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
- return block.get();
- }
- }
- return nullptr;
-}
-
-
-GPUStaticInst*
-ControlFlowInfo::lastInstruction(const BasicBlock* block) const
-{
- if (block->isExit()) {
- return nullptr;
- }
-
- return instructions.at(block->firstInstruction->instNum() +
- block->size - 1);
-}
-
-BasicBlock*
-ControlFlowInfo::postDominator(const BasicBlock* block) const
-{
- if (block->isExit()) {
- return nullptr;
- }
- return basicBlock(lastInstruction(block)->ipdInstNum());
-}
-
-void
-ControlFlowInfo::createBasicBlocks()
-{
- assert(!instructions.empty());
- std::set<int> leaders;
- // first instruction is a leader
- leaders.insert(0);
- for (const auto &instruction : instructions) {
- if (instruction->isBranch()) {
- const int target_pc = instruction->getTargetPc();
- leaders.insert(target_pc);
- leaders.insert(instruction->nextInstAddr());
- }
- }
-
- size_t block_size = 0;
- for (const auto &instruction : instructions) {
- if (leaders.find(instruction->instAddr()) != leaders.end()) {
- uint32_t id = basicBlocks.size();
- if (id > 0) {
- basicBlocks.back()->size = block_size;
- }
- block_size = 0;
- basicBlocks.emplace_back(new BasicBlock(id, instruction));
- }
- block_size++;
- }
- basicBlocks.back()->size = block_size;
- // exit basic block
- basicBlocks.emplace_back(new BasicBlock(basicBlocks.size(), nullptr));
-}
-
-void
-ControlFlowInfo::connectBasicBlocks()
-{
- BasicBlock* exit_bb = basicBlocks.back().get();
- for (auto& bb : basicBlocks) {
- if (bb->isExit()) {
- break;
- }
- GPUStaticInst* last = lastInstruction(bb.get());
- if (last->isReturn()) {
- bb->successorIds.insert(exit_bb->id);
- continue;
- }
- if (last->isBranch()) {
- const uint32_t target_pc = last->getTargetPc();
- BasicBlock* target_bb = basicBlock(target_pc);
- bb->successorIds.insert(target_bb->id);
- }
-
- // Unconditional jump instructions have a unique successor
- if (!last->isUnconditionalJump()) {
- BasicBlock* next_bb = basicBlock(last->nextInstAddr());
- bb->successorIds.insert(next_bb->id);
- }
- }
-}
-
-
-// In-place set intersection
-static void
-intersect(std::set<uint32_t>& a, const std::set<uint32_t>& b)
-{
- std::set<uint32_t>::iterator it = a.begin();
- while (it != a.end()) {
- it = b.find(*it) != b.end() ? ++it : a.erase(it);
- }
-}
-
-
-void
-ControlFlowInfo::findPostDominators()
-{
- // the only postdominator of the exit block is itself
- basicBlocks.back()->postDominatorIds.insert(basicBlocks.back()->id);
- //copy all basic blocks to all postdominator lists except for exit block
- for (auto& block : basicBlocks) {
- if (!block->isExit()) {
- for (uint32_t i = 0; i < basicBlocks.size(); i++) {
- block->postDominatorIds.insert(i);
- }
- }
- }
-
- bool change = true;
- while (change) {
- change = false;
- for (int h = basicBlocks.size() - 2; h >= 0; --h) {
- size_t num_postdominators =
- basicBlocks[h]->postDominatorIds.size();
- for (int s : basicBlocks[h]->successorIds) {
- intersect(basicBlocks[h]->postDominatorIds,
- basicBlocks[s]->postDominatorIds);
- }
- basicBlocks[h]->postDominatorIds.insert(h);
- change |= (num_postdominators
- != basicBlocks[h]->postDominatorIds.size());
- }
- }
-}
-
-
-// In-place set difference
-static void
-setDifference(std::set<uint32_t>&a,
- const std::set<uint32_t>& b, uint32_t exception)
-{
- for (uint32_t b_elem : b) {
- if (b_elem != exception) {
- a.erase(b_elem);
- }
- }
-}
-
-void
-ControlFlowInfo::findImmediatePostDominators()
-{
- assert(basicBlocks.size() > 1); // Entry and exit blocks must be present
-
- findPostDominators();
-
- for (auto& basicBlock : basicBlocks) {
- if (basicBlock->isExit()) {
- continue;
- }
- std::set<uint32_t> candidates = basicBlock->postDominatorIds;
- candidates.erase(basicBlock->id);
- for (uint32_t postDominatorId : basicBlock->postDominatorIds) {
- if (postDominatorId != basicBlock->id) {
- setDifference(candidates,
- basicBlocks[postDominatorId]->postDominatorIds,
- postDominatorId);
- }
- }
- assert(candidates.size() == 1);
- GPUStaticInst* last_instruction = lastInstruction(basicBlock.get());
- BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
- if (!ipd_block->isExit()) {
- GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
- last_instruction->ipdInstNum(ipd_first_inst->instAddr());
- } else {
- last_instruction->ipdInstNum(last_instruction->nextInstAddr());
- }
- }
-}
-
-void
-ControlFlowInfo::printPostDominators() const
-{
- for (auto& block : basicBlocks) {
- std::cout << "PD(" << block->id << ") = {";
- std::copy(block->postDominatorIds.begin(),
- block->postDominatorIds.end(),
- std::ostream_iterator<uint32_t>(std::cout, ", "));
- std::cout << "}" << std::endl;
- }
-}
-
-void
-ControlFlowInfo::printImmediatePostDominators() const
-{
- for (const auto& block : basicBlocks) {
- if (block->isExit()) {
- continue;
- }
- std::cout << "IPD(" << block->id << ") = ";
- std::cout << postDominator(block.get())->id << ", ";
- }
- std::cout << std::endl;
-}
-void
-ControlFlowInfo::printBasicBlocks() const
-{
- for (GPUStaticInst* inst : instructions) {
- int inst_addr = inst->instAddr();
- std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
- << "]: " << inst->disassemble();
- if (inst->isBranch()) {
- std::cout << ", PC = " << inst->getTargetPc();
- }
- std::cout << std::endl;
- }
-}
-
-void
-ControlFlowInfo::printBasicBlockDot() const
-{
- printf("digraph {\n");
- for (const auto& basic_block : basicBlocks) {
- printf("\t");
- for (uint32_t successorId : basic_block->successorIds) {
- printf("%d -> %d; ", basic_block->id, successorId);
- }
- printf("\n");
- }
- printf("}\n");
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __KERNEL_CFG_HH__
-#define __KERNEL_CFG_HH__
-
-#include <cstddef>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <vector>
-
-
-class GPUStaticInst;
-class HsailCode;
-
-struct BasicBlock
-{
- BasicBlock(uint32_t num, GPUStaticInst* begin) :
- id(num), size(0), firstInstruction(begin)
- {
- }
-
- bool
- isEntry() const
- {
- return !id;
- }
-
- bool
- isExit() const
- {
- return !size;
- }
-
- /**
- * Unique identifier for the block within a given kernel.
- */
- const uint32_t id;
-
- /**
- * Number of instructions contained in the block
- */
- size_t size;
-
- /**
- * Pointer to first instruction of the block.
- */
- GPUStaticInst* firstInstruction;
-
- /**
- * Identifiers of the blocks that follow (are reachable from) this block.
- */
- std::set<uint32_t> successorIds;
-
- /**
- * Identifiers of the blocks that will be visited from this block.
- */
- std::set<uint32_t> postDominatorIds;
-};
-
-class ControlFlowInfo
-{
-public:
-
- /**
- * Compute immediate post-dominator instruction for kernel instructions.
- */
- static void assignImmediatePostDominators(
- const std::vector<GPUStaticInst*>& instructions);
-
-private:
- ControlFlowInfo(const std::vector<GPUStaticInst*>& instructions);
-
- GPUStaticInst* lastInstruction(const BasicBlock* block) const;
-
- BasicBlock* basicBlock(int inst_addr) const;
-
- BasicBlock* postDominator(const BasicBlock* block) const;
-
- void createBasicBlocks();
-
- void connectBasicBlocks();
-
- void findPostDominators();
-
- void findImmediatePostDominators();
-
- void printBasicBlocks() const;
-
- void printBasicBlockDot() const;
-
- void printPostDominators() const;
-
- void printImmediatePostDominators() const;
-
- std::vector<std::unique_ptr<BasicBlock>> basicBlocks;
- std::vector<GPUStaticInst*> instructions;
-};
-
-#endif // __KERNEL_CFG_HH__
+++ /dev/null
-/*
- * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Steve Reinhardt
- */
-
-#ifndef __NDRANGE_HH__
-#define __NDRANGE_HH__
-
-#include "base/types.hh"
-#include "gpu-compute/qstruct.hh"
-
-struct NDRange
-{
- // copy of the queue entry provided at dispatch
- HsaQueueEntry q;
-
- // The current workgroup id (3 dimensions)
- int wgId[3];
- // The number of workgroups in each dimension
- int numWg[3];
- // The total number of workgroups
- int numWgTotal;
-
- // The number of completed work groups
- int numWgCompleted;
- // The global workgroup ID
- uint32_t globalWgId;
-
- // flag indicating whether all work groups have been launched
- bool wg_disp_rem;
- // kernel complete
- bool execDone;
- bool userDoorBellSet;
- volatile bool *addrToNotify;
- volatile uint32_t *numDispLeft;
- int dispatchId;
- int curCid; // Current context id
-};
-
-#endif // __NDRANGE_HH__
+++ /dev/null
-/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Brad Beckmann, Marc Orr
- */
-
-#ifndef __Q_STRUCT_HH__
-#define __Q_STRUCT_HH__
-
-#include <bitset>
-#include <cstdint>
-
-// Maximum number of arguments
-static const int KER_NUM_ARGS = 32;
-// Kernel argument buffer size
-static const int KER_ARGS_LENGTH = 512;
-
-class LdsChunk;
-struct NDRange;
-
-// Be very careful of alignment in this structure. The structure
-// must compile to the same layout in both 32-bit and 64-bit mode.
-struct HsaQueueEntry
-{
- // Base pointer for array of instruction pointers
- uint64_t code_ptr;
- // Grid Size (3 dimensions)
- uint32_t gdSize[3];
- // Workgroup Size (3 dimensions)
- uint32_t wgSize[3];
- uint16_t sRegCount;
- uint16_t dRegCount;
- uint16_t cRegCount;
- uint64_t privMemStart;
- uint32_t privMemPerItem;
- uint32_t privMemTotal;
- uint64_t spillMemStart;
- uint32_t spillMemPerItem;
- uint32_t spillMemTotal;
- uint64_t roMemStart;
- uint32_t roMemTotal;
- // Size (in bytes) of LDS
- uint32_t ldsSize;
- // Virtual Memory Id (unused right now)
- uint32_t vmId;
-
- // Pointer to dependency chain (unused now)
- uint64_t depends;
-
- // pointer to bool
- uint64_t addrToNotify;
- // pointer to uint32_t
- uint64_t numDispLeft;
-
- // variables to pass arguments when running in standalone mode,
- // will be removed when run.py and sh.cpp have been updated to
- // use args and offset arrays
- uint64_t arg1;
- uint64_t arg2;
- uint64_t arg3;
- uint64_t arg4;
-
- // variables to pass arguments when running in cpu+gpu mode
- uint8_t args[KER_ARGS_LENGTH];
- uint16_t offsets[KER_NUM_ARGS];
- uint16_t num_args;
-};
-
-// State that needs to be passed between the simulation and simulated app, a
-// pointer to this struct can be passed through the depends field in the
-// HsaQueueEntry struct
-struct HostState
-{
- // cl_event* has original HsaQueueEntry for init
- uint64_t event;
-};
-
-// Total number of HSA queues
-static const int HSAQ_NQUEUES = 8;
-
-// These values will eventually live in memory mapped registers
-// and be settable by the kernel mode driver.
-
-// Number of entries in each HSA queue
-static const int HSAQ_SIZE = 64;
-// Address of first HSA queue index
-static const int HSAQ_INDX_BASE = 0x10000ll;
-// Address of first HSA queue
-static const int HSAQ_BASE = 0x11000ll;
-// Suggested start of HSA code
-static const int HSA_CODE_BASE = 0x18000ll;
-
-// These are shortcuts for deriving the address of a specific
-// HSA queue or queue index
-#define HSAQ(n) (HSAQ_BASE + HSAQ_SIZE * sizeof(struct fsaQueue) * n)
-#define HSAQE(n,i) (HSAQ_BASE + (HSAQ_SIZE * n + i) * sizeof(struct fsaQueue))
-#define HSAQ_RI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 0))
-#define HSAQ_WI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 1))
-#define HSAQ_CI(n) (HSAQ_INDX_BASE + sizeof(int) * (n * 3 + 2))
-
-/*
- * Example code for writing to a queue
- *
- * void
- * ToQueue(int n,struct fsaQueue *val)
- * {
- * int wi = *(int*)HSAQ_WI(n);
- * int ri = *(int*)HSAQ_RI(n);
- * int ci = *(int*)HSAQ_CI(n);
- *
- * if (ci - ri < HSAQ_SIZE) {
- * (*(int*)HSAQ_CI(n))++;
- * *(HsaQueueEntry*)(HSAQE(n, (wi % HSAQ_SIZE))) = *val;
- * (*(int*)HSAQ_WI(n))++;
- * }
- * }
- */
-
-#endif // __Q_STRUCT_HH__
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#include "gpu-compute/vector_register_state.hh"
-
-#include <limits>
-
-#include "gpu-compute/compute_unit.hh"
-
-VecRegisterState::VecRegisterState() : computeUnit(nullptr)
-{
- s_reg.clear();
- d_reg.clear();
-}
-
-void
-VecRegisterState::setParent(ComputeUnit *_computeUnit)
-{
- computeUnit = _computeUnit;
- _name = computeUnit->name() + ".VecRegState";
-}
-
-void
-VecRegisterState::init(uint32_t _size, uint32_t wf_size)
-{
- s_reg.resize(_size);
- fatal_if(wf_size > std::numeric_limits<unsigned long long>::digits ||
- wf_size <= 0,
- "WF size is larger than the host can support or is zero");
- fatal_if((wf_size & (wf_size - 1)) != 0,
- "Wavefront size should be a power of 2");
- for (int i = 0; i < s_reg.size(); ++i) {
- s_reg[i].resize(wf_size, 0);
- }
- d_reg.resize(_size);
- for (int i = 0; i < d_reg.size(); ++i) {
- d_reg[i].resize(wf_size, 0);
- }
-}
+++ /dev/null
-/*
- * Copyright (c) 2015 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * For use for simulation and test purposes only
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holder nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: John Kalamatianos
- */
-
-#ifndef __VECTOR_REGISTER_STATE_HH__
-#define __VECTOR_REGISTER_STATE_HH__
-
-#include <array>
-#include <cassert>
-#include <string>
-#include <vector>
-
-#include "gpu-compute/misc.hh"
-
-class ComputeUnit;
-
-// Vector Register State per SIMD unit (contents of the vector
-// registers in the VRF of the SIMD)
-class VecRegisterState
-{
- public:
- VecRegisterState();
- void init(uint32_t _size, uint32_t wf_size);
-
- const std::string& name() const { return _name; }
- void setParent(ComputeUnit *_computeUnit);
- void regStats() { }
-
- // Access methods
- template<typename T>
- T
- read(int regIdx, int threadId=0) {
- T *p0;
- assert(sizeof(T) == 4 || sizeof(T) == 8);
- if (sizeof(T) == 4) {
- p0 = (T*)(&s_reg[regIdx][threadId]);
- } else {
- p0 = (T*)(&d_reg[regIdx][threadId]);
- }
-
- return *p0;
- }
-
- template<typename T>
- void
- write(unsigned int regIdx, T value, int threadId=0) {
- T *p0;
- assert(sizeof(T) == 4 || sizeof(T) == 8);
- if (sizeof(T) == 4) {
- p0 = (T*)(&s_reg[regIdx][threadId]);
- } else {
- p0 = (T*)(&d_reg[regIdx][threadId]);
- }
-
- *p0 = value;
- }
-
- // (Single Precision) Vector Register File size.
- int regSize() { return s_reg.size(); }
-
- private:
- ComputeUnit *computeUnit;
- std::string _name;
- // 32-bit Single Precision Vector Register State
- std::vector<std::vector<uint32_t>> s_reg;
- // 64-bit Double Precision Vector Register State
- std::vector<std::vector<uint64_t>> d_reg;
-};
-
-#endif // __VECTOR_REGISTER_STATE_HH__
# @todo this is error prone, and should be extracted automatically from
# a file
- valid_tags = ["arch", "arch-arm", "arch-gcn3", "arch-hsail",
+ valid_tags = ["arch", "arch-arm", "arch-gcn3",
"arch-mips", "arch-power", "arch-riscv", "arch-sparc", "arch-x86",
"base", "configs", "cpu", "cpu-kvm", "cpu-minor", "cpu-o3",
"cpu-simple", "dev", "dev-arm", "dev-virtio", "ext", "fastmodel",
'SPARC,' \
'X86,X86_MESI_Two_Level,' \
'RISCV,' \
- 'HSAIL_X86',
+ 'GCN3_X86',
help="comma-separated build targets to test (default: '%default')")
add_option('--modes',
default='se,fs',