RTL_H = $(RTL_BASE_H) $(FLAGS_H) genrtl.h
READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h
BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def \
- gtm-builtins.def sanitizer.def \
- hsa-builtins.def
+ gtm-builtins.def sanitizer.def
INTERNAL_FN_DEF = internal-fn.def
INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF)
TREE_CORE_H = tree-core.h $(CORETYPES_H) all-tree.def tree.def \
haifa-sched.o \
hash-map-tests.o \
hash-set-tests.o \
- hsa-common.o \
- hsa-gen.o \
- hsa-regalloc.o \
- hsa-brig.o \
- hsa-dump.o \
hw-doloop.o \
hwint.o \
ifcvt.o \
ipa-icf.o \
ipa-icf-gimple.o \
ipa-reference.o \
- ipa-hsa.o \
ipa-ref.o \
ipa-utils.o \
ipa.o \
omp-offload.o \
omp-expand.o \
omp-general.o \
- omp-grid.o \
omp-low.o \
omp-simd-clone.o \
opt-problem.o \
$(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
$(srcdir)/omp-offload.h \
$(srcdir)/omp-offload.c \
- $(srcdir)/omp-expand.c \
$(srcdir)/omp-general.c \
$(srcdir)/omp-low.c \
$(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
$(srcdir)/sancov.c \
$(srcdir)/ipa-devirt.c \
$(srcdir)/internal-fn.h \
- $(srcdir)/hsa-common.c \
$(srcdir)/calls.c \
$(srcdir)/omp-general.h \
@all_gtfiles@
}
}
}
+
+/* Return true if TYPE is a packed HSA type. */
+
+bool
+hsa_type_packed_p (BrigType16_t type)
+{
+ return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE;
+}
+
return TYPE_VECTOR_SUBPARTS (type).to_constant ();
}
+bool hsa_type_packed_p (BrigType16_t type);
+
#endif
--- /dev/null
+/* HSA BRIG (binary representation of HSAIL) 1.0.1 representation description.
+ Copyright (C) 2016-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>.
+
+The contents of the file was created by extracting data structures, enum,
+typedef and other definitions from HSA Programmer's Reference Manual Version
+1.0.1 (http://www.hsafoundation.com/standards/).
+
+HTML version is provided on the following link:
+http://www.hsafoundation.com/html/Content/PRM/Topics/PRM_title_page.htm */
+
+#ifndef HSA_BRIG_FORMAT_H
+#define HSA_BRIG_FORMAT_H
+
+struct BrigModuleHeader;
+typedef uint16_t BrigKind16_t;
+typedef uint32_t BrigVersion32_t;
+
+typedef BrigModuleHeader *BrigModule_t;
+typedef uint32_t BrigDataOffset32_t;
+typedef uint32_t BrigCodeOffset32_t;
+typedef uint32_t BrigOperandOffset32_t;
+typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
+typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
+typedef uint8_t BrigAlignment8_t;
+
+enum BrigAlignment
+{
+ BRIG_ALIGNMENT_NONE = 0,
+ BRIG_ALIGNMENT_1 = 1,
+ BRIG_ALIGNMENT_2 = 2,
+ BRIG_ALIGNMENT_4 = 3,
+ BRIG_ALIGNMENT_8 = 4,
+ BRIG_ALIGNMENT_16 = 5,
+ BRIG_ALIGNMENT_32 = 6,
+ BRIG_ALIGNMENT_64 = 7,
+ BRIG_ALIGNMENT_128 = 8,
+ BRIG_ALIGNMENT_256 = 9
+};
+
+typedef uint8_t BrigAllocation8_t;
+
+enum BrigAllocation
+{
+ BRIG_ALLOCATION_NONE = 0,
+ BRIG_ALLOCATION_PROGRAM = 1,
+ BRIG_ALLOCATION_AGENT = 2,
+ BRIG_ALLOCATION_AUTOMATIC = 3
+};
+
+typedef uint8_t BrigAluModifier8_t;
+
+enum BrigAluModifierMask
+{
+ BRIG_ALU_FTZ = 1
+};
+
+typedef uint8_t BrigAtomicOperation8_t;
+
+enum BrigAtomicOperation
+{
+ BRIG_ATOMIC_ADD = 0,
+ BRIG_ATOMIC_AND = 1,
+ BRIG_ATOMIC_CAS = 2,
+ BRIG_ATOMIC_EXCH = 3,
+ BRIG_ATOMIC_LD = 4,
+ BRIG_ATOMIC_MAX = 5,
+ BRIG_ATOMIC_MIN = 6,
+ BRIG_ATOMIC_OR = 7,
+ BRIG_ATOMIC_ST = 8,
+ BRIG_ATOMIC_SUB = 9,
+ BRIG_ATOMIC_WRAPDEC = 10,
+ BRIG_ATOMIC_WRAPINC = 11,
+ BRIG_ATOMIC_XOR = 12,
+ BRIG_ATOMIC_WAIT_EQ = 13,
+ BRIG_ATOMIC_WAIT_NE = 14,
+ BRIG_ATOMIC_WAIT_LT = 15,
+ BRIG_ATOMIC_WAIT_GTE = 16,
+ BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
+ BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
+ BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
+ BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
+};
+
+struct BrigBase
+{
+ uint16_t byteCount;
+ BrigKind16_t kind;
+};
+
+typedef uint8_t BrigCompareOperation8_t;
+
+enum BrigCompareOperation
+{
+ BRIG_COMPARE_EQ = 0,
+ BRIG_COMPARE_NE = 1,
+ BRIG_COMPARE_LT = 2,
+ BRIG_COMPARE_LE = 3,
+ BRIG_COMPARE_GT = 4,
+ BRIG_COMPARE_GE = 5,
+ BRIG_COMPARE_EQU = 6,
+ BRIG_COMPARE_NEU = 7,
+ BRIG_COMPARE_LTU = 8,
+ BRIG_COMPARE_LEU = 9,
+ BRIG_COMPARE_GTU = 10,
+ BRIG_COMPARE_GEU = 11,
+ BRIG_COMPARE_NUM = 12,
+ BRIG_COMPARE_NAN = 13,
+ BRIG_COMPARE_SEQ = 14,
+ BRIG_COMPARE_SNE = 15,
+ BRIG_COMPARE_SLT = 16,
+ BRIG_COMPARE_SLE = 17,
+ BRIG_COMPARE_SGT = 18,
+ BRIG_COMPARE_SGE = 19,
+ BRIG_COMPARE_SGEU = 20,
+ BRIG_COMPARE_SEQU = 21,
+ BRIG_COMPARE_SNEU = 22,
+ BRIG_COMPARE_SLTU = 23,
+ BRIG_COMPARE_SLEU = 24,
+ BRIG_COMPARE_SNUM = 25,
+ BRIG_COMPARE_SNAN = 26,
+ BRIG_COMPARE_SGTU = 27
+};
+
+typedef uint16_t BrigControlDirective16_t;
+
+enum BrigControlDirective
+{
+ BRIG_CONTROL_NONE = 0,
+ BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
+ BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
+ BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
+ BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
+ BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
+ BRIG_CONTROL_REQUIREDDIM = 6,
+ BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
+ BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
+ BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
+};
+
+typedef uint32_t BrigExceptions32_t;
+
+enum BrigExceptionsMask
+{
+ BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
+ BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
+ BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
+ BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
+ BRIG_EXCEPTIONS_INEXACT = 1 << 4,
+ BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
+};
+
+typedef uint8_t BrigExecutableModifier8_t;
+
+enum BrigExecutableModifierMask
+{
+ BRIG_EXECUTABLE_DEFINITION = 1
+};
+
+typedef uint8_t BrigImageChannelOrder8_t;
+
+enum BrigImageChannelOrder
+{
+ BRIG_CHANNEL_ORDER_A = 0,
+ BRIG_CHANNEL_ORDER_R = 1,
+ BRIG_CHANNEL_ORDER_RX = 2,
+ BRIG_CHANNEL_ORDER_RG = 3,
+ BRIG_CHANNEL_ORDER_RGX = 4,
+ BRIG_CHANNEL_ORDER_RA = 5,
+ BRIG_CHANNEL_ORDER_RGB = 6,
+ BRIG_CHANNEL_ORDER_RGBX = 7,
+ BRIG_CHANNEL_ORDER_RGBA = 8,
+ BRIG_CHANNEL_ORDER_BGRA = 9,
+ BRIG_CHANNEL_ORDER_ARGB = 10,
+ BRIG_CHANNEL_ORDER_ABGR = 11,
+ BRIG_CHANNEL_ORDER_SRGB = 12,
+ BRIG_CHANNEL_ORDER_SRGBX = 13,
+ BRIG_CHANNEL_ORDER_SRGBA = 14,
+ BRIG_CHANNEL_ORDER_SBGRA = 15,
+ BRIG_CHANNEL_ORDER_INTENSITY = 16,
+ BRIG_CHANNEL_ORDER_LUMINANCE = 17,
+ BRIG_CHANNEL_ORDER_DEPTH = 18,
+ BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
+ BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
+};
+
+typedef uint8_t BrigImageChannelType8_t;
+
+enum BrigImageChannelType
+{
+ BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
+ BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
+ BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
+ BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
+ BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
+ BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
+ BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
+ BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
+ BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
+ BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
+ BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
+ BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
+ BRIG_CHANNEL_TYPE_FLOAT = 15,
+ BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
+};
+
+typedef uint8_t BrigImageGeometry8_t;
+
+enum BrigImageGeometry
+{
+ BRIG_GEOMETRY_1D = 0,
+ BRIG_GEOMETRY_2D = 1,
+ BRIG_GEOMETRY_3D = 2,
+ BRIG_GEOMETRY_1DA = 3,
+ BRIG_GEOMETRY_2DA = 4,
+ BRIG_GEOMETRY_1DB = 5,
+ BRIG_GEOMETRY_2DDEPTH = 6,
+ BRIG_GEOMETRY_2DADEPTH = 7,
+ BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
+};
+
+typedef uint8_t BrigImageQuery8_t;
+
+enum BrigImageQuery
+{
+ BRIG_IMAGE_QUERY_WIDTH = 0,
+ BRIG_IMAGE_QUERY_HEIGHT = 1,
+ BRIG_IMAGE_QUERY_DEPTH = 2,
+ BRIG_IMAGE_QUERY_ARRAY = 3,
+ BRIG_IMAGE_QUERY_CHANNELORDER = 4,
+ BRIG_IMAGE_QUERY_CHANNELTYPE = 5
+};
+
+enum BrigKind
+{
+ BRIG_KIND_NONE = 0x0000,
+ BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
+ BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
+ BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
+ BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
+ BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
+ BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
+ BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
+ BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
+ BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
+ BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
+ BRIG_KIND_DIRECTIVE_LOC = 0x100a,
+ BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
+ BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
+ BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
+ BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
+ BRIG_KIND_DIRECTIVE_END = 0x100f,
+ BRIG_KIND_INST_BEGIN = 0x2000,
+ BRIG_KIND_INST_ADDR = 0x2000,
+ BRIG_KIND_INST_ATOMIC = 0x2001,
+ BRIG_KIND_INST_BASIC = 0x2002,
+ BRIG_KIND_INST_BR = 0x2003,
+ BRIG_KIND_INST_CMP = 0x2004,
+ BRIG_KIND_INST_CVT = 0x2005,
+ BRIG_KIND_INST_IMAGE = 0x2006,
+ BRIG_KIND_INST_LANE = 0x2007,
+ BRIG_KIND_INST_MEM = 0x2008,
+ BRIG_KIND_INST_MEM_FENCE = 0x2009,
+ BRIG_KIND_INST_MOD = 0x200a,
+ BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
+ BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
+ BRIG_KIND_INST_QUEUE = 0x200d,
+ BRIG_KIND_INST_SEG = 0x200e,
+ BRIG_KIND_INST_SEG_CVT = 0x200f,
+ BRIG_KIND_INST_SIGNAL = 0x2010,
+ BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
+ BRIG_KIND_INST_END = 0x2012,
+ BRIG_KIND_OPERAND_BEGIN = 0x3000,
+ BRIG_KIND_OPERAND_ADDRESS = 0x3000,
+ BRIG_KIND_OPERAND_ALIGN = 0x3001,
+ BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
+ BRIG_KIND_OPERAND_CODE_REF = 0x3003,
+ BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
+ BRIG_KIND_OPERAND_RESERVED = 0x3005,
+ BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
+ BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
+ BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
+ BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
+ BRIG_KIND_OPERAND_REGISTER = 0x300a,
+ BRIG_KIND_OPERAND_STRING = 0x300b,
+ BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
+ BRIG_KIND_OPERAND_END = 0x300d
+};
+
+typedef uint8_t BrigLinkage8_t;
+
+enum BrigLinkage
+{
+ BRIG_LINKAGE_NONE = 0,
+ BRIG_LINKAGE_PROGRAM = 1,
+ BRIG_LINKAGE_MODULE = 2,
+ BRIG_LINKAGE_FUNCTION = 3,
+ BRIG_LINKAGE_ARG = 4
+};
+
+typedef uint8_t BrigMachineModel8_t;
+
+enum BrigMachineModel
+{
+ BRIG_MACHINE_SMALL = 0,
+ BRIG_MACHINE_LARGE = 1
+};
+
+typedef uint8_t BrigMemoryModifier8_t;
+
+enum BrigMemoryModifierMask
+{
+ BRIG_MEMORY_CONST = 1
+};
+
+typedef uint8_t BrigMemoryOrder8_t;
+
+enum BrigMemoryOrder
+{
+ BRIG_MEMORY_ORDER_NONE = 0,
+ BRIG_MEMORY_ORDER_RELAXED = 1,
+ BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
+ BRIG_MEMORY_ORDER_SC_RELEASE = 3,
+ BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4
+};
+
+typedef uint8_t BrigMemoryScope8_t;
+
+enum BrigMemoryScope
+{
+ BRIG_MEMORY_SCOPE_NONE = 0,
+ BRIG_MEMORY_SCOPE_WORKITEM = 1,
+ BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
+ BRIG_MEMORY_SCOPE_WORKGROUP = 3,
+ BRIG_MEMORY_SCOPE_AGENT = 4,
+ BRIG_MEMORY_SCOPE_SYSTEM = 5
+};
+
+struct BrigModuleHeader
+{
+ char identification[8];
+ BrigVersion32_t brigMajor;
+ BrigVersion32_t brigMinor;
+ uint64_t byteCount;
+ uint8_t hash[64];
+ uint32_t reserved;
+ uint32_t sectionCount;
+ uint64_t sectionIndex;
+};
+
+typedef uint16_t BrigOpcode16_t;
+
+enum BrigOpcode
+{
+ BRIG_OPCODE_NOP = 0,
+ BRIG_OPCODE_ABS = 1,
+ BRIG_OPCODE_ADD = 2,
+ BRIG_OPCODE_BORROW = 3,
+ BRIG_OPCODE_CARRY = 4,
+ BRIG_OPCODE_CEIL = 5,
+ BRIG_OPCODE_COPYSIGN = 6,
+ BRIG_OPCODE_DIV = 7,
+ BRIG_OPCODE_FLOOR = 8,
+ BRIG_OPCODE_FMA = 9,
+ BRIG_OPCODE_FRACT = 10,
+ BRIG_OPCODE_MAD = 11,
+ BRIG_OPCODE_MAX = 12,
+ BRIG_OPCODE_MIN = 13,
+ BRIG_OPCODE_MUL = 14,
+ BRIG_OPCODE_MULHI = 15,
+ BRIG_OPCODE_NEG = 16,
+ BRIG_OPCODE_REM = 17,
+ BRIG_OPCODE_RINT = 18,
+ BRIG_OPCODE_SQRT = 19,
+ BRIG_OPCODE_SUB = 20,
+ BRIG_OPCODE_TRUNC = 21,
+ BRIG_OPCODE_MAD24 = 22,
+ BRIG_OPCODE_MAD24HI = 23,
+ BRIG_OPCODE_MUL24 = 24,
+ BRIG_OPCODE_MUL24HI = 25,
+ BRIG_OPCODE_SHL = 26,
+ BRIG_OPCODE_SHR = 27,
+ BRIG_OPCODE_AND = 28,
+ BRIG_OPCODE_NOT = 29,
+ BRIG_OPCODE_OR = 30,
+ BRIG_OPCODE_POPCOUNT = 31,
+ BRIG_OPCODE_XOR = 32,
+ BRIG_OPCODE_BITEXTRACT = 33,
+ BRIG_OPCODE_BITINSERT = 34,
+ BRIG_OPCODE_BITMASK = 35,
+ BRIG_OPCODE_BITREV = 36,
+ BRIG_OPCODE_BITSELECT = 37,
+ BRIG_OPCODE_FIRSTBIT = 38,
+ BRIG_OPCODE_LASTBIT = 39,
+ BRIG_OPCODE_COMBINE = 40,
+ BRIG_OPCODE_EXPAND = 41,
+ BRIG_OPCODE_LDA = 42,
+ BRIG_OPCODE_MOV = 43,
+ BRIG_OPCODE_SHUFFLE = 44,
+ BRIG_OPCODE_UNPACKHI = 45,
+ BRIG_OPCODE_UNPACKLO = 46,
+ BRIG_OPCODE_PACK = 47,
+ BRIG_OPCODE_UNPACK = 48,
+ BRIG_OPCODE_CMOV = 49,
+ BRIG_OPCODE_CLASS = 50,
+ BRIG_OPCODE_NCOS = 51,
+ BRIG_OPCODE_NEXP2 = 52,
+ BRIG_OPCODE_NFMA = 53,
+ BRIG_OPCODE_NLOG2 = 54,
+ BRIG_OPCODE_NRCP = 55,
+ BRIG_OPCODE_NRSQRT = 56,
+ BRIG_OPCODE_NSIN = 57,
+ BRIG_OPCODE_NSQRT = 58,
+ BRIG_OPCODE_BITALIGN = 59,
+ BRIG_OPCODE_BYTEALIGN = 60,
+ BRIG_OPCODE_PACKCVT = 61,
+ BRIG_OPCODE_UNPACKCVT = 62,
+ BRIG_OPCODE_LERP = 63,
+ BRIG_OPCODE_SAD = 64,
+ BRIG_OPCODE_SADHI = 65,
+ BRIG_OPCODE_SEGMENTP = 66,
+ BRIG_OPCODE_FTOS = 67,
+ BRIG_OPCODE_STOF = 68,
+ BRIG_OPCODE_CMP = 69,
+ BRIG_OPCODE_CVT = 70,
+ BRIG_OPCODE_LD = 71,
+ BRIG_OPCODE_ST = 72,
+ BRIG_OPCODE_ATOMIC = 73,
+ BRIG_OPCODE_ATOMICNORET = 74,
+ BRIG_OPCODE_SIGNAL = 75,
+ BRIG_OPCODE_SIGNALNORET = 76,
+ BRIG_OPCODE_MEMFENCE = 77,
+ BRIG_OPCODE_RDIMAGE = 78,
+ BRIG_OPCODE_LDIMAGE = 79,
+ BRIG_OPCODE_STIMAGE = 80,
+ BRIG_OPCODE_IMAGEFENCE = 81,
+ BRIG_OPCODE_QUERYIMAGE = 82,
+ BRIG_OPCODE_QUERYSAMPLER = 83,
+ BRIG_OPCODE_CBR = 84,
+ BRIG_OPCODE_BR = 85,
+ BRIG_OPCODE_SBR = 86,
+ BRIG_OPCODE_BARRIER = 87,
+ BRIG_OPCODE_WAVEBARRIER = 88,
+ BRIG_OPCODE_ARRIVEFBAR = 89,
+ BRIG_OPCODE_INITFBAR = 90,
+ BRIG_OPCODE_JOINFBAR = 91,
+ BRIG_OPCODE_LEAVEFBAR = 92,
+ BRIG_OPCODE_RELEASEFBAR = 93,
+ BRIG_OPCODE_WAITFBAR = 94,
+ BRIG_OPCODE_LDF = 95,
+ BRIG_OPCODE_ACTIVELANECOUNT = 96,
+ BRIG_OPCODE_ACTIVELANEID = 97,
+ BRIG_OPCODE_ACTIVELANEMASK = 98,
+ BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
+ BRIG_OPCODE_CALL = 100,
+ BRIG_OPCODE_SCALL = 101,
+ BRIG_OPCODE_ICALL = 102,
+ BRIG_OPCODE_RET = 103,
+ BRIG_OPCODE_ALLOCA = 104,
+ BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
+ BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
+ BRIG_OPCODE_DIM = 107,
+ BRIG_OPCODE_GRIDGROUPS = 108,
+ BRIG_OPCODE_GRIDSIZE = 109,
+ BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
+ BRIG_OPCODE_PACKETID = 111,
+ BRIG_OPCODE_WORKGROUPID = 112,
+ BRIG_OPCODE_WORKGROUPSIZE = 113,
+ BRIG_OPCODE_WORKITEMABSID = 114,
+ BRIG_OPCODE_WORKITEMFLATABSID = 115,
+ BRIG_OPCODE_WORKITEMFLATID = 116,
+ BRIG_OPCODE_WORKITEMID = 117,
+ BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
+ BRIG_OPCODE_GETDETECTEXCEPT = 119,
+ BRIG_OPCODE_SETDETECTEXCEPT = 120,
+ BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
+ BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
+ BRIG_OPCODE_LDQUEUEREADINDEX = 123,
+ BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
+ BRIG_OPCODE_STQUEUEREADINDEX = 125,
+ BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
+ BRIG_OPCODE_CLOCK = 127,
+ BRIG_OPCODE_CUID = 128,
+ BRIG_OPCODE_DEBUGTRAP = 129,
+ BRIG_OPCODE_GROUPBASEPTR = 130,
+ BRIG_OPCODE_KERNARGBASEPTR = 131,
+ BRIG_OPCODE_LANEID = 132,
+ BRIG_OPCODE_MAXCUID = 133,
+ BRIG_OPCODE_MAXWAVEID = 134,
+ BRIG_OPCODE_NULLPTR = 135,
+ BRIG_OPCODE_WAVEID = 136,
+ BRIG_OPCODE_FIRST_USER_DEFINED = 32768
+};
+
+typedef uint8_t BrigPack8_t;
+
+enum BrigPack
+{
+ BRIG_PACK_NONE = 0,
+ BRIG_PACK_PP = 1,
+ BRIG_PACK_PS = 2,
+ BRIG_PACK_SP = 3,
+ BRIG_PACK_SS = 4,
+ BRIG_PACK_S = 5,
+ BRIG_PACK_P = 6,
+ BRIG_PACK_PPSAT = 7,
+ BRIG_PACK_PSSAT = 8,
+ BRIG_PACK_SPSAT = 9,
+ BRIG_PACK_SSSAT = 10,
+ BRIG_PACK_SSAT = 11,
+ BRIG_PACK_PSAT = 12
+};
+
+typedef uint8_t BrigProfile8_t;
+
+enum BrigProfile
+{
+ BRIG_PROFILE_BASE = 0,
+ BRIG_PROFILE_FULL = 1
+};
+
+typedef uint16_t BrigRegisterKind16_t;
+
+enum BrigRegisterKind
+{
+ BRIG_REGISTER_KIND_CONTROL = 0,
+ BRIG_REGISTER_KIND_SINGLE = 1,
+ BRIG_REGISTER_KIND_DOUBLE = 2,
+ BRIG_REGISTER_KIND_QUAD = 3
+};
+
+typedef uint8_t BrigRound8_t;
+
+enum BrigRound
+{
+ BRIG_ROUND_NONE = 0,
+ BRIG_ROUND_FLOAT_DEFAULT = 1,
+ BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
+ BRIG_ROUND_FLOAT_ZERO = 3,
+ BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
+ BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
+ BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
+ BRIG_ROUND_INTEGER_ZERO = 7,
+ BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
+ BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
+ BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
+ BRIG_ROUND_INTEGER_ZERO_SAT = 11,
+ BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
+ BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
+ BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
+ BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
+ BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
+ BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
+};
+
+typedef uint8_t BrigSamplerAddressing8_t;
+
+enum BrigSamplerAddressing
+{
+ BRIG_ADDRESSING_UNDEFINED = 0,
+ BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
+ BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
+ BRIG_ADDRESSING_REPEAT = 3,
+ BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
+ BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
+};
+
+typedef uint8_t BrigSamplerCoordNormalization8_t;
+
+enum BrigSamplerCoordNormalization
+{
+ BRIG_COORD_UNNORMALIZED = 0,
+ BRIG_COORD_NORMALIZED = 1
+};
+
+typedef uint8_t BrigSamplerFilter8_t;
+
+enum BrigSamplerFilter
+{
+ BRIG_FILTER_NEAREST = 0,
+ BRIG_FILTER_LINEAR = 1,
+ BRIG_FILTER_FIRST_USER_DEFINED = 128
+};
+
+typedef uint8_t BrigSamplerQuery8_t;
+
+enum BrigSamplerQuery
+{
+ BRIG_SAMPLER_QUERY_ADDRESSING = 0,
+ BRIG_SAMPLER_QUERY_COORD = 1,
+ BRIG_SAMPLER_QUERY_FILTER = 2
+};
+
+typedef uint32_t BrigSectionIndex32_t;
+
+enum BrigSectionIndex
+{
+ BRIG_SECTION_INDEX_DATA = 0,
+ BRIG_SECTION_INDEX_CODE = 1,
+ BRIG_SECTION_INDEX_OPERAND = 2,
+ BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3
+};
+
+struct BrigSectionHeader
+{
+ uint64_t byteCount;
+ uint32_t headerByteCount;
+ uint32_t nameLength;
+ uint8_t name[1];
+};
+
+typedef uint8_t BrigSegCvtModifier8_t;
+
+enum BrigSegCvtModifierMask
+{
+ BRIG_SEG_CVT_NONULL = 1
+};
+
+typedef uint8_t BrigSegment8_t;
+
+enum BrigSegment
+{
+ BRIG_SEGMENT_NONE = 0,
+ BRIG_SEGMENT_FLAT = 1,
+ BRIG_SEGMENT_GLOBAL = 2,
+ BRIG_SEGMENT_READONLY = 3,
+ BRIG_SEGMENT_KERNARG = 4,
+ BRIG_SEGMENT_GROUP = 5,
+ BRIG_SEGMENT_PRIVATE = 6,
+ BRIG_SEGMENT_SPILL = 7,
+ BRIG_SEGMENT_ARG = 8,
+ BRIG_SEGMENT_FIRST_USER_DEFINED = 128
+};
+
+enum
+{
+ BRIG_TYPE_BASE_SIZE = 5,
+ BRIG_TYPE_PACK_SIZE = 2,
+ BRIG_TYPE_ARRAY_SIZE = 1,
+
+ BRIG_TYPE_BASE_SHIFT = 0,
+ BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
+ BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
+
+ BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1)
+ << BRIG_TYPE_BASE_SHIFT,
+ BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1)
+ << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1)
+ << BRIG_TYPE_ARRAY_SHIFT,
+
+ BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
+ BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
+
+ BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
+};
+
+typedef uint16_t BrigType16_t;
+
+enum BrigType
+{
+ BRIG_TYPE_NONE = 0,
+
+ BRIG_TYPE_U8 = 1,
+ BRIG_TYPE_U16 = 2,
+ BRIG_TYPE_U32 = 3,
+ BRIG_TYPE_U64 = 4,
+
+ BRIG_TYPE_S8 = 5,
+ BRIG_TYPE_S16 = 6,
+ BRIG_TYPE_S32 = 7,
+ BRIG_TYPE_S64 = 8,
+
+ BRIG_TYPE_F16 = 9,
+ BRIG_TYPE_F32 = 10,
+ BRIG_TYPE_F64 = 11,
+
+ BRIG_TYPE_B1 = 12,
+ BRIG_TYPE_B8 = 13,
+ BRIG_TYPE_B16 = 14,
+ BRIG_TYPE_B32 = 15,
+ BRIG_TYPE_B64 = 16,
+ BRIG_TYPE_B128 = 17,
+
+ BRIG_TYPE_SAMP = 18,
+ BRIG_TYPE_ROIMG = 19,
+ BRIG_TYPE_WOIMG = 20,
+ BRIG_TYPE_RWIMG = 21,
+
+ BRIG_TYPE_SIG32 = 22,
+ BRIG_TYPE_SIG64 = 23,
+
+ BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
+ BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
+ BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
+
+ BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
+ BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
+
+ BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY
+};
+
+struct BrigUInt64
+{
+ uint32_t lo;
+ uint32_t hi;
+};
+
+typedef uint8_t BrigVariableModifier8_t;
+
+enum BrigVariableModifierMask
+{
+ BRIG_VARIABLE_DEFINITION = 1,
+ BRIG_VARIABLE_CONST = 2
+};
+
+enum BrigVersion
+{
+ BRIG_VERSION_HSAIL_MAJOR = 1,
+ BRIG_VERSION_HSAIL_MINOR = 0,
+ BRIG_VERSION_BRIG_MAJOR = 1,
+ BRIG_VERSION_BRIG_MINOR = 0
+};
+
+typedef uint8_t BrigWidth8_t;
+
+enum BrigWidth
+{
+ BRIG_WIDTH_NONE = 0,
+ BRIG_WIDTH_1 = 1,
+ BRIG_WIDTH_2 = 2,
+ BRIG_WIDTH_4 = 3,
+ BRIG_WIDTH_8 = 4,
+ BRIG_WIDTH_16 = 5,
+ BRIG_WIDTH_32 = 6,
+ BRIG_WIDTH_64 = 7,
+ BRIG_WIDTH_128 = 8,
+ BRIG_WIDTH_256 = 9,
+ BRIG_WIDTH_512 = 10,
+ BRIG_WIDTH_1024 = 11,
+ BRIG_WIDTH_2048 = 12,
+ BRIG_WIDTH_4096 = 13,
+ BRIG_WIDTH_8192 = 14,
+ BRIG_WIDTH_16384 = 15,
+ BRIG_WIDTH_32768 = 16,
+ BRIG_WIDTH_65536 = 17,
+ BRIG_WIDTH_131072 = 18,
+ BRIG_WIDTH_262144 = 19,
+ BRIG_WIDTH_524288 = 20,
+ BRIG_WIDTH_1048576 = 21,
+ BRIG_WIDTH_2097152 = 22,
+ BRIG_WIDTH_4194304 = 23,
+ BRIG_WIDTH_8388608 = 24,
+ BRIG_WIDTH_16777216 = 25,
+ BRIG_WIDTH_33554432 = 26,
+ BRIG_WIDTH_67108864 = 27,
+ BRIG_WIDTH_134217728 = 28,
+ BRIG_WIDTH_268435456 = 29,
+ BRIG_WIDTH_536870912 = 30,
+ BRIG_WIDTH_1073741824 = 31,
+ BRIG_WIDTH_2147483648 = 32,
+ BRIG_WIDTH_WAVESIZE = 33,
+ BRIG_WIDTH_ALL = 34
+};
+
+struct BrigData
+{
+ uint32_t byteCount;
+ uint8_t bytes[1];
+};
+
+struct BrigDirectiveArgBlock
+{
+ BrigBase base;
+};
+
+struct BrigDirectiveComment
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveControl
+{
+ BrigBase base;
+ BrigControlDirective16_t control;
+ uint16_t reserved;
+ BrigDataOffsetOperandList32_t operands;
+};
+
+struct BrigDirectiveExecutable
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ uint16_t outArgCount;
+ uint16_t inArgCount;
+ BrigCodeOffset32_t firstInArg;
+ BrigCodeOffset32_t firstCodeBlockEntry;
+ BrigCodeOffset32_t nextModuleEntry;
+ BrigExecutableModifier8_t modifier;
+ BrigLinkage8_t linkage;
+ uint16_t reserved;
+};
+
+struct BrigDirectiveExtension
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveFbarrier
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVariableModifier8_t modifier;
+ BrigLinkage8_t linkage;
+ uint16_t reserved;
+};
+
+struct BrigDirectiveLabel
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+};
+
+struct BrigDirectiveLoc
+{
+ BrigBase base;
+ BrigDataOffsetString32_t filename;
+ uint32_t line;
+ uint32_t column;
+};
+
+struct BrigDirectiveModule
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigVersion32_t hsailMajor;
+ BrigVersion32_t hsailMinor;
+ BrigProfile8_t profile;
+ BrigMachineModel8_t machineModel;
+ BrigRound8_t defaultFloatRound;
+ uint8_t reserved;
+};
+
+struct BrigDirectiveNone
+{
+ BrigBase base;
+};
+
+struct BrigDirectivePragma
+{
+ BrigBase base;
+ BrigDataOffsetOperandList32_t operands;
+};
+
+struct BrigDirectiveVariable
+{
+ BrigBase base;
+ BrigDataOffsetString32_t name;
+ BrigOperandOffset32_t init;
+ BrigType16_t type;
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ BrigUInt64 dim;
+ BrigVariableModifier8_t modifier;
+ BrigLinkage8_t linkage;
+ BrigAllocation8_t allocation;
+ uint8_t reserved;
+};
+
+struct BrigInstBase
+{
+ BrigBase base;
+ BrigOpcode16_t opcode;
+ BrigType16_t type;
+ BrigDataOffsetOperandList32_t operands;
+};
+
+struct BrigInstAddr
+{
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3];
+};
+
+struct BrigInstAtomic
+{
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t memoryScope;
+ BrigAtomicOperation8_t atomicOperation;
+ uint8_t equivClass;
+ uint8_t reserved[3];
+};
+
+struct BrigInstBasic
+{
+ BrigInstBase base;
+};
+
+struct BrigInstBr
+{
+ BrigInstBase base;
+ BrigWidth8_t width;
+ uint8_t reserved[3];
+};
+
+struct BrigInstCmp
+{
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier8_t modifier;
+ BrigCompareOperation8_t compare;
+ BrigPack8_t pack;
+ uint8_t reserved[3];
+};
+
+struct BrigInstCvt
+{
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigAluModifier8_t modifier;
+ BrigRound8_t round;
+};
+
+struct BrigInstImage
+{
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigType16_t coordType;
+ BrigImageGeometry8_t geometry;
+ uint8_t equivClass;
+ uint16_t reserved;
+};
+
+struct BrigInstLane
+{
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigWidth8_t width;
+ uint8_t reserved;
+};
+
+struct BrigInstMem
+{
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigAlignment8_t align;
+ uint8_t equivClass;
+ BrigWidth8_t width;
+ BrigMemoryModifier8_t modifier;
+ uint8_t reserved[3];
+};
+
+struct BrigInstMemFence
+{
+ BrigInstBase base;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigMemoryScope8_t globalSegmentMemoryScope;
+ BrigMemoryScope8_t groupSegmentMemoryScope;
+ BrigMemoryScope8_t imageSegmentMemoryScope;
+};
+
+struct BrigInstMod
+{
+ BrigInstBase base;
+ BrigAluModifier8_t modifier;
+ BrigRound8_t round;
+ BrigPack8_t pack;
+ uint8_t reserved;
+};
+
+struct BrigInstQueryImage
+{
+ BrigInstBase base;
+ BrigType16_t imageType;
+ BrigImageGeometry8_t geometry;
+ BrigImageQuery8_t query;
+};
+
+struct BrigInstQuerySampler
+{
+ BrigInstBase base;
+ BrigSamplerQuery8_t query;
+ uint8_t reserved[3];
+};
+
+struct BrigInstQueue
+{
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ BrigMemoryOrder8_t memoryOrder;
+ uint16_t reserved;
+};
+
+struct BrigInstSeg
+{
+ BrigInstBase base;
+ BrigSegment8_t segment;
+ uint8_t reserved[3];
+};
+
+struct BrigInstSegCvt
+{
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ BrigSegment8_t segment;
+ BrigSegCvtModifier8_t modifier;
+};
+
+struct BrigInstSignal
+{
+ BrigInstBase base;
+ BrigType16_t signalType;
+ BrigMemoryOrder8_t memoryOrder;
+ BrigAtomicOperation8_t signalOperation;
+};
+
+struct BrigInstSourceType
+{
+ BrigInstBase base;
+ BrigType16_t sourceType;
+ uint16_t reserved;
+};
+
+struct BrigOperandAddress
+{
+ BrigBase base;
+ BrigCodeOffset32_t symbol;
+ BrigOperandOffset32_t reg;
+ BrigUInt64 offset;
+};
+
+struct BrigOperandAlign
+{
+ BrigBase base;
+ BrigAlignment8_t align;
+ uint8_t reserved[3];
+};
+
+struct BrigOperandCodeList
+{
+ BrigBase base;
+ BrigDataOffsetCodeList32_t elements;
+};
+
+struct BrigOperandCodeRef
+{
+ BrigBase base;
+ BrigCodeOffset32_t ref;
+};
+
+struct BrigOperandConstantBytes
+{
+ BrigBase base;
+ BrigType16_t type;
+ uint16_t reserved;
+ BrigDataOffsetString32_t bytes;
+};
+
+struct BrigOperandConstantImage
+{
+ BrigBase base;
+ BrigType16_t type;
+ BrigImageGeometry8_t geometry;
+ BrigImageChannelOrder8_t channelOrder;
+ BrigImageChannelType8_t channelType;
+ uint8_t reserved[3];
+ BrigUInt64 width;
+ BrigUInt64 height;
+ BrigUInt64 depth;
+ BrigUInt64 array;
+};
+
+struct BrigOperandConstantOperandList
+{
+ BrigBase base;
+ BrigType16_t type;
+ uint16_t reserved;
+ BrigDataOffsetOperandList32_t elements;
+};
+
+struct BrigOperandConstantSampler
+{
+ BrigBase base;
+ BrigType16_t type;
+ BrigSamplerCoordNormalization8_t coord;
+ BrigSamplerFilter8_t filter;
+ BrigSamplerAddressing8_t addressing;
+ uint8_t reserved[3];
+};
+
+struct BrigOperandOperandList
+{
+ BrigBase base;
+ BrigDataOffsetOperandList32_t elements;
+};
+
+struct BrigOperandRegister
+{
+ BrigBase base;
+ BrigRegisterKind16_t regKind;
+ uint16_t regNum;
+};
+
+struct BrigOperandString
+{
+ BrigBase base;
+ BrigDataOffsetString32_t string;
+};
+
+struct BrigOperandWavesize
+{
+ BrigBase base;
+};
+
+#endif /* HSA_BRIG_FORMAT_H */
|| flag_tree_parallelize_loops > 1 \
|| flag_offload_abi != OFFLOAD_ABI_UNSET))
-#undef DEF_HSA_BUILTIN
-#ifdef ENABLE_HSA
-#define DEF_HSA_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
- DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \
- false, false, true, ATTRS, false, \
- (!flag_disable_hsa))
-#else
-#define DEF_HSA_BUILTIN(ENUM, NAME, TYPE, ATTRS) \
- DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \
- false, false, true, ATTRS, false, \
- (false))
-#endif
-
/* Builtin used by the implementation of GNU TM. These
functions are mapped to the actual implementation of the STM library. */
#undef DEF_TM_BUILTIN
/* Offloading and Multi Processing builtins. */
#include "omp-builtins.def"
-/* Heterogeneous Systems Architecture. */
-#include "hsa-builtins.def"
-
/* GTM builtins. */
#include "gtm-builtins.def"
Variable
bool dump_base_name_prefixed = false
-; Flag whether HSA generation has been explicitely disabled
-Variable
-bool flag_disable_hsa = false
-
###
Driver
Warn when attempting to free a non-heap object.
Whsa
-Common Var(warn_hsa) Init(1) Warning
-Warn when a function cannot be expanded to HSAIL.
+Common Ignore Warning
+Does nothing. Preserved for backward compatibility.
Wimplicit-fallthrough
Common Alias(Wimplicit-fallthrough=,3,0) Warning
#endif
-/* Define this to enable support for generating HSAIL. */
-#ifndef USED_FOR_TARGET
-#undef ENABLE_HSA
-#endif
-
-
/* Define if gcc should always pass --build-id to linker. */
#ifndef USED_FOR_TARGET
#undef ENABLE_LD_BUILDID
for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
tgt=`echo $tgt | sed 's/=.*//'`
- if echo "$tgt" | grep "^hsa" > /dev/null ; then
- enable_hsa=1
- else
- enable_offloading=1
- case "$tgt" in
- *-intelmic-* | *-intelmicemul-*)
- omp_device_property=omp-device-properties-i386
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
- ;;
- amdgcn*)
- omp_device_property=omp-device-properties-gcn
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
- ;;
- nvptx*)
- omp_device_property=omp-device-properties-nvptx
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"
- ;;
- *)
- as_fn_error $? "unknown offload target specified" "$LINENO" 5
- ;;
- esac
- omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}"
- omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}"
- fi
+ enable_offloading=1
+ case "$tgt" in
+ *-intelmic-* | *-intelmicemul-*)
+ omp_device_property=omp-device-properties-i386
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
+ ;;
+ amdgcn*)
+ omp_device_property=omp-device-properties-gcn
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
+ ;;
+ nvptx*)
+ omp_device_property=omp-device-properties-nvptx
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"
+ ;;
+ *)
+ as_fn_error $? "unknown offload target specified" "$LINENO" 5
+ ;;
+ esac
+ omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}"
+ omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}"
if test x"$offload_targets" = x; then
offload_targets=$tgt
fi
-if test x"$enable_hsa" = x1 ; then
-
-$as_echo "#define ENABLE_HSA 1" >>confdefs.h
-
-fi
-
# Check whether --with-multilib-list was given.
if test "${with_multilib_list+set}" = set; then :
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 19026 "configure"
+#line 19016 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 19132 "configure"
+#line 19122 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
tgt=`echo $tgt | sed 's/=.*//'`
- if echo "$tgt" | grep "^hsa" > /dev/null ; then
- enable_hsa=1
- else
- enable_offloading=1
- case "$tgt" in
- *-intelmic-* | *-intelmicemul-*)
- omp_device_property=omp-device-properties-i386
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
- ;;
- amdgcn*)
- omp_device_property=omp-device-properties-gcn
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
- ;;
- nvptx*)
- omp_device_property=omp-device-properties-nvptx
- omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"
- ;;
- *)
- AC_MSG_ERROR([unknown offload target specified])
- ;;
- esac
- omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}"
- omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}"
- fi
+ enable_offloading=1
+ case "$tgt" in
+ *-intelmic-* | *-intelmicemul-*)
+ omp_device_property=omp-device-properties-i386
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
+ ;;
+ amdgcn*)
+ omp_device_property=omp-device-properties-gcn
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
+ ;;
+ nvptx*)
+ omp_device_property=omp-device-properties-nvptx
+ omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"
+ ;;
+ *)
+ AC_MSG_ERROR([unknown offload target specified])
+ ;;
+ esac
+ omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}"
+ omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}"
if test x"$offload_targets" = x; then
offload_targets=$tgt
[Define this to enable support for offloading.])
fi
-if test x"$enable_hsa" = x1 ; then
- AC_DEFINE(ENABLE_HSA, 1,
- [Define this to enable support for generating HSAIL.])
-fi
-
AC_ARG_WITH(multilib-list,
[AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and x86-64 only)])],
:,
@smallexample
% @var{srcdir}/configure \
- --enable-offload-targets=x86_64-intelmicemul-linux-gnu=/path/to/x86_64/compiler,nvptx-none,hsa
+ --enable-offload-targets=x86_64-intelmicemul-linux-gnu=/path/to/x86_64/compiler,nvptx-none
@end smallexample
-If @samp{hsa} is specified as one of the targets, the compiler will be
-built with support for HSA GPU accelerators. Because the same
-compiler will emit the accelerator code, no path should be specified.
-
@item --with-hsa-runtime=@var{pathname}
@itemx --with-hsa-runtime-include=@var{pathname}
@itemx --with-hsa-runtime-lib=@var{pathname}
-If you configure GCC with HSA offloading but do not have the HSA
-run-time library installed in a standard location then you can
-explicitly specify the directory where they are installed. The
-@option{--with-hsa-runtime=@/@var{hsainstalldir}} option is a
-shorthand for
+If you configure GCC with offloading which uses an HSA run-time such as
+AMDGCN but do not have the HSA run-time library installed in a standard
+location then you can explicitly specify the directory where they are
+installed. The @option{--with-hsa-runtime=@/@var{hsainstalldir}} option
+is a shorthand for
@option{--with-hsa-runtime-lib=@/@var{hsainstalldir}/lib} and
@option{--with-hsa-runtime-include=@/@var{hsainstalldir}/include}.
-Wformat-security -Wformat-signedness -Wformat-truncation=@var{n} @gol
-Wformat-y2k -Wframe-address @gol
-Wframe-larger-than=@var{byte-size} -Wno-free-nonheap-object @gol
--Wno-hsa -Wno-if-not-aligned -Wno-ignored-attributes @gol
+-Wno-if-not-aligned -Wno-ignored-attributes @gol
-Wignored-qualifiers -Wno-incompatible-pointer-types @gol
-Wimplicit -Wimplicit-fallthrough -Wimplicit-fallthrough=@var{n} @gol
-Wno-implicit-function-declaration -Wno-implicit-int @gol
a structure that has been marked with the @code{designated_init}
attribute.
-@item -Wno-hsa
-@opindex Whsa
-@opindex Wno-hsa
-Do not warn when HSAIL cannot be emitted for the compiled function or
-OpenMP construct. These warnings are enabled by default.
-
@end table
@node Static Analyzer Options
like fold routines. One level of recursion corresponds to following a
use-def chain.
-@item hsa-gen-debug-stores
-Enable emission of special debug stores within HSA kernels which are
-then read and reported by libgomp plugin. Generation of these stores
-is disabled by default, use @option{--param hsa-gen-debug-stores=1} to
-enable it.
-
@item max-speculative-devirt-maydefs
The maximum number of may-defs we analyze when looking for a must-def
specifying the dynamic type of an object that invokes a virtual call
pass is located in @file{ipa.c} and is described by
@code{pass_ipa_cdtor_merge}.
-@item IPA HSA
-
-This pass is part of the GCC support for HSA (Heterogeneous System
-Architecture) accelerators. It is responsible for creation of HSA
-clones and emitting HSAIL instructions for them. It is located in
-@file{ipa-hsa.c} and is described by @code{pass_ipa_hsa}.
-
@item IPA function summary
This pass provides function analysis for inter-procedural passes.
#undef DEF_GOMP_BUILTIN
}
-#ifdef ENABLE_HSA
- if (!flag_disable_hsa)
- {
-#undef DEF_HSA_BUILTIN
-#define DEF_HSA_BUILTIN(code, name, type, attr) \
- gfc_define_builtin ("__builtin_" name, builtin_types[type], \
- code, name, attr);
-#include "../hsa-builtins.def"
- }
-#endif
-
gfc_define_builtin ("__builtin_trap", builtin_types[BT_FN_VOID],
BUILT_IN_TRAP, NULL, ATTR_NOTHROW_LEAF_LIST);
TREE_THIS_VOLATILE (builtin_decl_explicit (BUILT_IN_TRAP)) = 1;
case GIMPLE_OMP_TASK:
case GIMPLE_OMP_TARGET:
case GIMPLE_OMP_TEAMS:
- case GIMPLE_OMP_GRID_BODY:
data->cannot_fallthru = false;
lower_omp_directive (gsi, data);
data->cannot_fallthru = false;
case GF_OMP_FOR_KIND_SIMD:
pp_string (buffer, "#pragma omp simd");
break;
- case GF_OMP_FOR_KIND_GRID_LOOP:
- pp_string (buffer, "#pragma omp for grid_loop");
- break;
default:
gcc_unreachable ();
}
case GIMPLE_OMP_SECTION:
pp_string (buffer, "#pragma omp section");
break;
- case GIMPLE_OMP_GRID_BODY:
- pp_string (buffer, "#pragma omp gridified body");
- break;
default:
gcc_unreachable ();
}
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_SECTION:
- case GIMPLE_OMP_GRID_BODY:
dump_gimple_omp_block (buffer, gs, spc, flags);
break;
case GIMPLE_OMP_SINGLE:
case GIMPLE_OMP_TARGET:
case GIMPLE_OMP_TEAMS:
- case GIMPLE_OMP_GRID_BODY:
ret = walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), callback_stmt,
callback_op, wi);
if (ret)
return p;
}
-/* Build a GIMPLE_OMP_GRID_BODY statement.
-
- BODY is the sequence of statements to be executed by the kernel. */
-
-gimple *
-gimple_build_omp_grid_body (gimple_seq body)
-{
- gimple *p = gimple_alloc (GIMPLE_OMP_GRID_BODY, 0);
- if (body)
- gimple_omp_set_body (p, body);
-
- return p;
-}
-
/* Build a GIMPLE_OMP_TASKGROUP statement.
BODY is the sequence of statements to be executed by the taskgroup
case GIMPLE_OMP_SECTION:
case GIMPLE_OMP_MASTER:
- case GIMPLE_OMP_GRID_BODY:
copy_omp_body:
new_seq = gimple_seq_copy (gimple_omp_body (stmt));
gimple_omp_set_body (copy, new_seq);
CLAUSES is an OMP_CLAUSE chain holding the associated clauses. */
DEFGSCODE(GIMPLE_OMP_ORDERED, "gimple_omp_ordered", GSS_OMP_SINGLE_LAYOUT)
-/* GIMPLE_OMP_GRID_BODY <BODY> represents a parallel loop lowered for execution
- on a GPU. It is an artificial statement created by omp lowering. */
-DEFGSCODE(GIMPLE_OMP_GRID_BODY, "gimple_omp_gpukernel", GSS_OMP)
-
/* GIMPLE_PREDICT <PREDICT, OUTCOME> specifies a hint for branch prediction.
PREDICT is one of the predictors from predict.def.
GF_CALL_BY_DESCRIPTOR = 1 << 10,
GF_CALL_NOCF_CHECK = 1 << 11,
GF_OMP_PARALLEL_COMBINED = 1 << 0,
- GF_OMP_PARALLEL_GRID_PHONY = 1 << 1,
GF_OMP_TASK_TASKLOOP = 1 << 0,
GF_OMP_TASK_TASKWAIT = 1 << 1,
GF_OMP_FOR_KIND_MASK = (1 << 3) - 1,
GF_OMP_FOR_KIND_DISTRIBUTE = 1,
GF_OMP_FOR_KIND_TASKLOOP = 2,
GF_OMP_FOR_KIND_OACC_LOOP = 4,
- GF_OMP_FOR_KIND_GRID_LOOP = 5,
- GF_OMP_FOR_KIND_SIMD = 6,
+ GF_OMP_FOR_KIND_SIMD = 5,
GF_OMP_FOR_COMBINED = 1 << 3,
GF_OMP_FOR_COMBINED_INTO = 1 << 4,
- /* The following flag must not be used on GF_OMP_FOR_KIND_GRID_LOOP loop
- statements. */
- GF_OMP_FOR_GRID_PHONY = 1 << 5,
- /* The following two flags should only be set on GF_OMP_FOR_KIND_GRID_LOOP
- loop statements. */
- GF_OMP_FOR_GRID_INTRA_GROUP = 1 << 5,
- GF_OMP_FOR_GRID_GROUP_ITER = 1 << 6,
GF_OMP_TARGET_KIND_MASK = (1 << 4) - 1,
GF_OMP_TARGET_KIND_REGION = 0,
GF_OMP_TARGET_KIND_DATA = 1,
GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 10,
GF_OMP_TARGET_KIND_OACC_DECLARE = 11,
GF_OMP_TARGET_KIND_OACC_HOST_DATA = 12,
- GF_OMP_TEAMS_GRID_PHONY = 1 << 0,
- GF_OMP_TEAMS_HOST = 1 << 1,
+ GF_OMP_TEAMS_HOST = 1 << 0,
/* True on an GIMPLE_OMP_RETURN statement if the return does not require
a thread synchronization via some sort of barrier. The exact barrier
tree, tree);
gimple *gimple_build_omp_section (gimple_seq);
gimple *gimple_build_omp_master (gimple_seq);
-gimple *gimple_build_omp_grid_body (gimple_seq);
gimple *gimple_build_omp_taskgroup (gimple_seq, tree);
gomp_continue *gimple_build_omp_continue (tree, tree);
gomp_ordered *gimple_build_omp_ordered (gimple_seq, tree);
case GIMPLE_OMP_CRITICAL:
case GIMPLE_WITH_CLEANUP_EXPR:
case GIMPLE_TRANSACTION:
- case GIMPLE_OMP_GRID_BODY:
return true;
default:
omp_for_stmt->pre_body = pre_body;
}
-/* Return the kernel_phony of OMP_FOR statement. */
-
-static inline bool
-gimple_omp_for_grid_phony (const gomp_for *omp_for)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- != GF_OMP_FOR_KIND_GRID_LOOP);
- return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_PHONY) != 0;
-}
-
-/* Set kernel_phony flag of OMP_FOR to VALUE. */
-
-static inline void
-gimple_omp_for_set_grid_phony (gomp_for *omp_for, bool value)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- != GF_OMP_FOR_KIND_GRID_LOOP);
- if (value)
- omp_for->subcode |= GF_OMP_FOR_GRID_PHONY;
- else
- omp_for->subcode &= ~GF_OMP_FOR_GRID_PHONY;
-}
-
-/* Return the kernel_intra_group of a GRID_LOOP OMP_FOR statement. */
-
-static inline bool
-gimple_omp_for_grid_intra_group (const gomp_for *omp_for)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_INTRA_GROUP) != 0;
-}
-
-/* Set kernel_intra_group flag of OMP_FOR to VALUE. */
-
-static inline void
-gimple_omp_for_set_grid_intra_group (gomp_for *omp_for, bool value)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- if (value)
- omp_for->subcode |= GF_OMP_FOR_GRID_INTRA_GROUP;
- else
- omp_for->subcode &= ~GF_OMP_FOR_GRID_INTRA_GROUP;
-}
-
-/* Return true if iterations of a grid OMP_FOR statement correspond to HSA
- groups. */
-
-static inline bool
-gimple_omp_for_grid_group_iter (const gomp_for *omp_for)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_GROUP_ITER) != 0;
-}
-
-/* Set group_iter flag of OMP_FOR to VALUE. */
-
-static inline void
-gimple_omp_for_set_grid_group_iter (gomp_for *omp_for, bool value)
-{
- gcc_checking_assert (gimple_omp_for_kind (omp_for)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- if (value)
- omp_for->subcode |= GF_OMP_FOR_GRID_GROUP_ITER;
- else
- omp_for->subcode &= ~GF_OMP_FOR_GRID_GROUP_ITER;
-}
-
/* Return the clauses associated with OMP_PARALLEL GS. */
static inline tree
omp_parallel_stmt->data_arg = data_arg;
}
-/* Return the kernel_phony flag of OMP_PARALLEL_STMT. */
-
-static inline bool
-gimple_omp_parallel_grid_phony (const gomp_parallel *stmt)
-{
- return (gimple_omp_subcode (stmt) & GF_OMP_PARALLEL_GRID_PHONY) != 0;
-}
-
-/* Set kernel_phony flag of OMP_PARALLEL_STMT to VALUE. */
-
-static inline void
-gimple_omp_parallel_set_grid_phony (gomp_parallel *stmt, bool value)
-{
- if (value)
- stmt->subcode |= GF_OMP_PARALLEL_GRID_PHONY;
- else
- stmt->subcode &= ~GF_OMP_PARALLEL_GRID_PHONY;
-}
-
/* Return the clauses associated with OMP_TASK GS. */
static inline tree
omp_teams_stmt->data_arg = data_arg;
}
-/* Return the kernel_phony flag of an OMP_TEAMS_STMT. */
-
-static inline bool
-gimple_omp_teams_grid_phony (const gomp_teams *omp_teams_stmt)
-{
- return (gimple_omp_subcode (omp_teams_stmt) & GF_OMP_TEAMS_GRID_PHONY) != 0;
-}
-
-/* Set kernel_phony flag of an OMP_TEAMS_STMT to VALUE. */
-
-static inline void
-gimple_omp_teams_set_grid_phony (gomp_teams *omp_teams_stmt, bool value)
-{
- if (value)
- omp_teams_stmt->subcode |= GF_OMP_TEAMS_GRID_PHONY;
- else
- omp_teams_stmt->subcode &= ~GF_OMP_TEAMS_GRID_PHONY;
-}
-
/* Return the host flag of an OMP_TEAMS_STMT. */
static inline bool
case GIMPLE_OMP_RETURN: \
case GIMPLE_OMP_ATOMIC_LOAD: \
case GIMPLE_OMP_ATOMIC_STORE: \
- case GIMPLE_OMP_CONTINUE: \
- case GIMPLE_OMP_GRID_BODY
+ case GIMPLE_OMP_CONTINUE
static inline bool
is_gimple_omp (const gimple *stmt)
+++ /dev/null
-/* HSA BRIG (binary representation of HSAIL) 1.0.1 representation description.
- Copyright (C) 2016-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>.
-
-The contents of the file was created by extracting data structures, enum,
-typedef and other definitions from HSA Programmer's Reference Manual Version
-1.0.1 (http://www.hsafoundation.com/standards/).
-
-HTML version is provided on the following link:
-http://www.hsafoundation.com/html/Content/PRM/Topics/PRM_title_page.htm */
-
-#ifndef HSA_BRIG_FORMAT_H
-#define HSA_BRIG_FORMAT_H
-
-struct BrigModuleHeader;
-typedef uint16_t BrigKind16_t;
-typedef uint32_t BrigVersion32_t;
-
-typedef BrigModuleHeader *BrigModule_t;
-typedef uint32_t BrigDataOffset32_t;
-typedef uint32_t BrigCodeOffset32_t;
-typedef uint32_t BrigOperandOffset32_t;
-typedef BrigDataOffset32_t BrigDataOffsetString32_t;
-typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
-typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
-typedef uint8_t BrigAlignment8_t;
-
-enum BrigAlignment
-{
- BRIG_ALIGNMENT_NONE = 0,
- BRIG_ALIGNMENT_1 = 1,
- BRIG_ALIGNMENT_2 = 2,
- BRIG_ALIGNMENT_4 = 3,
- BRIG_ALIGNMENT_8 = 4,
- BRIG_ALIGNMENT_16 = 5,
- BRIG_ALIGNMENT_32 = 6,
- BRIG_ALIGNMENT_64 = 7,
- BRIG_ALIGNMENT_128 = 8,
- BRIG_ALIGNMENT_256 = 9
-};
-
-typedef uint8_t BrigAllocation8_t;
-
-enum BrigAllocation
-{
- BRIG_ALLOCATION_NONE = 0,
- BRIG_ALLOCATION_PROGRAM = 1,
- BRIG_ALLOCATION_AGENT = 2,
- BRIG_ALLOCATION_AUTOMATIC = 3
-};
-
-typedef uint8_t BrigAluModifier8_t;
-
-enum BrigAluModifierMask
-{
- BRIG_ALU_FTZ = 1
-};
-
-typedef uint8_t BrigAtomicOperation8_t;
-
-enum BrigAtomicOperation
-{
- BRIG_ATOMIC_ADD = 0,
- BRIG_ATOMIC_AND = 1,
- BRIG_ATOMIC_CAS = 2,
- BRIG_ATOMIC_EXCH = 3,
- BRIG_ATOMIC_LD = 4,
- BRIG_ATOMIC_MAX = 5,
- BRIG_ATOMIC_MIN = 6,
- BRIG_ATOMIC_OR = 7,
- BRIG_ATOMIC_ST = 8,
- BRIG_ATOMIC_SUB = 9,
- BRIG_ATOMIC_WRAPDEC = 10,
- BRIG_ATOMIC_WRAPINC = 11,
- BRIG_ATOMIC_XOR = 12,
- BRIG_ATOMIC_WAIT_EQ = 13,
- BRIG_ATOMIC_WAIT_NE = 14,
- BRIG_ATOMIC_WAIT_LT = 15,
- BRIG_ATOMIC_WAIT_GTE = 16,
- BRIG_ATOMIC_WAITTIMEOUT_EQ = 17,
- BRIG_ATOMIC_WAITTIMEOUT_NE = 18,
- BRIG_ATOMIC_WAITTIMEOUT_LT = 19,
- BRIG_ATOMIC_WAITTIMEOUT_GTE = 20
-};
-
-struct BrigBase
-{
- uint16_t byteCount;
- BrigKind16_t kind;
-};
-
-typedef uint8_t BrigCompareOperation8_t;
-
-enum BrigCompareOperation
-{
- BRIG_COMPARE_EQ = 0,
- BRIG_COMPARE_NE = 1,
- BRIG_COMPARE_LT = 2,
- BRIG_COMPARE_LE = 3,
- BRIG_COMPARE_GT = 4,
- BRIG_COMPARE_GE = 5,
- BRIG_COMPARE_EQU = 6,
- BRIG_COMPARE_NEU = 7,
- BRIG_COMPARE_LTU = 8,
- BRIG_COMPARE_LEU = 9,
- BRIG_COMPARE_GTU = 10,
- BRIG_COMPARE_GEU = 11,
- BRIG_COMPARE_NUM = 12,
- BRIG_COMPARE_NAN = 13,
- BRIG_COMPARE_SEQ = 14,
- BRIG_COMPARE_SNE = 15,
- BRIG_COMPARE_SLT = 16,
- BRIG_COMPARE_SLE = 17,
- BRIG_COMPARE_SGT = 18,
- BRIG_COMPARE_SGE = 19,
- BRIG_COMPARE_SGEU = 20,
- BRIG_COMPARE_SEQU = 21,
- BRIG_COMPARE_SNEU = 22,
- BRIG_COMPARE_SLTU = 23,
- BRIG_COMPARE_SLEU = 24,
- BRIG_COMPARE_SNUM = 25,
- BRIG_COMPARE_SNAN = 26,
- BRIG_COMPARE_SGTU = 27
-};
-
-typedef uint16_t BrigControlDirective16_t;
-
-enum BrigControlDirective
-{
- BRIG_CONTROL_NONE = 0,
- BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1,
- BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2,
- BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3,
- BRIG_CONTROL_MAXFLATGRIDSIZE = 4,
- BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5,
- BRIG_CONTROL_REQUIREDDIM = 6,
- BRIG_CONTROL_REQUIREDGRIDSIZE = 7,
- BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8,
- BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9
-};
-
-typedef uint32_t BrigExceptions32_t;
-
-enum BrigExceptionsMask
-{
- BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0,
- BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1,
- BRIG_EXCEPTIONS_OVERFLOW = 1 << 2,
- BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3,
- BRIG_EXCEPTIONS_INEXACT = 1 << 4,
- BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16
-};
-
-typedef uint8_t BrigExecutableModifier8_t;
-
-enum BrigExecutableModifierMask
-{
- BRIG_EXECUTABLE_DEFINITION = 1
-};
-
-typedef uint8_t BrigImageChannelOrder8_t;
-
-enum BrigImageChannelOrder
-{
- BRIG_CHANNEL_ORDER_A = 0,
- BRIG_CHANNEL_ORDER_R = 1,
- BRIG_CHANNEL_ORDER_RX = 2,
- BRIG_CHANNEL_ORDER_RG = 3,
- BRIG_CHANNEL_ORDER_RGX = 4,
- BRIG_CHANNEL_ORDER_RA = 5,
- BRIG_CHANNEL_ORDER_RGB = 6,
- BRIG_CHANNEL_ORDER_RGBX = 7,
- BRIG_CHANNEL_ORDER_RGBA = 8,
- BRIG_CHANNEL_ORDER_BGRA = 9,
- BRIG_CHANNEL_ORDER_ARGB = 10,
- BRIG_CHANNEL_ORDER_ABGR = 11,
- BRIG_CHANNEL_ORDER_SRGB = 12,
- BRIG_CHANNEL_ORDER_SRGBX = 13,
- BRIG_CHANNEL_ORDER_SRGBA = 14,
- BRIG_CHANNEL_ORDER_SBGRA = 15,
- BRIG_CHANNEL_ORDER_INTENSITY = 16,
- BRIG_CHANNEL_ORDER_LUMINANCE = 17,
- BRIG_CHANNEL_ORDER_DEPTH = 18,
- BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19,
- BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageChannelType8_t;
-
-enum BrigImageChannelType
-{
- BRIG_CHANNEL_TYPE_SNORM_INT8 = 0,
- BRIG_CHANNEL_TYPE_SNORM_INT16 = 1,
- BRIG_CHANNEL_TYPE_UNORM_INT8 = 2,
- BRIG_CHANNEL_TYPE_UNORM_INT16 = 3,
- BRIG_CHANNEL_TYPE_UNORM_INT24 = 4,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7,
- BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8,
- BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9,
- BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- BRIG_CHANNEL_TYPE_HALF_FLOAT = 14,
- BRIG_CHANNEL_TYPE_FLOAT = 15,
- BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageGeometry8_t;
-
-enum BrigImageGeometry
-{
- BRIG_GEOMETRY_1D = 0,
- BRIG_GEOMETRY_2D = 1,
- BRIG_GEOMETRY_3D = 2,
- BRIG_GEOMETRY_1DA = 3,
- BRIG_GEOMETRY_2DA = 4,
- BRIG_GEOMETRY_1DB = 5,
- BRIG_GEOMETRY_2DDEPTH = 6,
- BRIG_GEOMETRY_2DADEPTH = 7,
- BRIG_GEOMETRY_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigImageQuery8_t;
-
-enum BrigImageQuery
-{
- BRIG_IMAGE_QUERY_WIDTH = 0,
- BRIG_IMAGE_QUERY_HEIGHT = 1,
- BRIG_IMAGE_QUERY_DEPTH = 2,
- BRIG_IMAGE_QUERY_ARRAY = 3,
- BRIG_IMAGE_QUERY_CHANNELORDER = 4,
- BRIG_IMAGE_QUERY_CHANNELTYPE = 5
-};
-
-enum BrigKind
-{
- BRIG_KIND_NONE = 0x0000,
- BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000,
- BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001,
- BRIG_KIND_DIRECTIVE_COMMENT = 0x1002,
- BRIG_KIND_DIRECTIVE_CONTROL = 0x1003,
- BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004,
- BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005,
- BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006,
- BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007,
- BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
- BRIG_KIND_DIRECTIVE_LABEL = 0x1009,
- BRIG_KIND_DIRECTIVE_LOC = 0x100a,
- BRIG_KIND_DIRECTIVE_MODULE = 0x100b,
- BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c,
- BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d,
- BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e,
- BRIG_KIND_DIRECTIVE_END = 0x100f,
- BRIG_KIND_INST_BEGIN = 0x2000,
- BRIG_KIND_INST_ADDR = 0x2000,
- BRIG_KIND_INST_ATOMIC = 0x2001,
- BRIG_KIND_INST_BASIC = 0x2002,
- BRIG_KIND_INST_BR = 0x2003,
- BRIG_KIND_INST_CMP = 0x2004,
- BRIG_KIND_INST_CVT = 0x2005,
- BRIG_KIND_INST_IMAGE = 0x2006,
- BRIG_KIND_INST_LANE = 0x2007,
- BRIG_KIND_INST_MEM = 0x2008,
- BRIG_KIND_INST_MEM_FENCE = 0x2009,
- BRIG_KIND_INST_MOD = 0x200a,
- BRIG_KIND_INST_QUERY_IMAGE = 0x200b,
- BRIG_KIND_INST_QUERY_SAMPLER = 0x200c,
- BRIG_KIND_INST_QUEUE = 0x200d,
- BRIG_KIND_INST_SEG = 0x200e,
- BRIG_KIND_INST_SEG_CVT = 0x200f,
- BRIG_KIND_INST_SIGNAL = 0x2010,
- BRIG_KIND_INST_SOURCE_TYPE = 0x2011,
- BRIG_KIND_INST_END = 0x2012,
- BRIG_KIND_OPERAND_BEGIN = 0x3000,
- BRIG_KIND_OPERAND_ADDRESS = 0x3000,
- BRIG_KIND_OPERAND_ALIGN = 0x3001,
- BRIG_KIND_OPERAND_CODE_LIST = 0x3002,
- BRIG_KIND_OPERAND_CODE_REF = 0x3003,
- BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004,
- BRIG_KIND_OPERAND_RESERVED = 0x3005,
- BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006,
- BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007,
- BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008,
- BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009,
- BRIG_KIND_OPERAND_REGISTER = 0x300a,
- BRIG_KIND_OPERAND_STRING = 0x300b,
- BRIG_KIND_OPERAND_WAVESIZE = 0x300c,
- BRIG_KIND_OPERAND_END = 0x300d
-};
-
-typedef uint8_t BrigLinkage8_t;
-
-enum BrigLinkage
-{
- BRIG_LINKAGE_NONE = 0,
- BRIG_LINKAGE_PROGRAM = 1,
- BRIG_LINKAGE_MODULE = 2,
- BRIG_LINKAGE_FUNCTION = 3,
- BRIG_LINKAGE_ARG = 4
-};
-
-typedef uint8_t BrigMachineModel8_t;
-
-enum BrigMachineModel
-{
- BRIG_MACHINE_SMALL = 0,
- BRIG_MACHINE_LARGE = 1
-};
-
-typedef uint8_t BrigMemoryModifier8_t;
-
-enum BrigMemoryModifierMask
-{
- BRIG_MEMORY_CONST = 1
-};
-
-typedef uint8_t BrigMemoryOrder8_t;
-
-enum BrigMemoryOrder
-{
- BRIG_MEMORY_ORDER_NONE = 0,
- BRIG_MEMORY_ORDER_RELAXED = 1,
- BRIG_MEMORY_ORDER_SC_ACQUIRE = 2,
- BRIG_MEMORY_ORDER_SC_RELEASE = 3,
- BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4
-};
-
-typedef uint8_t BrigMemoryScope8_t;
-
-enum BrigMemoryScope
-{
- BRIG_MEMORY_SCOPE_NONE = 0,
- BRIG_MEMORY_SCOPE_WORKITEM = 1,
- BRIG_MEMORY_SCOPE_WAVEFRONT = 2,
- BRIG_MEMORY_SCOPE_WORKGROUP = 3,
- BRIG_MEMORY_SCOPE_AGENT = 4,
- BRIG_MEMORY_SCOPE_SYSTEM = 5
-};
-
-struct BrigModuleHeader
-{
- char identification[8];
- BrigVersion32_t brigMajor;
- BrigVersion32_t brigMinor;
- uint64_t byteCount;
- uint8_t hash[64];
- uint32_t reserved;
- uint32_t sectionCount;
- uint64_t sectionIndex;
-};
-
-typedef uint16_t BrigOpcode16_t;
-
-enum BrigOpcode
-{
- BRIG_OPCODE_NOP = 0,
- BRIG_OPCODE_ABS = 1,
- BRIG_OPCODE_ADD = 2,
- BRIG_OPCODE_BORROW = 3,
- BRIG_OPCODE_CARRY = 4,
- BRIG_OPCODE_CEIL = 5,
- BRIG_OPCODE_COPYSIGN = 6,
- BRIG_OPCODE_DIV = 7,
- BRIG_OPCODE_FLOOR = 8,
- BRIG_OPCODE_FMA = 9,
- BRIG_OPCODE_FRACT = 10,
- BRIG_OPCODE_MAD = 11,
- BRIG_OPCODE_MAX = 12,
- BRIG_OPCODE_MIN = 13,
- BRIG_OPCODE_MUL = 14,
- BRIG_OPCODE_MULHI = 15,
- BRIG_OPCODE_NEG = 16,
- BRIG_OPCODE_REM = 17,
- BRIG_OPCODE_RINT = 18,
- BRIG_OPCODE_SQRT = 19,
- BRIG_OPCODE_SUB = 20,
- BRIG_OPCODE_TRUNC = 21,
- BRIG_OPCODE_MAD24 = 22,
- BRIG_OPCODE_MAD24HI = 23,
- BRIG_OPCODE_MUL24 = 24,
- BRIG_OPCODE_MUL24HI = 25,
- BRIG_OPCODE_SHL = 26,
- BRIG_OPCODE_SHR = 27,
- BRIG_OPCODE_AND = 28,
- BRIG_OPCODE_NOT = 29,
- BRIG_OPCODE_OR = 30,
- BRIG_OPCODE_POPCOUNT = 31,
- BRIG_OPCODE_XOR = 32,
- BRIG_OPCODE_BITEXTRACT = 33,
- BRIG_OPCODE_BITINSERT = 34,
- BRIG_OPCODE_BITMASK = 35,
- BRIG_OPCODE_BITREV = 36,
- BRIG_OPCODE_BITSELECT = 37,
- BRIG_OPCODE_FIRSTBIT = 38,
- BRIG_OPCODE_LASTBIT = 39,
- BRIG_OPCODE_COMBINE = 40,
- BRIG_OPCODE_EXPAND = 41,
- BRIG_OPCODE_LDA = 42,
- BRIG_OPCODE_MOV = 43,
- BRIG_OPCODE_SHUFFLE = 44,
- BRIG_OPCODE_UNPACKHI = 45,
- BRIG_OPCODE_UNPACKLO = 46,
- BRIG_OPCODE_PACK = 47,
- BRIG_OPCODE_UNPACK = 48,
- BRIG_OPCODE_CMOV = 49,
- BRIG_OPCODE_CLASS = 50,
- BRIG_OPCODE_NCOS = 51,
- BRIG_OPCODE_NEXP2 = 52,
- BRIG_OPCODE_NFMA = 53,
- BRIG_OPCODE_NLOG2 = 54,
- BRIG_OPCODE_NRCP = 55,
- BRIG_OPCODE_NRSQRT = 56,
- BRIG_OPCODE_NSIN = 57,
- BRIG_OPCODE_NSQRT = 58,
- BRIG_OPCODE_BITALIGN = 59,
- BRIG_OPCODE_BYTEALIGN = 60,
- BRIG_OPCODE_PACKCVT = 61,
- BRIG_OPCODE_UNPACKCVT = 62,
- BRIG_OPCODE_LERP = 63,
- BRIG_OPCODE_SAD = 64,
- BRIG_OPCODE_SADHI = 65,
- BRIG_OPCODE_SEGMENTP = 66,
- BRIG_OPCODE_FTOS = 67,
- BRIG_OPCODE_STOF = 68,
- BRIG_OPCODE_CMP = 69,
- BRIG_OPCODE_CVT = 70,
- BRIG_OPCODE_LD = 71,
- BRIG_OPCODE_ST = 72,
- BRIG_OPCODE_ATOMIC = 73,
- BRIG_OPCODE_ATOMICNORET = 74,
- BRIG_OPCODE_SIGNAL = 75,
- BRIG_OPCODE_SIGNALNORET = 76,
- BRIG_OPCODE_MEMFENCE = 77,
- BRIG_OPCODE_RDIMAGE = 78,
- BRIG_OPCODE_LDIMAGE = 79,
- BRIG_OPCODE_STIMAGE = 80,
- BRIG_OPCODE_IMAGEFENCE = 81,
- BRIG_OPCODE_QUERYIMAGE = 82,
- BRIG_OPCODE_QUERYSAMPLER = 83,
- BRIG_OPCODE_CBR = 84,
- BRIG_OPCODE_BR = 85,
- BRIG_OPCODE_SBR = 86,
- BRIG_OPCODE_BARRIER = 87,
- BRIG_OPCODE_WAVEBARRIER = 88,
- BRIG_OPCODE_ARRIVEFBAR = 89,
- BRIG_OPCODE_INITFBAR = 90,
- BRIG_OPCODE_JOINFBAR = 91,
- BRIG_OPCODE_LEAVEFBAR = 92,
- BRIG_OPCODE_RELEASEFBAR = 93,
- BRIG_OPCODE_WAITFBAR = 94,
- BRIG_OPCODE_LDF = 95,
- BRIG_OPCODE_ACTIVELANECOUNT = 96,
- BRIG_OPCODE_ACTIVELANEID = 97,
- BRIG_OPCODE_ACTIVELANEMASK = 98,
- BRIG_OPCODE_ACTIVELANEPERMUTE = 99,
- BRIG_OPCODE_CALL = 100,
- BRIG_OPCODE_SCALL = 101,
- BRIG_OPCODE_ICALL = 102,
- BRIG_OPCODE_RET = 103,
- BRIG_OPCODE_ALLOCA = 104,
- BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105,
- BRIG_OPCODE_CURRENTWORKITEMFLATID = 106,
- BRIG_OPCODE_DIM = 107,
- BRIG_OPCODE_GRIDGROUPS = 108,
- BRIG_OPCODE_GRIDSIZE = 109,
- BRIG_OPCODE_PACKETCOMPLETIONSIG = 110,
- BRIG_OPCODE_PACKETID = 111,
- BRIG_OPCODE_WORKGROUPID = 112,
- BRIG_OPCODE_WORKGROUPSIZE = 113,
- BRIG_OPCODE_WORKITEMABSID = 114,
- BRIG_OPCODE_WORKITEMFLATABSID = 115,
- BRIG_OPCODE_WORKITEMFLATID = 116,
- BRIG_OPCODE_WORKITEMID = 117,
- BRIG_OPCODE_CLEARDETECTEXCEPT = 118,
- BRIG_OPCODE_GETDETECTEXCEPT = 119,
- BRIG_OPCODE_SETDETECTEXCEPT = 120,
- BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121,
- BRIG_OPCODE_CASQUEUEWRITEINDEX = 122,
- BRIG_OPCODE_LDQUEUEREADINDEX = 123,
- BRIG_OPCODE_LDQUEUEWRITEINDEX = 124,
- BRIG_OPCODE_STQUEUEREADINDEX = 125,
- BRIG_OPCODE_STQUEUEWRITEINDEX = 126,
- BRIG_OPCODE_CLOCK = 127,
- BRIG_OPCODE_CUID = 128,
- BRIG_OPCODE_DEBUGTRAP = 129,
- BRIG_OPCODE_GROUPBASEPTR = 130,
- BRIG_OPCODE_KERNARGBASEPTR = 131,
- BRIG_OPCODE_LANEID = 132,
- BRIG_OPCODE_MAXCUID = 133,
- BRIG_OPCODE_MAXWAVEID = 134,
- BRIG_OPCODE_NULLPTR = 135,
- BRIG_OPCODE_WAVEID = 136,
- BRIG_OPCODE_FIRST_USER_DEFINED = 32768
-};
-
-typedef uint8_t BrigPack8_t;
-
-enum BrigPack
-{
- BRIG_PACK_NONE = 0,
- BRIG_PACK_PP = 1,
- BRIG_PACK_PS = 2,
- BRIG_PACK_SP = 3,
- BRIG_PACK_SS = 4,
- BRIG_PACK_S = 5,
- BRIG_PACK_P = 6,
- BRIG_PACK_PPSAT = 7,
- BRIG_PACK_PSSAT = 8,
- BRIG_PACK_SPSAT = 9,
- BRIG_PACK_SSSAT = 10,
- BRIG_PACK_SSAT = 11,
- BRIG_PACK_PSAT = 12
-};
-
-typedef uint8_t BrigProfile8_t;
-
-enum BrigProfile
-{
- BRIG_PROFILE_BASE = 0,
- BRIG_PROFILE_FULL = 1
-};
-
-typedef uint16_t BrigRegisterKind16_t;
-
-enum BrigRegisterKind
-{
- BRIG_REGISTER_KIND_CONTROL = 0,
- BRIG_REGISTER_KIND_SINGLE = 1,
- BRIG_REGISTER_KIND_DOUBLE = 2,
- BRIG_REGISTER_KIND_QUAD = 3
-};
-
-typedef uint8_t BrigRound8_t;
-
-enum BrigRound
-{
- BRIG_ROUND_NONE = 0,
- BRIG_ROUND_FLOAT_DEFAULT = 1,
- BRIG_ROUND_FLOAT_NEAR_EVEN = 2,
- BRIG_ROUND_FLOAT_ZERO = 3,
- BRIG_ROUND_FLOAT_PLUS_INFINITY = 4,
- BRIG_ROUND_FLOAT_MINUS_INFINITY = 5,
- BRIG_ROUND_INTEGER_NEAR_EVEN = 6,
- BRIG_ROUND_INTEGER_ZERO = 7,
- BRIG_ROUND_INTEGER_PLUS_INFINITY = 8,
- BRIG_ROUND_INTEGER_MINUS_INFINITY = 9,
- BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10,
- BRIG_ROUND_INTEGER_ZERO_SAT = 11,
- BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12,
- BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13,
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14,
- BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15,
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16,
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17,
- BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18,
- BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19,
- BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20,
- BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21
-};
-
-typedef uint8_t BrigSamplerAddressing8_t;
-
-enum BrigSamplerAddressing
-{
- BRIG_ADDRESSING_UNDEFINED = 0,
- BRIG_ADDRESSING_CLAMP_TO_EDGE = 1,
- BRIG_ADDRESSING_CLAMP_TO_BORDER = 2,
- BRIG_ADDRESSING_REPEAT = 3,
- BRIG_ADDRESSING_MIRRORED_REPEAT = 4,
- BRIG_ADDRESSING_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerCoordNormalization8_t;
-
-enum BrigSamplerCoordNormalization
-{
- BRIG_COORD_UNNORMALIZED = 0,
- BRIG_COORD_NORMALIZED = 1
-};
-
-typedef uint8_t BrigSamplerFilter8_t;
-
-enum BrigSamplerFilter
-{
- BRIG_FILTER_NEAREST = 0,
- BRIG_FILTER_LINEAR = 1,
- BRIG_FILTER_FIRST_USER_DEFINED = 128
-};
-
-typedef uint8_t BrigSamplerQuery8_t;
-
-enum BrigSamplerQuery
-{
- BRIG_SAMPLER_QUERY_ADDRESSING = 0,
- BRIG_SAMPLER_QUERY_COORD = 1,
- BRIG_SAMPLER_QUERY_FILTER = 2
-};
-
-typedef uint32_t BrigSectionIndex32_t;
-
-enum BrigSectionIndex
-{
- BRIG_SECTION_INDEX_DATA = 0,
- BRIG_SECTION_INDEX_CODE = 1,
- BRIG_SECTION_INDEX_OPERAND = 2,
- BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3
-};
-
-struct BrigSectionHeader
-{
- uint64_t byteCount;
- uint32_t headerByteCount;
- uint32_t nameLength;
- uint8_t name[1];
-};
-
-typedef uint8_t BrigSegCvtModifier8_t;
-
-enum BrigSegCvtModifierMask
-{
- BRIG_SEG_CVT_NONULL = 1
-};
-
-typedef uint8_t BrigSegment8_t;
-
-enum BrigSegment
-{
- BRIG_SEGMENT_NONE = 0,
- BRIG_SEGMENT_FLAT = 1,
- BRIG_SEGMENT_GLOBAL = 2,
- BRIG_SEGMENT_READONLY = 3,
- BRIG_SEGMENT_KERNARG = 4,
- BRIG_SEGMENT_GROUP = 5,
- BRIG_SEGMENT_PRIVATE = 6,
- BRIG_SEGMENT_SPILL = 7,
- BRIG_SEGMENT_ARG = 8,
- BRIG_SEGMENT_FIRST_USER_DEFINED = 128
-};
-
-enum
-{
- BRIG_TYPE_BASE_SIZE = 5,
- BRIG_TYPE_PACK_SIZE = 2,
- BRIG_TYPE_ARRAY_SIZE = 1,
-
- BRIG_TYPE_BASE_SHIFT = 0,
- BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE,
- BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE,
-
- BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1)
- << BRIG_TYPE_BASE_SHIFT,
- BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1)
- << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1)
- << BRIG_TYPE_ARRAY_SHIFT,
-
- BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT,
- BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT,
-
- BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT
-};
-
-typedef uint16_t BrigType16_t;
-
-enum BrigType
-{
- BRIG_TYPE_NONE = 0,
-
- BRIG_TYPE_U8 = 1,
- BRIG_TYPE_U16 = 2,
- BRIG_TYPE_U32 = 3,
- BRIG_TYPE_U64 = 4,
-
- BRIG_TYPE_S8 = 5,
- BRIG_TYPE_S16 = 6,
- BRIG_TYPE_S32 = 7,
- BRIG_TYPE_S64 = 8,
-
- BRIG_TYPE_F16 = 9,
- BRIG_TYPE_F32 = 10,
- BRIG_TYPE_F64 = 11,
-
- BRIG_TYPE_B1 = 12,
- BRIG_TYPE_B8 = 13,
- BRIG_TYPE_B16 = 14,
- BRIG_TYPE_B32 = 15,
- BRIG_TYPE_B64 = 16,
- BRIG_TYPE_B128 = 17,
-
- BRIG_TYPE_SAMP = 18,
- BRIG_TYPE_ROIMG = 19,
- BRIG_TYPE_WOIMG = 20,
- BRIG_TYPE_RWIMG = 21,
-
- BRIG_TYPE_SIG32 = 22,
- BRIG_TYPE_SIG64 = 23,
-
- BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32,
- BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64,
- BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128,
-
- BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY,
- BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY,
- BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY,
- BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY,
- BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY,
-
- BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY
-};
-
-struct BrigUInt64
-{
- uint32_t lo;
- uint32_t hi;
-};
-
-typedef uint8_t BrigVariableModifier8_t;
-
-enum BrigVariableModifierMask
-{
- BRIG_VARIABLE_DEFINITION = 1,
- BRIG_VARIABLE_CONST = 2
-};
-
-enum BrigVersion
-{
- BRIG_VERSION_HSAIL_MAJOR = 1,
- BRIG_VERSION_HSAIL_MINOR = 0,
- BRIG_VERSION_BRIG_MAJOR = 1,
- BRIG_VERSION_BRIG_MINOR = 0
-};
-
-typedef uint8_t BrigWidth8_t;
-
-enum BrigWidth
-{
- BRIG_WIDTH_NONE = 0,
- BRIG_WIDTH_1 = 1,
- BRIG_WIDTH_2 = 2,
- BRIG_WIDTH_4 = 3,
- BRIG_WIDTH_8 = 4,
- BRIG_WIDTH_16 = 5,
- BRIG_WIDTH_32 = 6,
- BRIG_WIDTH_64 = 7,
- BRIG_WIDTH_128 = 8,
- BRIG_WIDTH_256 = 9,
- BRIG_WIDTH_512 = 10,
- BRIG_WIDTH_1024 = 11,
- BRIG_WIDTH_2048 = 12,
- BRIG_WIDTH_4096 = 13,
- BRIG_WIDTH_8192 = 14,
- BRIG_WIDTH_16384 = 15,
- BRIG_WIDTH_32768 = 16,
- BRIG_WIDTH_65536 = 17,
- BRIG_WIDTH_131072 = 18,
- BRIG_WIDTH_262144 = 19,
- BRIG_WIDTH_524288 = 20,
- BRIG_WIDTH_1048576 = 21,
- BRIG_WIDTH_2097152 = 22,
- BRIG_WIDTH_4194304 = 23,
- BRIG_WIDTH_8388608 = 24,
- BRIG_WIDTH_16777216 = 25,
- BRIG_WIDTH_33554432 = 26,
- BRIG_WIDTH_67108864 = 27,
- BRIG_WIDTH_134217728 = 28,
- BRIG_WIDTH_268435456 = 29,
- BRIG_WIDTH_536870912 = 30,
- BRIG_WIDTH_1073741824 = 31,
- BRIG_WIDTH_2147483648 = 32,
- BRIG_WIDTH_WAVESIZE = 33,
- BRIG_WIDTH_ALL = 34
-};
-
-struct BrigData
-{
- uint32_t byteCount;
- uint8_t bytes[1];
-};
-
-struct BrigDirectiveArgBlock
-{
- BrigBase base;
-};
-
-struct BrigDirectiveComment
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveControl
-{
- BrigBase base;
- BrigControlDirective16_t control;
- uint16_t reserved;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveExecutable
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
- uint16_t outArgCount;
- uint16_t inArgCount;
- BrigCodeOffset32_t firstInArg;
- BrigCodeOffset32_t firstCodeBlockEntry;
- BrigCodeOffset32_t nextModuleEntry;
- BrigExecutableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
-};
-
-struct BrigDirectiveExtension
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveFbarrier
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVariableModifier8_t modifier;
- BrigLinkage8_t linkage;
- uint16_t reserved;
-};
-
-struct BrigDirectiveLabel
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
-};
-
-struct BrigDirectiveLoc
-{
- BrigBase base;
- BrigDataOffsetString32_t filename;
- uint32_t line;
- uint32_t column;
-};
-
-struct BrigDirectiveModule
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigVersion32_t hsailMajor;
- BrigVersion32_t hsailMinor;
- BrigProfile8_t profile;
- BrigMachineModel8_t machineModel;
- BrigRound8_t defaultFloatRound;
- uint8_t reserved;
-};
-
-struct BrigDirectiveNone
-{
- BrigBase base;
-};
-
-struct BrigDirectivePragma
-{
- BrigBase base;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigDirectiveVariable
-{
- BrigBase base;
- BrigDataOffsetString32_t name;
- BrigOperandOffset32_t init;
- BrigType16_t type;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- BrigUInt64 dim;
- BrigVariableModifier8_t modifier;
- BrigLinkage8_t linkage;
- BrigAllocation8_t allocation;
- uint8_t reserved;
-};
-
-struct BrigInstBase
-{
- BrigBase base;
- BrigOpcode16_t opcode;
- BrigType16_t type;
- BrigDataOffsetOperandList32_t operands;
-};
-
-struct BrigInstAddr
-{
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
-};
-
-struct BrigInstAtomic
-{
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t memoryScope;
- BrigAtomicOperation8_t atomicOperation;
- uint8_t equivClass;
- uint8_t reserved[3];
-};
-
-struct BrigInstBasic
-{
- BrigInstBase base;
-};
-
-struct BrigInstBr
-{
- BrigInstBase base;
- BrigWidth8_t width;
- uint8_t reserved[3];
-};
-
-struct BrigInstCmp
-{
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier8_t modifier;
- BrigCompareOperation8_t compare;
- BrigPack8_t pack;
- uint8_t reserved[3];
-};
-
-struct BrigInstCvt
-{
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigAluModifier8_t modifier;
- BrigRound8_t round;
-};
-
-struct BrigInstImage
-{
- BrigInstBase base;
- BrigType16_t imageType;
- BrigType16_t coordType;
- BrigImageGeometry8_t geometry;
- uint8_t equivClass;
- uint16_t reserved;
-};
-
-struct BrigInstLane
-{
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigWidth8_t width;
- uint8_t reserved;
-};
-
-struct BrigInstMem
-{
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigAlignment8_t align;
- uint8_t equivClass;
- BrigWidth8_t width;
- BrigMemoryModifier8_t modifier;
- uint8_t reserved[3];
-};
-
-struct BrigInstMemFence
-{
- BrigInstBase base;
- BrigMemoryOrder8_t memoryOrder;
- BrigMemoryScope8_t globalSegmentMemoryScope;
- BrigMemoryScope8_t groupSegmentMemoryScope;
- BrigMemoryScope8_t imageSegmentMemoryScope;
-};
-
-struct BrigInstMod
-{
- BrigInstBase base;
- BrigAluModifier8_t modifier;
- BrigRound8_t round;
- BrigPack8_t pack;
- uint8_t reserved;
-};
-
-struct BrigInstQueryImage
-{
- BrigInstBase base;
- BrigType16_t imageType;
- BrigImageGeometry8_t geometry;
- BrigImageQuery8_t query;
-};
-
-struct BrigInstQuerySampler
-{
- BrigInstBase base;
- BrigSamplerQuery8_t query;
- uint8_t reserved[3];
-};
-
-struct BrigInstQueue
-{
- BrigInstBase base;
- BrigSegment8_t segment;
- BrigMemoryOrder8_t memoryOrder;
- uint16_t reserved;
-};
-
-struct BrigInstSeg
-{
- BrigInstBase base;
- BrigSegment8_t segment;
- uint8_t reserved[3];
-};
-
-struct BrigInstSegCvt
-{
- BrigInstBase base;
- BrigType16_t sourceType;
- BrigSegment8_t segment;
- BrigSegCvtModifier8_t modifier;
-};
-
-struct BrigInstSignal
-{
- BrigInstBase base;
- BrigType16_t signalType;
- BrigMemoryOrder8_t memoryOrder;
- BrigAtomicOperation8_t signalOperation;
-};
-
-struct BrigInstSourceType
-{
- BrigInstBase base;
- BrigType16_t sourceType;
- uint16_t reserved;
-};
-
-struct BrigOperandAddress
-{
- BrigBase base;
- BrigCodeOffset32_t symbol;
- BrigOperandOffset32_t reg;
- BrigUInt64 offset;
-};
-
-struct BrigOperandAlign
-{
- BrigBase base;
- BrigAlignment8_t align;
- uint8_t reserved[3];
-};
-
-struct BrigOperandCodeList
-{
- BrigBase base;
- BrigDataOffsetCodeList32_t elements;
-};
-
-struct BrigOperandCodeRef
-{
- BrigBase base;
- BrigCodeOffset32_t ref;
-};
-
-struct BrigOperandConstantBytes
-{
- BrigBase base;
- BrigType16_t type;
- uint16_t reserved;
- BrigDataOffsetString32_t bytes;
-};
-
-struct BrigOperandConstantImage
-{
- BrigBase base;
- BrigType16_t type;
- BrigImageGeometry8_t geometry;
- BrigImageChannelOrder8_t channelOrder;
- BrigImageChannelType8_t channelType;
- uint8_t reserved[3];
- BrigUInt64 width;
- BrigUInt64 height;
- BrigUInt64 depth;
- BrigUInt64 array;
-};
-
-struct BrigOperandConstantOperandList
-{
- BrigBase base;
- BrigType16_t type;
- uint16_t reserved;
- BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandConstantSampler
-{
- BrigBase base;
- BrigType16_t type;
- BrigSamplerCoordNormalization8_t coord;
- BrigSamplerFilter8_t filter;
- BrigSamplerAddressing8_t addressing;
- uint8_t reserved[3];
-};
-
-struct BrigOperandOperandList
-{
- BrigBase base;
- BrigDataOffsetOperandList32_t elements;
-};
-
-struct BrigOperandRegister
-{
- BrigBase base;
- BrigRegisterKind16_t regKind;
- uint16_t regNum;
-};
-
-struct BrigOperandString
-{
- BrigBase base;
- BrigDataOffsetString32_t string;
-};
-
-struct BrigOperandWavesize
-{
- BrigBase base;
-};
-
-#endif /* HSA_BRIG_FORMAT_H */
+++ /dev/null
-/* Producing binary form of HSA BRIG from our internal representation.
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "target.h"
-#include "memmodel.h"
-#include "tm_p.h"
-#include "is-a.h"
-#include "vec.h"
-#include "hash-table.h"
-#include "hash-map.h"
-#include "tree.h"
-#include "tree-iterator.h"
-#include "stor-layout.h"
-#include "output.h"
-#include "basic-block.h"
-#include "function.h"
-#include "cfg.h"
-#include "fold-const.h"
-#include "stringpool.h"
-#include "gimple-pretty-print.h"
-#include "diagnostic-core.h"
-#include "cgraph.h"
-#include "dumpfile.h"
-#include "print-tree.h"
-#include "alloc-pool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-#include "gomp-constants.h"
-
-/* Convert VAL to little endian form, if necessary. */
-
-static uint16_t
-lendian16 (uint16_t val)
-{
-#if GCC_VERSION >= 4008
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return val;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap16 (val);
-#else /* __ORDER_PDP_ENDIAN__ */
- return val;
-#endif
-#else
-// provide a safe slower default, with shifts and masking
-#ifndef WORDS_BIGENDIAN
- return val;
-#else
- return (val >> 8) | (val << 8);
-#endif
-#endif
-}
-
-/* Convert VAL to little endian form, if necessary. */
-
-static uint32_t
-lendian32 (uint32_t val)
-{
-#if GCC_VERSION >= 4006
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return val;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap32 (val);
-#else /* __ORDER_PDP_ENDIAN__ */
- return (val >> 16) | (val << 16);
-#endif
-#else
-// provide a safe slower default, with shifts and masking
-#ifndef WORDS_BIGENDIAN
- return val;
-#else
- val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8);
- return (val >> 16) | (val << 16);
-#endif
-#endif
-}
-
-/* Convert VAL to little endian form, if necessary. */
-
-static uint64_t
-lendian64 (uint64_t val)
-{
-#if GCC_VERSION >= 4006
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- return val;
-#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- return __builtin_bswap64 (val);
-#else /* __ORDER_PDP_ENDIAN__ */
- return (((val & 0xffffll) << 48)
- | ((val & 0xffff0000ll) << 16)
- | ((val & 0xffff00000000ll) >> 16)
- | ((val & 0xffff000000000000ll) >> 48));
-#endif
-#else
-// provide a safe slower default, with shifts and masking
-#ifndef WORDS_BIGENDIAN
- return val;
-#else
- val = (((val & 0xff00ff00ff00ff00ll) >> 8)
- | ((val & 0x00ff00ff00ff00ffll) << 8));
- val = ((( val & 0xffff0000ffff0000ll) >> 16)
- | (( val & 0x0000ffff0000ffffll) << 16));
- return (val >> 32) | (val << 32);
-#endif
-#endif
-}
-
-#define BRIG_ELF_SECTION_NAME ".brig"
-#define BRIG_LABEL_STRING "hsa_brig"
-#define BRIG_SECTION_DATA_NAME "hsa_data"
-#define BRIG_SECTION_CODE_NAME "hsa_code"
-#define BRIG_SECTION_OPERAND_NAME "hsa_operand"
-
-#define BRIG_CHUNK_MAX_SIZE (64 * 1024)
-
-/* Required HSA section alignment. */
-
-#define HSA_SECTION_ALIGNMENT 16
-
-/* Chunks of BRIG binary data. */
-
-struct hsa_brig_data_chunk
-{
- /* Size of the data already stored into a chunk. */
- unsigned size;
-
- /* Pointer to the data. */
- char *data;
-};
-
-/* Structure representing a BRIG section, holding and writing its data. */
-
-struct hsa_brig_section
-{
- /* Section name that will be output to the BRIG. */
- const char *section_name;
- /* Size in bytes of all data stored in the section. */
- unsigned total_size;
- /* The size of the header of the section including padding. */
- unsigned header_byte_count;
- /* The size of the header of the section without any padding. */
- unsigned header_byte_delta;
-
- void init (const char *name);
- void release ();
- void output ();
- unsigned add (const void *data, unsigned len, void **output = NULL);
- void round_size_up (int factor);
- void *get_ptr_by_offset (unsigned int offset);
-
-private:
- void allocate_new_chunk ();
-
- /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */
- vec <struct hsa_brig_data_chunk> chunks;
-
- /* More convenient access to the last chunk from the vector above. */
- struct hsa_brig_data_chunk *cur_chunk;
-};
-
-static struct hsa_brig_section brig_data, brig_code, brig_operand;
-static uint32_t brig_insn_count;
-static bool brig_initialized = false;
-
-/* Mapping between emitted HSA functions and their offset in code segment. */
-static hash_map<tree, BrigCodeOffset32_t> *function_offsets;
-
-/* Hash map of emitted function declarations. */
-static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations;
-
-/* Hash table of emitted internal function declaration offsets. */
-hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
-
-/* List of sbr instructions. */
-static vec <hsa_insn_sbr *> *switch_instructions;
-
-class function_linkage_pair
-{
-public:
- function_linkage_pair (tree decl, unsigned int off)
- : function_decl (decl), offset (off) {}
-
- /* Declaration of called function. */
- tree function_decl;
-
- /* Offset in operand section. */
- unsigned int offset;
-};
-
-/* Vector of function calls where we need to resolve function offsets. */
-static auto_vec <function_linkage_pair> function_call_linkage;
-
-/* Add a new chunk, allocate data for it and initialize it. */
-
-void
-hsa_brig_section::allocate_new_chunk ()
-{
- struct hsa_brig_data_chunk new_chunk;
-
- new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE);
- new_chunk.size = 0;
- cur_chunk = chunks.safe_push (new_chunk);
-}
-
-/* Initialize the brig section. */
-
-void
-hsa_brig_section::init (const char *name)
-{
- section_name = name;
- /* While the following computation is basically wrong, because the intent
- certainly wasn't to have the first character of name and padding, which
- are a part of sizeof (BrigSectionHeader), included in the first addend,
- this is what the disassembler expects. */
- total_size = sizeof (BrigSectionHeader) + strlen (section_name);
- chunks.create (1);
- allocate_new_chunk ();
- header_byte_delta = total_size;
- round_size_up (4);
- header_byte_count = total_size;
-}
-
-/* Free all data in the section. */
-
-void
-hsa_brig_section::release ()
-{
- for (unsigned i = 0; i < chunks.length (); i++)
- free (chunks[i].data);
- chunks.release ();
- cur_chunk = NULL;
-}
-
-/* Write the section to the output file to a section with the name given at
- initialization. Switches the output section and does not restore it. */
-
-void
-hsa_brig_section::output ()
-{
- struct BrigSectionHeader section_header;
- char padding[8];
-
- section_header.byteCount = lendian64 (total_size);
- section_header.headerByteCount = lendian32 (header_byte_count);
- section_header.nameLength = lendian32 (strlen (section_name));
- assemble_string ((const char *) §ion_header, 16);
- assemble_string (section_name, (section_header.nameLength));
- memset (&padding, 0, sizeof (padding));
- /* This is also a consequence of the wrong header size computation described
- in a comment in hsa_brig_section::init. */
- assemble_string (padding, 8);
- for (unsigned i = 0; i < chunks.length (); i++)
- assemble_string (chunks[i].data, chunks[i].size);
-}
-
-/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at
- which it was stored. If OUTPUT is not NULL, store into it the pointer to
- the place where DATA was actually stored. */
-
-unsigned
-hsa_brig_section::add (const void *data, unsigned len, void **output)
-{
- unsigned offset = total_size;
-
- gcc_assert (len <= BRIG_CHUNK_MAX_SIZE);
- if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len))
- allocate_new_chunk ();
-
- char *dst = cur_chunk->data + cur_chunk->size;
- memcpy (dst, data, len);
- if (output)
- *output = dst;
- cur_chunk->size += len;
- total_size += len;
-
- return offset;
-}
-
-/* Add padding to section so that its size is divisible by FACTOR. */
-
-void
-hsa_brig_section::round_size_up (int factor)
-{
- unsigned padding, res = total_size % factor;
-
- if (res == 0)
- return;
-
- padding = factor - res;
- total_size += padding;
- if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding))
- {
- padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size;
- cur_chunk->size = BRIG_CHUNK_MAX_SIZE;
- allocate_new_chunk ();
- }
-
- cur_chunk->size += padding;
-}
-
-/* Return pointer to data by global OFFSET in the section. */
-
-void *
-hsa_brig_section::get_ptr_by_offset (unsigned int offset)
-{
- gcc_assert (offset < total_size);
- offset -= header_byte_delta;
-
- unsigned i;
- for (i = 0; offset >= chunks[i].size; i++)
- offset -= chunks[i].size;
-
- return chunks[i].data + offset;
-}
-
-/* BRIG string data hashing. */
-
-struct brig_string_slot
-{
- const char *s;
- char prefix;
- int len;
- uint32_t offset;
-};
-
-/* Hash table helpers. */
-
-struct brig_string_slot_hasher : pointer_hash <brig_string_slot>
-{
- static inline hashval_t hash (const value_type);
- static inline bool equal (const value_type, const compare_type);
- static inline void remove (value_type);
-};
-
-/* Returns a hash code for DS. Adapted from libiberty's htab_hash_string
- to support strings that may not end in '\0'. */
-
-inline hashval_t
-brig_string_slot_hasher::hash (const value_type ds)
-{
- hashval_t r = ds->len;
- int i;
-
- for (i = 0; i < ds->len; i++)
- r = r * 67 + (unsigned) ds->s[i] - 113;
- r = r * 67 + (unsigned) ds->prefix - 113;
- return r;
-}
-
-/* Returns nonzero if DS1 and DS2 are equal. */
-
-inline bool
-brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2)
-{
- if (ds1->len == ds2->len)
- return ds1->prefix == ds2->prefix
- && memcmp (ds1->s, ds2->s, ds1->len) == 0;
-
- return 0;
-}
-
-/* Deallocate memory for DS upon its removal. */
-
-inline void
-brig_string_slot_hasher::remove (value_type ds)
-{
- free (const_cast<char *> (ds->s));
- free (ds);
-}
-
-/* Hash for strings we output in order not to duplicate them needlessly. */
-
-static hash_table<brig_string_slot_hasher> *brig_string_htab;
-
-/* Emit a null terminated string STR to the data section and return its
- offset in it. If PREFIX is non-zero, output it just before STR too.
- Sanitize the string if SANITIZE option is set to true. */
-
-static unsigned
-brig_emit_string (const char *str, char prefix = 0, bool sanitize = true)
-{
- unsigned slen = strlen (str);
- unsigned offset, len = slen + (prefix ? 1 : 0);
- uint32_t hdr_len = lendian32 (len);
- brig_string_slot s_slot;
- brig_string_slot **slot;
- char *str2;
-
- str2 = xstrdup (str);
-
- if (sanitize)
- hsa_sanitize_name (str2);
- s_slot.s = str2;
- s_slot.len = slen;
- s_slot.prefix = prefix;
- s_slot.offset = 0;
-
- slot = brig_string_htab->find_slot (&s_slot, INSERT);
- if (*slot == NULL)
- {
- brig_string_slot *new_slot = XCNEW (brig_string_slot);
-
- /* In theory we should fill in BrigData but that would mean copying
- the string to a buffer for no reason, so we just emulate it. */
- offset = brig_data.add (&hdr_len, sizeof (hdr_len));
- if (prefix)
- brig_data.add (&prefix, 1);
-
- brig_data.add (str2, slen);
- brig_data.round_size_up (4);
-
- /* TODO: could use the string we just copied into
- brig_string->cur_chunk */
- new_slot->s = str2;
- new_slot->len = slen;
- new_slot->prefix = prefix;
- new_slot->offset = offset;
- *slot = new_slot;
- }
- else
- {
- offset = (*slot)->offset;
- free (str2);
- }
-
- return offset;
-}
-
-/* Linked list of queued operands. */
-
-static struct operand_queue
-{
- /* First from the chain of queued operands. */
- hsa_op_base *first_op, *last_op;
-
- /* The offset at which the next operand will be enqueued. */
- unsigned projected_size;
-
-} op_queue;
-
-/* Unless already initialized, initialize infrastructure to produce BRIG. */
-
-static void
-brig_init (void)
-{
- brig_insn_count = 0;
-
- if (brig_initialized)
- return;
-
- brig_string_htab = new hash_table<brig_string_slot_hasher> (37);
- brig_data.init (BRIG_SECTION_DATA_NAME);
- brig_code.init (BRIG_SECTION_CODE_NAME);
- brig_operand.init (BRIG_SECTION_OPERAND_NAME);
- brig_initialized = true;
-
- struct BrigDirectiveModule moddir;
- memset (&moddir, 0, sizeof (moddir));
- moddir.base.byteCount = lendian16 (sizeof (moddir));
-
- char *modname;
- if (main_input_filename && *main_input_filename != '\0')
- {
- const char *part = strrchr (main_input_filename, '/');
- if (!part)
- part = main_input_filename;
- else
- part++;
- modname = concat ("&__hsa_module_", part, NULL);
- char *extension = strchr (modname, '.');
- if (extension)
- *extension = '\0';
-
- /* As in LTO mode, we have to emit a different module names. */
- if (flag_ltrans)
- {
- part = strrchr (asm_file_name, '/');
- if (!part)
- part = asm_file_name;
- else
- part++;
- char *modname2;
- modname2 = xasprintf ("%s_%s", modname, part);
- free (modname);
- modname = modname2;
- }
-
- hsa_sanitize_name (modname);
- moddir.name = brig_emit_string (modname);
- free (modname);
- }
- else
- moddir.name = brig_emit_string ("__hsa_module_unnamed", '&');
- moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE);
- moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR);
- moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR);
- moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE;
- if (hsa_machine_large_p ())
- moddir.machineModel = BRIG_MACHINE_LARGE;
- else
- moddir.machineModel = BRIG_MACHINE_SMALL;
- moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT;
- brig_code.add (&moddir, sizeof (moddir));
-}
-
-/* Free all BRIG data. */
-
-static void
-brig_release_data (void)
-{
- delete brig_string_htab;
- brig_data.release ();
- brig_code.release ();
- brig_operand.release ();
-
- brig_initialized = 0;
-}
-
-/* Enqueue operation OP. Return the offset at which it will be stored. */
-
-static unsigned int
-enqueue_op (hsa_op_base *op)
-{
- unsigned ret;
-
- if (op->m_brig_op_offset)
- return op->m_brig_op_offset;
-
- ret = op_queue.projected_size;
- op->m_brig_op_offset = op_queue.projected_size;
-
- if (!op_queue.first_op)
- op_queue.first_op = op;
- else
- op_queue.last_op->m_next = op;
- op_queue.last_op = op;
-
- if (is_a <hsa_op_immed *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandConstantBytes);
- else if (is_a <hsa_op_reg *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandRegister);
- else if (is_a <hsa_op_address *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandAddress);
- else if (is_a <hsa_op_code_ref *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandCodeRef);
- else if (is_a <hsa_op_code_list *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandCodeList);
- else if (is_a <hsa_op_operand_list *> (op))
- op_queue.projected_size += sizeof (struct BrigOperandOperandList);
- else
- gcc_unreachable ();
- return ret;
-}
-
-static void emit_immediate_operand (hsa_op_immed *imm);
-
-/* Emit directive describing a symbol if it has not been emitted already.
- Return the offset of the directive. */
-
-static unsigned
-emit_directive_variable (class hsa_symbol *symbol)
-{
- struct BrigDirectiveVariable dirvar;
- unsigned name_offset;
- static unsigned res_name_offset;
-
- if (symbol->m_directive_offset)
- return symbol->m_directive_offset;
-
- memset (&dirvar, 0, sizeof (dirvar));
- dirvar.base.byteCount = lendian16 (sizeof (dirvar));
- dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE);
- dirvar.allocation = symbol->m_allocation;
-
- char prefix = symbol->m_global_scope_p ? '&' : '%';
-
- if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL)
- {
- if (res_name_offset == 0)
- res_name_offset = brig_emit_string (symbol->m_name, '%');
- name_offset = res_name_offset;
- }
- else if (symbol->m_name)
- name_offset = brig_emit_string (symbol->m_name, prefix);
- else
- {
- char buf[64];
- snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment),
- symbol->m_name_number);
- name_offset = brig_emit_string (buf, prefix);
- }
-
- dirvar.name = lendian32 (name_offset);
-
- if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL)
- {
- hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl));
- dirvar.init = lendian32 (enqueue_op (tmp));
- }
- else
- dirvar.init = 0;
- dirvar.type = lendian16 (symbol->m_type);
- dirvar.segment = symbol->m_segment;
- dirvar.align = symbol->m_align;
- dirvar.linkage = symbol->m_linkage;
- dirvar.dim.lo = symbol->m_dim;
- dirvar.dim.hi = symbol->m_dim >> 32;
-
- /* Global variables are just declared and linked via HSA runtime. */
- if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM)
- dirvar.modifier |= BRIG_VARIABLE_DEFINITION;
- dirvar.reserved = 0;
-
- if (symbol->m_cst_value)
- {
- dirvar.modifier |= BRIG_VARIABLE_CONST;
- dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value));
- }
-
- symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar));
- return symbol->m_directive_offset;
-}
-
-/* Emit directives describing either a function declaration or definition F and
- return the produced BrigDirectiveExecutable structure. The function does
- not take into account any instructions when calculating nextModuleEntry
- field of the produced BrigDirectiveExecutable structure so when emitting
- actual definitions, this field needs to be updated after all of the function
- is actually added to the code section. */
-
-static BrigDirectiveExecutable *
-emit_function_directives (hsa_function_representation *f, bool is_declaration)
-{
- struct BrigDirectiveExecutable fndir;
- unsigned name_offset, inarg_off, scoped_off, next_toplev_off;
- int count = 0;
- void *ptr_to_fndir;
- hsa_symbol *sym;
-
- if (!f->m_declaration_p)
- for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
- {
- gcc_assert (!sym->m_emitted_to_brig);
- sym->m_emitted_to_brig = true;
- emit_directive_variable (sym);
- brig_insn_count++;
- }
-
- name_offset = brig_emit_string (f->m_name, '&');
- inarg_off = brig_code.total_size + sizeof (fndir)
- + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0);
- scoped_off = inarg_off
- + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable);
-
- if (!f->m_declaration_p)
- {
- count += f->m_spill_symbols.length ();
- count += f->m_private_variables.length ();
- }
-
- next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable);
-
- memset (&fndir, 0, sizeof (fndir));
- fndir.base.byteCount = lendian16 (sizeof (fndir));
- fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL
- : BRIG_KIND_DIRECTIVE_FUNCTION);
- fndir.name = lendian32 (name_offset);
- fndir.inArgCount = lendian16 (f->m_input_args.length ());
- fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0);
- fndir.firstInArg = lendian32 (inarg_off);
- fndir.firstCodeBlockEntry = lendian32 (scoped_off);
- fndir.nextModuleEntry = lendian32 (next_toplev_off);
- fndir.linkage = f->get_linkage ();
- if (!f->m_declaration_p)
- fndir.modifier |= BRIG_EXECUTABLE_DEFINITION;
- memset (&fndir.reserved, 0, sizeof (fndir.reserved));
-
- /* Once we put a definition of function_offsets, we should not overwrite
- it with a declaration of the function. */
- if (f->m_internal_fn == NULL)
- {
- if (!function_offsets->get (f->m_decl) || !is_declaration)
- function_offsets->put (f->m_decl, brig_code.total_size);
- }
- else
- {
- /* Internal function. */
- hsa_internal_fn **slot
- = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT);
- hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn);
- int_fn->m_offset = brig_code.total_size;
- *slot = int_fn;
- }
-
- brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir);
-
- if (f->m_output_arg)
- emit_directive_variable (f->m_output_arg);
- for (unsigned i = 0; i < f->m_input_args.length (); i++)
- emit_directive_variable (f->m_input_args[i]);
-
- if (!f->m_declaration_p)
- {
- for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++)
- {
- emit_directive_variable (sym);
- brig_insn_count++;
- }
- for (unsigned i = 0; i < f->m_private_variables.length (); i++)
- {
- emit_directive_variable (f->m_private_variables[i]);
- brig_insn_count++;
- }
- }
-
- return (BrigDirectiveExecutable *) ptr_to_fndir;
-}
-
-/* Emit a label directive for the given HBB. We assume it is about to start on
- the current offset in the code section. */
-
-static void
-emit_bb_label_directive (hsa_bb *hbb)
-{
- struct BrigDirectiveLabel lbldir;
-
- lbldir.base.byteCount = lendian16 (sizeof (lbldir));
- lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL);
- char buf[32];
- snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl),
- hbb->m_index);
- lbldir.name = lendian32 (brig_emit_string (buf, '@'));
-
- hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir,
- sizeof (lbldir));
- brig_insn_count++;
-}
-
-/* Map a normal HSAIL type to the type of the equivalent BRIG operand
- holding such, for constants and registers. */
-
-static BrigType16_t
-regtype_for_type (BrigType16_t t)
-{
- switch (t)
- {
- case BRIG_TYPE_B1:
- return BRIG_TYPE_B1;
-
- case BRIG_TYPE_U8:
- case BRIG_TYPE_U16:
- case BRIG_TYPE_U32:
- case BRIG_TYPE_S8:
- case BRIG_TYPE_S16:
- case BRIG_TYPE_S32:
- case BRIG_TYPE_B8:
- case BRIG_TYPE_B16:
- case BRIG_TYPE_B32:
- case BRIG_TYPE_F16:
- case BRIG_TYPE_F32:
- case BRIG_TYPE_U8X4:
- case BRIG_TYPE_U16X2:
- case BRIG_TYPE_S8X4:
- case BRIG_TYPE_S16X2:
- case BRIG_TYPE_F16X2:
- return BRIG_TYPE_B32;
-
- case BRIG_TYPE_U64:
- case BRIG_TYPE_S64:
- case BRIG_TYPE_F64:
- case BRIG_TYPE_B64:
- case BRIG_TYPE_U8X8:
- case BRIG_TYPE_U16X4:
- case BRIG_TYPE_U32X2:
- case BRIG_TYPE_S8X8:
- case BRIG_TYPE_S16X4:
- case BRIG_TYPE_S32X2:
- case BRIG_TYPE_F16X4:
- case BRIG_TYPE_F32X2:
- return BRIG_TYPE_B64;
-
- case BRIG_TYPE_B128:
- case BRIG_TYPE_U8X16:
- case BRIG_TYPE_U16X8:
- case BRIG_TYPE_U32X4:
- case BRIG_TYPE_U64X2:
- case BRIG_TYPE_S8X16:
- case BRIG_TYPE_S16X8:
- case BRIG_TYPE_S32X4:
- case BRIG_TYPE_S64X2:
- case BRIG_TYPE_F16X8:
- case BRIG_TYPE_F32X4:
- case BRIG_TYPE_F64X2:
- return BRIG_TYPE_B128;
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return the length of the BRIG type TYPE that is going to be streamed out as
- an immediate constant (so it must not be B1). */
-
-unsigned
-hsa_get_imm_brig_type_len (BrigType16_t type)
-{
- BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
- BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
-
- switch (pack_type)
- {
- case BRIG_TYPE_PACK_NONE:
- break;
- case BRIG_TYPE_PACK_32:
- return 4;
- case BRIG_TYPE_PACK_64:
- return 8;
- case BRIG_TYPE_PACK_128:
- return 16;
- default:
- gcc_unreachable ();
- }
-
- switch (base_type)
- {
- case BRIG_TYPE_U8:
- case BRIG_TYPE_S8:
- case BRIG_TYPE_B8:
- return 1;
- case BRIG_TYPE_U16:
- case BRIG_TYPE_S16:
- case BRIG_TYPE_F16:
- case BRIG_TYPE_B16:
- return 2;
- case BRIG_TYPE_U32:
- case BRIG_TYPE_S32:
- case BRIG_TYPE_F32:
- case BRIG_TYPE_B32:
- return 4;
- case BRIG_TYPE_U64:
- case BRIG_TYPE_S64:
- case BRIG_TYPE_F64:
- case BRIG_TYPE_B64:
- return 8;
- case BRIG_TYPE_B128:
- return 16;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Emit one scalar VALUE to the buffer DATA intended for BRIG emission.
- If NEED_LEN is not equal to zero, shrink or extend the value
- to NEED_LEN bytes. Return how many bytes were written. */
-
-static int
-emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len)
-{
- union hsa_bytes bytes;
-
- memset (&bytes, 0, sizeof (bytes));
- tree type = TREE_TYPE (value);
- gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
-
- unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
- if (INTEGRAL_TYPE_P (type)
- || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST))
- switch (data_len)
- {
- case 1:
- bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
- break;
- case 2:
- bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
- break;
- case 4:
- bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
- break;
- case 8:
- bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value);
- break;
- default:
- gcc_unreachable ();
- }
- else if (SCALAR_FLOAT_TYPE_P (type))
- {
- if (data_len == 2)
- {
- sorry ("Support for HSA does not implement immediate 16 bit FPU "
- "operands");
- return 2;
- }
- unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type));
- /* There are always 32 bits in each long, no matter the size of
- the hosts long. */
- long tmp[6];
-
- real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
-
- if (int_len == 4)
- bytes.b32 = (uint32_t) tmp[0];
- else
- {
- bytes.b64 = (uint64_t)(uint32_t) tmp[1];
- bytes.b64 <<= 32;
- bytes.b64 |= (uint32_t) tmp[0];
- }
- }
- else
- gcc_unreachable ();
-
- int len;
- if (need_len == 0)
- len = data_len;
- else
- len = need_len;
-
- memcpy (data, &bytes, len);
- return len;
-}
-
-char *
-hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size)
-{
- char *brig_repr;
- *brig_repr_size = hsa_get_imm_brig_type_len (m_type);
-
- if (m_tree_value != NULL_TREE)
- {
- /* Update brig_repr_size for special tree values. */
- if (TREE_CODE (m_tree_value) == STRING_CST)
- *brig_repr_size = TREE_STRING_LENGTH (m_tree_value);
- else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
- *brig_repr_size
- = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value)));
-
- unsigned total_len = *brig_repr_size;
-
- /* As we can have a constructor with fewer elements, fill the memory
- with zeros. */
- brig_repr = XCNEWVEC (char, total_len);
- char *p = brig_repr;
-
- if (TREE_CODE (m_tree_value) == VECTOR_CST)
- {
- /* Variable-length vectors aren't supported. */
- int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant ();
- for (i = 0; i < num; i++)
- {
- tree v = VECTOR_CST_ELT (m_tree_value, i);
- unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
- total_len -= actual;
- p += actual;
- }
- /* Vectors should have the exact size. */
- gcc_assert (total_len == 0);
- }
- else if (TREE_CODE (m_tree_value) == STRING_CST)
- memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value),
- TREE_STRING_LENGTH (m_tree_value));
- else if (TREE_CODE (m_tree_value) == COMPLEX_CST)
- {
- gcc_assert (total_len % 2 == 0);
- unsigned actual;
- actual
- = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p,
- total_len / 2);
-
- gcc_assert (actual == total_len / 2);
- p += actual;
-
- actual
- = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p,
- total_len / 2);
- gcc_assert (actual == total_len / 2);
- }
- else if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
- {
- unsigned len = CONSTRUCTOR_NELTS (m_tree_value);
- for (unsigned i = 0; i < len; i++)
- {
- tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
- unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0);
- total_len -= actual;
- p += actual;
- }
- }
- else
- emit_immediate_scalar_to_buffer (m_tree_value, p, total_len);
- }
- else
- {
- hsa_bytes bytes;
-
- switch (*brig_repr_size)
- {
- case 1:
- bytes.b8 = (uint8_t) m_int_value;
- break;
- case 2:
- bytes.b16 = (uint16_t) m_int_value;
- break;
- case 4:
- bytes.b32 = (uint32_t) m_int_value;
- break;
- case 8:
- bytes.b64 = (uint64_t) m_int_value;
- break;
- default:
- gcc_unreachable ();
- }
-
- brig_repr = XNEWVEC (char, *brig_repr_size);
- memcpy (brig_repr, &bytes, *brig_repr_size);
- }
-
- return brig_repr;
-}
-
-/* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might
- have been massaged to comply with various HSA/BRIG type requirements, so the
- only important aspect of that is the length (because HSAIL might expect
- smaller constants or become bit-data). The data should be represented
- according to what is in the tree representation. */
-
-static void
-emit_immediate_operand (hsa_op_immed *imm)
-{
- unsigned brig_repr_size;
- char *brig_repr = imm->emit_to_buffer (&brig_repr_size);
- struct BrigOperandConstantBytes out;
-
- memset (&out, 0, sizeof (out));
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
- uint32_t byteCount = lendian32 (brig_repr_size);
- out.type = lendian16 (imm->m_type);
- out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
- brig_operand.add (&out, sizeof (out));
- brig_data.add (brig_repr, brig_repr_size);
- brig_data.round_size_up (4);
-
- free (brig_repr);
-}
-
-/* Emit a register BRIG operand REG. */
-
-static void
-emit_register_operand (hsa_op_reg *reg)
-{
- struct BrigOperandRegister out;
-
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER);
- out.regNum = lendian32 (reg->m_hard_num);
-
- switch (regtype_for_type (reg->m_type))
- {
- case BRIG_TYPE_B32:
- out.regKind = BRIG_REGISTER_KIND_SINGLE;
- break;
- case BRIG_TYPE_B64:
- out.regKind = BRIG_REGISTER_KIND_DOUBLE;
- break;
- case BRIG_TYPE_B128:
- out.regKind = BRIG_REGISTER_KIND_QUAD;
- break;
- case BRIG_TYPE_B1:
- out.regKind = BRIG_REGISTER_KIND_CONTROL;
- break;
- default:
- gcc_unreachable ();
- }
-
- brig_operand.add (&out, sizeof (out));
-}
-
-/* Emit an address BRIG operand ADDR. */
-
-static void
-emit_address_operand (hsa_op_address *addr)
-{
- struct BrigOperandAddress out;
-
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS);
- out.symbol = addr->m_symbol
- ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0;
- out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0;
-
- if (sizeof (addr->m_imm_offset) == 8)
- {
- out.offset.lo = lendian32 (addr->m_imm_offset);
- out.offset.hi = lendian32 (addr->m_imm_offset >> 32);
- }
- else
- {
- gcc_assert (sizeof (addr->m_imm_offset) == 4);
- out.offset.lo = lendian32 (addr->m_imm_offset);
- out.offset.hi = 0;
- }
-
- brig_operand.add (&out, sizeof (out));
-}
-
-/* Emit a code reference operand REF. */
-
-static void
-emit_code_ref_operand (hsa_op_code_ref *ref)
-{
- struct BrigOperandCodeRef out;
-
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF);
- out.ref = lendian32 (ref->m_directive_offset);
- brig_operand.add (&out, sizeof (out));
-}
-
-/* Emit a code list operand CODE_LIST. */
-
-static void
-emit_code_list_operand (hsa_op_code_list *code_list)
-{
- struct BrigOperandCodeList out;
- unsigned args = code_list->m_offsets.length ();
-
- for (unsigned i = 0; i < args; i++)
- gcc_assert (code_list->m_offsets[i]);
-
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST);
-
- uint32_t byteCount = lendian32 (4 * args);
-
- out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
- brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t));
- brig_data.round_size_up (4);
- brig_operand.add (&out, sizeof (out));
-}
-
-/* Emit an operand list operand OPERAND_LIST. */
-
-static void
-emit_operand_list_operand (hsa_op_operand_list *operand_list)
-{
- struct BrigOperandOperandList out;
- unsigned args = operand_list->m_offsets.length ();
-
- for (unsigned i = 0; i < args; i++)
- gcc_assert (operand_list->m_offsets[i]);
-
- out.base.byteCount = lendian16 (sizeof (out));
- out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST);
-
- uint32_t byteCount = lendian32 (4 * args);
-
- out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount)));
- brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t));
- brig_data.round_size_up (4);
- brig_operand.add (&out, sizeof (out));
-}
-
-/* Emit all operands queued for writing. */
-
-static void
-emit_queued_operands (void)
-{
- for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next)
- {
- gcc_assert (op->m_brig_op_offset == brig_operand.total_size);
- if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op))
- emit_immediate_operand (imm);
- else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
- emit_register_operand (reg);
- else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op))
- emit_address_operand (addr);
- else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op))
- emit_code_ref_operand (ref);
- else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op))
- emit_code_list_operand (code_list);
- else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op))
- emit_operand_list_operand (l);
- else
- gcc_unreachable ();
- }
-}
-
-/* Emit directives describing the function that is used for
- a function declaration. */
-
-static BrigDirectiveExecutable *
-emit_function_declaration (tree decl)
-{
- hsa_function_representation *f = hsa_generate_function_declaration (decl);
-
- BrigDirectiveExecutable *e = emit_function_directives (f, true);
- emit_queued_operands ();
-
- delete f;
-
- return e;
-}
-
-/* Emit directives describing the function that is used for
- an internal function declaration. */
-
-static BrigDirectiveExecutable *
-emit_internal_fn_decl (hsa_internal_fn *fn)
-{
- hsa_function_representation *f = hsa_generate_internal_fn_decl (fn);
-
- BrigDirectiveExecutable *e = emit_function_directives (f, true);
- emit_queued_operands ();
-
- delete f;
-
- return e;
-}
-
-/* Enqueue all operands of INSN and return offset to BRIG data section
- to list of operand offsets. */
-
-static unsigned
-emit_insn_operands (hsa_insn_basic *insn)
-{
- auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
- operand_offsets;
-
- unsigned l = insn->operand_count ();
-
- /* We have N operands so use 4 * N for the byte_count. */
- uint32_t byte_count = lendian32 (4 * l);
- unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
- if (l > 0)
- {
- operand_offsets.safe_grow (l);
- for (unsigned i = 0; i < l; i++)
- operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i)));
-
- brig_data.add (operand_offsets.address (),
- l * sizeof (BrigOperandOffset32_t));
- }
- brig_data.round_size_up (4);
- return offset;
-}
-
-/* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset
- to BRIG data section to list of operand offsets. */
-
-static unsigned
-emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL,
- hsa_op_base *op2 = NULL)
-{
- auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS>
- operand_offsets;
-
- gcc_checking_assert (op0 != NULL);
- operand_offsets.safe_push (enqueue_op (op0));
-
- if (op1 != NULL)
- {
- operand_offsets.safe_push (enqueue_op (op1));
- if (op2 != NULL)
- operand_offsets.safe_push (enqueue_op (op2));
- }
-
- unsigned l = operand_offsets.length ();
-
- /* We have N operands so use 4 * N for the byte_count. */
- uint32_t byte_count = lendian32 (4 * l);
-
- unsigned offset = brig_data.add (&byte_count, sizeof (byte_count));
- brig_data.add (operand_offsets.address (),
- l * sizeof (BrigOperandOffset32_t));
-
- brig_data.round_size_up (4);
-
- return offset;
-}
-
-/* Emit an HSA memory instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_memory_insn (hsa_insn_mem *mem)
-{
- struct BrigInstMem repr;
- gcc_checking_assert (mem->operand_count () == 2);
-
- hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
-
- /* This is necessary because of the erroneous typedef of
- BrigMemoryModifier8_t which introduces padding which may then contain
- random stuff (which we do not want so that we can test things don't
- change). */
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
- repr.base.opcode = lendian16 (mem->m_opcode);
- repr.base.type = lendian16 (mem->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (mem));
-
- if (addr->m_symbol)
- repr.segment = addr->m_symbol->m_segment;
- else
- repr.segment = BRIG_SEGMENT_FLAT;
- repr.modifier = 0;
- repr.equivClass = mem->m_equiv_class;
- repr.align = mem->m_align;
- if (mem->m_opcode == BRIG_OPCODE_LD)
- repr.width = BRIG_WIDTH_1;
- else
- repr.width = BRIG_WIDTH_NONE;
- memset (&repr.reserved, 0, sizeof (repr.reserved));
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA signal memory instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_signal_insn (hsa_insn_signal *mem)
-{
- struct BrigInstSignal repr;
-
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL);
- repr.base.opcode = lendian16 (mem->m_opcode);
- repr.base.type = lendian16 (mem->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (mem));
-
- repr.memoryOrder = mem->m_memory_order;
- repr.signalOperation = mem->m_signalop;
- repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32;
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA atomic memory instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_atomic_insn (hsa_insn_atomic *mem)
-{
- struct BrigInstAtomic repr;
-
- /* Either operand[0] or operand[1] must be an address operand. */
- hsa_op_address *addr = NULL;
- if (is_a <hsa_op_address *> (mem->get_op (0)))
- addr = as_a <hsa_op_address *> (mem->get_op (0));
- else
- addr = as_a <hsa_op_address *> (mem->get_op (1));
-
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC);
- repr.base.opcode = lendian16 (mem->m_opcode);
- repr.base.type = lendian16 (mem->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (mem));
-
- if (addr->m_symbol)
- repr.segment = addr->m_symbol->m_segment;
- else
- repr.segment = BRIG_SEGMENT_FLAT;
- repr.memoryOrder = mem->m_memoryorder;
- repr.memoryScope = mem->m_memoryscope;
- repr.atomicOperation = mem->m_atomicop;
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA LDA instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_addr_insn (hsa_insn_basic *insn)
-{
- struct BrigInstAddr repr;
-
- hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR);
- repr.base.opcode = lendian16 (insn->m_opcode);
- repr.base.type = lendian16 (insn->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (insn));
-
- if (addr->m_symbol)
- repr.segment = addr->m_symbol->m_segment;
- else
- repr.segment = BRIG_SEGMENT_FLAT;
- memset (&repr.reserved, 0, sizeof (repr.reserved));
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA segment conversion instruction and all necessary directives,
- schedule necessary operands for writing. */
-
-static void
-emit_segment_insn (hsa_insn_seg *seg)
-{
- struct BrigInstSegCvt repr;
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT);
- repr.base.opcode = lendian16 (seg->m_opcode);
- repr.base.type = lendian16 (seg->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (seg));
- repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type);
- repr.segment = seg->m_segment;
- repr.modifier = 0;
-
- brig_code.add (&repr, sizeof (repr));
-
- brig_insn_count++;
-}
-
-/* Emit an HSA alloca instruction and all necessary directives,
- schedule necessary operands for writing. */
-
-static void
-emit_alloca_insn (hsa_insn_alloca *alloca)
-{
- struct BrigInstMem repr;
- gcc_checking_assert (alloca->operand_count () == 2);
-
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM);
- repr.base.opcode = lendian16 (alloca->m_opcode);
- repr.base.type = lendian16 (alloca->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (alloca));
- repr.segment = BRIG_SEGMENT_PRIVATE;
- repr.modifier = 0;
- repr.equivClass = 0;
- repr.align = alloca->m_align;
- repr.width = BRIG_WIDTH_NONE;
- memset (&repr.reserved, 0, sizeof (repr.reserved));
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA comparison instruction and all necessary directives,
- schedule necessary operands for writing. */
-
-static void
-emit_cmp_insn (hsa_insn_cmp *cmp)
-{
- struct BrigInstCmp repr;
-
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP);
- repr.base.opcode = lendian16 (cmp->m_opcode);
- repr.base.type = lendian16 (cmp->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (cmp));
-
- if (is_a <hsa_op_reg *> (cmp->get_op (1)))
- repr.sourceType
- = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type);
- else
- repr.sourceType
- = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type);
- repr.modifier = 0;
- repr.compare = cmp->m_compare;
- repr.pack = 0;
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA generic branching/sycnronization instruction. */
-
-static void
-emit_generic_branch_insn (hsa_insn_br *br)
-{
- struct BrigInstBr repr;
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
- repr.base.opcode = lendian16 (br->m_opcode);
- repr.width = br->m_width;
- repr.base.type = lendian16 (br->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (br));
- memset (&repr.reserved, 0, sizeof (repr.reserved));
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA conditional branching instruction and all necessary directives,
- schedule necessary operands for writing. */
-
-static void
-emit_cond_branch_insn (hsa_insn_cbr *br)
-{
- struct BrigInstBr repr;
-
- basic_block target = NULL;
- edge_iterator ei;
- edge e;
-
- /* At the moment we only handle direct conditional jumps. */
- gcc_assert (br->m_opcode == BRIG_OPCODE_CBR);
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
- repr.base.opcode = lendian16 (br->m_opcode);
- repr.width = br->m_width;
- /* For Conditional jumps the type is always B1. */
- repr.base.type = lendian16 (BRIG_TYPE_B1);
-
- FOR_EACH_EDGE (e, ei, br->m_bb->succs)
- if (e->flags & EDGE_TRUE_VALUE)
- {
- target = e->dest;
- break;
- }
- gcc_assert (target);
-
- repr.base.operands
- = lendian32 (emit_operands (br->get_op (0),
- &hsa_bb_for_bb (target)->m_label_ref));
- memset (&repr.reserved, 0, sizeof (repr.reserved));
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA unconditional jump branching instruction that points to
- a label REFERENCE. */
-
-static void
-emit_unconditional_jump (hsa_op_code_ref *reference)
-{
- struct BrigInstBr repr;
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
- repr.base.opcode = lendian16 (BRIG_OPCODE_BR);
- repr.base.type = lendian16 (BRIG_TYPE_NONE);
- /* Direct branches to labels must be width(all). */
- repr.width = BRIG_WIDTH_ALL;
-
- repr.base.operands = lendian32 (emit_operands (reference));
- memset (&repr.reserved, 0, sizeof (repr.reserved));
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit an HSA switch jump instruction that uses a jump table to
- jump to a destination label. */
-
-static void
-emit_switch_insn (hsa_insn_sbr *sbr)
-{
- struct BrigInstBr repr;
-
- gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR);
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
- repr.base.opcode = lendian16 (sbr->m_opcode);
- repr.width = BRIG_WIDTH_1;
- /* For Conditional jumps the type is always B1. */
- hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0));
- repr.base.type = lendian16 (index->m_type);
- repr.base.operands
- = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list));
- memset (&repr.reserved, 0, sizeof (repr.reserved));
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit a HSA convert instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_cvt_insn (hsa_insn_cvt *insn)
-{
- struct BrigInstCvt repr;
- BrigType16_t srctype;
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT);
- repr.base.opcode = lendian16 (insn->m_opcode);
- repr.base.type = lendian16 (insn->m_type);
- repr.base.operands = lendian32 (emit_insn_operands (insn));
-
- if (is_a <hsa_op_reg *> (insn->get_op (1)))
- srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type;
- else
- srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type;
- repr.sourceType = lendian16 (srctype);
- repr.modifier = 0;
- /* float to smaller float requires a rounding setting (we default
- to 'near'. */
- if (hsa_type_float_p (insn->m_type)
- && (!hsa_type_float_p (srctype)
- || ((insn->m_type & BRIG_TYPE_BASE_MASK)
- < (srctype & BRIG_TYPE_BASE_MASK))))
- repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
- else if (hsa_type_integer_p (insn->m_type) &&
- hsa_type_float_p (srctype))
- repr.round = BRIG_ROUND_INTEGER_ZERO;
- else
- repr.round = BRIG_ROUND_NONE;
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit call instruction INSN, where this instruction must be closed
- within a call block instruction. */
-
-static void
-emit_call_insn (hsa_insn_call *call)
-{
- struct BrigInstBr repr;
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR);
- repr.base.opcode = lendian16 (BRIG_OPCODE_CALL);
- repr.base.type = lendian16 (BRIG_TYPE_NONE);
-
- repr.base.operands
- = lendian32 (emit_operands (call->m_result_code_list, &call->m_func,
- call->m_args_code_list));
-
- /* Internal functions have not set m_called_function. */
- if (call->m_called_function)
- {
- function_linkage_pair pair (call->m_called_function,
- call->m_func.m_brig_op_offset);
- function_call_linkage.safe_push (pair);
- }
- else
- {
- hsa_internal_fn *slot
- = hsa_emitted_internal_decls->find (call->m_called_internal_fn);
- gcc_assert (slot);
- gcc_assert (slot->m_offset > 0);
- call->m_func.m_directive_offset = slot->m_offset;
- }
-
- repr.width = BRIG_WIDTH_ALL;
- memset (&repr.reserved, 0, sizeof (repr.reserved));
-
- brig_code.add (&repr, sizeof (repr));
- brig_insn_count++;
-}
-
-/* Emit argument block directive. */
-
-static void
-emit_arg_block_insn (hsa_insn_arg_block *insn)
-{
- switch (insn->m_kind)
- {
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START:
- {
- struct BrigDirectiveArgBlock repr;
- repr.base.byteCount = lendian16 (sizeof (repr));
- repr.base.kind = lendian16 (insn->m_kind);
- brig_code.add (&repr, sizeof (repr));
-
- for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++)
- {
- insn->m_call_insn->m_args_code_list->m_offsets[i]
- = lendian32 (emit_directive_variable
- (insn->m_call_insn->m_input_args[i]));
- brig_insn_count++;
- }
-
- if (insn->m_call_insn->m_output_arg)
- {
- insn->m_call_insn->m_result_code_list->m_offsets[0]
- = lendian32 (emit_directive_variable
- (insn->m_call_insn->m_output_arg));
- brig_insn_count++;
- }
-
- break;
- }
- case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END:
- {
- struct BrigDirectiveArgBlock repr;
- repr.base.byteCount = lendian16 (sizeof (repr));
- repr.base.kind = lendian16 (insn->m_kind);
- brig_code.add (&repr, sizeof (repr));
- break;
- }
- default:
- gcc_unreachable ();
- }
-
- brig_insn_count++;
-}
-
-/* Emit comment directive. */
-
-static void
-emit_comment_insn (hsa_insn_comment *insn)
-{
- struct BrigDirectiveComment repr;
- memset (&repr, 0, sizeof (repr));
-
- repr.base.byteCount = lendian16 (sizeof (repr));
- repr.base.kind = lendian16 (insn->m_opcode);
- repr.name = brig_emit_string (insn->m_comment, '\0', false);
- brig_code.add (&repr, sizeof (repr));
-}
-
-/* Emit queue instruction INSN. */
-
-static void
-emit_queue_insn (hsa_insn_queue *insn)
-{
- BrigInstQueue repr;
- memset (&repr, 0, sizeof (repr));
-
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE);
- repr.base.opcode = lendian16 (insn->m_opcode);
- repr.base.type = lendian16 (insn->m_type);
- repr.segment = insn->m_segment;
- repr.memoryOrder = insn->m_memory_order;
- repr.base.operands = lendian32 (emit_insn_operands (insn));
- brig_data.round_size_up (4);
- brig_code.add (&repr, sizeof (repr));
-
- brig_insn_count++;
-}
-
-/* Emit source type instruction INSN. */
-
-static void
-emit_srctype_insn (hsa_insn_srctype *insn)
-{
- /* We assume that BrigInstMod has a BrigInstBasic prefix. */
- struct BrigInstSourceType repr;
- unsigned operand_count = insn->operand_count ();
- gcc_checking_assert (operand_count >= 2);
-
- memset (&repr, 0, sizeof (repr));
- repr.sourceType = lendian16 (insn->m_source_type);
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
- repr.base.opcode = lendian16 (insn->m_opcode);
- repr.base.type = lendian16 (insn->m_type);
-
- repr.base.operands = lendian32 (emit_insn_operands (insn));
- brig_code.add (&repr, sizeof (struct BrigInstSourceType));
- brig_insn_count++;
-}
-
-/* Emit packed instruction INSN. */
-
-static void
-emit_packed_insn (hsa_insn_packed *insn)
-{
- /* We assume that BrigInstMod has a BrigInstBasic prefix. */
- struct BrigInstSourceType repr;
- unsigned operand_count = insn->operand_count ();
- gcc_checking_assert (operand_count >= 2);
-
- memset (&repr, 0, sizeof (repr));
- repr.sourceType = lendian16 (insn->m_source_type);
- repr.base.base.byteCount = lendian16 (sizeof (repr));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE);
- repr.base.opcode = lendian16 (insn->m_opcode);
- repr.base.type = lendian16 (insn->m_type);
-
- if (insn->m_opcode == BRIG_OPCODE_COMBINE)
- {
- /* Create operand list for packed type. */
- for (unsigned i = 1; i < operand_count; i++)
- {
- gcc_checking_assert (insn->get_op (i));
- insn->m_operand_list->m_offsets[i - 1]
- = lendian32 (enqueue_op (insn->get_op (i)));
- }
-
- repr.base.operands = lendian32 (emit_operands (insn->get_op (0),
- insn->m_operand_list));
- }
- else if (insn->m_opcode == BRIG_OPCODE_EXPAND)
- {
- /* Create operand list for packed type. */
- for (unsigned i = 0; i < operand_count - 1; i++)
- {
- gcc_checking_assert (insn->get_op (i));
- insn->m_operand_list->m_offsets[i]
- = lendian32 (enqueue_op (insn->get_op (i)));
- }
-
- unsigned ops = emit_operands (insn->m_operand_list,
- insn->get_op (insn->operand_count () - 1));
- repr.base.operands = lendian32 (ops);
- }
-
-
- brig_code.add (&repr, sizeof (struct BrigInstSourceType));
- brig_insn_count++;
-}
-
-/* Emit a basic HSA instruction and all necessary directives, schedule
- necessary operands for writing. */
-
-static void
-emit_basic_insn (hsa_insn_basic *insn)
-{
- /* We assume that BrigInstMod has a BrigInstBasic prefix. */
- struct BrigInstMod repr;
- BrigType16_t type;
-
- memset (&repr, 0, sizeof (repr));
- repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC);
- repr.base.opcode = lendian16 (insn->m_opcode);
- switch (insn->m_opcode)
- {
- /* And the bit-logical operations need bit types and whine about
- arithmetic types :-/ */
- case BRIG_OPCODE_AND:
- case BRIG_OPCODE_OR:
- case BRIG_OPCODE_XOR:
- case BRIG_OPCODE_NOT:
- type = regtype_for_type (insn->m_type);
- break;
- default:
- type = insn->m_type;
- break;
- }
- repr.base.type = lendian16 (type);
- repr.base.operands = lendian32 (emit_insn_operands (insn));
-
- if (hsa_type_packed_p (type))
- {
- if (hsa_type_float_p (type)
- && !hsa_opcode_floating_bit_insn_p (insn->m_opcode))
- repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN;
- else
- repr.round = 0;
- /* We assume that destination and sources agree in packing layout. */
- if (insn->num_used_ops () >= 2)
- repr.pack = BRIG_PACK_PP;
- else
- repr.pack = BRIG_PACK_P;
- repr.reserved = 0;
- repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod));
- repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD);
- brig_code.add (&repr, sizeof (struct BrigInstMod));
- }
- else
- brig_code.add (&repr, sizeof (struct BrigInstBasic));
- brig_insn_count++;
-}
-
-/* Emit an HSA instruction and all necessary directives, schedule necessary
- operands for writing. */
-
-static void
-emit_insn (hsa_insn_basic *insn)
-{
- gcc_assert (!is_a <hsa_insn_phi *> (insn));
-
- insn->m_brig_offset = brig_code.total_size;
-
- if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn))
- emit_signal_insn (signal);
- else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn))
- emit_atomic_insn (atom);
- else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
- emit_memory_insn (mem);
- else if (insn->m_opcode == BRIG_OPCODE_LDA)
- emit_addr_insn (insn);
- else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
- emit_segment_insn (seg);
- else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
- emit_cmp_insn (cmp);
- else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
- emit_cond_branch_insn (br);
- else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
- {
- if (switch_instructions == NULL)
- switch_instructions = new vec <hsa_insn_sbr *> ();
-
- switch_instructions->safe_push (sbr);
- emit_switch_insn (sbr);
- }
- else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
- emit_generic_branch_insn (br);
- else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
- emit_arg_block_insn (block);
- else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
- emit_call_insn (call);
- else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
- emit_comment_insn (comment);
- else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn))
- emit_queue_insn (queue);
- else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn))
- emit_srctype_insn (srctype);
- else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn))
- emit_packed_insn (packed);
- else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn))
- emit_cvt_insn (cvt);
- else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn))
- emit_alloca_insn (alloca);
- else
- emit_basic_insn (insn);
-}
-
-/* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL,
- or we are about to finish emitting code, if it is NULL. If the fall through
- edge from BB does not lead to NEXT_BB, emit an unconditional jump. */
-
-static void
-perhaps_emit_branch (basic_block bb, basic_block next_bb)
-{
- basic_block t_bb = NULL, ff = NULL;
-
- edge_iterator ei;
- edge e;
-
- /* If the last instruction of BB is a switch, ignore emission of all
- edges. */
- if (hsa_bb_for_bb (bb)->m_last_insn
- && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn))
- return;
-
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (e->flags & EDGE_TRUE_VALUE)
- {
- gcc_assert (!t_bb);
- t_bb = e->dest;
- }
- else
- {
- gcc_assert (!ff);
- ff = e->dest;
- }
-
- if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun))
- return;
-
- emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref);
-}
-
-/* Emit the a function with name NAME to the various brig sections. */
-
-void
-hsa_brig_emit_function (void)
-{
- basic_block bb, prev_bb;
- hsa_insn_basic *insn;
- BrigDirectiveExecutable *ptr_to_fndir;
-
- brig_init ();
-
- brig_insn_count = 0;
- memset (&op_queue, 0, sizeof (op_queue));
- op_queue.projected_size = brig_operand.total_size;
-
- if (!function_offsets)
- function_offsets = new hash_map<tree, BrigCodeOffset32_t> ();
-
- if (!emitted_declarations)
- emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> ();
-
- for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++)
- {
- tree called = hsa_cfun->m_called_functions[i];
-
- /* If the function has no definition, emit a declaration. */
- if (!emitted_declarations->get (called))
- {
- BrigDirectiveExecutable *e = emit_function_declaration (called);
- emitted_declarations->put (called, e);
- }
- }
-
- for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++)
- {
- hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i];
- emit_internal_fn_decl (called);
- }
-
- ptr_to_fndir = emit_function_directives (hsa_cfun, false);
- for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn;
- insn;
- insn = insn->m_next)
- emit_insn (insn);
- prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
- FOR_EACH_BB_FN (bb, cfun)
- {
- perhaps_emit_branch (prev_bb, bb);
- emit_bb_label_directive (hsa_bb_for_bb (bb));
- for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next)
- emit_insn (insn);
- prev_bb = bb;
- }
- perhaps_emit_branch (prev_bb, NULL);
- ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size);
-
- /* Fill up label references for all sbr instructions. */
- if (switch_instructions)
- {
- for (unsigned i = 0; i < switch_instructions->length (); i++)
- {
- hsa_insn_sbr *sbr = (*switch_instructions)[i];
- for (unsigned j = 0; j < sbr->m_jump_table.length (); j++)
- {
- hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]);
- sbr->m_label_code_list->m_offsets[j]
- = hbb->m_label_ref.m_directive_offset;
- }
- }
-
- switch_instructions->release ();
- delete switch_instructions;
- switch_instructions = NULL;
- }
-
- if (dump_file)
- {
- fprintf (dump_file, "------- After BRIG emission: -------\n");
- dump_hsa_cfun (dump_file);
- }
-
- emit_queued_operands ();
-}
-
-/* Emit all OMP symbols related to OMP. */
-
-void
-hsa_brig_emit_omp_symbols (void)
-{
- brig_init ();
- emit_directive_variable (hsa_num_threads);
-}
-
-/* Create and return __hsa_global_variables symbol that contains
- all informations consumed by libgomp to link global variables
- with their string names used by an HSA kernel. */
-
-static tree
-hsa_output_global_variables ()
-{
- unsigned l = hsa_global_variable_symbols->elements ();
-
- tree variable_info_type = make_node (RECORD_TYPE);
- tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("name"), ptr_type_node);
- DECL_CHAIN (id_f1) = NULL_TREE;
- tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("omp_data_size"),
- ptr_type_node);
- DECL_CHAIN (id_f2) = id_f1;
- finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2,
- NULL_TREE);
-
- tree int_num_of_global_vars;
- int_num_of_global_vars = build_int_cst (uint32_type_node, l);
- tree global_vars_num_index_type = build_index_type (int_num_of_global_vars);
- tree global_vars_array_type = build_array_type (variable_info_type,
- global_vars_num_index_type);
- TYPE_ARTIFICIAL (global_vars_array_type) = 1;
-
- vec<constructor_elt, va_gc> *global_vars_vec = NULL;
-
- for (hash_table <hsa_noop_symbol_hasher>::iterator it
- = hsa_global_variable_symbols->begin ();
- it != hsa_global_variable_symbols->end (); ++it)
- {
- unsigned len = strlen ((*it)->m_name);
- char *copy = XNEWVEC (char, len + 2);
- copy[0] = '&';
- memcpy (copy + 1, (*it)->m_name, len);
- copy[len + 1] = '\0';
- len++;
- hsa_sanitize_name (copy);
-
- tree var_name = build_string (len, copy);
- TREE_TYPE (var_name)
- = build_array_type (char_type_node, build_index_type (size_int (len)));
- free (copy);
-
- vec<constructor_elt, va_gc> *variable_info_vec = NULL;
- CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (var_name)),
- var_name));
- CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE,
- build_fold_addr_expr ((*it)->m_decl));
-
- tree variable_info_ctor = build_constructor (variable_info_type,
- variable_info_vec);
-
- CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE,
- variable_info_ctor);
- }
-
- tree global_vars_ctor = build_constructor (global_vars_array_type,
- global_vars_vec);
-
- char tmp_name[64];
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1);
- tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- global_vars_array_type);
- TREE_STATIC (global_vars_table) = 1;
- TREE_READONLY (global_vars_table) = 1;
- TREE_PUBLIC (global_vars_table) = 0;
- DECL_ARTIFICIAL (global_vars_table) = 1;
- DECL_IGNORED_P (global_vars_table) = 1;
- DECL_EXTERNAL (global_vars_table) = 0;
- TREE_CONSTANT (global_vars_table) = 1;
- DECL_INITIAL (global_vars_table) = global_vars_ctor;
- varpool_node::finalize_decl (global_vars_table);
-
- return global_vars_table;
-}
-
-/* Create __hsa_host_functions and __hsa_kernels that contain
- all informations consumed by libgomp to register all kernels
- in the BRIG binary. */
-
-static void
-hsa_output_kernels (tree *host_func_table, tree *kernels)
-{
- unsigned map_count = hsa_get_number_decl_kernel_mappings ();
-
- tree int_num_of_kernels;
- int_num_of_kernels = build_int_cst (uint32_type_node, map_count);
- tree kernel_num_index_type = build_index_type (int_num_of_kernels);
- tree host_functions_array_type = build_array_type (ptr_type_node,
- kernel_num_index_type);
- TYPE_ARTIFICIAL (host_functions_array_type) = 1;
-
- vec<constructor_elt, va_gc> *host_functions_vec = NULL;
- for (unsigned i = 0; i < map_count; ++i)
- {
- tree decl = hsa_get_decl_kernel_mapping_decl (i);
- tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl));
- CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn);
- }
- tree host_functions_ctor = build_constructor (host_functions_array_type,
- host_functions_vec);
- char tmp_name[64];
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1);
- tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- host_functions_array_type);
- TREE_STATIC (hsa_host_func_table) = 1;
- TREE_READONLY (hsa_host_func_table) = 1;
- TREE_PUBLIC (hsa_host_func_table) = 0;
- DECL_ARTIFICIAL (hsa_host_func_table) = 1;
- DECL_IGNORED_P (hsa_host_func_table) = 1;
- DECL_EXTERNAL (hsa_host_func_table) = 0;
- TREE_CONSTANT (hsa_host_func_table) = 1;
- DECL_INITIAL (hsa_host_func_table) = host_functions_ctor;
- varpool_node::finalize_decl (hsa_host_func_table);
- *host_func_table = hsa_host_func_table;
-
- /* Following code emits list of kernel_info structures. */
-
- tree kernel_info_type = make_node (RECORD_TYPE);
- tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("name"), ptr_type_node);
- DECL_CHAIN (id_f1) = NULL_TREE;
- tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("omp_data_size"),
- unsigned_type_node);
- DECL_CHAIN (id_f2) = id_f1;
- tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("gridified_kernel_p"),
- boolean_type_node);
- DECL_CHAIN (id_f3) = id_f2;
- tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernel_dependencies_count"),
- unsigned_type_node);
- DECL_CHAIN (id_f4) = id_f3;
- tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernel_dependencies"),
- build_pointer_type (build_pointer_type
- (char_type_node)));
- DECL_CHAIN (id_f5) = id_f4;
- finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5,
- NULL_TREE);
-
- int_num_of_kernels = build_int_cstu (uint32_type_node, map_count);
- tree kernel_info_vector_type
- = build_array_type (kernel_info_type,
- build_index_type (int_num_of_kernels));
- TYPE_ARTIFICIAL (kernel_info_vector_type) = 1;
-
- vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL;
- tree kernel_dependencies_vector_type = NULL;
-
- for (unsigned i = 0; i < map_count; ++i)
- {
- tree kernel = hsa_get_decl_kernel_mapping_decl (i);
- char *name = hsa_get_decl_kernel_mapping_name (i);
- unsigned len = strlen (name);
- char *copy = XNEWVEC (char, len + 2);
- copy[0] = '&';
- memcpy (copy + 1, name, len);
- copy[len + 1] = '\0';
- len++;
-
- tree kern_name = build_string (len, copy);
- TREE_TYPE (kern_name)
- = build_array_type (char_type_node, build_index_type (size_int (len)));
- free (copy);
-
- unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i);
- tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size);
- bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i);
- tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node,
- gridified_kernel_p);
- unsigned count = 0;
- vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL;
- if (hsa_decl_kernel_dependencies)
- {
- vec<const char *> **slot;
- slot = hsa_decl_kernel_dependencies->get (kernel);
- if (slot)
- {
- vec <const char *> *dependencies = *slot;
- count = dependencies->length ();
-
- kernel_dependencies_vector_type
- = build_array_type (build_pointer_type (char_type_node),
- build_index_type (size_int (count)));
- TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1;
-
- for (unsigned j = 0; j < count; j++)
- {
- const char *d = (*dependencies)[j];
- len = strlen (d);
- tree dependency_name = build_string (len, d);
- TREE_TYPE (dependency_name)
- = build_array_type (char_type_node,
- build_index_type (size_int (len)));
-
- CONSTRUCTOR_APPEND_ELT
- (kernel_dependencies_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (dependency_name)),
- dependency_name));
- }
- }
- }
-
- tree dependencies_count = build_int_cstu (unsigned_type_node, count);
-
- vec<constructor_elt, va_gc> *kernel_info_vec = NULL;
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE
- (kern_name)),
- kern_name));
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size);
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
- gridified_kernel_p_tree);
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count);
-
- if (count > 0)
- {
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i);
- gcc_checking_assert (kernel_dependencies_vector_type);
- tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- kernel_dependencies_vector_type);
-
- TREE_STATIC (dependencies_list) = 1;
- TREE_READONLY (dependencies_list) = 1;
- TREE_PUBLIC (dependencies_list) = 0;
- DECL_ARTIFICIAL (dependencies_list) = 1;
- DECL_IGNORED_P (dependencies_list) = 1;
- DECL_EXTERNAL (dependencies_list) = 0;
- TREE_CONSTANT (dependencies_list) = 1;
- DECL_INITIAL (dependencies_list)
- = build_constructor (kernel_dependencies_vector_type,
- kernel_dependencies_vec);
- varpool_node::finalize_decl (dependencies_list);
-
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type
- (TREE_TYPE (dependencies_list)),
- dependencies_list));
- }
- else
- CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node);
-
- tree kernel_info_ctor = build_constructor (kernel_info_type,
- kernel_info_vec);
-
- CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE,
- kernel_info_ctor);
- }
-
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1);
- tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- kernel_info_vector_type);
-
- TREE_STATIC (hsa_kernels) = 1;
- TREE_READONLY (hsa_kernels) = 1;
- TREE_PUBLIC (hsa_kernels) = 0;
- DECL_ARTIFICIAL (hsa_kernels) = 1;
- DECL_IGNORED_P (hsa_kernels) = 1;
- DECL_EXTERNAL (hsa_kernels) = 0;
- TREE_CONSTANT (hsa_kernels) = 1;
- DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type,
- kernel_info_vector_vec);
- varpool_node::finalize_decl (hsa_kernels);
- *kernels = hsa_kernels;
-}
-
-/* Create a static constructor that will register out brig stuff with
- libgomp. */
-
-static void
-hsa_output_libgomp_mapping (tree brig_decl)
-{
- unsigned kernel_count = hsa_get_number_decl_kernel_mappings ();
- unsigned global_variable_count = hsa_global_variable_symbols->elements ();
-
- tree kernels;
- tree host_func_table;
-
- hsa_output_kernels (&host_func_table, &kernels);
- tree global_vars = hsa_output_global_variables ();
-
- tree hsa_image_desc_type = make_node (RECORD_TYPE);
- tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("brig_module"), ptr_type_node);
- DECL_CHAIN (id_f1) = NULL_TREE;
- tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernel_count"),
- unsigned_type_node);
-
- DECL_CHAIN (id_f2) = id_f1;
- tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("hsa_kernel_infos"),
- ptr_type_node);
- DECL_CHAIN (id_f3) = id_f2;
- tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("global_variable_count"),
- unsigned_type_node);
- DECL_CHAIN (id_f4) = id_f3;
- tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("hsa_global_variable_infos"),
- ptr_type_node);
- DECL_CHAIN (id_f5) = id_f4;
- finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5,
- NULL_TREE);
- TYPE_ARTIFICIAL (hsa_image_desc_type) = 1;
-
- vec<constructor_elt, va_gc> *img_desc_vec = NULL;
- CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
- build_fold_addr_expr (brig_decl));
- CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
- build_int_cstu (unsigned_type_node, kernel_count));
- CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (kernels)),
- kernels));
- CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
- build_int_cstu (unsigned_type_node,
- global_variable_count));
- CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE,
- build1 (ADDR_EXPR,
- build_pointer_type (TREE_TYPE (global_vars)),
- global_vars));
-
- tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec);
-
- char tmp_name[64];
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1);
- tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- hsa_image_desc_type);
- TREE_STATIC (hsa_img_descriptor) = 1;
- TREE_READONLY (hsa_img_descriptor) = 1;
- TREE_PUBLIC (hsa_img_descriptor) = 0;
- DECL_ARTIFICIAL (hsa_img_descriptor) = 1;
- DECL_IGNORED_P (hsa_img_descriptor) = 1;
- DECL_EXTERNAL (hsa_img_descriptor) = 0;
- TREE_CONSTANT (hsa_img_descriptor) = 1;
- DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor;
- varpool_node::finalize_decl (hsa_img_descriptor);
-
- /* Construct the "host_table" libgomp expects. */
- tree index_type = build_index_type (build_int_cst (integer_type_node, 4));
- tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type);
- TYPE_ARTIFICIAL (libgomp_host_table_type) = 1;
- vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL;
- tree host_func_table_addr = build_fold_addr_expr (host_func_table);
- CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
- host_func_table_addr);
- offset_int func_table_size
- = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count;
- CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE,
- fold_build2 (POINTER_PLUS_EXPR,
- TREE_TYPE (host_func_table_addr),
- host_func_table_addr,
- build_int_cst (size_type_node,
- func_table_size.to_uhwi
- ())));
- CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
- CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node);
- tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type,
- libgomp_host_table_vec);
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1);
- tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL,
- get_identifier (tmp_name),
- libgomp_host_table_type);
-
- TREE_STATIC (hsa_libgomp_host_table) = 1;
- TREE_READONLY (hsa_libgomp_host_table) = 1;
- TREE_PUBLIC (hsa_libgomp_host_table) = 0;
- DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1;
- DECL_IGNORED_P (hsa_libgomp_host_table) = 1;
- DECL_EXTERNAL (hsa_libgomp_host_table) = 0;
- TREE_CONSTANT (hsa_libgomp_host_table) = 1;
- DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor;
- varpool_node::finalize_decl (hsa_libgomp_host_table);
-
- /* Generate an initializer with a call to the registration routine. */
-
- tree offload_register
- = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER);
- gcc_checking_assert (offload_register);
-
- tree *hsa_ctor_stmts = hsa_get_ctor_statements ();
- append_to_statement_list
- (build_call_expr (offload_register, 4,
- build_int_cstu (unsigned_type_node,
- GOMP_VERSION_PACK (GOMP_VERSION,
- GOMP_VERSION_HSA)),
- build_fold_addr_expr (hsa_libgomp_host_table),
- build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
- build_fold_addr_expr (hsa_img_descriptor)),
- hsa_ctor_stmts);
-
- cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY);
-
- tree offload_unregister
- = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER);
- gcc_checking_assert (offload_unregister);
-
- tree *hsa_dtor_stmts = hsa_get_dtor_statements ();
- append_to_statement_list
- (build_call_expr (offload_unregister, 4,
- build_int_cstu (unsigned_type_node,
- GOMP_VERSION_PACK (GOMP_VERSION,
- GOMP_VERSION_HSA)),
- build_fold_addr_expr (hsa_libgomp_host_table),
- build_int_cst (integer_type_node, GOMP_DEVICE_HSA),
- build_fold_addr_expr (hsa_img_descriptor)),
- hsa_dtor_stmts);
- cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY);
-}
-
-/* Emit the brig module we have compiled to a section in the final assembly and
- also create a compile unit static constructor that will register the brig
- module with libgomp. */
-
-void
-hsa_output_brig (void)
-{
- section *saved_section;
-
- if (!brig_initialized)
- return;
-
- for (unsigned i = 0; i < function_call_linkage.length (); i++)
- {
- function_linkage_pair p = function_call_linkage[i];
-
- BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl);
- gcc_assert (*func_offset);
- BrigOperandCodeRef *code_ref
- = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset));
- gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF);
- code_ref->ref = lendian32 (*func_offset);
- }
-
- /* Iterate all function declarations and if we meet a function that should
- have module linkage and we are unable to emit HSAIL for the function,
- then change the linkage to program linkage. Doing so, we will emit
- a valid BRIG image. */
- if (hsa_failed_functions != NULL && emitted_declarations != NULL)
- for (hash_map <tree, BrigDirectiveExecutable *>::iterator it
- = emitted_declarations->begin ();
- it != emitted_declarations->end ();
- ++it)
- {
- if (hsa_failed_functions->contains ((*it).first))
- (*it).second->linkage = BRIG_LINKAGE_PROGRAM;
- }
-
- saved_section = in_section;
-
- switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL));
- char tmp_name[64];
- ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1);
- ASM_OUTPUT_LABEL (asm_out_file, tmp_name);
- tree brig_id = get_identifier (tmp_name);
- tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id,
- char_type_node);
- SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id);
- TREE_ADDRESSABLE (brig_decl) = 1;
- TREE_READONLY (brig_decl) = 1;
- DECL_ARTIFICIAL (brig_decl) = 1;
- DECL_IGNORED_P (brig_decl) = 1;
- TREE_STATIC (brig_decl) = 1;
- TREE_PUBLIC (brig_decl) = 0;
- TREE_USED (brig_decl) = 1;
- DECL_INITIAL (brig_decl) = brig_decl;
- TREE_ASM_WRITTEN (brig_decl) = 1;
-
- BrigModuleHeader module_header;
- memcpy (&module_header.identification, "HSA BRIG",
- sizeof (module_header.identification));
- module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR);
- module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR);
- uint64_t section_index[3];
-
- int data_padding, code_padding, operand_padding;
- data_padding = HSA_SECTION_ALIGNMENT
- - brig_data.total_size % HSA_SECTION_ALIGNMENT;
- code_padding = HSA_SECTION_ALIGNMENT
- - brig_code.total_size % HSA_SECTION_ALIGNMENT;
- operand_padding = HSA_SECTION_ALIGNMENT
- - brig_operand.total_size % HSA_SECTION_ALIGNMENT;
-
- uint64_t module_size = sizeof (module_header)
- + sizeof (section_index)
- + brig_data.total_size
- + data_padding
- + brig_code.total_size
- + code_padding
- + brig_operand.total_size
- + operand_padding;
- gcc_assert ((module_size % 16) == 0);
- module_header.byteCount = lendian64 (module_size);
- memset (&module_header.hash, 0, sizeof (module_header.hash));
- module_header.reserved = 0;
- module_header.sectionCount = lendian32 (3);
- module_header.sectionIndex = lendian64 (sizeof (module_header));
- assemble_string ((const char *) &module_header, sizeof (module_header));
- uint64_t off = sizeof (module_header) + sizeof (section_index);
- section_index[0] = lendian64 (off);
- off += brig_data.total_size + data_padding;
- section_index[1] = lendian64 (off);
- off += brig_code.total_size + code_padding;
- section_index[2] = lendian64 (off);
- assemble_string ((const char *) §ion_index, sizeof (section_index));
-
- char padding[HSA_SECTION_ALIGNMENT];
- memset (padding, 0, sizeof (padding));
-
- brig_data.output ();
- assemble_string (padding, data_padding);
- brig_code.output ();
- assemble_string (padding, code_padding);
- brig_operand.output ();
- assemble_string (padding, operand_padding);
-
- if (saved_section)
- switch_to_section (saved_section);
-
- hsa_output_libgomp_mapping (brig_decl);
-
- hsa_free_decl_kernel_mapping ();
- brig_release_data ();
- hsa_deinit_compilation_unit_data ();
-
- delete emitted_declarations;
- emitted_declarations = NULL;
- delete function_offsets;
- function_offsets = NULL;
-}
+++ /dev/null
-/* This file contains the definitions and documentation for the
- Offloading and Multi Processing builtins used in the GNU compiler.
- Copyright (C) 2005-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-/* Before including this file, you should define a macro:
-
- DEF_HSA_BUILTIN (ENUM, NAME, TYPE, ATTRS)
-
- See builtins.def for details. */
-
-/* The reason why they aren't in gcc/builtins.def is that the Fortran front end
- doesn't source those. */
-
-DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKGROUPID, "hsa_workgroupid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKITEMID, "hsa_workitemid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKITEMABSID, "hsa_workitemabsid",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_HSA_BUILTIN (BUILT_IN_HSA_GRIDSIZE, "hsa_gridsize",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_HSA_BUILTIN (BUILT_IN_HSA_CURRENTWORKGROUPSIZE, "hsa_currentworkgroupsize",
- BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
+++ /dev/null
-/* Implementation of commonly needed HSAIL related functions and methods.
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "is-a.h"
-#include "hash-set.h"
-#include "hash-map.h"
-#include "vec.h"
-#include "tree.h"
-#include "dumpfile.h"
-#include "gimple-pretty-print.h"
-#include "diagnostic-core.h"
-#include "alloc-pool.h"
-#include "cgraph.h"
-#include "print-tree.h"
-#include "stringpool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-#include "internal-fn.h"
-#include "ctype.h"
-#include "builtins.h"
-#include "stringpool.h"
-#include "attribs.h"
-
-/* Structure containing intermediate HSA representation of the generated
- function. */
-class hsa_function_representation *hsa_cfun;
-
-/* Element of the mapping vector between a host decl and an HSA kernel. */
-
-struct GTY(()) hsa_decl_kernel_map_element
-{
- /* The decl of the host function. */
- tree decl;
- /* Name of the HSA kernel in BRIG. */
- char * GTY((skip)) name;
- /* Size of OMP data, if the kernel contains a kernel dispatch. */
- unsigned omp_data_size;
- /* True if the function is gridified kernel. */
- bool gridified_kernel_p;
-};
-
-/* Mapping between decls and corresponding HSA kernels in this compilation
- unit. */
-
-static GTY (()) vec<hsa_decl_kernel_map_element, va_gc>
- *hsa_decl_kernel_mapping;
-
-/* Mapping between decls and corresponding HSA kernels
- called by the function. */
-hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
-
-/* Hash function to lookup a symbol for a decl. */
-hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
-
-/* HSA summaries. */
-hsa_summary_t *hsa_summaries = NULL;
-
-/* HSA number of threads. */
-hsa_symbol *hsa_num_threads = NULL;
-
-/* HSA function that cannot be expanded to HSAIL. */
-hash_set <tree> *hsa_failed_functions = NULL;
-
-/* True if compilation unit-wide data are already allocated and initialized. */
-static bool compilation_unit_data_initialized;
-
-/* Return true if FNDECL represents an HSA-callable function. */
-
-bool
-hsa_callable_function_p (tree fndecl)
-{
- return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))
- && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl)));
-}
-
-/* Allocate HSA structures that are used when dealing with different
- functions. */
-
-void
-hsa_init_compilation_unit_data (void)
-{
- if (compilation_unit_data_initialized)
- return;
-
- compilation_unit_data_initialized = true;
-
- hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8);
- hsa_failed_functions = new hash_set <tree> ();
- hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2);
-}
-
-/* Free data structures that are used when dealing with different
- functions. */
-
-void
-hsa_deinit_compilation_unit_data (void)
-{
- gcc_assert (compilation_unit_data_initialized);
-
- delete hsa_failed_functions;
- delete hsa_emitted_internal_decls;
-
- for (hash_table <hsa_noop_symbol_hasher>::iterator it
- = hsa_global_variable_symbols->begin ();
- it != hsa_global_variable_symbols->end ();
- ++it)
- {
- hsa_symbol *sym = *it;
- delete sym;
- }
-
- delete hsa_global_variable_symbols;
-
- if (hsa_num_threads)
- {
- delete hsa_num_threads;
- hsa_num_threads = NULL;
- }
-
- compilation_unit_data_initialized = false;
-}
-
-/* Return true if we are generating large HSA machine model. */
-
-bool
-hsa_machine_large_p (void)
-{
- /* FIXME: I suppose this is technically wrong but should work for me now. */
- return (GET_MODE_BITSIZE (Pmode) == 64);
-}
-
-/* Return the HSA profile we are using. */
-
-bool
-hsa_full_profile_p (void)
-{
- return true;
-}
-
-/* Return true if a register in operand number OPNUM of instruction
- is an output. False if it is an input. */
-
-bool
-hsa_insn_basic::op_output_p (unsigned opnum)
-{
- switch (m_opcode)
- {
- case HSA_OPCODE_PHI:
- case BRIG_OPCODE_CBR:
- case BRIG_OPCODE_SBR:
- case BRIG_OPCODE_ST:
- case BRIG_OPCODE_SIGNALNORET:
- case BRIG_OPCODE_DEBUGTRAP:
- /* FIXME: There are probably missing cases here, double check. */
- return false;
- case BRIG_OPCODE_EXPAND:
- /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */
- return opnum < operand_count () - 1;
- default:
- return opnum == 0;
- }
-}
-
-/* Return true if OPCODE is an floating-point bit instruction opcode. */
-
-bool
-hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode)
-{
- switch (opcode)
- {
- case BRIG_OPCODE_NEG:
- case BRIG_OPCODE_ABS:
- case BRIG_OPCODE_CLASS:
- case BRIG_OPCODE_COPYSIGN:
- return true;
- default:
- return false;
- }
-}
-
-/* Return the number of destination operands for this INSN. */
-
-unsigned
-hsa_insn_basic::input_count ()
-{
- switch (m_opcode)
- {
- default:
- return 1;
-
- case BRIG_OPCODE_NOP:
- return 0;
-
- case BRIG_OPCODE_EXPAND:
- return 2;
-
- case BRIG_OPCODE_LD:
- /* ld_v[234] not yet handled. */
- return 1;
-
- case BRIG_OPCODE_ST:
- return 0;
-
- case BRIG_OPCODE_ATOMICNORET:
- return 0;
-
- case BRIG_OPCODE_SIGNAL:
- return 1;
-
- case BRIG_OPCODE_SIGNALNORET:
- return 0;
-
- case BRIG_OPCODE_MEMFENCE:
- return 0;
-
- case BRIG_OPCODE_RDIMAGE:
- case BRIG_OPCODE_LDIMAGE:
- case BRIG_OPCODE_STIMAGE:
- case BRIG_OPCODE_QUERYIMAGE:
- case BRIG_OPCODE_QUERYSAMPLER:
- sorry ("HSA image ops not handled");
- return 0;
-
- case BRIG_OPCODE_CBR:
- case BRIG_OPCODE_BR:
- return 0;
-
- case BRIG_OPCODE_SBR:
- return 0; /* ??? */
-
- case BRIG_OPCODE_WAVEBARRIER:
- return 0; /* ??? */
-
- case BRIG_OPCODE_BARRIER:
- case BRIG_OPCODE_ARRIVEFBAR:
- case BRIG_OPCODE_INITFBAR:
- case BRIG_OPCODE_JOINFBAR:
- case BRIG_OPCODE_LEAVEFBAR:
- case BRIG_OPCODE_RELEASEFBAR:
- case BRIG_OPCODE_WAITFBAR:
- return 0;
-
- case BRIG_OPCODE_LDF:
- return 1;
-
- case BRIG_OPCODE_ACTIVELANECOUNT:
- case BRIG_OPCODE_ACTIVELANEID:
- case BRIG_OPCODE_ACTIVELANEMASK:
- case BRIG_OPCODE_ACTIVELANEPERMUTE:
- return 1; /* ??? */
-
- case BRIG_OPCODE_CALL:
- case BRIG_OPCODE_SCALL:
- case BRIG_OPCODE_ICALL:
- return 0;
-
- case BRIG_OPCODE_RET:
- return 0;
-
- case BRIG_OPCODE_ALLOCA:
- return 1;
-
- case BRIG_OPCODE_CLEARDETECTEXCEPT:
- return 0;
-
- case BRIG_OPCODE_SETDETECTEXCEPT:
- return 0;
-
- case BRIG_OPCODE_PACKETCOMPLETIONSIG:
- case BRIG_OPCODE_PACKETID:
- case BRIG_OPCODE_CASQUEUEWRITEINDEX:
- case BRIG_OPCODE_LDQUEUEREADINDEX:
- case BRIG_OPCODE_LDQUEUEWRITEINDEX:
- case BRIG_OPCODE_STQUEUEREADINDEX:
- case BRIG_OPCODE_STQUEUEWRITEINDEX:
- return 1; /* ??? */
-
- case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
- return 1;
-
- case BRIG_OPCODE_DEBUGTRAP:
- return 0;
-
- case BRIG_OPCODE_GROUPBASEPTR:
- case BRIG_OPCODE_KERNARGBASEPTR:
- return 1; /* ??? */
-
- case HSA_OPCODE_ARG_BLOCK:
- return 0;
-
- case BRIG_KIND_DIRECTIVE_COMMENT:
- return 0;
- }
-}
-
-/* Return the number of source operands for this INSN. */
-
-unsigned
-hsa_insn_basic::num_used_ops ()
-{
- gcc_checking_assert (input_count () <= operand_count ());
-
- return operand_count () - input_count ();
-}
-
-/* Set alignment to VALUE. */
-
-void
-hsa_insn_mem::set_align (BrigAlignment8_t value)
-{
- /* TODO: Perhaps remove this dump later on: */
- if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align)
- {
- fprintf (dump_file, "Decreasing alignment to %u in instruction ", value);
- dump_hsa_insn (dump_file, this);
- }
- m_align = value;
-}
-
-/* Return size of HSA type T in bits. */
-
-unsigned
-hsa_type_bit_size (BrigType16_t t)
-{
- switch (t)
- {
- case BRIG_TYPE_B1:
- return 1;
-
- case BRIG_TYPE_U8:
- case BRIG_TYPE_S8:
- case BRIG_TYPE_B8:
- return 8;
-
- case BRIG_TYPE_U16:
- case BRIG_TYPE_S16:
- case BRIG_TYPE_B16:
- case BRIG_TYPE_F16:
- return 16;
-
- case BRIG_TYPE_U32:
- case BRIG_TYPE_S32:
- case BRIG_TYPE_B32:
- case BRIG_TYPE_F32:
- case BRIG_TYPE_U8X4:
- case BRIG_TYPE_U16X2:
- case BRIG_TYPE_S8X4:
- case BRIG_TYPE_S16X2:
- case BRIG_TYPE_F16X2:
- return 32;
-
- case BRIG_TYPE_U64:
- case BRIG_TYPE_S64:
- case BRIG_TYPE_F64:
- case BRIG_TYPE_B64:
- case BRIG_TYPE_U8X8:
- case BRIG_TYPE_U16X4:
- case BRIG_TYPE_U32X2:
- case BRIG_TYPE_S8X8:
- case BRIG_TYPE_S16X4:
- case BRIG_TYPE_S32X2:
- case BRIG_TYPE_F16X4:
- case BRIG_TYPE_F32X2:
-
- return 64;
-
- case BRIG_TYPE_B128:
- case BRIG_TYPE_U8X16:
- case BRIG_TYPE_U16X8:
- case BRIG_TYPE_U32X4:
- case BRIG_TYPE_U64X2:
- case BRIG_TYPE_S8X16:
- case BRIG_TYPE_S16X8:
- case BRIG_TYPE_S32X4:
- case BRIG_TYPE_S64X2:
- case BRIG_TYPE_F16X8:
- case BRIG_TYPE_F32X4:
- case BRIG_TYPE_F64X2:
- return 128;
-
- default:
- gcc_assert (hsa_seen_error ());
- return t;
- }
-}
-
-/* Return BRIG bit-type with BITSIZE length. */
-
-BrigType16_t
-hsa_bittype_for_bitsize (unsigned bitsize)
-{
- switch (bitsize)
- {
- case 1:
- return BRIG_TYPE_B1;
- case 8:
- return BRIG_TYPE_B8;
- case 16:
- return BRIG_TYPE_B16;
- case 32:
- return BRIG_TYPE_B32;
- case 64:
- return BRIG_TYPE_B64;
- case 128:
- return BRIG_TYPE_B128;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return BRIG unsigned int type with BITSIZE length. */
-
-BrigType16_t
-hsa_uint_for_bitsize (unsigned bitsize)
-{
- switch (bitsize)
- {
- case 8:
- return BRIG_TYPE_U8;
- case 16:
- return BRIG_TYPE_U16;
- case 32:
- return BRIG_TYPE_U32;
- case 64:
- return BRIG_TYPE_U64;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return BRIG float type with BITSIZE length. */
-
-BrigType16_t
-hsa_float_for_bitsize (unsigned bitsize)
-{
- switch (bitsize)
- {
- case 16:
- return BRIG_TYPE_F16;
- case 32:
- return BRIG_TYPE_F32;
- case 64:
- return BRIG_TYPE_F64;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return HSA bit-type with the same size as the type T. */
-
-BrigType16_t
-hsa_bittype_for_type (BrigType16_t t)
-{
- return hsa_bittype_for_bitsize (hsa_type_bit_size (t));
-}
-
-/* Return HSA unsigned integer type with the same size as the type T. */
-
-BrigType16_t
-hsa_unsigned_type_for_type (BrigType16_t t)
-{
- return hsa_uint_for_bitsize (hsa_type_bit_size (t));
-}
-
-/* Return true if TYPE is a packed HSA type. */
-
-bool
-hsa_type_packed_p (BrigType16_t type)
-{
- return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE;
-}
-
-/* Return true if and only if TYPE is a floating point number type. */
-
-bool
-hsa_type_float_p (BrigType16_t type)
-{
- switch (type & BRIG_TYPE_BASE_MASK)
- {
- case BRIG_TYPE_F16:
- case BRIG_TYPE_F32:
- case BRIG_TYPE_F64:
- return true;
- default:
- return false;
- }
-}
-
-/* Return true if and only if TYPE is an integer number type. */
-
-bool
-hsa_type_integer_p (BrigType16_t type)
-{
- switch (type & BRIG_TYPE_BASE_MASK)
- {
- case BRIG_TYPE_U8:
- case BRIG_TYPE_U16:
- case BRIG_TYPE_U32:
- case BRIG_TYPE_U64:
- case BRIG_TYPE_S8:
- case BRIG_TYPE_S16:
- case BRIG_TYPE_S32:
- case BRIG_TYPE_S64:
- return true;
- default:
- return false;
- }
-}
-
-/* Return true if and only if TYPE is an bit-type. */
-
-bool
-hsa_btype_p (BrigType16_t type)
-{
- switch (type & BRIG_TYPE_BASE_MASK)
- {
- case BRIG_TYPE_B8:
- case BRIG_TYPE_B16:
- case BRIG_TYPE_B32:
- case BRIG_TYPE_B64:
- case BRIG_TYPE_B128:
- return true;
- default:
- return false;
- }
-}
-
-
-/* Return HSA alignment encoding alignment to N bits. */
-
-BrigAlignment8_t
-hsa_alignment_encoding (unsigned n)
-{
- gcc_assert (n >= 8 && !(n & (n - 1)));
- if (n >= 256)
- return BRIG_ALIGNMENT_32;
-
- switch (n)
- {
- case 8:
- return BRIG_ALIGNMENT_1;
- case 16:
- return BRIG_ALIGNMENT_2;
- case 32:
- return BRIG_ALIGNMENT_4;
- case 64:
- return BRIG_ALIGNMENT_8;
- case 128:
- return BRIG_ALIGNMENT_16;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Return HSA alignment encoding alignment of T got
- by get_object_alignment. */
-
-BrigAlignment8_t
-hsa_object_alignment (tree t)
-{
- return hsa_alignment_encoding (get_object_alignment (t));
-}
-
-/* Return byte alignment for given BrigAlignment8_t value. */
-
-unsigned
-hsa_byte_alignment (BrigAlignment8_t alignment)
-{
- gcc_assert (alignment != BRIG_ALIGNMENT_NONE);
-
- return 1 << (alignment - 1);
-}
-
-/* Return natural alignment of HSA TYPE. */
-
-BrigAlignment8_t
-hsa_natural_alignment (BrigType16_t type)
-{
- return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY));
-}
-
-/* Call the correct destructor of a HSA instruction. */
-
-void
-hsa_destroy_insn (hsa_insn_basic *insn)
-{
- if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn))
- phi->~hsa_insn_phi ();
- else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn))
- br->~hsa_insn_cbr ();
- else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn))
- cmp->~hsa_insn_cmp ();
- else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn))
- mem->~hsa_insn_mem ();
- else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn))
- atomic->~hsa_insn_atomic ();
- else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn))
- seg->~hsa_insn_seg ();
- else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn))
- call->~hsa_insn_call ();
- else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn))
- block->~hsa_insn_arg_block ();
- else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn))
- sbr->~hsa_insn_sbr ();
- else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn))
- br->~hsa_insn_br ();
- else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn))
- comment->~hsa_insn_comment ();
- else
- insn->~hsa_insn_basic ();
-}
-
-/* Call the correct destructor of a HSA operand. */
-
-void
-hsa_destroy_operand (hsa_op_base *op)
-{
- if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op))
- list->~hsa_op_code_list ();
- else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op))
- list->~hsa_op_operand_list ();
- else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op))
- reg->~hsa_op_reg ();
- else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op))
- immed->~hsa_op_immed ();
- else
- op->~hsa_op_base ();
-}
-
-/* Create a mapping between the original function DECL and kernel name NAME. */
-
-void
-hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size,
- bool gridified_kernel_p)
-{
- hsa_decl_kernel_map_element dkm;
- dkm.decl = decl;
- dkm.name = name;
- dkm.omp_data_size = omp_data_size;
- dkm.gridified_kernel_p = gridified_kernel_p;
- vec_safe_push (hsa_decl_kernel_mapping, dkm);
-}
-
-/* Return the number of kernel decl name mappings. */
-
-unsigned
-hsa_get_number_decl_kernel_mappings (void)
-{
- return vec_safe_length (hsa_decl_kernel_mapping);
-}
-
-/* Return the decl in the Ith kernel decl name mapping. */
-
-tree
-hsa_get_decl_kernel_mapping_decl (unsigned i)
-{
- return (*hsa_decl_kernel_mapping)[i].decl;
-}
-
-/* Return the name in the Ith kernel decl name mapping. */
-
-char *
-hsa_get_decl_kernel_mapping_name (unsigned i)
-{
- return (*hsa_decl_kernel_mapping)[i].name;
-}
-
-/* Return maximum OMP size for kernel decl name mapping. */
-
-unsigned
-hsa_get_decl_kernel_mapping_omp_size (unsigned i)
-{
- return (*hsa_decl_kernel_mapping)[i].omp_data_size;
-}
-
-/* Return if the function is gridified kernel in decl name mapping. */
-
-bool
-hsa_get_decl_kernel_mapping_gridified (unsigned i)
-{
- return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p;
-}
-
-/* Free the mapping between original decls and kernel names. */
-
-void
-hsa_free_decl_kernel_mapping (void)
-{
- if (hsa_decl_kernel_mapping == NULL)
- return;
-
- for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i)
- free ((*hsa_decl_kernel_mapping)[i].name);
- ggc_free (hsa_decl_kernel_mapping);
-}
-
-/* Add new kernel dependency. */
-
-void
-hsa_add_kernel_dependency (tree caller, const char *called_function)
-{
- if (hsa_decl_kernel_dependencies == NULL)
- hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> ();
-
- vec <const char *> *s = NULL;
- vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller);
- if (slot == NULL)
- {
- s = new vec <const char *> ();
- hsa_decl_kernel_dependencies->put (caller, s);
- }
- else
- s = *slot;
-
- s->safe_push (called_function);
-}
-
-/* Expansion to HSA needs a few gc roots to hold types, constructors etc. In
- order to minimize the number of GTY roots, we'll root them all in the
- following array. The individual elements should only be accessed by the
- very simple getters (of a pointer-to-tree) below. */
-
-static GTY(()) tree hsa_tree_gt_roots[3];
-
-tree *
-hsa_get_ctor_statements (void)
-{
- return &hsa_tree_gt_roots[0];
-}
-
-tree *
-hsa_get_dtor_statements (void)
-{
- return &hsa_tree_gt_roots[1];
-}
-
-tree *
-hsa_get_kernel_dispatch_type (void)
-{
- return &hsa_tree_gt_roots[2];
-}
-
-/* Modify the name P in-place so that it is a valid HSA identifier. */
-
-void
-hsa_sanitize_name (char *p)
-{
- for (; *p; p++)
- if (*p == '.' || *p == '-')
- *p = '_';
-}
-
-/* Clone the name P, set trailing ampersand and sanitize the name. */
-
-char *
-hsa_brig_function_name (const char *p)
-{
- unsigned len = strlen (p);
- char *buf = XNEWVEC (char, len + 2);
-
- buf[0] = '&';
- buf[len + 1] = '\0';
- memcpy (buf + 1, p, len);
-
- hsa_sanitize_name (buf);
- return buf;
-}
-
-/* Add a flatten attribute and disable vectorization for gpu implementation
- function decl GDECL. */
-
-void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl)
-{
- DECL_ATTRIBUTES (gdecl)
- = tree_cons (get_identifier ("flatten"), NULL_TREE,
- DECL_ATTRIBUTES (gdecl));
-
- tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl);
- if (fn_opts == NULL_TREE)
- fn_opts = optimization_default_node;
- fn_opts = copy_node (fn_opts);
- TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false;
- TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false;
- DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts;
-}
-
-void
-hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host,
- hsa_function_kind kind, bool gridified_kernel_p)
-{
- hsa_function_summary *gpu_summary = get_create (gpu);
- hsa_function_summary *host_summary = get_create (host);
-
- gpu_summary->m_kind = kind;
- host_summary->m_kind = kind;
-
- gpu_summary->m_gpu_implementation_p = true;
- host_summary->m_gpu_implementation_p = false;
-
- gpu_summary->m_gridified_kernel_p = gridified_kernel_p;
- host_summary->m_gridified_kernel_p = gridified_kernel_p;
-
- gpu_summary->m_bound_function = host;
- host_summary->m_bound_function = gpu;
-
- process_gpu_implementation_attributes (gpu->decl);
-
- /* Create reference between a kernel and a corresponding host implementation
- to quarantee LTO streaming to a same LTRANS. */
- if (kind == HSA_KERNEL)
- gpu->create_reference (host, IPA_REF_ADDR);
-}
-
-/* Add a HOST function to HSA summaries. */
-
-void
-hsa_register_kernel (cgraph_node *host)
-{
- if (hsa_summaries == NULL)
- hsa_summaries = new hsa_summary_t (symtab);
- hsa_function_summary *s = hsa_summaries->get_create (host);
- s->m_kind = HSA_KERNEL;
-}
-
-/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of
- a HOST function. */
-
-void
-hsa_register_kernel (cgraph_node *gpu, cgraph_node *host)
-{
- if (hsa_summaries == NULL)
- hsa_summaries = new hsa_summary_t (symtab);
- hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true);
-}
-
-/* Return true if expansion of the current HSA function has already failed. */
-
-bool
-hsa_seen_error (void)
-{
- return hsa_cfun->m_seen_error;
-}
-
-/* Mark current HSA function as failed. */
-
-void
-hsa_fail_cfun (void)
-{
- hsa_failed_functions->add (hsa_cfun->m_decl);
- hsa_cfun->m_seen_error = true;
-}
-
-char *
-hsa_internal_fn::name ()
-{
- char *name = xstrdup (internal_fn_name (m_fn));
- for (char *ptr = name; *ptr; ptr++)
- *ptr = TOLOWER (*ptr);
-
- const char *suffix = NULL;
- if (m_type_bit_size == 32)
- suffix = "f";
-
- if (suffix)
- {
- char *name2 = concat (name, suffix, NULL);
- free (name);
- name = name2;
- }
-
- hsa_sanitize_name (name);
- return name;
-}
-
-unsigned
-hsa_internal_fn::get_arity ()
-{
- switch (m_fn)
- {
- case IFN_ACOS:
- case IFN_ASIN:
- case IFN_ATAN:
- case IFN_COS:
- case IFN_EXP:
- case IFN_EXP10:
- case IFN_EXP2:
- case IFN_EXPM1:
- case IFN_LOG:
- case IFN_LOG10:
- case IFN_LOG1P:
- case IFN_LOG2:
- case IFN_LOGB:
- case IFN_SIGNIFICAND:
- case IFN_SIN:
- case IFN_SQRT:
- case IFN_TAN:
- case IFN_CEIL:
- case IFN_FLOOR:
- case IFN_NEARBYINT:
- case IFN_RINT:
- case IFN_ROUND:
- case IFN_TRUNC:
- return 1;
- case IFN_ATAN2:
- case IFN_COPYSIGN:
- case IFN_FMOD:
- case IFN_POW:
- case IFN_REMAINDER:
- case IFN_SCALB:
- case IFN_LDEXP:
- return 2;
- case IFN_CLRSB:
- case IFN_CLZ:
- case IFN_CTZ:
- case IFN_FFS:
- case IFN_PARITY:
- case IFN_POPCOUNT:
- default:
- /* As we produce sorry message for unknown internal functions,
- reaching this label is definitely a bug. */
- gcc_unreachable ();
- }
-}
-
-BrigType16_t
-hsa_internal_fn::get_argument_type (int n)
-{
- switch (m_fn)
- {
- case IFN_ACOS:
- case IFN_ASIN:
- case IFN_ATAN:
- case IFN_COS:
- case IFN_EXP:
- case IFN_EXP10:
- case IFN_EXP2:
- case IFN_EXPM1:
- case IFN_LOG:
- case IFN_LOG10:
- case IFN_LOG1P:
- case IFN_LOG2:
- case IFN_LOGB:
- case IFN_SIGNIFICAND:
- case IFN_SIN:
- case IFN_SQRT:
- case IFN_TAN:
- case IFN_CEIL:
- case IFN_FLOOR:
- case IFN_NEARBYINT:
- case IFN_RINT:
- case IFN_ROUND:
- case IFN_TRUNC:
- case IFN_ATAN2:
- case IFN_COPYSIGN:
- case IFN_FMOD:
- case IFN_POW:
- case IFN_REMAINDER:
- case IFN_SCALB:
- return hsa_float_for_bitsize (m_type_bit_size);
- case IFN_LDEXP:
- {
- if (n == -1 || n == 0)
- return hsa_float_for_bitsize (m_type_bit_size);
- else
- return BRIG_TYPE_S32;
- }
- default:
- /* As we produce sorry message for unknown internal functions,
- reaching this label is definitely a bug. */
- gcc_unreachable ();
- }
-}
-
-#include "gt-hsa-common.h"
+++ /dev/null
-/* HSAIL and BRIG related macros and definitions.
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#ifndef HSA_H
-#define HSA_H
-
-#include "hsa-brig-format.h"
-#include "is-a.h"
-#include "predict.h"
-#include "tree.h"
-#include "vec.h"
-#include "hash-table.h"
-#include "basic-block.h"
-#include "bitmap.h"
-
-
-/* Return true if the compiler should produce HSAIL. */
-
-static inline bool
-hsa_gen_requested_p (void)
-{
-#ifndef ENABLE_HSA
- return false;
-#endif
- return !flag_disable_hsa;
-}
-
-/* Standard warning message if we failed to generate HSAIL for a function. */
-
-#define HSA_SORRY_MSG "could not emit HSAIL for the function"
-
-class hsa_op_immed;
-class hsa_op_cst_list;
-class hsa_insn_basic;
-class hsa_op_address;
-class hsa_op_reg;
-class hsa_bb;
-
-/* Class representing an input argument, output argument (result) or a
- variable, that will eventually end up being a symbol directive. */
-
-class hsa_symbol
-{
-public:
- /* Constructor. */
- hsa_symbol (BrigType16_t type, BrigSegment8_t segment,
- BrigLinkage8_t linkage, bool global_scope_p = false,
- BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC,
- BrigAlignment8_t align = BRIG_ALIGNMENT_8);
-
- /* Return total size of the symbol. */
- unsigned HOST_WIDE_INT total_byte_size ();
-
- /* Fill in those values into the symbol according to DECL, which are
- determined independently from whether it is parameter, result,
- or a variable, local or global. */
- void fillup_for_decl (tree decl);
-
- /* Pointer to the original tree, which is PARM_DECL for input parameters and
- RESULT_DECL for the output parameters. Also can be CONST_DECL for Fortran
- constants which need to be put into readonly segment. */
- tree m_decl;
-
- /* Name of the symbol, that will be written into output and dumps. Can be
- NULL, see name_number below. */
- const char *m_name;
-
- /* If name is NULL, artificial name will be formed from the segment name and
- this number. */
- int m_name_number;
-
- /* Once written, this is the offset of the associated symbol directive. Zero
- means the symbol has not been written yet. */
- unsigned m_directive_offset;
-
- /* HSA type of the parameter. */
- BrigType16_t m_type;
-
- /* The HSA segment this will eventually end up in. */
- BrigSegment8_t m_segment;
-
- /* The HSA kind of linkage. */
- BrigLinkage8_t m_linkage;
-
- /* Array dimension, if non-zero. */
- unsigned HOST_WIDE_INT m_dim;
-
- /* Constant value, used for string constants. */
- hsa_op_immed *m_cst_value;
-
- /* Is in global scope. */
- bool m_global_scope_p;
-
- /* True if an error has been seen for the symbol. */
- bool m_seen_error;
-
- /* Symbol allocation. */
- BrigAllocation m_allocation;
-
- /* Flag used for global variables if a variable is already emitted or not. */
- bool m_emitted_to_brig;
-
- /* Alignment of the symbol. */
- BrigAlignment8_t m_align;
-
-private:
- /* Default constructor. */
- hsa_symbol ();
-};
-
-/* Abstract class for HSA instruction operands. */
-
-class hsa_op_base
-{
-public:
- /* Next operand scheduled to be written when writing BRIG operand
- section. */
- hsa_op_base *m_next;
-
- /* Offset to which the associated operand structure will be written. Zero if
- yet not scheduled for writing. */
- unsigned m_brig_op_offset;
-
- /* The type of a particular operand. */
- BrigKind16_t m_kind;
-
-protected:
- hsa_op_base (BrigKind16_t k);
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_base () {}
-};
-
-/* Common abstract ancestor for operands which have a type. */
-
-class hsa_op_with_type : public hsa_op_base
-{
-public:
- /* The type. */
- BrigType16_t m_type;
-
- /* Convert an operand to a destination type DTYPE and attach insns
- to HBB if needed. */
- hsa_op_with_type *get_in_type (BrigType16_t dtype, hsa_bb *hbb);
- /* If this operand has integer type smaller than 32 bits, extend it to 32
- bits, adding instructions to HBB if needed. */
- hsa_op_with_type *extend_int_to_32bit (hsa_bb *hbb);
-
-protected:
- hsa_op_with_type (BrigKind16_t k, BrigType16_t t);
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_with_type () : hsa_op_base (BRIG_KIND_NONE) {}
-};
-
-/* An immediate HSA operand. */
-
-class hsa_op_immed : public hsa_op_with_type
-{
-public:
- hsa_op_immed (tree tree_val, bool min32int = true);
- hsa_op_immed (HOST_WIDE_INT int_value, BrigType16_t type);
- void *operator new (size_t);
- ~hsa_op_immed ();
- void set_type (BrigKind16_t t);
-
- /* Function returns pointer to a buffer that contains binary representation
- of the immeadiate value. The buffer has length of BRIG_SIZE and
- a caller is responsible for deallocation of the buffer. */
- char *emit_to_buffer (unsigned *brig_size);
-
- /* Value as represented by middle end. */
- tree m_tree_value;
-
- /* Integer value representation. */
- HOST_WIDE_INT m_int_value;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_immed ();
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
-};
-
-/* Report whether or not P is an immediate operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_immed *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_CONSTANT_BYTES;
-}
-
-/* Likewise, but for a more specified base. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_immed *>::test (hsa_op_with_type *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_CONSTANT_BYTES;
-}
-
-
-/* HSA register operand. */
-
-class hsa_op_reg : public hsa_op_with_type
-{
- friend class hsa_insn_basic;
- friend class hsa_insn_phi;
-public:
- hsa_op_reg (BrigType16_t t);
- void *operator new (size_t);
-
- /* Verify register operand. */
- void verify_ssa ();
-
- /* If NON-NULL, gimple SSA that we come from. NULL if none. */
- tree m_gimple_ssa;
-
- /* Defining instruction while still in the SSA. */
- hsa_insn_basic *m_def_insn;
-
- /* If the register allocator decides to spill the register, this is the
- appropriate spill symbol. */
- hsa_symbol *m_spill_sym;
-
- /* Number of this register structure in the order in which they were
- allocated. */
- int m_order;
- int m_lr_begin, m_lr_end;
-
- /* Zero if the register is not yet allocated. After, allocation, this must
- be 'c', 's', 'd' or 'q'. */
- char m_reg_class;
- /* If allocated, the number of the HW register (within its HSA register
- class). */
- char m_hard_num;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_reg () : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE) {}
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
- /* Set definition where the register is defined. */
- void set_definition (hsa_insn_basic *insn);
- /* Uses of the value while still in SSA. */
- auto_vec <hsa_insn_basic *> m_uses;
-};
-
-/* Report whether or not P is a register operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_reg *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_REGISTER;
-}
-
-/* Report whether or not P is a register operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_reg *>::test (hsa_op_with_type *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_REGISTER;
-}
-
-/* An address HSA operand. */
-
-class hsa_op_address : public hsa_op_base
-{
-public:
- /* set up a new address operand consisting of base symbol SYM, register R and
- immediate OFFSET. If the machine model is not large and offset is 64 bit,
- the upper, 32 bits have to be zero. */
- hsa_op_address (hsa_symbol *sym, hsa_op_reg *reg,
- HOST_WIDE_INT offset = 0);
-
- void *operator new (size_t);
-
- /* Set up a new address operand consisting of base symbol SYM and
- immediate OFFSET. If the machine model is not large and offset is 64 bit,
- the upper, 32 bits have to be zero. */
- hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset = 0);
-
- /* Set up a new address operand consisting of register R and
- immediate OFFSET. If the machine model is not large and offset is 64 bit,
- the upper, 32 bits have to be zero. */
- hsa_op_address (hsa_op_reg *reg, HOST_WIDE_INT offset = 0);
-
- /* Symbol base of the address. Can be NULL if there is none. */
- hsa_symbol *m_symbol;
-
- /* Register offset. Can be NULL if there is none. */
- hsa_op_reg *m_reg;
-
- /* Immediate byte offset. */
- HOST_WIDE_INT m_imm_offset;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_address () : hsa_op_base (BRIG_KIND_NONE) {}
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
-};
-
-/* Report whether or not P is an address operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_address *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_ADDRESS;
-}
-
-/* A reference to code HSA operand. It can be either reference
- to a start of a BB or a start of a function. */
-
-class hsa_op_code_ref : public hsa_op_base
-{
-public:
- hsa_op_code_ref ();
-
- /* Offset in the code section that this refers to. */
- unsigned m_directive_offset;
-};
-
-/* Report whether or not P is a code reference operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_code_ref *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_CODE_REF;
-}
-
-/* Code list HSA operand. */
-
-class hsa_op_code_list: public hsa_op_base
-{
-public:
- hsa_op_code_list (unsigned elements);
- void *operator new (size_t);
-
- /* Offset to variable-sized array in hsa_data section, where
- are offsets to entries in the hsa_code section. */
- auto_vec<unsigned> m_offsets;
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_code_list () : hsa_op_base (BRIG_KIND_NONE) {}
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
-};
-
-/* Report whether or not P is a code list operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_code_list *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_CODE_LIST;
-}
-
-/* Operand list HSA operand. */
-
-class hsa_op_operand_list: public hsa_op_base
-{
-public:
- hsa_op_operand_list (unsigned elements);
- ~hsa_op_operand_list ();
- void *operator new (size_t);
-
- /* Offset to variable-sized array in hsa_data section, where
- are offsets to entries in the hsa_code section. */
- auto_vec<unsigned> m_offsets;
-private:
- /* Make the default constructor inaccessible. */
- hsa_op_operand_list () : hsa_op_base (BRIG_KIND_NONE) {}
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
-};
-
-/* Report whether or not P is a code list operand. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_op_operand_list *>::test (hsa_op_base *p)
-{
- return p->m_kind == BRIG_KIND_OPERAND_OPERAND_LIST;
-}
-
-/* Opcodes of instructions that are not part of HSA but that we use to
- represent it nevertheless. */
-
-#define HSA_OPCODE_PHI (-1)
-#define HSA_OPCODE_ARG_BLOCK (-2)
-
-/* The number of operand pointers we can directly in an instruction. */
-#define HSA_BRIG_INT_STORAGE_OPERANDS 5
-
-/* Class representing an HSA instruction. Unlike typical ancestors for
- specialized classes, this one is also directly used for all instructions
- that are then represented as BrigInstBasic. */
-
-class hsa_insn_basic
-{
-public:
- hsa_insn_basic (unsigned nops, int opc);
- hsa_insn_basic (unsigned nops, int opc, BrigType16_t t,
- hsa_op_base *arg0 = NULL,
- hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL,
- hsa_op_base *arg3 = NULL);
-
- void *operator new (size_t);
- void set_op (int index, hsa_op_base *op);
- hsa_op_base *get_op (int index);
- hsa_op_base **get_op_addr (int index);
- unsigned int operand_count ();
- void verify ();
- unsigned input_count ();
- unsigned num_used_ops ();
- void set_output_in_type (hsa_op_reg *dest, unsigned op_index, hsa_bb *hbb);
- bool op_output_p (unsigned opnum);
-
- /* The previous and next instruction in the basic block. */
- hsa_insn_basic *m_prev, *m_next;
-
- /* Basic block this instruction belongs to. */
- basic_block m_bb;
-
- /* Operand code distinguishing different types of instructions. Eventually
- these should only be BRIG_INST_* values from the BrigOpcode16_t range but
- initially we use negative values for PHI nodes and such. */
- int m_opcode;
-
- /* Linearized number assigned to the instruction by HSA RA. */
- int m_number;
-
- /* Type of the destination of the operations. */
- BrigType16_t m_type;
-
- /* BRIG offset of the instruction in code section. */
- unsigned int m_brig_offset;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_basic () {}
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
- /* The individual operands. All instructions but PHI nodes have five or
- fewer instructions and so will fit the internal storage. */
- /* TODO: Vast majority of instructions have three or fewer operands, so we
- may actually try reducing it. */
- auto_vec<hsa_op_base *, HSA_BRIG_INT_STORAGE_OPERANDS> m_operands;
-};
-
-/* Class representing a PHI node of the SSA form of HSA virtual
- registers. */
-
-class hsa_insn_phi : public hsa_insn_basic
-{
-public:
- hsa_insn_phi (unsigned nops, hsa_op_reg *dst);
-
- /* Destination. */
- hsa_op_reg *m_dest;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_phi () : hsa_insn_basic (1, HSA_OPCODE_PHI) {}
-};
-
-/* Report whether or not P is a PHI node. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_phi *>::test (hsa_insn_basic *p)
-{
- return p->m_opcode == HSA_OPCODE_PHI;
-}
-
-/* HSA instruction for */
-class hsa_insn_br : public hsa_insn_basic
-{
-public:
- hsa_insn_br (unsigned nops, int opc, BrigType16_t t, BrigWidth8_t width,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL);
-
- /* Number of work-items affected in the same way by the instruction. */
- BrigWidth8_t m_width;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_br () : hsa_insn_basic (0, BRIG_OPCODE_BR) {}
-};
-
-/* Return true if P is a branching/synchronization instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_br *>::test (hsa_insn_basic *p)
-{
- return p->m_opcode == BRIG_OPCODE_BARRIER
- || p->m_opcode == BRIG_OPCODE_BR;
-}
-
-/* HSA instruction for conditional branches. Structurally the same as
- hsa_insn_br but we represent it specially because of inherent control
- flow it represents. */
-
-class hsa_insn_cbr : public hsa_insn_br
-{
-public:
- hsa_insn_cbr (hsa_op_reg *ctrl);
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_cbr () : hsa_insn_br (0, BRIG_OPCODE_CBR, BRIG_TYPE_B1,
- BRIG_WIDTH_1) {}
-};
-
-/* Report whether P is a contitional branching instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_cbr *>::test (hsa_insn_basic *p)
-{
- return p->m_opcode == BRIG_OPCODE_CBR;
-}
-
-/* HSA instruction for switch branches. */
-
-class hsa_insn_sbr : public hsa_insn_basic
-{
-public:
- hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count);
-
- /* Default destructor. */
- ~hsa_insn_sbr ();
-
- void replace_all_labels (basic_block old_bb, basic_block new_bb);
-
- /* Width as described in HSA documentation. */
- BrigWidth8_t m_width;
-
- /* Jump table. */
- vec <basic_block> m_jump_table;
-
- /* Code list for label references. */
- hsa_op_code_list *m_label_code_list;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_sbr () : hsa_insn_basic (1, BRIG_OPCODE_SBR) {}
-};
-
-/* Report whether P is a switch branching instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_sbr *>::test (hsa_insn_basic *p)
-{
- return p->m_opcode == BRIG_OPCODE_SBR;
-}
-
-/* HSA instruction for comparisons. */
-
-class hsa_insn_cmp : public hsa_insn_basic
-{
-public:
- hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL);
-
- /* Source type should be derived from operand types. */
-
- /* The comparison operation. */
- BrigCompareOperation8_t m_compare;
-
- /* TODO: Modifiers and packing control are missing but so are everywhere
- else. */
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_cmp () : hsa_insn_basic (1, BRIG_OPCODE_CMP) {}
-};
-
-/* Report whether or not P is a comparison instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_cmp *>::test (hsa_insn_basic *p)
-{
- return p->m_opcode == BRIG_OPCODE_CMP;
-}
-
-/* HSA instruction for memory operations. */
-
-class hsa_insn_mem : public hsa_insn_basic
-{
-public:
- hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0, hsa_op_base *arg1);
-
- /* Set alignment to VALUE. */
-
- void set_align (BrigAlignment8_t value);
-
- /* The segment is of the memory access is either the segment of the symbol in
- the address operand or flat address is there is no symbol there. */
-
- /* Required alignment of the memory operation. */
- BrigAlignment8_t m_align;
-
- /* HSA equiv class, basically an alias set number. */
- uint8_t m_equiv_class;
-
- /* TODO: Add width modifier, perhaps also other things. */
-protected:
- hsa_insn_mem (unsigned nops, int opc, BrigType16_t t,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL);
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_mem () : hsa_insn_basic (1, BRIG_OPCODE_LD) {}
-};
-
-/* Report whether or not P is a memory instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_mem *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_LD
- || p->m_opcode == BRIG_OPCODE_ST);
-}
-
-/* HSA instruction for atomic operations. */
-
-class hsa_insn_atomic : public hsa_insn_mem
-{
-public:
- hsa_insn_atomic (int nops, int opc, enum BrigAtomicOperation aop,
- BrigType16_t t, BrigMemoryOrder memorder,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL);
-
- /* The operation itself. */
- enum BrigAtomicOperation m_atomicop;
-
- /* Things like acquire/release/aligned. */
- enum BrigMemoryOrder m_memoryorder;
-
- /* Scope of the atomic operation. */
- enum BrigMemoryScope m_memoryscope;
-
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_atomic () : hsa_insn_mem (1, BRIG_KIND_NONE, BRIG_TYPE_NONE) {}
-};
-
-/* Report whether or not P is an atomic instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_atomic *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_ATOMIC
- || p->m_opcode == BRIG_OPCODE_ATOMICNORET);
-}
-
-/* HSA instruction for signal operations. */
-
-class hsa_insn_signal : public hsa_insn_basic
-{
-public:
- hsa_insn_signal (int nops, int opc, enum BrigAtomicOperation sop,
- BrigType16_t t, BrigMemoryOrder memorder,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL);
-
- /* Things like acquire/release/aligned. */
- enum BrigMemoryOrder m_memory_order;
-
- /* The operation itself. */
- enum BrigAtomicOperation m_signalop;
-};
-
-/* Report whether or not P is a signal instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_signal *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_SIGNAL
- || p->m_opcode == BRIG_OPCODE_SIGNALNORET);
-}
-
-/* HSA instruction to convert between flat addressing and segments. */
-
-class hsa_insn_seg : public hsa_insn_basic
-{
-public:
- hsa_insn_seg (int opc, BrigType16_t destt, BrigType16_t srct,
- BrigSegment8_t seg, hsa_op_base *arg0, hsa_op_base *arg1);
-
- /* Source type. Depends on the source addressing/segment. */
- BrigType16_t m_src_type;
- /* The segment we are converting from or to. */
- BrigSegment8_t m_segment;
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_seg () : hsa_insn_basic (1, BRIG_OPCODE_STOF) {}
-};
-
-/* Report whether or not P is a segment conversion instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_seg *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_STOF
- || p->m_opcode == BRIG_OPCODE_FTOS);
-}
-
-/* Class for internal functions for purpose of HSA emission. */
-
-class hsa_internal_fn
-{
-public:
- hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size):
- m_fn (fn), m_type_bit_size (type_bit_size), m_offset (0) {}
-
- hsa_internal_fn (const hsa_internal_fn *f):
- m_fn (f->m_fn), m_type_bit_size (f->m_type_bit_size),
- m_offset (f->m_offset) {}
-
- /* Return arity of the internal function. */
- unsigned get_arity ();
-
- /* Return BRIG type of N-th argument, if -1 is passed, return value type
- is received. */
- BrigType16_t get_argument_type (int n);
-
- /* Return function name. The memory must be released by a caller. */
- char *name ();
-
- /* Internal function. */
- enum internal_fn m_fn;
-
- /* Bit width of return type. */
- unsigned m_type_bit_size;
-
- /* BRIG offset of declaration of the function. */
- BrigCodeOffset32_t m_offset;
-};
-
-/* HSA instruction for function call. */
-
-class hsa_insn_call : public hsa_insn_basic
-{
-public:
- hsa_insn_call (tree callee);
- hsa_insn_call (hsa_internal_fn *fn);
-
- /* Default destructor. */
- ~hsa_insn_call ();
-
- /* Called function. */
- tree m_called_function;
-
- /* Called internal function. */
- hsa_internal_fn *m_called_internal_fn;
-
- /* Input formal arguments. */
- auto_vec <hsa_symbol *> m_input_args;
-
- /* Input arguments store instructions. */
- auto_vec <hsa_insn_mem *> m_input_arg_insns;
-
- /* Output argument, can be NULL for void functions. */
- hsa_symbol *m_output_arg;
-
- /* Called function code reference. */
- hsa_op_code_ref m_func;
-
- /* Code list for arguments of the function. */
- hsa_op_code_list *m_args_code_list;
-
- /* Code list for result of the function. */
- hsa_op_code_list *m_result_code_list;
-private:
- /* Make the default constructor inaccessible. */
- hsa_insn_call () : hsa_insn_basic (0, BRIG_OPCODE_CALL) {}
-};
-
-/* Report whether or not P is a call instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_call *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_CALL);
-}
-
-/* HSA call instruction block encapsulates definition of arguments,
- result type, corresponding loads and a possible store.
- Moreover, it contains a single call instruction.
- Emission of the instruction will produce multiple
- HSAIL instructions. */
-
-class hsa_insn_arg_block : public hsa_insn_basic
-{
-public:
- hsa_insn_arg_block (BrigKind brig_kind, hsa_insn_call * call);
-
- /* Kind of argument block. */
- BrigKind m_kind;
-
- /* Call instruction. */
- hsa_insn_call *m_call_insn;
-};
-
-/* Report whether or not P is a call block instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_arg_block *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == HSA_OPCODE_ARG_BLOCK);
-}
-
-/* HSA comment instruction. */
-
-class hsa_insn_comment: public hsa_insn_basic
-{
-public:
- /* Constructor of class representing the comment in HSAIL. */
- hsa_insn_comment (const char *s);
-
- /* Default destructor. */
- ~hsa_insn_comment ();
-
- char *m_comment;
-};
-
-/* Report whether or not P is a call block instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_comment *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_KIND_DIRECTIVE_COMMENT);
-}
-
-/* HSA queue instruction. */
-
-class hsa_insn_queue: public hsa_insn_basic
-{
-public:
- hsa_insn_queue (int nops, int opcode, BrigSegment segment,
- BrigMemoryOrder memory_order,
- hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL,
- hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL);
-
- /* Destructor. */
- ~hsa_insn_queue ();
-
- /* Segment used to refer to the queue. Must be global or flat. */
- BrigSegment m_segment;
- /* Memory order used to specify synchronization. */
- BrigMemoryOrder m_memory_order;
-};
-
-/* Report whether or not P is a queue instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_queue *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_ADDQUEUEWRITEINDEX
- || p->m_opcode == BRIG_OPCODE_CASQUEUEWRITEINDEX
- || p->m_opcode == BRIG_OPCODE_LDQUEUEREADINDEX
- || p->m_opcode == BRIG_OPCODE_LDQUEUEWRITEINDEX
- || p->m_opcode == BRIG_OPCODE_STQUEUEREADINDEX
- || p->m_opcode == BRIG_OPCODE_STQUEUEWRITEINDEX);
-}
-
-/* HSA source type instruction. */
-
-class hsa_insn_srctype: public hsa_insn_basic
-{
-public:
- hsa_insn_srctype (int nops, BrigOpcode opcode, BrigType16_t destt,
- BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2);
-
- /* Source type. */
- BrigType16_t m_source_type;
-
- /* Destructor. */
- ~hsa_insn_srctype ();
-};
-
-/* Report whether or not P is a source type instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_srctype *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_POPCOUNT
- || p->m_opcode == BRIG_OPCODE_FIRSTBIT
- || p->m_opcode == BRIG_OPCODE_LASTBIT);
-}
-
-/* HSA packed instruction. */
-
-class hsa_insn_packed : public hsa_insn_srctype
-{
-public:
- hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt,
- BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2);
-
- /* Operand list for an operand of the instruction. */
- hsa_op_operand_list *m_operand_list;
-
- /* Destructor. */
- ~hsa_insn_packed ();
-};
-
-/* Report whether or not P is a combine instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_packed *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_COMBINE
- || p->m_opcode == BRIG_OPCODE_EXPAND);
-}
-
-/* HSA convert instruction. */
-
-class hsa_insn_cvt: public hsa_insn_basic
-{
-public:
- hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src);
-};
-
-/* Report whether or not P is a convert instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_cvt *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_CVT);
-}
-
-/* HSA alloca instruction. */
-
-class hsa_insn_alloca: public hsa_insn_basic
-{
-public:
- hsa_insn_alloca (hsa_op_with_type *dest, hsa_op_with_type *size,
- unsigned alignment = 0);
-
- /* Required alignment of the allocation. */
- BrigAlignment8_t m_align;
-};
-
-/* Report whether or not P is an alloca instruction. */
-
-template <>
-template <>
-inline bool
-is_a_helper <hsa_insn_alloca *>::test (hsa_insn_basic *p)
-{
- return (p->m_opcode == BRIG_OPCODE_ALLOCA);
-}
-
-/* Basic block of HSA instructions. */
-
-class hsa_bb
-{
-public:
- hsa_bb (basic_block cfg_bb);
- hsa_bb (basic_block cfg_bb, int idx);
-
- /* Append an instruction INSN into the basic block. */
- void append_insn (hsa_insn_basic *insn);
-
- /* Add a PHI instruction. */
- void append_phi (hsa_insn_phi *phi);
-
- /* The real CFG BB that this HBB belongs to. */
- basic_block m_bb;
-
- /* The operand that refers to the label to this BB. */
- hsa_op_code_ref m_label_ref;
-
- /* The first and last instruction. */
- hsa_insn_basic *m_first_insn, *m_last_insn;
- /* The first and last phi node. */
- hsa_insn_phi *m_first_phi, *m_last_phi;
-
- /* Just a number to construct names from. */
- int m_index;
-
- auto_bitmap m_liveout, m_livein;
-private:
- /* Make the default constructor inaccessible. */
- hsa_bb ();
- /* All objects are deallocated by destroying their pool, so make delete
- inaccessible too. */
- void operator delete (void *) {}
-};
-
-/* Return the corresponding HSA basic block structure for the given control
- flow basic_block BB. */
-
-static inline hsa_bb *
-hsa_bb_for_bb (basic_block bb)
-{
- return (class hsa_bb *) bb->aux;
-}
-
-/* Class for hashing local hsa_symbols. */
-
-struct hsa_noop_symbol_hasher : nofree_ptr_hash <hsa_symbol>
-{
- static inline hashval_t hash (const value_type);
- static inline bool equal (const value_type, const compare_type);
-};
-
-/* Hash hsa_symbol. */
-
-inline hashval_t
-hsa_noop_symbol_hasher::hash (const value_type item)
-{
- return DECL_UID (item->m_decl);
-}
-
-/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */
-
-inline bool
-hsa_noop_symbol_hasher::equal (const value_type a, const compare_type b)
-{
- return (DECL_UID (a->m_decl) == DECL_UID (b->m_decl));
-}
-
-/* Structure that encapsulates intermediate representation of a HSA
- function. */
-
-class hsa_function_representation
-{
-public:
- hsa_function_representation (tree fdecl, bool kernel_p,
- unsigned ssa_names_count,
- bool modified_cfg = false);
- hsa_function_representation (hsa_internal_fn *fn);
- ~hsa_function_representation ();
-
- /* Builds a shadow register that is utilized to a kernel dispatch. */
- hsa_op_reg *get_shadow_reg ();
-
- /* Return true if we are in a function that has kernel dispatch
- shadow register. */
- bool has_shadow_reg_p ();
-
- /* The entry/exit blocks don't contain incoming code,
- but the HSA generator might use them to put code into,
- so we need hsa_bb instances of them. */
- void init_extra_bbs ();
-
- /* Update CFG dominators if m_modified_cfg flag is set. */
- void update_dominance ();
-
- /* Return linkage of the representation. */
- BrigLinkage8_t get_linkage ();
-
- /* Create a private symbol of requested TYPE. */
- hsa_symbol *create_hsa_temporary (BrigType16_t type);
-
- /* Lookup or create a HSA pseudo register for a given gimple SSA name. */
- hsa_op_reg *reg_for_gimple_ssa (tree ssa);
-
- /* Name of the function. */
- char *m_name;
-
- /* Number of allocated register structures. */
- int m_reg_count;
-
- /* Input arguments. */
- vec <hsa_symbol *> m_input_args;
-
- /* Output argument or NULL if there is none. */
- hsa_symbol *m_output_arg;
-
- /* Hash table of local variable symbols. */
- hash_table <hsa_noop_symbol_hasher> *m_local_symbols;
-
- /* Hash map for string constants. */
- hash_map <tree, hsa_symbol *> m_string_constants_map;
-
- /* Vector of pointers to spill symbols. */
- vec <class hsa_symbol *> m_spill_symbols;
-
- /* Vector of pointers to global variables and transformed string constants
- that are used by the function. */
- vec <class hsa_symbol *> m_global_symbols;
-
- /* Private function artificial variables. */
- vec <class hsa_symbol *> m_private_variables;
-
- /* Vector of called function declarations. */
- vec <tree> m_called_functions;
-
- /* Vector of used internal functions. */
- vec <hsa_internal_fn *> m_called_internal_fns;
-
- /* Number of HBB BBs. */
- int m_hbb_count;
-
- /* Whether or not we could check and enforce SSA properties. */
- bool m_in_ssa;
-
- /* True if the function is kernel function. */
- bool m_kern_p;
-
- /* True if the function representation is a declaration. */
- bool m_declaration_p;
-
- /* Function declaration tree. */
- tree m_decl;
-
- /* Internal function info is used for declarations of internal functions. */
- hsa_internal_fn *m_internal_fn;
-
- /* Runtime shadow register. */
- hsa_op_reg *m_shadow_reg;
-
- /* Number of kernel dispatched which take place in the function. */
- unsigned m_kernel_dispatch_count;
-
- /* If the function representation contains a kernel dispatch,
- OMP data size is necessary memory that is used for copying before
- a kernel dispatch. */
- unsigned m_maximum_omp_data_size;
-
- /* Return true if there's an HSA-specific warning already seen. */
- bool m_seen_error;
-
- /* Counter for temporary symbols created in the function representation. */
- unsigned m_temp_symbol_count;
-
- /* SSA names mapping. */
- vec <hsa_op_reg *> m_ssa_map;
-
- /* Flag whether a function needs update of dominators before RA. */
- bool m_modified_cfg;
-};
-
-enum hsa_function_kind
-{
- HSA_INVALID,
- HSA_KERNEL,
- HSA_FUNCTION
-};
-
-class hsa_function_summary
-{
-public:
- /* Default constructor. */
- hsa_function_summary ();
-
- /* Kind of GPU/host function. */
- hsa_function_kind m_kind;
-
- /* Pointer to a cgraph node which is a HSA implementation of the function.
- In case of the function is a HSA function, the bound function points
- to the host function. */
- cgraph_node *m_bound_function;
-
- /* Identifies if the function is an HSA function or a host function. */
- bool m_gpu_implementation_p;
-
- /* True if the function is a gridified kernel. */
- bool m_gridified_kernel_p;
-};
-
-inline
-hsa_function_summary::hsa_function_summary (): m_kind (HSA_INVALID),
- m_bound_function (NULL), m_gpu_implementation_p (false)
-{
-}
-
-/* Function summary for HSA functions. */
-class hsa_summary_t: public function_summary <hsa_function_summary *>
-{
-public:
- hsa_summary_t (symbol_table *table):
- function_summary<hsa_function_summary *> (table)
- {
- disable_insertion_hook ();
- }
-
- /* Couple GPU and HOST as gpu-specific and host-specific implementation of
- the same function. KIND determines whether GPU is a host-invokable kernel
- or gpu-callable function and GRIDIFIED_KERNEL_P is set if the function was
- gridified in OMP. */
-
- void link_functions (cgraph_node *gpu, cgraph_node *host,
- hsa_function_kind kind, bool gridified_kernel_p);
-
-private:
- void process_gpu_implementation_attributes (tree gdecl);
-};
-
-/* OMP simple builtin describes behavior that should be done for
- the routine. */
-class omp_simple_builtin
-{
-public:
- omp_simple_builtin (const char *name, const char *warning_message,
- bool sorry, hsa_op_immed *return_value = NULL):
- m_name (name), m_warning_message (warning_message), m_sorry (sorry),
- m_return_value (return_value)
- {}
-
- /* Generate HSAIL instructions for the builtin or produce warning message. */
- void generate (gimple *stmt, hsa_bb *hbb);
-
- /* Name of function. */
- const char *m_name;
-
- /* Warning message. */
- const char *m_warning_message;
-
- /* Flag if we should sorry after the warning message is printed. */
- bool m_sorry;
-
- /* Return value of the function. */
- hsa_op_immed *m_return_value;
-
- /* Emission function. */
- void (*m_emit_func) (gimple *stmt, hsa_bb *);
-};
-
-/* Class for hashing hsa_internal_fn. */
-
-struct hsa_internal_fn_hasher: free_ptr_hash <hsa_internal_fn>
-{
- static inline hashval_t hash (const value_type);
- static inline bool equal (const value_type, const compare_type);
-};
-
-/* Hash hsa_symbol. */
-
-inline hashval_t
-hsa_internal_fn_hasher::hash (const value_type item)
-{
- return item->m_fn;
-}
-
-/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */
-
-inline bool
-hsa_internal_fn_hasher::equal (const value_type a, const compare_type b)
-{
- return a->m_fn == b->m_fn && a->m_type_bit_size == b->m_type_bit_size;
-}
-
-/* in hsa-common.c */
-extern class hsa_function_representation *hsa_cfun;
-extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies;
-extern hsa_summary_t *hsa_summaries;
-extern hsa_symbol *hsa_num_threads;
-extern unsigned hsa_kernel_calls_counter;
-extern hash_set <tree> *hsa_failed_functions;
-extern hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols;
-
-bool hsa_callable_function_p (tree fndecl);
-void hsa_init_compilation_unit_data (void);
-void hsa_deinit_compilation_unit_data (void);
-bool hsa_machine_large_p (void);
-bool hsa_full_profile_p (void);
-bool hsa_opcode_floating_bit_insn_p (BrigOpcode16_t);
-unsigned hsa_type_bit_size (BrigType16_t t);
-BrigType16_t hsa_bittype_for_bitsize (unsigned bitsize);
-BrigType16_t hsa_uint_for_bitsize (unsigned bitsize);
-BrigType16_t hsa_float_for_bitsize (unsigned bitsize);
-BrigType16_t hsa_bittype_for_type (BrigType16_t t);
-BrigType16_t hsa_unsigned_type_for_type (BrigType16_t t);
-bool hsa_type_packed_p (BrigType16_t type);
-bool hsa_type_float_p (BrigType16_t type);
-bool hsa_type_integer_p (BrigType16_t type);
-bool hsa_btype_p (BrigType16_t type);
-BrigAlignment8_t hsa_alignment_encoding (unsigned n);
-BrigAlignment8_t hsa_natural_alignment (BrigType16_t type);
-BrigAlignment8_t hsa_object_alignment (tree t);
-unsigned hsa_byte_alignment (BrigAlignment8_t alignment);
-void hsa_destroy_operand (hsa_op_base *op);
-void hsa_destroy_insn (hsa_insn_basic *insn);
-void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool);
-unsigned hsa_get_number_decl_kernel_mappings (void);
-tree hsa_get_decl_kernel_mapping_decl (unsigned i);
-char *hsa_get_decl_kernel_mapping_name (unsigned i);
-unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i);
-bool hsa_get_decl_kernel_mapping_gridified (unsigned i);
-void hsa_free_decl_kernel_mapping (void);
-tree *hsa_get_ctor_statements (void);
-tree *hsa_get_dtor_statements (void);
-tree *hsa_get_kernel_dispatch_type (void);
-void hsa_add_kernel_dependency (tree caller, const char *called_function);
-void hsa_sanitize_name (char *p);
-char *hsa_brig_function_name (const char *p);
-const char *hsa_get_declaration_name (tree decl);
-void hsa_register_kernel (cgraph_node *host);
-void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host);
-bool hsa_seen_error (void);
-void hsa_fail_cfun (void);
-
-/* In hsa-gen.c. */
-void hsa_build_append_simple_mov (hsa_op_reg *, hsa_op_base *, hsa_bb *);
-hsa_symbol *hsa_get_spill_symbol (BrigType16_t);
-hsa_symbol *hsa_get_string_cst_symbol (BrigType16_t);
-hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
-hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **);
-hsa_bb *hsa_init_new_bb (basic_block);
-hsa_function_representation *hsa_generate_function_declaration (tree decl);
-hsa_function_representation *hsa_generate_internal_fn_decl (hsa_internal_fn *);
-tree hsa_get_host_function (tree decl);
-
-/* In hsa-regalloc.c. */
-void hsa_regalloc (void);
-
-/* In hsa-brig.c. */
-extern hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls;
-void hsa_brig_emit_function (void);
-void hsa_output_brig (void);
-unsigned hsa_get_imm_brig_type_len (BrigType16_t type);
-void hsa_brig_emit_omp_symbols (void);
-
-/* In hsa-dump.c. */
-const char *hsa_seg_name (BrigSegment8_t);
-void dump_hsa_insn (FILE *f, hsa_insn_basic *insn);
-void dump_hsa_bb (FILE *, hsa_bb *);
-void dump_hsa_cfun (FILE *);
-DEBUG_FUNCTION void debug_hsa_operand (hsa_op_base *opc);
-DEBUG_FUNCTION void debug_hsa_insn (hsa_insn_basic *insn);
-
-union hsa_bytes
-{
- uint8_t b8;
- uint16_t b16;
- uint32_t b32;
- uint64_t b64;
-};
-
-/* Return true if a function DECL is an HSA implementation. */
-
-static inline bool
-hsa_gpu_implementation_p (tree decl)
-{
- if (hsa_summaries == NULL)
- return false;
-
- hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl));
- return s != NULL && s->m_gpu_implementation_p;
-}
-
-#endif /* HSA_H */
+++ /dev/null
-/* Infrastructure to dump our HSAIL IL
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "is-a.h"
-#include "vec.h"
-#include "tree.h"
-#include "basic-block.h"
-#include "function.h"
-#include "cfg.h"
-#include "dumpfile.h"
-#include "gimple-pretty-print.h"
-#include "cgraph.h"
-#include "print-tree.h"
-#include "alloc-pool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-
-/* Return textual name of TYPE. */
-
-static const char *
-hsa_type_name (BrigType16_t type)
-{
- switch (type)
- {
- case BRIG_TYPE_NONE:
- return "none";
- case BRIG_TYPE_U8:
- return "u8";
- case BRIG_TYPE_U16:
- return "u16";
- case BRIG_TYPE_U32:
- return "u32";
- case BRIG_TYPE_U64:
- return "u64";
- case BRIG_TYPE_S8:
- return "s8";
- case BRIG_TYPE_S16:
- return "s16";
- case BRIG_TYPE_S32:
- return "s32";
- case BRIG_TYPE_S64:
- return "s64";
- case BRIG_TYPE_F16:
- return "f16";
- case BRIG_TYPE_F32:
- return "f32";
- case BRIG_TYPE_F64:
- return "f64";
- case BRIG_TYPE_B1:
- return "b1";
- case BRIG_TYPE_B8:
- return "b8";
- case BRIG_TYPE_B16:
- return "b16";
- case BRIG_TYPE_B32:
- return "b32";
- case BRIG_TYPE_B64:
- return "b64";
- case BRIG_TYPE_B128:
- return "b128";
- case BRIG_TYPE_SAMP:
- return "samp";
- case BRIG_TYPE_ROIMG:
- return "roimg";
- case BRIG_TYPE_WOIMG:
- return "woimg";
- case BRIG_TYPE_RWIMG:
- return "rwimg";
- case BRIG_TYPE_SIG32:
- return "sig32";
- case BRIG_TYPE_SIG64:
- return "sig64";
- case BRIG_TYPE_U8X4:
- return "u8x4";
- case BRIG_TYPE_U8X8:
- return "u8x8";
- case BRIG_TYPE_U8X16:
- return "u8x16";
- case BRIG_TYPE_U16X2:
- return "u16x2";
- case BRIG_TYPE_U16X4:
- return "u16x4";
- case BRIG_TYPE_U16X8:
- return "u16x8";
- case BRIG_TYPE_U32X2:
- return "u32x2";
- case BRIG_TYPE_U32X4:
- return "u32x4";
- case BRIG_TYPE_U64X2:
- return "u64x2";
- case BRIG_TYPE_S8X4:
- return "s8x4";
- case BRIG_TYPE_S8X8:
- return "s8x8";
- case BRIG_TYPE_S8X16:
- return "s8x16";
- case BRIG_TYPE_S16X2:
- return "s16x2";
- case BRIG_TYPE_S16X4:
- return "s16x4";
- case BRIG_TYPE_S16X8:
- return "s16x8";
- case BRIG_TYPE_S32X2:
- return "s32x2";
- case BRIG_TYPE_S32X4:
- return "s32x4";
- case BRIG_TYPE_S64X2:
- return "s64x2";
- case BRIG_TYPE_F16X2:
- return "f16x2";
- case BRIG_TYPE_F16X4:
- return "f16x4";
- case BRIG_TYPE_F16X8:
- return "f16x8";
- case BRIG_TYPE_F32X2:
- return "f32x2";
- case BRIG_TYPE_F32X4:
- return "f32x4";
- case BRIG_TYPE_F64X2:
- return "f64x2";
- default:
- return "UNKNOWN_TYPE";
- }
-}
-
-/* Return textual name of OPCODE. */
-
-static const char *
-hsa_opcode_name (BrigOpcode16_t opcode)
-{
- switch (opcode)
- {
- case BRIG_OPCODE_NOP:
- return "nop";
- case BRIG_OPCODE_ABS:
- return "abs";
- case BRIG_OPCODE_ADD:
- return "add";
- case BRIG_OPCODE_BORROW:
- return "borrow";
- case BRIG_OPCODE_CARRY:
- return "carry";
- case BRIG_OPCODE_CEIL:
- return "ceil";
- case BRIG_OPCODE_COPYSIGN:
- return "copysign";
- case BRIG_OPCODE_DIV:
- return "div";
- case BRIG_OPCODE_FLOOR:
- return "floor";
- case BRIG_OPCODE_FMA:
- return "fma";
- case BRIG_OPCODE_FRACT:
- return "fract";
- case BRIG_OPCODE_MAD:
- return "mad";
- case BRIG_OPCODE_MAX:
- return "max";
- case BRIG_OPCODE_MIN:
- return "min";
- case BRIG_OPCODE_MUL:
- return "mul";
- case BRIG_OPCODE_MULHI:
- return "mulhi";
- case BRIG_OPCODE_NEG:
- return "neg";
- case BRIG_OPCODE_REM:
- return "rem";
- case BRIG_OPCODE_RINT:
- return "rint";
- case BRIG_OPCODE_SQRT:
- return "sqrt";
- case BRIG_OPCODE_SUB:
- return "sub";
- case BRIG_OPCODE_TRUNC:
- return "trunc";
- case BRIG_OPCODE_MAD24:
- return "mad24";
- case BRIG_OPCODE_MAD24HI:
- return "mad24hi";
- case BRIG_OPCODE_MUL24:
- return "mul24";
- case BRIG_OPCODE_MUL24HI:
- return "mul24hi";
- case BRIG_OPCODE_SHL:
- return "shl";
- case BRIG_OPCODE_SHR:
- return "shr";
- case BRIG_OPCODE_AND:
- return "and";
- case BRIG_OPCODE_NOT:
- return "not";
- case BRIG_OPCODE_OR:
- return "or";
- case BRIG_OPCODE_POPCOUNT:
- return "popcount";
- case BRIG_OPCODE_XOR:
- return "xor";
- case BRIG_OPCODE_BITEXTRACT:
- return "bitextract";
- case BRIG_OPCODE_BITINSERT:
- return "bitinsert";
- case BRIG_OPCODE_BITMASK:
- return "bitmask";
- case BRIG_OPCODE_BITREV:
- return "bitrev";
- case BRIG_OPCODE_BITSELECT:
- return "bitselect";
- case BRIG_OPCODE_FIRSTBIT:
- return "firstbit";
- case BRIG_OPCODE_LASTBIT:
- return "lastbit";
- case BRIG_OPCODE_COMBINE:
- return "combine";
- case BRIG_OPCODE_EXPAND:
- return "expand";
- case BRIG_OPCODE_LDA:
- return "lda";
- case BRIG_OPCODE_MOV:
- return "mov";
- case BRIG_OPCODE_SHUFFLE:
- return "shuffle";
- case BRIG_OPCODE_UNPACKHI:
- return "unpackhi";
- case BRIG_OPCODE_UNPACKLO:
- return "unpacklo";
- case BRIG_OPCODE_PACK:
- return "pack";
- case BRIG_OPCODE_UNPACK:
- return "unpack";
- case BRIG_OPCODE_CMOV:
- return "cmov";
- case BRIG_OPCODE_CLASS:
- return "class";
- case BRIG_OPCODE_NCOS:
- return "ncos";
- case BRIG_OPCODE_NEXP2:
- return "nexp2";
- case BRIG_OPCODE_NFMA:
- return "nfma";
- case BRIG_OPCODE_NLOG2:
- return "nlog2";
- case BRIG_OPCODE_NRCP:
- return "nrcp";
- case BRIG_OPCODE_NRSQRT:
- return "nrsqrt";
- case BRIG_OPCODE_NSIN:
- return "nsin";
- case BRIG_OPCODE_NSQRT:
- return "nsqrt";
- case BRIG_OPCODE_BITALIGN:
- return "bitalign";
- case BRIG_OPCODE_BYTEALIGN:
- return "bytealign";
- case BRIG_OPCODE_PACKCVT:
- return "packcvt";
- case BRIG_OPCODE_UNPACKCVT:
- return "unpackcvt";
- case BRIG_OPCODE_LERP:
- return "lerp";
- case BRIG_OPCODE_SAD:
- return "sad";
- case BRIG_OPCODE_SADHI:
- return "sadhi";
- case BRIG_OPCODE_SEGMENTP:
- return "segmentp";
- case BRIG_OPCODE_FTOS:
- return "ftos";
- case BRIG_OPCODE_STOF:
- return "stof";
- case BRIG_OPCODE_CMP:
- return "cmp";
- case BRIG_OPCODE_CVT:
- return "cvt";
- case BRIG_OPCODE_LD:
- return "ld";
- case BRIG_OPCODE_ST:
- return "st";
- case BRIG_OPCODE_ATOMIC:
- return "atomic";
- case BRIG_OPCODE_ATOMICNORET:
- return "atomicnoret";
- case BRIG_OPCODE_SIGNAL:
- return "signal";
- case BRIG_OPCODE_SIGNALNORET:
- return "signalnoret";
- case BRIG_OPCODE_MEMFENCE:
- return "memfence";
- case BRIG_OPCODE_RDIMAGE:
- return "rdimage";
- case BRIG_OPCODE_LDIMAGE:
- return "ldimage";
- case BRIG_OPCODE_STIMAGE:
- return "stimage";
- case BRIG_OPCODE_QUERYIMAGE:
- return "queryimage";
- case BRIG_OPCODE_QUERYSAMPLER:
- return "querysampler";
- case BRIG_OPCODE_CBR:
- return "cbr";
- case BRIG_OPCODE_BR:
- return "br";
- case BRIG_OPCODE_SBR:
- return "sbr";
- case BRIG_OPCODE_BARRIER:
- return "barrier";
- case BRIG_OPCODE_WAVEBARRIER:
- return "wavebarrier";
- case BRIG_OPCODE_ARRIVEFBAR:
- return "arrivefbar";
- case BRIG_OPCODE_INITFBAR:
- return "initfbar";
- case BRIG_OPCODE_JOINFBAR:
- return "joinfbar";
- case BRIG_OPCODE_LEAVEFBAR:
- return "leavefbar";
- case BRIG_OPCODE_RELEASEFBAR:
- return "releasefbar";
- case BRIG_OPCODE_WAITFBAR:
- return "waitfbar";
- case BRIG_OPCODE_LDF:
- return "ldf";
- case BRIG_OPCODE_ACTIVELANECOUNT:
- return "activelanecount";
- case BRIG_OPCODE_ACTIVELANEID:
- return "activelaneid";
- case BRIG_OPCODE_ACTIVELANEMASK:
- return "activelanemask";
- case BRIG_OPCODE_CALL:
- return "call";
- case BRIG_OPCODE_SCALL:
- return "scall";
- case BRIG_OPCODE_ICALL:
- return "icall";
- case BRIG_OPCODE_RET:
- return "ret";
- case BRIG_OPCODE_ALLOCA:
- return "alloca";
- case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
- return "currentworkgroupsize";
- case BRIG_OPCODE_DIM:
- return "dim";
- case BRIG_OPCODE_GRIDGROUPS:
- return "gridgroups";
- case BRIG_OPCODE_GRIDSIZE:
- return "gridsize";
- case BRIG_OPCODE_PACKETCOMPLETIONSIG:
- return "packetcompletionsig";
- case BRIG_OPCODE_PACKETID:
- return "packetid";
- case BRIG_OPCODE_WORKGROUPID:
- return "workgroupid";
- case BRIG_OPCODE_WORKGROUPSIZE:
- return "workgroupsize";
- case BRIG_OPCODE_WORKITEMABSID:
- return "workitemabsid";
- case BRIG_OPCODE_WORKITEMFLATABSID:
- return "workitemflatabsid";
- case BRIG_OPCODE_WORKITEMFLATID:
- return "workitemflatid";
- case BRIG_OPCODE_WORKITEMID:
- return "workitemid";
- case BRIG_OPCODE_CLEARDETECTEXCEPT:
- return "cleardetectexcept";
- case BRIG_OPCODE_GETDETECTEXCEPT:
- return "getdetectexcept";
- case BRIG_OPCODE_SETDETECTEXCEPT:
- return "setdetectexcept";
- case BRIG_OPCODE_ADDQUEUEWRITEINDEX:
- return "addqueuewriteindex";
- case BRIG_OPCODE_CASQUEUEWRITEINDEX:
- return "casqueuewriteindex";
- case BRIG_OPCODE_LDQUEUEREADINDEX:
- return "ldqueuereadindex";
- case BRIG_OPCODE_LDQUEUEWRITEINDEX:
- return "ldqueuewriteindex";
- case BRIG_OPCODE_STQUEUEREADINDEX:
- return "stqueuereadindex";
- case BRIG_OPCODE_STQUEUEWRITEINDEX:
- return "stqueuewriteindex";
- case BRIG_OPCODE_CLOCK:
- return "clock";
- case BRIG_OPCODE_CUID:
- return "cuid";
- case BRIG_OPCODE_DEBUGTRAP:
- return "debugtrap";
- case BRIG_OPCODE_GROUPBASEPTR:
- return "groupbaseptr";
- case BRIG_OPCODE_KERNARGBASEPTR:
- return "kernargbaseptr";
- case BRIG_OPCODE_LANEID:
- return "laneid";
- case BRIG_OPCODE_MAXCUID:
- return "maxcuid";
- case BRIG_OPCODE_MAXWAVEID:
- return "maxwaveid";
- case BRIG_OPCODE_NULLPTR:
- return "nullptr";
- case BRIG_OPCODE_WAVEID:
- return "waveid";
- default:
- return "UNKNOWN_OPCODE";
- }
-}
-
-/* Return textual name of SEG. */
-
-const char *
-hsa_seg_name (BrigSegment8_t seg)
-{
- switch (seg)
- {
- case BRIG_SEGMENT_NONE:
- return "none";
- case BRIG_SEGMENT_FLAT:
- return "flat";
- case BRIG_SEGMENT_GLOBAL:
- return "global";
- case BRIG_SEGMENT_READONLY:
- return "readonly";
- case BRIG_SEGMENT_KERNARG:
- return "kernarg";
- case BRIG_SEGMENT_GROUP:
- return "group";
- case BRIG_SEGMENT_PRIVATE:
- return "private";
- case BRIG_SEGMENT_SPILL:
- return "spill";
- case BRIG_SEGMENT_ARG:
- return "arg";
- default:
- return "UNKNOWN_SEGMENT";
- }
-}
-
-/* Return textual name of CMPOP. */
-
-static const char *
-hsa_cmpop_name (BrigCompareOperation8_t cmpop)
-{
- switch (cmpop)
- {
- case BRIG_COMPARE_EQ:
- return "eq";
- case BRIG_COMPARE_NE:
- return "ne";
- case BRIG_COMPARE_LT:
- return "lt";
- case BRIG_COMPARE_LE:
- return "le";
- case BRIG_COMPARE_GT:
- return "gt";
- case BRIG_COMPARE_GE:
- return "ge";
- case BRIG_COMPARE_EQU:
- return "equ";
- case BRIG_COMPARE_NEU:
- return "neu";
- case BRIG_COMPARE_LTU:
- return "ltu";
- case BRIG_COMPARE_LEU:
- return "leu";
- case BRIG_COMPARE_GTU:
- return "gtu";
- case BRIG_COMPARE_GEU:
- return "geu";
- case BRIG_COMPARE_NUM:
- return "num";
- case BRIG_COMPARE_NAN:
- return "nan";
- case BRIG_COMPARE_SEQ:
- return "seq";
- case BRIG_COMPARE_SNE:
- return "sne";
- case BRIG_COMPARE_SLT:
- return "slt";
- case BRIG_COMPARE_SLE:
- return "sle";
- case BRIG_COMPARE_SGT:
- return "sgt";
- case BRIG_COMPARE_SGE:
- return "sge";
- case BRIG_COMPARE_SGEU:
- return "sgeu";
- case BRIG_COMPARE_SEQU:
- return "sequ";
- case BRIG_COMPARE_SNEU:
- return "sneu";
- case BRIG_COMPARE_SLTU:
- return "sltu";
- case BRIG_COMPARE_SLEU:
- return "sleu";
- case BRIG_COMPARE_SNUM:
- return "snum";
- case BRIG_COMPARE_SNAN:
- return "snan";
- case BRIG_COMPARE_SGTU:
- return "sgtu";
- default:
- return "UNKNOWN_COMPARISON";
- }
-}
-
-/* Return textual name for memory order. */
-
-static const char *
-hsa_memsem_name (enum BrigMemoryOrder mo)
-{
- switch (mo)
- {
- case BRIG_MEMORY_ORDER_NONE:
- return "";
- case BRIG_MEMORY_ORDER_RELAXED:
- return "rlx";
- case BRIG_MEMORY_ORDER_SC_ACQUIRE:
- return "scacq";
- case BRIG_MEMORY_ORDER_SC_RELEASE:
- return "screl";
- case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE:
- return "scar";
- default:
- return "UNKNOWN_MEMORY_ORDER";
- }
-}
-
-/* Return textual name for memory scope. */
-
-static const char *
-hsa_memscope_name (enum BrigMemoryScope scope)
-{
- switch (scope)
- {
- case BRIG_MEMORY_SCOPE_NONE:
- return "";
- case BRIG_MEMORY_SCOPE_WORKITEM:
- return "wi";
- case BRIG_MEMORY_SCOPE_WAVEFRONT:
- return "wave";
- case BRIG_MEMORY_SCOPE_WORKGROUP:
- return "wg";
- case BRIG_MEMORY_SCOPE_AGENT:
- return "agent";
- case BRIG_MEMORY_SCOPE_SYSTEM:
- return "sys";
- default:
- return "UNKNOWN_SCOPE";
- }
-}
-
-/* Return textual name for atomic operation. */
-
-static const char *
-hsa_m_atomicop_name (enum BrigAtomicOperation op)
-{
- switch (op)
- {
- case BRIG_ATOMIC_ADD:
- return "add";
- case BRIG_ATOMIC_AND:
- return "and";
- case BRIG_ATOMIC_CAS:
- return "cas";
- case BRIG_ATOMIC_EXCH:
- return "exch";
- case BRIG_ATOMIC_LD:
- return "ld";
- case BRIG_ATOMIC_MAX:
- return "max";
- case BRIG_ATOMIC_MIN:
- return "min";
- case BRIG_ATOMIC_OR:
- return "or";
- case BRIG_ATOMIC_ST:
- return "st";
- case BRIG_ATOMIC_SUB:
- return "sub";
- case BRIG_ATOMIC_WRAPDEC:
- return "wrapdec";
- case BRIG_ATOMIC_WRAPINC:
- return "wrapinc";
- case BRIG_ATOMIC_XOR:
- return "xor";
- case BRIG_ATOMIC_WAIT_EQ:
- return "wait_eq";
- case BRIG_ATOMIC_WAIT_NE:
- return "wait_ne";
- case BRIG_ATOMIC_WAIT_LT:
- return "wait_lt";
- case BRIG_ATOMIC_WAIT_GTE:
- return "wait_gte";
- case BRIG_ATOMIC_WAITTIMEOUT_EQ:
- return "waittimeout_eq";
- case BRIG_ATOMIC_WAITTIMEOUT_NE:
- return "waittimeout_ne";
- case BRIG_ATOMIC_WAITTIMEOUT_LT:
- return "waittimeout_lt";
- case BRIG_ATOMIC_WAITTIMEOUT_GTE:
- return "waittimeout_gte";
- default:
- return "UNKNOWN_ATOMIC_OP";
- }
-}
-
-/* Return textual name for atomic operation. */
-
-static const char *
-hsa_width_specifier_name (BrigWidth8_t width)
-{
- switch (width)
- {
- case BRIG_WIDTH_NONE:
- return "none";
- case BRIG_WIDTH_1:
- return "1";
- case BRIG_WIDTH_2:
- return "2";
- case BRIG_WIDTH_4:
- return "4";
- case BRIG_WIDTH_8:
- return "8";
- case BRIG_WIDTH_16:
- return "16";
- case BRIG_WIDTH_32:
- return "32";
- case BRIG_WIDTH_64:
- return "64";
- case BRIG_WIDTH_128:
- return "128";
- case BRIG_WIDTH_256:
- return "256";
- case BRIG_WIDTH_512:
- return "512";
- case BRIG_WIDTH_1024:
- return "1024";
- case BRIG_WIDTH_2048:
- return "2048";
- case BRIG_WIDTH_4096:
- return "4096";
- case BRIG_WIDTH_8192:
- return "8192";
- case BRIG_WIDTH_16384:
- return "16384";
- case BRIG_WIDTH_32768:
- return "32768";
- case BRIG_WIDTH_65536:
- return "65536";
- case BRIG_WIDTH_131072:
- return "131072";
- case BRIG_WIDTH_262144:
- return "262144";
- case BRIG_WIDTH_524288:
- return "524288";
- case BRIG_WIDTH_1048576:
- return "1048576";
- case BRIG_WIDTH_2097152:
- return "2097152";
- case BRIG_WIDTH_4194304:
- return "4194304";
- case BRIG_WIDTH_8388608:
- return "8388608";
- case BRIG_WIDTH_16777216:
- return "16777216";
- case BRIG_WIDTH_33554432:
- return "33554432";
- case BRIG_WIDTH_67108864:
- return "67108864";
- case BRIG_WIDTH_134217728:
- return "134217728";
- case BRIG_WIDTH_268435456:
- return "268435456";
- case BRIG_WIDTH_536870912:
- return "536870912";
- case BRIG_WIDTH_1073741824:
- return "1073741824";
- case BRIG_WIDTH_2147483648:
- return "2147483648";
- case BRIG_WIDTH_WAVESIZE:
- return "wavesize";
- case BRIG_WIDTH_ALL:
- return "all";
- default:
- return "UNKNOWN_WIDTH";
- }
-}
-
-/* Dump textual representation of HSA IL register REG to file F. */
-
-static void
-dump_hsa_reg (FILE *f, hsa_op_reg *reg, bool dump_type = false)
-{
- if (reg->m_reg_class)
- fprintf (f, "$%c%i", reg->m_reg_class, reg->m_hard_num);
- else
- fprintf (f, "$_%i", reg->m_order);
- if (dump_type)
- fprintf (f, " (%s)", hsa_type_name (reg->m_type));
-}
-
-/* Dump textual representation of HSA IL immediate operand IMM to file F. */
-
-static void
-dump_hsa_immed (FILE *f, hsa_op_immed *imm)
-{
- bool unsigned_int_type
- = (BRIG_TYPE_U8 | BRIG_TYPE_U16 | BRIG_TYPE_U32 | BRIG_TYPE_U64)
- & imm->m_type;
-
- if (imm->m_tree_value)
- print_generic_expr (f, imm->m_tree_value);
- else
- {
- if (unsigned_int_type)
- fprintf (f, HOST_WIDE_INT_PRINT_DEC, imm->m_int_value);
- else
- fprintf (f, HOST_WIDE_INT_PRINT_UNSIGNED,
- (unsigned HOST_WIDE_INT) imm->m_int_value);
- }
-
- fprintf (f, " (%s)", hsa_type_name (imm->m_type));
-}
-
-/* Dump textual representation of HSA IL address operand ADDR to file F. */
-
-static void
-dump_hsa_address (FILE *f, hsa_op_address *addr)
-{
- bool sth = false;
-
- if (addr->m_symbol)
- {
- sth = true;
- if (addr->m_symbol->m_name)
- fprintf (f, "[%%%s]", addr->m_symbol->m_name);
- else
- fprintf (f, "[%%__%s_%i]", hsa_seg_name (addr->m_symbol->m_segment),
- addr->m_symbol->m_name_number);
- }
-
- if (addr->m_reg)
- {
- fprintf (f, "[");
- dump_hsa_reg (f, addr->m_reg);
- if (addr->m_imm_offset != 0)
- fprintf (f, " + " HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset);
- else
- fprintf (f, "]");
- }
- else if (!sth || addr->m_imm_offset != 0)
- fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset);
-}
-
-/* Dump textual representation of HSA IL symbol SYMBOL to file F. */
-
-static void
-dump_hsa_symbol (FILE *f, hsa_symbol *symbol)
-{
- const char *name;
- char buf[64];
- if (symbol->m_name)
- name = symbol->m_name;
- else
- {
- sprintf (buf, "__%s_%i", hsa_seg_name (symbol->m_segment),
- symbol->m_name_number);
-
- name = buf;
- }
-
- fprintf (f, "align(%u) %s_%s %s", hsa_byte_alignment (symbol->m_align),
- hsa_seg_name (symbol->m_segment),
- hsa_type_name (symbol->m_type & ~BRIG_TYPE_ARRAY_MASK), name);
-
- if (symbol->m_type & BRIG_TYPE_ARRAY_MASK)
- fprintf (f, "[%lu]", (unsigned long) symbol->m_dim);
-
- if (symbol->m_directive_offset)
- fprintf (f, " /* BRIG offset: %u */", symbol->m_directive_offset);
-}
-
-/* Dump textual representation of HSA IL operand OP to file F. */
-
-static void
-dump_hsa_operand (FILE *f, hsa_op_base *op, bool dump_reg_type = false)
-{
- if (is_a <hsa_op_immed *> (op))
- dump_hsa_immed (f, as_a <hsa_op_immed *> (op));
- else if (is_a <hsa_op_reg *> (op))
- dump_hsa_reg (f, as_a <hsa_op_reg *> (op), dump_reg_type);
- else if (is_a <hsa_op_address *> (op))
- dump_hsa_address (f, as_a <hsa_op_address *> (op));
- else
- fprintf (f, "UNKNOWN_OP_KIND");
-}
-
-/* Dump textual representation of HSA IL operands in VEC to file F. */
-
-static void
-dump_hsa_operands (FILE *f, hsa_insn_basic *insn, int start = 0,
- int end = -1, bool dump_reg_type = false)
-{
- if (end == -1)
- end = insn->operand_count ();
-
- for (int i = start; i < end; i++)
- {
- dump_hsa_operand (f, insn->get_op (i), dump_reg_type);
- if (i != end - 1)
- fprintf (f, ", ");
- }
-}
-
-/* Indent F stream with INDENT spaces. */
-
-static void indent_stream (FILE *f, int indent)
-{
- for (int i = 0; i < indent; i++)
- fputc (' ', f);
-}
-
-/* Dump textual representation of HSA IL instruction INSN to file F. Prepend
- the instruction with *INDENT spaces and adjust the indentation for call
- instructions as appropriate. */
-
-static void
-dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent)
-{
- gcc_checking_assert (insn);
-
- if (insn->m_number)
- fprintf (f, "%5d: ", insn->m_number);
-
- indent_stream (f, *indent);
-
- if (is_a <hsa_insn_phi *> (insn))
- {
- hsa_insn_phi *phi = as_a <hsa_insn_phi *> (insn);
- bool first = true;
- dump_hsa_reg (f, phi->m_dest, true);
- fprintf (f, " = PHI <");
- unsigned count = phi->operand_count ();
- for (unsigned i = 0; i < count; i++)
- {
- if (!phi->get_op (i))
- break;
- if (!first)
- fprintf (f, ", ");
- else
- first = false;
- dump_hsa_operand (f, phi->get_op (i), true);
- }
- fprintf (f, ">");
- }
- else if (is_a <hsa_insn_signal *> (insn))
- {
- hsa_insn_signal *mem = as_a <hsa_insn_signal *> (insn);
-
- fprintf (f, "%s", hsa_opcode_name (mem->m_opcode));
- fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_signalop));
- if (mem->m_memory_order != BRIG_MEMORY_ORDER_NONE)
- fprintf (f, "_%s", hsa_memsem_name (mem->m_memory_order));
- fprintf (f, "_%s ", hsa_type_name (mem->m_type));
-
- dump_hsa_operands (f, mem);
- }
-
- else if (is_a <hsa_insn_atomic *> (insn))
- {
- hsa_insn_atomic *mem = as_a <hsa_insn_atomic *> (insn);
-
- /* Either operand[0] or operand[1] must be an address operand. */
- hsa_op_address *addr = NULL;
- if (is_a <hsa_op_address *> (mem->get_op (0)))
- addr = as_a <hsa_op_address *> (mem->get_op (0));
- else
- addr = as_a <hsa_op_address *> (mem->get_op (1));
-
- fprintf (f, "%s", hsa_opcode_name (mem->m_opcode));
- fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_atomicop));
- if (addr->m_symbol)
- fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment));
- if (mem->m_memoryorder != BRIG_MEMORY_ORDER_NONE)
- fprintf (f, "_%s", hsa_memsem_name (mem->m_memoryorder));
- if (mem->m_memoryscope != BRIG_MEMORY_SCOPE_NONE)
- fprintf (f, "_%s", hsa_memscope_name (mem->m_memoryscope));
- fprintf (f, "_%s ", hsa_type_name (mem->m_type));
-
- dump_hsa_operands (f, mem);
- }
- else if (is_a <hsa_insn_mem *> (insn))
- {
- hsa_insn_mem *mem = as_a <hsa_insn_mem *> (insn);
- hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1));
-
- fprintf (f, "%s", hsa_opcode_name (mem->m_opcode));
- if (addr->m_symbol)
- fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment));
- if (mem->m_align != BRIG_ALIGNMENT_NONE)
- fprintf (f, "_align(%u)", hsa_byte_alignment (mem->m_align));
- if (mem->m_equiv_class != 0)
- fprintf (f, "_equiv(%i)", mem->m_equiv_class);
- fprintf (f, "_%s ", hsa_type_name (mem->m_type));
-
- dump_hsa_operand (f, mem->get_op (0));
- fprintf (f, ", ");
- dump_hsa_address (f, addr);
- }
- else if (insn->m_opcode == BRIG_OPCODE_LDA)
- {
- hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1));
-
- fprintf (f, "%s", hsa_opcode_name (insn->m_opcode));
- if (addr->m_symbol)
- fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment));
- fprintf (f, "_%s ", hsa_type_name (insn->m_type));
-
- dump_hsa_operand (f, insn->get_op (0));
- fprintf (f, ", ");
- dump_hsa_address (f, addr);
- }
- else if (is_a <hsa_insn_seg *> (insn))
- {
- hsa_insn_seg *seg = as_a <hsa_insn_seg *> (insn);
- fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (seg->m_opcode),
- hsa_seg_name (seg->m_segment),
- hsa_type_name (seg->m_type), hsa_type_name (seg->m_src_type));
- dump_hsa_reg (f, as_a <hsa_op_reg *> (seg->get_op (0)));
- fprintf (f, ", ");
- dump_hsa_operand (f, seg->get_op (1));
- }
- else if (is_a <hsa_insn_cmp *> (insn))
- {
- hsa_insn_cmp *cmp = as_a <hsa_insn_cmp *> (insn);
- BrigType16_t src_type;
-
- if (is_a <hsa_op_reg *> (cmp->get_op (1)))
- src_type = as_a <hsa_op_reg *> (cmp->get_op (1))->m_type;
- else
- src_type = as_a <hsa_op_immed *> (cmp->get_op (1))->m_type;
-
- fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (cmp->m_opcode),
- hsa_cmpop_name (cmp->m_compare),
- hsa_type_name (cmp->m_type), hsa_type_name (src_type));
- dump_hsa_reg (f, as_a <hsa_op_reg *> (cmp->get_op (0)));
- fprintf (f, ", ");
- dump_hsa_operand (f, cmp->get_op (1));
- fprintf (f, ", ");
- dump_hsa_operand (f, cmp->get_op (2));
- }
- else if (is_a <hsa_insn_cbr *> (insn))
- {
- hsa_insn_cbr *br = as_a <hsa_insn_cbr *> (insn);
- basic_block target = NULL;
- edge_iterator ei;
- edge e;
-
- fprintf (f, "%s ", hsa_opcode_name (br->m_opcode));
- if (br->m_opcode == BRIG_OPCODE_CBR)
- {
- dump_hsa_reg (f, as_a <hsa_op_reg *> (br->get_op (0)));
- fprintf (f, ", ");
- }
-
- FOR_EACH_EDGE (e, ei, br->m_bb->succs)
- if (e->flags & EDGE_TRUE_VALUE)
- {
- target = e->dest;
- break;
- }
- fprintf (f, "BB %i", hsa_bb_for_bb (target)->m_index);
- }
- else if (is_a <hsa_insn_sbr *> (insn))
- {
- hsa_insn_sbr *sbr = as_a <hsa_insn_sbr *> (insn);
-
- fprintf (f, "%s ", hsa_opcode_name (sbr->m_opcode));
- dump_hsa_reg (f, as_a <hsa_op_reg *> (sbr->get_op (0)));
- fprintf (f, ", [");
-
- for (unsigned i = 0; i < sbr->m_jump_table.length (); i++)
- {
- fprintf (f, "BB %i", hsa_bb_for_bb (sbr->m_jump_table[i])->m_index);
- if (i != sbr->m_jump_table.length () - 1)
- fprintf (f, ", ");
- }
- }
- else if (is_a <hsa_insn_br *> (insn))
- {
- hsa_insn_br *br = as_a <hsa_insn_br *> (insn);
- fprintf (f, "%s_width(%s) ", hsa_opcode_name (br->m_opcode),
- hsa_width_specifier_name (br->m_width));
- }
- else if (is_a <hsa_insn_arg_block *> (insn))
- {
- hsa_insn_arg_block *arg_block = as_a <hsa_insn_arg_block *> (insn);
- bool start_p = arg_block->m_kind == BRIG_KIND_DIRECTIVE_ARG_BLOCK_START;
- char c = start_p ? '{' : '}';
-
- if (start_p)
- {
- *indent += 2;
- indent_stream (f, 2);
- }
-
- if (!start_p)
- *indent -= 2;
-
- fprintf (f, "%c", c);
- }
- else if (is_a <hsa_insn_call *> (insn))
- {
- hsa_insn_call *call = as_a <hsa_insn_call *> (insn);
- if (call->m_called_function)
- {
- const char *name = hsa_get_declaration_name (call->m_called_function);
- fprintf (f, "call &%s", name);
- }
- else
- {
- char *name = call->m_called_internal_fn->name ();
- fprintf (f, "call &%s", name);
- free (name);
- }
-
- if (call->m_output_arg)
- fprintf (f, "(%%res) ");
-
- fprintf (f, "(");
- for (unsigned i = 0; i < call->m_input_args.length (); i++)
- {
- fprintf (f, "%%__arg_%u", i);
-
- if (i != call->m_input_args.length () - 1)
- fprintf (f, ", ");
- }
- fprintf (f, ")");
- }
- else if (is_a <hsa_insn_comment *> (insn))
- {
- hsa_insn_comment *c = as_a <hsa_insn_comment *> (insn);
- fprintf (f, "%s", c->m_comment);
- }
- else if (is_a <hsa_insn_srctype *> (insn))
- {
- hsa_insn_srctype *srctype = as_a <hsa_insn_srctype *> (insn);
-
- fprintf (f, "%s_%s_%s ", hsa_opcode_name (srctype->m_opcode),
- hsa_type_name (srctype->m_type),
- hsa_type_name (srctype->m_source_type));
-
- dump_hsa_operands (f, insn);
- }
- else if (is_a <hsa_insn_packed *> (insn))
- {
- hsa_insn_packed *packed = as_a <hsa_insn_packed *> (insn);
-
- fprintf (f, "%s_v%u_%s_%s ", hsa_opcode_name (packed->m_opcode),
- packed->operand_count () - 1,
- hsa_type_name (packed->m_type),
- hsa_type_name (packed->m_source_type));
-
- if (packed->m_opcode == BRIG_OPCODE_COMBINE)
- {
- dump_hsa_operand (f, insn->get_op (0));
- fprintf (f, ", (");
- dump_hsa_operands (f, insn, 1);
- fprintf (f, ")");
- }
- else if (packed->m_opcode == BRIG_OPCODE_EXPAND)
- {
- fprintf (f, "(");
- dump_hsa_operands (f, insn, 0, insn->operand_count () - 1);
- fprintf (f, "), ");
- dump_hsa_operand (f, insn->get_op (insn->operand_count () - 1));
-
- }
- else
- gcc_unreachable ();
- }
- else if (is_a <hsa_insn_alloca *> (insn))
- {
- hsa_insn_alloca *alloca = as_a <hsa_insn_alloca *> (insn);
-
- fprintf (f, "%s_align(%u)_%s ", hsa_opcode_name (insn->m_opcode),
- hsa_byte_alignment (alloca->m_align),
- hsa_type_name (insn->m_type));
-
- dump_hsa_operands (f, insn);
- }
- else if (hsa_insn_queue *qi = dyn_cast <hsa_insn_queue *> (insn))
- {
- fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (qi->m_opcode),
- hsa_seg_name (qi->m_segment),
- hsa_memsem_name (qi->m_memory_order),
- hsa_type_name (qi->m_type));
-
- dump_hsa_operands (f, qi);
- }
- else
- {
- fprintf (f, "%s_%s ", hsa_opcode_name (insn->m_opcode),
- hsa_type_name (insn->m_type));
-
- dump_hsa_operands (f, insn);
- }
-
- if (insn->m_brig_offset)
- {
- fprintf (f, " /* BRIG offset: %u", insn->m_brig_offset);
-
- for (unsigned i = 0; i < insn->operand_count (); i++)
- fprintf (f, ", op%u: %u", i, insn->get_op (i)->m_brig_op_offset);
-
- fprintf (f, " */");
- }
-
- fprintf (f, "\n");
-}
-
-/* Dump textual representation of HSA IL instruction INSN to file F. */
-
-void
-dump_hsa_insn (FILE *f, hsa_insn_basic *insn)
-{
- int indent = 0;
- dump_hsa_insn_1 (f, insn, &indent);
-}
-
-/* Dump textual representation of HSA IL in HBB to file F. */
-
-void
-dump_hsa_bb (FILE *f, hsa_bb *hbb)
-{
- hsa_insn_basic *insn;
- edge_iterator ei;
- edge e;
- basic_block true_bb = NULL, other = NULL;
-
- fprintf (f, "BB %i:\n", hbb->m_index);
-
- int indent = 2;
- for (insn = hbb->m_first_phi; insn; insn = insn->m_next)
- dump_hsa_insn_1 (f, insn, &indent);
-
- for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
- dump_hsa_insn_1 (f, insn, &indent);
-
- if (hbb->m_last_insn && is_a <hsa_insn_sbr *> (hbb->m_last_insn))
- goto exit;
-
- FOR_EACH_EDGE (e, ei, hbb->m_bb->succs)
- if (e->flags & EDGE_TRUE_VALUE)
- {
- gcc_assert (!true_bb);
- true_bb = e->dest;
- }
- else
- {
- gcc_assert (!other);
- other = e->dest;
- }
-
- if (true_bb)
- {
- if (!hbb->m_last_insn
- || hbb->m_last_insn->m_opcode != BRIG_OPCODE_CBR)
- fprintf (f, "WARNING: No branch insn for a true edge. \n");
- }
- else if (hbb->m_last_insn
- && hbb->m_last_insn->m_opcode == BRIG_OPCODE_CBR)
- fprintf (f, "WARNING: No true edge for a cbr statement\n");
-
- if (other && other->aux)
- fprintf (f, " Fall-through to BB %i\n",
- hsa_bb_for_bb (other)->m_index);
- else if (hbb->m_last_insn
- && hbb->m_last_insn->m_opcode != BRIG_OPCODE_RET)
- fprintf (f, " WARNING: Fall through to a BB with no aux!\n");
-
-exit:
- fprintf (f, "\n");
-}
-
-/* Dump textual representation of HSA IL of the current function to file F. */
-
-void
-dump_hsa_cfun (FILE *f)
-{
- basic_block bb;
-
- if (hsa_cfun->m_global_symbols.length () > 0)
- fprintf (f, "\nHSAIL in global scope\n");
-
- for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++)
- {
- fprintf (f, " ");
- dump_hsa_symbol (f, hsa_cfun->m_global_symbols[i]);
- fprintf (f, "\n");
- }
-
- fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
-
- for (unsigned i = 0; i < hsa_cfun->m_private_variables.length (); i++)
- {
- fprintf (f, " ");
- dump_hsa_symbol (f, hsa_cfun->m_private_variables[i]);
- fprintf (f, "\n");
- }
-
- FOR_ALL_BB_FN (bb, cfun)
- {
- hsa_bb *hbb = (class hsa_bb *) bb->aux;
- dump_hsa_bb (f, hbb);
- }
-}
-
-/* Dump textual representation of HSA IL instruction INSN to stderr. */
-
-DEBUG_FUNCTION void
-debug_hsa_insn (hsa_insn_basic *insn)
-{
- dump_hsa_insn (stderr, insn);
-}
-
-/* Dump textual representation of HSA IL in HBB to stderr. */
-
-DEBUG_FUNCTION void
-debug_hsa_bb (hsa_bb *hbb)
-{
- dump_hsa_bb (stderr, hbb);
-}
-
-/* Dump textual representation of HSA IL of the current function to stderr. */
-
-DEBUG_FUNCTION void
-debug_hsa_cfun (void)
-{
- dump_hsa_cfun (stderr);
-}
-
-/* Dump textual representation of an HSA operand to stderr. */
-
-DEBUG_FUNCTION void
-debug_hsa_operand (hsa_op_base *opc)
-{
- dump_hsa_operand (stderr, opc, true);
- fprintf (stderr, "\n");
-}
-
-/* Dump textual representation of as HSA symbol. */
-
-DEBUG_FUNCTION void
-debug_hsa_symbol (hsa_symbol *symbol)
-{
- dump_hsa_symbol (stderr, symbol);
- fprintf (stderr, "\n");
-}
+++ /dev/null
-/* A pass for lowering gimple to HSAIL
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "memmodel.h"
-#include "tm.h"
-#include "is-a.h"
-#include "hash-table.h"
-#include "vec.h"
-#include "tree.h"
-#include "tree-pass.h"
-#include "function.h"
-#include "basic-block.h"
-#include "cfg.h"
-#include "fold-const.h"
-#include "gimple.h"
-#include "gimple-iterator.h"
-#include "bitmap.h"
-#include "dumpfile.h"
-#include "gimple-pretty-print.h"
-#include "diagnostic-core.h"
-#include "gimple-ssa.h"
-#include "tree-phinodes.h"
-#include "stringpool.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
-#include "tree-dfa.h"
-#include "ssa-iterators.h"
-#include "cgraph.h"
-#include "print-tree.h"
-#include "alloc-pool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-#include "cfghooks.h"
-#include "tree-cfg.h"
-#include "cfgloop.h"
-#include "cfganal.h"
-#include "builtins.h"
-#include "gomp-constants.h"
-#include "internal-fn.h"
-#include "builtins.h"
-#include "stor-layout.h"
-#include "stringpool.h"
-#include "attribs.h"
-
-/* Print a warning message and set that we have seen an error. */
-
-#define HSA_SORRY_ATV(location, message, ...) \
- do \
- { \
- hsa_fail_cfun (); \
- auto_diagnostic_group d; \
- if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \
- HSA_SORRY_MSG)) \
- inform (location, message, __VA_ARGS__); \
- } \
- while (false)
-
-/* Same as previous, but highlight a location. */
-
-#define HSA_SORRY_AT(location, message) \
- do \
- { \
- hsa_fail_cfun (); \
- auto_diagnostic_group d; \
- if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \
- HSA_SORRY_MSG)) \
- inform (location, message); \
- } \
- while (false)
-
-/* Default number of threads used by kernel dispatch. */
-
-#define HSA_DEFAULT_NUM_THREADS 64
-
-/* Following structures are defined in the final version
- of HSA specification. */
-
-/* HSA queue packet is shadow structure, originally provided by AMD. */
-
-struct hsa_queue_packet
-{
- uint16_t header;
- uint16_t setup;
- uint16_t workgroup_size_x;
- uint16_t workgroup_size_y;
- uint16_t workgroup_size_z;
- uint16_t reserved0;
- uint32_t grid_size_x;
- uint32_t grid_size_y;
- uint32_t grid_size_z;
- uint32_t private_segment_size;
- uint32_t group_segment_size;
- uint64_t kernel_object;
- void *kernarg_address;
- uint64_t reserved2;
- uint64_t completion_signal;
-};
-
-/* HSA queue is shadow structure, originally provided by AMD. */
-
-struct hsa_queue
-{
- int type;
- uint32_t features;
- void *base_address;
- uint64_t doorbell_signal;
- uint32_t size;
- uint32_t reserved1;
- uint64_t id;
-};
-
-static struct obstack hsa_obstack;
-
-/* List of pointers to all instructions that come from an object allocator. */
-static vec <hsa_insn_basic *> hsa_instructions;
-
-/* List of pointers to all operands that come from an object allocator. */
-static vec <hsa_op_base *> hsa_operands;
-
-hsa_symbol::hsa_symbol ()
- : m_decl (NULL_TREE), m_name (NULL), m_name_number (0),
- m_directive_offset (0), m_type (BRIG_TYPE_NONE),
- m_segment (BRIG_SEGMENT_NONE), m_linkage (BRIG_LINKAGE_NONE), m_dim (0),
- m_cst_value (NULL), m_global_scope_p (false), m_seen_error (false),
- m_allocation (BRIG_ALLOCATION_AUTOMATIC), m_emitted_to_brig (false)
-{
-}
-
-
-hsa_symbol::hsa_symbol (BrigType16_t type, BrigSegment8_t segment,
- BrigLinkage8_t linkage, bool global_scope_p,
- BrigAllocation allocation, BrigAlignment8_t align)
- : m_decl (NULL_TREE), m_name (NULL), m_name_number (0),
- m_directive_offset (0), m_type (type), m_segment (segment),
- m_linkage (linkage), m_dim (0), m_cst_value (NULL),
- m_global_scope_p (global_scope_p), m_seen_error (false),
- m_allocation (allocation), m_emitted_to_brig (false), m_align (align)
-{
-}
-
-unsigned HOST_WIDE_INT
-hsa_symbol::total_byte_size ()
-{
- unsigned HOST_WIDE_INT s
- = hsa_type_bit_size (~BRIG_TYPE_ARRAY_MASK & m_type);
- gcc_assert (s % BITS_PER_UNIT == 0);
- s /= BITS_PER_UNIT;
-
- if (m_dim)
- s *= m_dim;
-
- return s;
-}
-
-/* Forward declaration. */
-
-static BrigType16_t
-hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p,
- bool min32int);
-
-void
-hsa_symbol::fillup_for_decl (tree decl)
-{
- m_decl = decl;
- m_type = hsa_type_for_tree_type (TREE_TYPE (decl), &m_dim, false);
- if (hsa_seen_error ())
- {
- m_seen_error = true;
- return;
- }
-
- m_align = MAX (m_align, hsa_natural_alignment (m_type));
-}
-
-/* Constructor of class representing global HSA function/kernel information and
- state. FNDECL is function declaration, KERNEL_P is true if the function
- is going to become a HSA kernel. If the function has body, SSA_NAMES_COUNT
- should be set to number of SSA names used in the function.
- MODIFIED_CFG is set to true in case we modified control-flow graph
- of the function. */
-
-hsa_function_representation::hsa_function_representation
- (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool modified_cfg)
- : m_name (NULL),
- m_reg_count (0), m_input_args (vNULL),
- m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL),
- m_private_variables (vNULL), m_called_functions (vNULL),
- m_called_internal_fns (vNULL), m_hbb_count (0),
- m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false),
- m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL),
- m_kernel_dispatch_count (0), m_maximum_omp_data_size (0),
- m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (),
- m_modified_cfg (modified_cfg)
-{
- int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;
- m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len);
- m_ssa_map.safe_grow_cleared (ssa_names_count);
-}
-
-/* Constructor of class representing HSA function information that
- is derived for an internal function. */
-hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn)
- : m_reg_count (0), m_input_args (vNULL),
- m_output_arg (NULL), m_local_symbols (NULL),
- m_spill_symbols (vNULL), m_global_symbols (vNULL),
- m_private_variables (vNULL), m_called_functions (vNULL),
- m_called_internal_fns (vNULL), m_hbb_count (0),
- m_in_ssa (true), m_kern_p (false), m_declaration_p (true), m_decl (NULL),
- m_internal_fn (fn), m_shadow_reg (NULL), m_kernel_dispatch_count (0),
- m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0),
- m_ssa_map () {}
-
-/* Destructor of class holding function/kernel-wide information and state. */
-
-hsa_function_representation::~hsa_function_representation ()
-{
- /* Kernel names are deallocated at the end of BRIG output when deallocating
- hsa_decl_kernel_mapping. */
- if (!m_kern_p || m_seen_error)
- free (m_name);
-
- for (unsigned i = 0; i < m_input_args.length (); i++)
- delete m_input_args[i];
- m_input_args.release ();
-
- delete m_output_arg;
- delete m_local_symbols;
-
- for (unsigned i = 0; i < m_spill_symbols.length (); i++)
- delete m_spill_symbols[i];
- m_spill_symbols.release ();
-
- hsa_symbol *sym;
- for (unsigned i = 0; i < m_global_symbols.iterate (i, &sym); i++)
- if (sym->m_linkage != BRIG_ALLOCATION_PROGRAM)
- delete sym;
- m_global_symbols.release ();
-
- for (unsigned i = 0; i < m_private_variables.length (); i++)
- delete m_private_variables[i];
- m_private_variables.release ();
- m_called_functions.release ();
- m_ssa_map.release ();
-
- for (unsigned i = 0; i < m_called_internal_fns.length (); i++)
- delete m_called_internal_fns[i];
-}
-
-hsa_op_reg *
-hsa_function_representation::get_shadow_reg ()
-{
- /* If we compile a function with kernel dispatch and does not set
- an optimization level, the function won't be inlined and
- we return NULL. */
- if (!m_kern_p)
- return NULL;
-
- if (m_shadow_reg)
- return m_shadow_reg;
-
- /* Append the shadow argument. */
- hsa_symbol *shadow = new hsa_symbol (BRIG_TYPE_U64, BRIG_SEGMENT_KERNARG,
- BRIG_LINKAGE_FUNCTION);
- m_input_args.safe_push (shadow);
- shadow->m_name = "hsa_runtime_shadow";
-
- hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_U64);
- hsa_op_address *addr = new hsa_op_address (shadow);
-
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, r, addr);
- hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->append_insn (mem);
- m_shadow_reg = r;
-
- return r;
-}
-
-bool hsa_function_representation::has_shadow_reg_p ()
-{
- return m_shadow_reg != NULL;
-}
-
-void
-hsa_function_representation::init_extra_bbs ()
-{
- hsa_init_new_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
- hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
-}
-
-void
-hsa_function_representation::update_dominance ()
-{
- if (m_modified_cfg)
- {
- free_dominance_info (CDI_DOMINATORS);
- calculate_dominance_info (CDI_DOMINATORS);
- }
-}
-
-hsa_symbol *
-hsa_function_representation::create_hsa_temporary (BrigType16_t type)
-{
- hsa_symbol *s = new hsa_symbol (type, BRIG_SEGMENT_PRIVATE,
- BRIG_LINKAGE_FUNCTION);
- s->m_name_number = m_temp_symbol_count++;
-
- hsa_cfun->m_private_variables.safe_push (s);
- return s;
-}
-
-BrigLinkage8_t
-hsa_function_representation::get_linkage ()
-{
- if (m_internal_fn)
- return BRIG_LINKAGE_PROGRAM;
-
- return m_kern_p || TREE_PUBLIC (m_decl) ?
- BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE;
-}
-
-/* Hash map of simple OMP builtins. */
-static hash_map <nofree_string_hash, omp_simple_builtin> *omp_simple_builtins
- = NULL;
-
-/* Warning messages for OMP builtins. */
-
-#define HSA_WARN_LOCK_ROUTINE "support for HSA does not implement OpenMP " \
- "lock routines"
-#define HSA_WARN_TIMING_ROUTINE "support for HSA does not implement OpenMP " \
- "timing routines"
-#define HSA_WARN_MEMORY_ROUTINE "OpenMP device memory library routines have " \
- "undefined semantics within target regions, support for HSA ignores them"
-#define HSA_WARN_AFFINITY "Support for HSA does not implement OpenMP " \
- "affinity feateres"
-
-/* Initialize hash map with simple OMP builtins. */
-
-static void
-hsa_init_simple_builtins ()
-{
- if (omp_simple_builtins != NULL)
- return;
-
- omp_simple_builtins
- = new hash_map <nofree_string_hash, omp_simple_builtin> ();
-
- omp_simple_builtin omp_builtins[] =
- {
- omp_simple_builtin ("omp_get_initial_device", NULL, false,
- new hsa_op_immed (GOMP_DEVICE_HOST,
- (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_is_initial_device", NULL, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_dynamic", NULL, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_set_dynamic", NULL, false, NULL),
- omp_simple_builtin ("omp_init_lock", HSA_WARN_LOCK_ROUTINE, true),
- omp_simple_builtin ("omp_init_lock_with_hint", HSA_WARN_LOCK_ROUTINE,
- true),
- omp_simple_builtin ("omp_init_nest_lock_with_hint", HSA_WARN_LOCK_ROUTINE,
- true),
- omp_simple_builtin ("omp_destroy_lock", HSA_WARN_LOCK_ROUTINE, true),
- omp_simple_builtin ("omp_set_lock", HSA_WARN_LOCK_ROUTINE, true),
- omp_simple_builtin ("omp_unset_lock", HSA_WARN_LOCK_ROUTINE, true),
- omp_simple_builtin ("omp_test_lock", HSA_WARN_LOCK_ROUTINE, true),
- omp_simple_builtin ("omp_get_wtime", HSA_WARN_TIMING_ROUTINE, true),
- omp_simple_builtin ("omp_get_wtick", HSA_WARN_TIMING_ROUTINE, true),
- omp_simple_builtin ("omp_target_alloc", HSA_WARN_MEMORY_ROUTINE, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_U64)),
- omp_simple_builtin ("omp_target_free", HSA_WARN_MEMORY_ROUTINE, false),
- omp_simple_builtin ("omp_target_is_present", HSA_WARN_MEMORY_ROUTINE,
- false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_target_memcpy", HSA_WARN_MEMORY_ROUTINE, false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_target_memcpy_rect", HSA_WARN_MEMORY_ROUTINE,
- false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_target_associate_ptr", HSA_WARN_MEMORY_ROUTINE,
- false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_target_disassociate_ptr",
- HSA_WARN_MEMORY_ROUTINE,
- false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_set_max_active_levels",
- "Support for HSA only allows only one active level, "
- "call to omp_set_max_active_levels will be ignored "
- "in the generated HSAIL",
- false, NULL),
- omp_simple_builtin ("omp_get_max_active_levels", NULL, false,
- new hsa_op_immed (1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_in_final", NULL, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_proc_bind", HSA_WARN_AFFINITY, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_num_places", HSA_WARN_AFFINITY, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_place_num_procs", HSA_WARN_AFFINITY, false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_place_proc_ids", HSA_WARN_AFFINITY, false,
- NULL),
- omp_simple_builtin ("omp_get_place_num", HSA_WARN_AFFINITY, false,
- new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_partition_num_places", HSA_WARN_AFFINITY,
- false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_partition_place_nums", HSA_WARN_AFFINITY,
- false, NULL),
- omp_simple_builtin ("omp_set_default_device",
- "omp_set_default_device has undefined semantics "
- "within target regions, support for HSA ignores it",
- false, NULL),
- omp_simple_builtin ("omp_get_default_device",
- "omp_get_default_device has undefined semantics "
- "within target regions, support for HSA ignores it",
- false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_num_devices",
- "omp_get_num_devices has undefined semantics "
- "within target regions, support for HSA ignores it",
- false,
- new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)),
- omp_simple_builtin ("omp_get_num_procs", NULL, true, NULL),
- omp_simple_builtin ("omp_get_cancellation", NULL, true, NULL),
- omp_simple_builtin ("omp_set_nested", NULL, true, NULL),
- omp_simple_builtin ("omp_get_nested", NULL, true, NULL),
- omp_simple_builtin ("omp_set_schedule", NULL, true, NULL),
- omp_simple_builtin ("omp_get_schedule", NULL, true, NULL),
- omp_simple_builtin ("omp_get_thread_limit", NULL, true, NULL),
- omp_simple_builtin ("omp_get_team_size", NULL, true, NULL),
- omp_simple_builtin ("omp_get_ancestor_thread_num", NULL, true, NULL),
- omp_simple_builtin ("omp_get_max_task_priority", NULL, true, NULL)
- };
-
- unsigned count = sizeof (omp_builtins) / sizeof (omp_simple_builtin);
-
- for (unsigned i = 0; i < count; i++)
- omp_simple_builtins->put (omp_builtins[i].m_name, omp_builtins[i]);
-}
-
-/* Allocate HSA structures that we need only while generating with this. */
-
-static void
-hsa_init_data_for_cfun ()
-{
- hsa_init_compilation_unit_data ();
- gcc_obstack_init (&hsa_obstack);
-}
-
-/* Deinitialize HSA subsystem and free all allocated memory. */
-
-static void
-hsa_deinit_data_for_cfun (void)
-{
- basic_block bb;
-
- FOR_ALL_BB_FN (bb, cfun)
- if (bb->aux)
- {
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- hbb->~hsa_bb ();
- bb->aux = NULL;
- }
-
- for (unsigned int i = 0; i < hsa_operands.length (); i++)
- hsa_destroy_operand (hsa_operands[i]);
-
- hsa_operands.release ();
-
- for (unsigned i = 0; i < hsa_instructions.length (); i++)
- hsa_destroy_insn (hsa_instructions[i]);
-
- hsa_instructions.release ();
-
- if (omp_simple_builtins != NULL)
- {
- delete omp_simple_builtins;
- omp_simple_builtins = NULL;
- }
-
- obstack_free (&hsa_obstack, NULL);
- delete hsa_cfun;
-}
-
-/* Return the type which holds addresses in the given SEGMENT. */
-
-static BrigType16_t
-hsa_get_segment_addr_type (BrigSegment8_t segment)
-{
- switch (segment)
- {
- case BRIG_SEGMENT_NONE:
- gcc_unreachable ();
-
- case BRIG_SEGMENT_FLAT:
- case BRIG_SEGMENT_GLOBAL:
- case BRIG_SEGMENT_READONLY:
- case BRIG_SEGMENT_KERNARG:
- return hsa_machine_large_p () ? BRIG_TYPE_U64 : BRIG_TYPE_U32;
-
- case BRIG_SEGMENT_GROUP:
- case BRIG_SEGMENT_PRIVATE:
- case BRIG_SEGMENT_SPILL:
- case BRIG_SEGMENT_ARG:
- return BRIG_TYPE_U32;
- }
- gcc_unreachable ();
-}
-
-/* Return integer brig type according to provided SIZE in bytes. If SIGN
- is set to true, return signed integer type. */
-
-static BrigType16_t
-get_integer_type_by_bytes (unsigned size, bool sign)
-{
- if (sign)
- switch (size)
- {
- case 1:
- return BRIG_TYPE_S8;
- case 2:
- return BRIG_TYPE_S16;
- case 4:
- return BRIG_TYPE_S32;
- case 8:
- return BRIG_TYPE_S64;
- default:
- break;
- }
- else
- switch (size)
- {
- case 1:
- return BRIG_TYPE_U8;
- case 2:
- return BRIG_TYPE_U16;
- case 4:
- return BRIG_TYPE_U32;
- case 8:
- return BRIG_TYPE_U64;
- default:
- break;
- }
-
- return 0;
-}
-
-/* If T points to an integral type smaller than 32 bits, change it to a 32bit
- equivalent and return the result. Otherwise just return the result. */
-
-static BrigType16_t
-hsa_extend_inttype_to_32bit (BrigType16_t t)
-{
- if (t == BRIG_TYPE_U8 || t == BRIG_TYPE_U16)
- return BRIG_TYPE_U32;
- else if (t == BRIG_TYPE_S8 || t == BRIG_TYPE_S16)
- return BRIG_TYPE_S32;
- return t;
-}
-
-/* Return HSA type for tree TYPE, which has to fit into BrigType16_t. Pointers
- are assumed to use flat addressing. If min32int is true, always expand
- integer types to one that has at least 32 bits. */
-
-static BrigType16_t
-hsa_type_for_scalar_tree_type (const_tree type, bool min32int)
-{
- HOST_WIDE_INT bsize;
- const_tree base;
- BrigType16_t res = BRIG_TYPE_NONE;
-
- gcc_checking_assert (TYPE_P (type));
- gcc_checking_assert (!AGGREGATE_TYPE_P (type));
- if (POINTER_TYPE_P (type))
- return hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
-
- if (TREE_CODE (type) == VECTOR_TYPE)
- base = TREE_TYPE (type);
- else if (TREE_CODE (type) == COMPLEX_TYPE)
- {
- base = TREE_TYPE (type);
- min32int = true;
- }
- else
- base = type;
-
- if (!tree_fits_uhwi_p (TYPE_SIZE (base)))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (type),
- "support for HSA does not implement huge or "
- "variable-sized type %qT", type);
- return res;
- }
-
- bsize = tree_to_uhwi (TYPE_SIZE (base));
- unsigned byte_size = bsize / BITS_PER_UNIT;
- if (INTEGRAL_TYPE_P (base))
- res = get_integer_type_by_bytes (byte_size, !TYPE_UNSIGNED (base));
- else if (SCALAR_FLOAT_TYPE_P (base))
- {
- switch (bsize)
- {
- case 16:
- res = BRIG_TYPE_F16;
- break;
- case 32:
- res = BRIG_TYPE_F32;
- break;
- case 64:
- res = BRIG_TYPE_F64;
- break;
- default:
- break;
- }
- }
-
- if (res == BRIG_TYPE_NONE)
- {
- HSA_SORRY_ATV (EXPR_LOCATION (type),
- "support for HSA does not implement type %qT", type);
- return res;
- }
-
- if (TREE_CODE (type) == VECTOR_TYPE)
- {
- HOST_WIDE_INT tsize = tree_to_uhwi (TYPE_SIZE (type));
-
- if (bsize == tsize)
- {
- HSA_SORRY_ATV (EXPR_LOCATION (type),
- "support for HSA does not implement a vector type "
- "where a type and unit size are equal: %qT", type);
- return res;
- }
-
- switch (tsize)
- {
- case 32:
- res |= BRIG_TYPE_PACK_32;
- break;
- case 64:
- res |= BRIG_TYPE_PACK_64;
- break;
- case 128:
- res |= BRIG_TYPE_PACK_128;
- break;
- default:
- HSA_SORRY_ATV (EXPR_LOCATION (type),
- "support for HSA does not implement type %qT", type);
- }
- }
-
- if (min32int)
- /* Registers/immediate operands can only be 32bit or more except for
- f16. */
- res = hsa_extend_inttype_to_32bit (res);
-
- if (TREE_CODE (type) == COMPLEX_TYPE)
- {
- unsigned bsize = 2 * hsa_type_bit_size (res);
- res = hsa_bittype_for_bitsize (bsize);
- }
-
- return res;
-}
-
-/* Returns the BRIG type we need to load/store entities of TYPE. */
-
-static BrigType16_t
-mem_type_for_type (BrigType16_t type)
-{
- /* HSA has non-intuitive constraints on load/store types. If it's
- a bit-type it _must_ be B128, if it's not a bit-type it must be
- 64bit max. So for loading entities of 128 bits (e.g. vectors)
- we have to use B128, while for loading the rest we have to use the
- input type (??? or maybe also flattened to a equally sized non-vector
- unsigned type?). */
- if ((type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_128)
- return BRIG_TYPE_B128;
- else if (hsa_btype_p (type) || hsa_type_packed_p (type))
- {
- unsigned bitsize = hsa_type_bit_size (type);
- if (bitsize < 128)
- return hsa_uint_for_bitsize (bitsize);
- else
- return hsa_bittype_for_bitsize (bitsize);
- }
- return type;
-}
-
-/* Return HSA type for tree TYPE. If it cannot fit into BrigType16_t, some
- kind of array will be generated, setting DIM appropriately. Otherwise, it
- will be set to zero. */
-
-static BrigType16_t
-hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p = NULL,
- bool min32int = false)
-{
- gcc_checking_assert (TYPE_P (type));
- if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (type), "support for HSA does not "
- "implement huge or variable-sized type %qT", type);
- return BRIG_TYPE_NONE;
- }
-
- if (RECORD_OR_UNION_TYPE_P (type))
- {
- if (dim_p)
- *dim_p = tree_to_uhwi (TYPE_SIZE_UNIT (type));
- return BRIG_TYPE_U8 | BRIG_TYPE_ARRAY;
- }
-
- if (TREE_CODE (type) == ARRAY_TYPE)
- {
- /* We try to be nice and use the real base-type when this is an array of
- scalars and only resort to an array of bytes if the type is more
- complex. */
-
- unsigned HOST_WIDE_INT dim = 1;
-
- while (TREE_CODE (type) == ARRAY_TYPE)
- {
- tree domain = TYPE_DOMAIN (type);
- if (!TYPE_MIN_VALUE (domain)
- || !TYPE_MAX_VALUE (domain)
- || !tree_fits_shwi_p (TYPE_MIN_VALUE (domain))
- || !tree_fits_shwi_p (TYPE_MAX_VALUE (domain)))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (type),
- "support for HSA does not implement array "
- "%qT with unknown bounds", type);
- return BRIG_TYPE_NONE;
- }
- HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (domain));
- HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (domain));
- dim = dim * (unsigned HOST_WIDE_INT) (max - min + 1);
- type = TREE_TYPE (type);
- }
-
- BrigType16_t res;
- if (RECORD_OR_UNION_TYPE_P (type))
- {
- dim = dim * tree_to_uhwi (TYPE_SIZE_UNIT (type));
- res = BRIG_TYPE_U8;
- }
- else
- res = hsa_type_for_scalar_tree_type (type, false);
-
- if (dim_p)
- *dim_p = dim;
- return res | BRIG_TYPE_ARRAY;
- }
-
- /* Scalar case: */
- if (dim_p)
- *dim_p = 0;
-
- return hsa_type_for_scalar_tree_type (type, min32int);
-}
-
-/* Returns true if converting from STYPE into DTYPE needs the _CVT
- opcode. If false a normal _MOV is enough. */
-
-static bool
-hsa_needs_cvt (BrigType16_t dtype, BrigType16_t stype)
-{
- if (hsa_btype_p (dtype))
- return false;
-
- /* float <-> int conversions are real converts. */
- if (hsa_type_float_p (dtype) != hsa_type_float_p (stype))
- return true;
- /* When both types have different size, then we need CVT as well. */
- if (hsa_type_bit_size (dtype) != hsa_type_bit_size (stype))
- return true;
- return false;
-}
-
-/* Return declaration name if it exists or create one from UID if it does not.
- If DECL is a local variable, make UID part of its name. */
-
-const char *
-hsa_get_declaration_name (tree decl)
-{
- if (!DECL_NAME (decl))
- {
- char buf[64];
- snprintf (buf, 64, "__hsa_anon_%u", DECL_UID (decl));
- size_t len = strlen (buf);
- char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1);
- memcpy (copy, buf, len + 1);
- return copy;
- }
-
- tree name_tree;
- if (TREE_CODE (decl) == FUNCTION_DECL
- || (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)))
- name_tree = DECL_ASSEMBLER_NAME (decl);
- else
- name_tree = DECL_NAME (decl);
-
- const char *name = IDENTIFIER_POINTER (name_tree);
- /* User-defined assembly names have prepended asterisk symbol. */
- if (name[0] == '*')
- name++;
-
- if ((TREE_CODE (decl) == VAR_DECL)
- && decl_function_context (decl))
- {
- size_t len = strlen (name);
- char *buf = (char *) alloca (len + 32);
- snprintf (buf, len + 32, "%s_%u", name, DECL_UID (decl));
- len = strlen (buf);
- char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1);
- memcpy (copy, buf, len + 1);
- return copy;
- }
- else
- return name;
-}
-
-/* Lookup or create the associated hsa_symbol structure with a given VAR_DECL
- or lookup the hsa_structure corresponding to a PARM_DECL. */
-
-static hsa_symbol *
-get_symbol_for_decl (tree decl)
-{
- hsa_symbol **slot;
- hsa_symbol dummy (BRIG_TYPE_NONE, BRIG_SEGMENT_NONE, BRIG_LINKAGE_NONE);
-
- gcc_assert (TREE_CODE (decl) == PARM_DECL
- || TREE_CODE (decl) == RESULT_DECL
- || TREE_CODE (decl) == VAR_DECL
- || TREE_CODE (decl) == CONST_DECL);
-
- dummy.m_decl = decl;
-
- bool is_in_global_vars = ((TREE_CODE (decl) == VAR_DECL)
- && !decl_function_context (decl));
-
- if (is_in_global_vars)
- slot = hsa_global_variable_symbols->find_slot (&dummy, INSERT);
- else
- slot = hsa_cfun->m_local_symbols->find_slot (&dummy, INSERT);
-
- gcc_checking_assert (slot);
- if (*slot)
- {
- hsa_symbol *sym = (*slot);
-
- /* If the symbol is problematic, mark current function also as
- problematic. */
- if (sym->m_seen_error)
- hsa_fail_cfun ();
-
- /* PR hsa/70234: If a global variable was marked to be emitted,
- but HSAIL generation of a function using the variable fails,
- we should retry to emit the variable in context of a different
- function.
-
- Iterate elements whether a symbol is already in m_global_symbols
- of not. */
- if (is_in_global_vars && !sym->m_emitted_to_brig)
- {
- for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++)
- if (hsa_cfun->m_global_symbols[i] == sym)
- return *slot;
- hsa_cfun->m_global_symbols.safe_push (sym);
- }
-
- return *slot;
- }
- else
- {
- hsa_symbol *sym;
- /* PARM_DECLs and RESULT_DECL should be already in m_local_symbols. */
- gcc_assert (TREE_CODE (decl) == VAR_DECL
- || TREE_CODE (decl) == CONST_DECL);
- BrigAlignment8_t align = hsa_object_alignment (decl);
-
- if (is_in_global_vars)
- {
- gcc_checking_assert (TREE_CODE (decl) != CONST_DECL);
- sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_GLOBAL,
- BRIG_LINKAGE_PROGRAM, true,
- BRIG_ALLOCATION_PROGRAM, align);
- hsa_cfun->m_global_symbols.safe_push (sym);
- sym->fillup_for_decl (decl);
- if (sym->m_align > align)
- {
- sym->m_seen_error = true;
- HSA_SORRY_ATV (EXPR_LOCATION (decl),
- "HSA specification requires that %E is at least "
- "naturally aligned", decl);
- }
- }
- else
- {
- /* As generation of efficient memory copy instructions relies
- on alignment greater or equal to 8 bytes,
- we need to increase alignment of all aggregate types.. */
- if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
- align = MAX ((BrigAlignment8_t) BRIG_ALIGNMENT_8, align);
-
- BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC;
- BrigSegment8_t segment;
- if (TREE_CODE (decl) == CONST_DECL)
- {
- segment = BRIG_SEGMENT_READONLY;
- allocation = BRIG_ALLOCATION_AGENT;
- }
- else if (lookup_attribute ("hsa_group_segment",
- DECL_ATTRIBUTES (decl)))
- segment = BRIG_SEGMENT_GROUP;
- else if (TREE_STATIC (decl))
- {
- segment = BRIG_SEGMENT_GLOBAL;
- allocation = BRIG_ALLOCATION_PROGRAM;
- }
- else if (lookup_attribute ("hsa_global_segment",
- DECL_ATTRIBUTES (decl)))
- segment = BRIG_SEGMENT_GLOBAL;
- else
- segment = BRIG_SEGMENT_PRIVATE;
-
- sym = new hsa_symbol (BRIG_TYPE_NONE, segment, BRIG_LINKAGE_FUNCTION,
- false, allocation, align);
- sym->fillup_for_decl (decl);
- hsa_cfun->m_private_variables.safe_push (sym);
- }
-
- sym->m_name = hsa_get_declaration_name (decl);
- *slot = sym;
- return sym;
- }
-}
-
-/* For a given HSA function declaration, return a host
- function declaration. */
-
-tree
-hsa_get_host_function (tree decl)
-{
- hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl));
- gcc_assert (s->m_gpu_implementation_p);
-
- return s->m_bound_function ? s->m_bound_function->decl : NULL;
-}
-
-/* Return true if function DECL has a host equivalent function. */
-
-static char *
-get_brig_function_name (tree decl)
-{
- tree d = decl;
-
- hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d));
- if (s != NULL
- && s->m_gpu_implementation_p
- && s->m_bound_function)
- d = s->m_bound_function->decl;
-
- /* IPA split can create a function that has no host equivalent. */
- if (d == NULL)
- d = decl;
-
- char *name = xstrdup (hsa_get_declaration_name (d));
- hsa_sanitize_name (name);
-
- return name;
-}
-
-/* Create a spill symbol of type TYPE. */
-
-hsa_symbol *
-hsa_get_spill_symbol (BrigType16_t type)
-{
- hsa_symbol *sym = new hsa_symbol (type, BRIG_SEGMENT_SPILL,
- BRIG_LINKAGE_FUNCTION);
- hsa_cfun->m_spill_symbols.safe_push (sym);
- return sym;
-}
-
-/* Create a symbol for a read-only string constant. */
-hsa_symbol *
-hsa_get_string_cst_symbol (tree string_cst)
-{
- gcc_checking_assert (TREE_CODE (string_cst) == STRING_CST);
-
- hsa_symbol **slot = hsa_cfun->m_string_constants_map.get (string_cst);
- if (slot)
- return *slot;
-
- hsa_op_immed *cst = new hsa_op_immed (string_cst);
- hsa_symbol *sym = new hsa_symbol (cst->m_type, BRIG_SEGMENT_GLOBAL,
- BRIG_LINKAGE_MODULE, true,
- BRIG_ALLOCATION_AGENT);
- sym->m_cst_value = cst;
- sym->m_dim = TREE_STRING_LENGTH (string_cst);
- sym->m_name_number = hsa_cfun->m_global_symbols.length ();
-
- hsa_cfun->m_global_symbols.safe_push (sym);
- hsa_cfun->m_string_constants_map.put (string_cst, sym);
- return sym;
-}
-
-/* Make the type of a MOV instruction larger if mandated by HSAIL rules. */
-
-static void
-hsa_fixup_mov_insn_type (hsa_insn_basic *insn)
-{
- insn->m_type = hsa_extend_inttype_to_32bit (insn->m_type);
- if (insn->m_type == BRIG_TYPE_B8 || insn->m_type == BRIG_TYPE_B16)
- insn->m_type = BRIG_TYPE_B32;
-}
-
-/* Constructor of the ancestor of all operands. K is BRIG kind that identified
- what the operator is. */
-
-hsa_op_base::hsa_op_base (BrigKind16_t k)
- : m_next (NULL), m_brig_op_offset (0), m_kind (k)
-{
- hsa_operands.safe_push (this);
-}
-
-/* Constructor of ancestor of all operands which have a type. K is BRIG kind
- that identified what the operator is. T is the type of the operator. */
-
-hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t)
- : hsa_op_base (k), m_type (t)
-{
-}
-
-hsa_op_with_type *
-hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb)
-{
- if (m_type == dtype)
- return this;
-
- hsa_op_reg *dest;
-
- if (hsa_needs_cvt (dtype, m_type))
- {
- dest = new hsa_op_reg (dtype);
- hbb->append_insn (new hsa_insn_cvt (dest, this));
- }
- else if (is_a <hsa_op_reg *> (this))
- {
- /* In the end, HSA registers do not really have types, only sizes, so if
- the sizes match, we can use the register directly. */
- gcc_checking_assert (hsa_type_bit_size (dtype)
- == hsa_type_bit_size (m_type));
- return this;
- }
- else
- {
- dest = new hsa_op_reg (m_type);
-
- hsa_insn_basic *mov = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
- dest->m_type, dest, this);
- hsa_fixup_mov_insn_type (mov);
- hbb->append_insn (mov);
- /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because
- type of the operand must be same as type of the instruction. */
- dest->m_type = dtype;
- }
-
- return dest;
-}
-
-/* If this operand has integer type smaller than 32 bits, extend it to 32 bits,
- adding instructions to HBB if needed. */
-
-hsa_op_with_type *
-hsa_op_with_type::extend_int_to_32bit (hsa_bb *hbb)
-{
- if (m_type == BRIG_TYPE_U8 || m_type == BRIG_TYPE_U16)
- return get_in_type (BRIG_TYPE_U32, hbb);
- else if (m_type == BRIG_TYPE_S8 || m_type == BRIG_TYPE_S16)
- return get_in_type (BRIG_TYPE_S32, hbb);
- else
- return this;
-}
-
-/* Constructor of class representing HSA immediate values. TREE_VAL is the
- tree representation of the immediate value. If min32int is true,
- always expand integer types to one that has at least 32 bits. */
-
-hsa_op_immed::hsa_op_immed (tree tree_val, bool min32int)
- : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES,
- hsa_type_for_tree_type (TREE_TYPE (tree_val), NULL,
- min32int))
-{
- if (hsa_seen_error ())
- return;
-
- gcc_checking_assert ((is_gimple_min_invariant (tree_val)
- && (!POINTER_TYPE_P (TREE_TYPE (tree_val))
- || TREE_CODE (tree_val) == INTEGER_CST))
- || TREE_CODE (tree_val) == CONSTRUCTOR);
- m_tree_value = tree_val;
-
- /* Verify that all elements of a constructor are constants. */
- if (TREE_CODE (m_tree_value) == CONSTRUCTOR)
- for (unsigned i = 0; i < CONSTRUCTOR_NELTS (m_tree_value); i++)
- {
- tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value;
- if (!CONSTANT_CLASS_P (v))
- {
- HSA_SORRY_AT (EXPR_LOCATION (tree_val),
- "HSA ctor should have only constants");
- return;
- }
- }
-}
-
-/* Constructor of class representing HSA immediate values. INTEGER_VALUE is the
- integer representation of the immediate value. TYPE is BRIG type. */
-
-hsa_op_immed::hsa_op_immed (HOST_WIDE_INT integer_value, BrigType16_t type)
- : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, type),
- m_tree_value (NULL)
-{
- gcc_assert (hsa_type_integer_p (type));
- m_int_value = integer_value;
-}
-
-hsa_op_immed::hsa_op_immed ()
- : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE)
-{
-}
-
-/* New operator to allocate immediate operands from obstack. */
-
-void *
-hsa_op_immed::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-/* Destructor. */
-
-hsa_op_immed::~hsa_op_immed ()
-{
-}
-
-/* Change type of the immediate value to T. */
-
-void
-hsa_op_immed::set_type (BrigType16_t t)
-{
- m_type = t;
-}
-
-/* Constructor of class representing HSA registers and pseudo-registers. T is
- the BRIG type of the new register. */
-
-hsa_op_reg::hsa_op_reg (BrigType16_t t)
- : hsa_op_with_type (BRIG_KIND_OPERAND_REGISTER, t), m_gimple_ssa (NULL_TREE),
- m_def_insn (NULL), m_spill_sym (NULL), m_order (hsa_cfun->m_reg_count++),
- m_lr_begin (0), m_lr_end (0), m_reg_class (0), m_hard_num (0)
-{
-}
-
-/* New operator to allocate a register from obstack. */
-
-void *
-hsa_op_reg::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-/* Verify register operand. */
-
-void
-hsa_op_reg::verify_ssa ()
-{
- /* Verify that each HSA register has a definition assigned.
- Exceptions are VAR_DECL and PARM_DECL that are a default
- definition. */
- gcc_checking_assert (m_def_insn
- || (m_gimple_ssa != NULL
- && (!SSA_NAME_VAR (m_gimple_ssa)
- || (TREE_CODE (SSA_NAME_VAR (m_gimple_ssa))
- != PARM_DECL))
- && SSA_NAME_IS_DEFAULT_DEF (m_gimple_ssa)));
-
- /* Verify that every use of the register is really present
- in an instruction. */
- for (unsigned i = 0; i < m_uses.length (); i++)
- {
- hsa_insn_basic *use = m_uses[i];
-
- bool is_visited = false;
- for (unsigned j = 0; j < use->operand_count (); j++)
- {
- hsa_op_base *u = use->get_op (j);
- hsa_op_address *addr; addr = dyn_cast <hsa_op_address *> (u);
- if (addr && addr->m_reg)
- u = addr->m_reg;
-
- if (u == this)
- {
- bool r = !addr && use->op_output_p (j);
-
- if (r)
- {
- error ("HSA SSA name defined by instruction that is supposed "
- "to be using it");
- debug_hsa_operand (this);
- debug_hsa_insn (use);
- internal_error ("HSA SSA verification failed");
- }
-
- is_visited = true;
- }
- }
-
- if (!is_visited)
- {
- error ("HSA SSA name not among operands of instruction that is "
- "supposed to use it");
- debug_hsa_operand (this);
- debug_hsa_insn (use);
- internal_error ("HSA SSA verification failed");
- }
- }
-}
-
-hsa_op_address::hsa_op_address (hsa_symbol *sym, hsa_op_reg *r,
- HOST_WIDE_INT offset)
- : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (r),
- m_imm_offset (offset)
-{
-}
-
-hsa_op_address::hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset)
- : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (NULL),
- m_imm_offset (offset)
-{
-}
-
-hsa_op_address::hsa_op_address (hsa_op_reg *r, HOST_WIDE_INT offset)
- : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (NULL), m_reg (r),
- m_imm_offset (offset)
-{
-}
-
-/* New operator to allocate address operands from obstack. */
-
-void *
-hsa_op_address::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-/* Constructor of an operand referring to HSAIL code. */
-
-hsa_op_code_ref::hsa_op_code_ref () : hsa_op_base (BRIG_KIND_OPERAND_CODE_REF),
- m_directive_offset (0)
-{
-}
-
-/* Constructor of an operand representing a code list. Set it up so that it
- can contain ELEMENTS number of elements. */
-
-hsa_op_code_list::hsa_op_code_list (unsigned elements)
- : hsa_op_base (BRIG_KIND_OPERAND_CODE_LIST)
-{
- m_offsets.create (1);
- m_offsets.safe_grow_cleared (elements);
-}
-
-/* New operator to allocate code list operands from obstack. */
-
-void *
-hsa_op_code_list::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-/* Constructor of an operand representing an operand list.
- Set it up so that it can contain ELEMENTS number of elements. */
-
-hsa_op_operand_list::hsa_op_operand_list (unsigned elements)
- : hsa_op_base (BRIG_KIND_OPERAND_OPERAND_LIST)
-{
- m_offsets.create (elements);
- m_offsets.safe_grow (elements);
-}
-
-/* New operator to allocate operand list operands from obstack. */
-
-void *
-hsa_op_operand_list::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-hsa_op_operand_list::~hsa_op_operand_list ()
-{
- m_offsets.release ();
-}
-
-
-hsa_op_reg *
-hsa_function_representation::reg_for_gimple_ssa (tree ssa)
-{
- hsa_op_reg *hreg;
-
- gcc_checking_assert (TREE_CODE (ssa) == SSA_NAME);
- if (m_ssa_map[SSA_NAME_VERSION (ssa)])
- return m_ssa_map[SSA_NAME_VERSION (ssa)];
-
- hreg = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (ssa),
- false));
- hreg->m_gimple_ssa = ssa;
- m_ssa_map[SSA_NAME_VERSION (ssa)] = hreg;
-
- return hreg;
-}
-
-void
-hsa_op_reg::set_definition (hsa_insn_basic *insn)
-{
- if (hsa_cfun->m_in_ssa)
- {
- gcc_checking_assert (!m_def_insn);
- m_def_insn = insn;
- }
- else
- m_def_insn = NULL;
-}
-
-/* Constructor of the class which is the bases of all instructions and directly
- represents the most basic ones. NOPS is the number of operands that the
- operand vector will contain (and which will be cleared). OP is the opcode
- of the instruction. This constructor does not set type. */
-
-hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc)
- : m_prev (NULL),
- m_next (NULL), m_bb (NULL), m_opcode (opc), m_number (0),
- m_type (BRIG_TYPE_NONE), m_brig_offset (0)
-{
- if (nops > 0)
- m_operands.safe_grow_cleared (nops);
-
- hsa_instructions.safe_push (this);
-}
-
-/* Make OP the operand number INDEX of operands of this instruction. If OP is a
- register or an address containing a register, then either set the definition
- of the register to this instruction if it an output operand or add this
- instruction to the uses if it is an input one. */
-
-void
-hsa_insn_basic::set_op (int index, hsa_op_base *op)
-{
- /* Each address operand is always use. */
- hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
- if (addr && addr->m_reg)
- addr->m_reg->m_uses.safe_push (this);
- else
- {
- hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
- if (reg)
- {
- if (op_output_p (index))
- reg->set_definition (this);
- else
- reg->m_uses.safe_push (this);
- }
- }
-
- m_operands[index] = op;
-}
-
-/* Get INDEX-th operand of the instruction. */
-
-hsa_op_base *
-hsa_insn_basic::get_op (int index)
-{
- return m_operands[index];
-}
-
-/* Get address of INDEX-th operand of the instruction. */
-
-hsa_op_base **
-hsa_insn_basic::get_op_addr (int index)
-{
- return &m_operands[index];
-}
-
-/* Get number of operands of the instruction. */
-unsigned int
-hsa_insn_basic::operand_count ()
-{
- return m_operands.length ();
-}
-
-/* Constructor of the class which is the bases of all instructions and directly
- represents the most basic ones. NOPS is the number of operands that the
- operand vector will contain (and which will be cleared). OPC is the opcode
- of the instruction, T is the type of the instruction. */
-
-hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc, BrigType16_t t,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2, hsa_op_base *arg3)
- : m_prev (NULL), m_next (NULL), m_bb (NULL), m_opcode (opc),m_number (0),
- m_type (t), m_brig_offset (0)
-{
- if (nops > 0)
- m_operands.safe_grow_cleared (nops);
-
- if (arg0 != NULL)
- {
- gcc_checking_assert (nops >= 1);
- set_op (0, arg0);
- }
-
- if (arg1 != NULL)
- {
- gcc_checking_assert (nops >= 2);
- set_op (1, arg1);
- }
-
- if (arg2 != NULL)
- {
- gcc_checking_assert (nops >= 3);
- set_op (2, arg2);
- }
-
- if (arg3 != NULL)
- {
- gcc_checking_assert (nops >= 4);
- set_op (3, arg3);
- }
-
- hsa_instructions.safe_push (this);
-}
-
-/* New operator to allocate basic instruction from obstack. */
-
-void *
-hsa_insn_basic::operator new (size_t size)
-{
- return obstack_alloc (&hsa_obstack, size);
-}
-
-/* Verify the instruction. */
-
-void
-hsa_insn_basic::verify ()
-{
- hsa_op_address *addr;
- hsa_op_reg *reg;
-
- /* Iterate all register operands and verify that the instruction
- is set in uses of the register. */
- for (unsigned i = 0; i < operand_count (); i++)
- {
- hsa_op_base *use = get_op (i);
-
- if ((addr = dyn_cast <hsa_op_address *> (use)) && addr->m_reg)
- {
- gcc_assert (addr->m_reg->m_def_insn != this);
- use = addr->m_reg;
- }
-
- if ((reg = dyn_cast <hsa_op_reg *> (use)) && !op_output_p (i))
- {
- unsigned j;
- for (j = 0; j < reg->m_uses.length (); j++)
- {
- if (reg->m_uses[j] == this)
- break;
- }
-
- if (j == reg->m_uses.length ())
- {
- error ("HSA instruction uses a register but is not among "
- "recorded register uses");
- debug_hsa_operand (reg);
- debug_hsa_insn (this);
- internal_error ("HSA instruction verification failed");
- }
- }
- }
-}
-
-/* Constructor of an instruction representing a PHI node. NOPS is the number
- of operands (equal to the number of predecessors). */
-
-hsa_insn_phi::hsa_insn_phi (unsigned nops, hsa_op_reg *dst)
- : hsa_insn_basic (nops, HSA_OPCODE_PHI), m_dest (dst)
-{
- dst->set_definition (this);
-}
-
-/* Constructor of class representing instructions for control flow and
- sychronization, */
-
-hsa_insn_br::hsa_insn_br (unsigned nops, int opc, BrigType16_t t,
- BrigWidth8_t width, hsa_op_base *arg0,
- hsa_op_base *arg1, hsa_op_base *arg2,
- hsa_op_base *arg3)
- : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
- m_width (width)
-{
-}
-
-/* Constructor of class representing instruction for conditional jump, CTRL is
- the control register determining whether the jump will be carried out, the
- new instruction is automatically added to its uses list. */
-
-hsa_insn_cbr::hsa_insn_cbr (hsa_op_reg *ctrl)
- : hsa_insn_br (1, BRIG_OPCODE_CBR, BRIG_TYPE_B1, BRIG_WIDTH_1, ctrl)
-{
-}
-
-/* Constructor of class representing instruction for switch jump, CTRL is
- the index register. */
-
-hsa_insn_sbr::hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count)
- : hsa_insn_basic (1, BRIG_OPCODE_SBR, BRIG_TYPE_B1, index),
- m_width (BRIG_WIDTH_1), m_jump_table (vNULL),
- m_label_code_list (new hsa_op_code_list (jump_count))
-{
-}
-
-/* Replace all occurrences of OLD_BB with NEW_BB in the statements
- jump table. */
-
-void
-hsa_insn_sbr::replace_all_labels (basic_block old_bb, basic_block new_bb)
-{
- for (unsigned i = 0; i < m_jump_table.length (); i++)
- if (m_jump_table[i] == old_bb)
- m_jump_table[i] = new_bb;
-}
-
-hsa_insn_sbr::~hsa_insn_sbr ()
-{
- m_jump_table.release ();
-}
-
-/* Constructor of comparison instruction. CMP is the comparison operation and T
- is the result type. */
-
-hsa_insn_cmp::hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2)
- : hsa_insn_basic (3 , BRIG_OPCODE_CMP, t, arg0, arg1, arg2), m_compare (cmp)
-{
-}
-
-/* Constructor of classes representing memory accesses. OPC is the opcode (must
- be BRIG_OPCODE_ST or BRIG_OPCODE_LD) and T is the type. The instruction
- operands are provided as ARG0 and ARG1. */
-
-hsa_insn_mem::hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0,
- hsa_op_base *arg1)
- : hsa_insn_basic (2, opc, t, arg0, arg1),
- m_align (hsa_natural_alignment (t)), m_equiv_class (0)
-{
- gcc_checking_assert (opc == BRIG_OPCODE_LD || opc == BRIG_OPCODE_ST);
-}
-
-/* Constructor for descendants allowing different opcodes and number of
- operands, it passes its arguments directly to hsa_insn_basic
- constructor. The instruction operands are provided as ARG[0-3]. */
-
-
-hsa_insn_mem::hsa_insn_mem (unsigned nops, int opc, BrigType16_t t,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2, hsa_op_base *arg3)
- : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
- m_align (hsa_natural_alignment (t)), m_equiv_class (0)
-{
-}
-
-/* Constructor of class representing atomic instructions. OPC is the principal
- opcode, AOP is the specific atomic operation opcode. T is the type of the
- instruction. The instruction operands are provided as ARG[0-3]. */
-
-hsa_insn_atomic::hsa_insn_atomic (int nops, int opc,
- enum BrigAtomicOperation aop,
- BrigType16_t t, BrigMemoryOrder memorder,
- hsa_op_base *arg0,
- hsa_op_base *arg1, hsa_op_base *arg2,
- hsa_op_base *arg3)
- : hsa_insn_mem (nops, opc, t, arg0, arg1, arg2, arg3), m_atomicop (aop),
- m_memoryorder (memorder),
- m_memoryscope (BRIG_MEMORY_SCOPE_SYSTEM)
-{
- gcc_checking_assert (opc == BRIG_OPCODE_ATOMICNORET ||
- opc == BRIG_OPCODE_ATOMIC ||
- opc == BRIG_OPCODE_SIGNAL ||
- opc == BRIG_OPCODE_SIGNALNORET);
-}
-
-/* Constructor of class representing signal instructions. OPC is the prinicpal
- opcode, SOP is the specific signal operation opcode. T is the type of the
- instruction. The instruction operands are provided as ARG[0-3]. */
-
-hsa_insn_signal::hsa_insn_signal (int nops, int opc,
- enum BrigAtomicOperation sop,
- BrigType16_t t, BrigMemoryOrder memorder,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2, hsa_op_base *arg3)
- : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3),
- m_memory_order (memorder), m_signalop (sop)
-{
-}
-
-/* Constructor of class representing segment conversion instructions. OPC is
- the opcode which must be either BRIG_OPCODE_STOF or BRIG_OPCODE_FTOS. DEST
- and SRCT are destination and source types respectively, SEG is the segment
- we are converting to or from. The instruction operands are
- provided as ARG0 and ARG1. */
-
-hsa_insn_seg::hsa_insn_seg (int opc, BrigType16_t dest, BrigType16_t srct,
- BrigSegment8_t seg, hsa_op_base *arg0,
- hsa_op_base *arg1)
- : hsa_insn_basic (2, opc, dest, arg0, arg1), m_src_type (srct),
- m_segment (seg)
-{
- gcc_checking_assert (opc == BRIG_OPCODE_STOF || opc == BRIG_OPCODE_FTOS);
-}
-
-/* Constructor of class representing a call instruction. CALLEE is the tree
- representation of the function being called. */
-
-hsa_insn_call::hsa_insn_call (tree callee)
- : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (callee),
- m_output_arg (NULL), m_args_code_list (NULL), m_result_code_list (NULL)
-{
-}
-
-hsa_insn_call::hsa_insn_call (hsa_internal_fn *fn)
- : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (NULL),
- m_called_internal_fn (fn), m_output_arg (NULL), m_args_code_list (NULL),
- m_result_code_list (NULL)
-{
-}
-
-hsa_insn_call::~hsa_insn_call ()
-{
- for (unsigned i = 0; i < m_input_args.length (); i++)
- delete m_input_args[i];
-
- delete m_output_arg;
-
- m_input_args.release ();
- m_input_arg_insns.release ();
-}
-
-/* Constructor of class representing the argument block required to invoke
- a call in HSAIL. */
-hsa_insn_arg_block::hsa_insn_arg_block (BrigKind brig_kind,
- hsa_insn_call * call)
- : hsa_insn_basic (0, HSA_OPCODE_ARG_BLOCK), m_kind (brig_kind),
- m_call_insn (call)
-{
-}
-
-hsa_insn_comment::hsa_insn_comment (const char *s)
- : hsa_insn_basic (0, BRIG_KIND_DIRECTIVE_COMMENT)
-{
- unsigned l = strlen (s);
-
- /* Append '// ' to the string. */
- char *buf = XNEWVEC (char, l + 4);
- sprintf (buf, "// %s", s);
- m_comment = buf;
-}
-
-hsa_insn_comment::~hsa_insn_comment ()
-{
- gcc_checking_assert (m_comment);
- free (m_comment);
- m_comment = NULL;
-}
-
-/* Constructor of class representing the queue instruction in HSAIL. */
-
-hsa_insn_queue::hsa_insn_queue (int nops, int opcode, BrigSegment segment,
- BrigMemoryOrder memory_order,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2, hsa_op_base *arg3)
- : hsa_insn_basic (nops, opcode, BRIG_TYPE_U64, arg0, arg1, arg2, arg3),
- m_segment (segment), m_memory_order (memory_order)
-{
-}
-
-/* Constructor of class representing the source type instruction in HSAIL. */
-
-hsa_insn_srctype::hsa_insn_srctype (int nops, BrigOpcode opcode,
- BrigType16_t destt, BrigType16_t srct,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2 = NULL)
- : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2),
- m_source_type (srct)
-{}
-
-/* Constructor of class representing the packed instruction in HSAIL. */
-
-hsa_insn_packed::hsa_insn_packed (int nops, BrigOpcode opcode,
- BrigType16_t destt, BrigType16_t srct,
- hsa_op_base *arg0, hsa_op_base *arg1,
- hsa_op_base *arg2)
- : hsa_insn_srctype (nops, opcode, destt, srct, arg0, arg1, arg2)
-{
- m_operand_list = new hsa_op_operand_list (nops - 1);
-}
-
-/* Constructor of class representing the convert instruction in HSAIL. */
-
-hsa_insn_cvt::hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src)
- : hsa_insn_basic (2, BRIG_OPCODE_CVT, dest->m_type, dest, src)
-{
-}
-
-/* Constructor of class representing the alloca in HSAIL. */
-
-hsa_insn_alloca::hsa_insn_alloca (hsa_op_with_type *dest,
- hsa_op_with_type *size, unsigned alignment)
- : hsa_insn_basic (2, BRIG_OPCODE_ALLOCA, dest->m_type, dest, size),
- m_align (BRIG_ALIGNMENT_8)
-{
- gcc_assert (dest->m_type == BRIG_TYPE_U32);
- if (alignment)
- m_align = hsa_alignment_encoding (alignment);
-}
-
-/* Append an instruction INSN into the basic block. */
-
-void
-hsa_bb::append_insn (hsa_insn_basic *insn)
-{
- gcc_assert (insn->m_opcode != 0 || insn->operand_count () == 0);
- gcc_assert (!insn->m_bb);
-
- insn->m_bb = m_bb;
- insn->m_prev = m_last_insn;
- insn->m_next = NULL;
- if (m_last_insn)
- m_last_insn->m_next = insn;
- m_last_insn = insn;
- if (!m_first_insn)
- m_first_insn = insn;
-}
-
-void
-hsa_bb::append_phi (hsa_insn_phi *hphi)
-{
- hphi->m_bb = m_bb;
-
- hphi->m_prev = m_last_phi;
- hphi->m_next = NULL;
- if (m_last_phi)
- m_last_phi->m_next = hphi;
- m_last_phi = hphi;
- if (!m_first_phi)
- m_first_phi = hphi;
-}
-
-/* Insert HSA instruction NEW_INSN immediately before an existing instruction
- OLD_INSN. */
-
-static void
-hsa_insert_insn_before (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn)
-{
- hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb);
-
- if (hbb->m_first_insn == old_insn)
- hbb->m_first_insn = new_insn;
- new_insn->m_prev = old_insn->m_prev;
- new_insn->m_next = old_insn;
- if (old_insn->m_prev)
- old_insn->m_prev->m_next = new_insn;
- old_insn->m_prev = new_insn;
-}
-
-/* Append HSA instruction NEW_INSN immediately after an existing instruction
- OLD_INSN. */
-
-static void
-hsa_append_insn_after (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn)
-{
- hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb);
-
- if (hbb->m_last_insn == old_insn)
- hbb->m_last_insn = new_insn;
- new_insn->m_prev = old_insn;
- new_insn->m_next = old_insn->m_next;
- if (old_insn->m_next)
- old_insn->m_next->m_prev = new_insn;
- old_insn->m_next = new_insn;
-}
-
-/* Return a register containing the calculated value of EXP which must be an
- expression consisting of PLUS_EXPRs, MULT_EXPRs, NOP_EXPRs, SSA_NAMEs and
- integer constants as returned by get_inner_reference.
- Newly generated HSA instructions will be appended to HBB.
- Perform all calculations in ADDRTYPE. */
-
-static hsa_op_with_type *
-gen_address_calculation (tree exp, hsa_bb *hbb, BrigType16_t addrtype)
-{
- int opcode;
-
- if (TREE_CODE (exp) == NOP_EXPR)
- exp = TREE_OPERAND (exp, 0);
-
- switch (TREE_CODE (exp))
- {
- case SSA_NAME:
- return hsa_cfun->reg_for_gimple_ssa (exp)->get_in_type (addrtype, hbb);
-
- case INTEGER_CST:
- {
- hsa_op_immed *imm = new hsa_op_immed (exp);
- if (addrtype != imm->m_type)
- imm->m_type = addrtype;
- return imm;
- }
-
- case PLUS_EXPR:
- opcode = BRIG_OPCODE_ADD;
- break;
-
- case MULT_EXPR:
- opcode = BRIG_OPCODE_MUL;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- hsa_op_reg *res = new hsa_op_reg (addrtype);
- hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, addrtype);
- insn->set_op (0, res);
-
- hsa_op_with_type *op1 = gen_address_calculation (TREE_OPERAND (exp, 0), hbb,
- addrtype);
- hsa_op_with_type *op2 = gen_address_calculation (TREE_OPERAND (exp, 1), hbb,
- addrtype);
- insn->set_op (1, op1);
- insn->set_op (2, op2);
-
- hbb->append_insn (insn);
- return res;
-}
-
-/* If R1 is NULL, just return R2, otherwise append an instruction adding them
- to HBB and return the register holding the result. */
-
-static hsa_op_reg *
-add_addr_regs_if_needed (hsa_op_reg *r1, hsa_op_reg *r2, hsa_bb *hbb)
-{
- gcc_checking_assert (r2);
- if (!r1)
- return r2;
-
- hsa_op_reg *res = new hsa_op_reg (r1->m_type);
- gcc_assert (!hsa_needs_cvt (r1->m_type, r2->m_type));
- hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_ADD, res->m_type);
- insn->set_op (0, res);
- insn->set_op (1, r1);
- insn->set_op (2, r2);
- hbb->append_insn (insn);
- return res;
-}
-
-/* Helper of gen_hsa_addr. Update *SYMBOL, *ADDRTYPE, *REG and *OFFSET to
- reflect BASE which is the first operand of a MEM_REF or a TARGET_MEM_REF. */
-
-static void
-process_mem_base (tree base, hsa_symbol **symbol, BrigType16_t *addrtype,
- hsa_op_reg **reg, offset_int *offset, hsa_bb *hbb)
-{
- if (TREE_CODE (base) == SSA_NAME)
- {
- gcc_assert (!*reg);
- hsa_op_with_type *ssa
- = hsa_cfun->reg_for_gimple_ssa (base)->get_in_type (*addrtype, hbb);
- *reg = dyn_cast <hsa_op_reg *> (ssa);
- }
- else if (TREE_CODE (base) == ADDR_EXPR)
- {
- tree decl = TREE_OPERAND (base, 0);
-
- if (!DECL_P (decl) || TREE_CODE (decl) == FUNCTION_DECL)
- {
- HSA_SORRY_AT (EXPR_LOCATION (base),
- "support for HSA does not implement a memory reference "
- "to a non-declaration type");
- return;
- }
-
- gcc_assert (!*symbol);
-
- *symbol = get_symbol_for_decl (decl);
- *addrtype = hsa_get_segment_addr_type ((*symbol)->m_segment);
- }
- else if (TREE_CODE (base) == INTEGER_CST)
- *offset += wi::to_offset (base);
- else
- gcc_unreachable ();
-}
-
-/* Forward declaration of a function. */
-
-static void
-gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb);
-
-/* Generate HSA address operand for a given tree memory reference REF. If
- instructions need to be created to calculate the address, they will be added
- to the end of HBB. If a caller provider OUTPUT_BITSIZE and OUTPUT_BITPOS,
- the function assumes that the caller will handle possible
- bit-field references. Otherwise if we reference a bit-field, sorry message
- is displayed. */
-
-static hsa_op_address *
-gen_hsa_addr (tree ref, hsa_bb *hbb, HOST_WIDE_INT *output_bitsize = NULL,
- HOST_WIDE_INT *output_bitpos = NULL)
-{
- hsa_symbol *symbol = NULL;
- hsa_op_reg *reg = NULL;
- offset_int offset = 0;
- tree origref = ref;
- tree varoffset = NULL_TREE;
- BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
- HOST_WIDE_INT bitsize = 0, bitpos = 0;
- BrigType16_t flat_addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
-
- if (TREE_CODE (ref) == STRING_CST)
- {
- symbol = hsa_get_string_cst_symbol (ref);
- goto out;
- }
- else if (TREE_CODE (ref) == BIT_FIELD_REF
- && (!multiple_p (bit_field_size (ref), BITS_PER_UNIT)
- || !multiple_p (bit_field_offset (ref), BITS_PER_UNIT)))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (origref),
- "support for HSA does not implement "
- "bit field references such as %E", ref);
- goto out;
- }
-
- if (handled_component_p (ref))
- {
- machine_mode mode;
- int unsignedp, volatilep, preversep;
- poly_int64 pbitsize, pbitpos;
- tree new_ref;
-
- new_ref = get_inner_reference (ref, &pbitsize, &pbitpos, &varoffset,
- &mode, &unsignedp, &preversep,
- &volatilep);
- /* When this isn't true, the switch below will report an
- appropriate error. */
- if (pbitsize.is_constant () && pbitpos.is_constant ())
- {
- bitsize = pbitsize.to_constant ();
- bitpos = pbitpos.to_constant ();
- ref = new_ref;
- offset = bitpos;
- offset = wi::rshift (offset, LOG2_BITS_PER_UNIT, SIGNED);
- }
- }
-
- switch (TREE_CODE (ref))
- {
- case ADDR_EXPR:
- {
- addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE);
- symbol = hsa_cfun->create_hsa_temporary (flat_addrtype);
- hsa_op_reg *r = new hsa_op_reg (flat_addrtype);
- gen_hsa_addr_insns (ref, r, hbb);
- hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type,
- r, new hsa_op_address (symbol)));
-
- break;
- }
- case SSA_NAME:
- {
- addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE);
- hsa_op_with_type *r = hsa_cfun->reg_for_gimple_ssa (ref);
- if (r->m_type == BRIG_TYPE_B1)
- r = r->get_in_type (BRIG_TYPE_U32, hbb);
- symbol = hsa_cfun->create_hsa_temporary (r->m_type);
-
- hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type,
- r, new hsa_op_address (symbol)));
-
- break;
- }
- case PARM_DECL:
- case VAR_DECL:
- case RESULT_DECL:
- case CONST_DECL:
- gcc_assert (!symbol);
- symbol = get_symbol_for_decl (ref);
- addrtype = hsa_get_segment_addr_type (symbol->m_segment);
- break;
-
- case MEM_REF:
- process_mem_base (TREE_OPERAND (ref, 0), &symbol, &addrtype, ®,
- &offset, hbb);
-
- if (!integer_zerop (TREE_OPERAND (ref, 1)))
- offset += wi::to_offset (TREE_OPERAND (ref, 1));
- break;
-
- case TARGET_MEM_REF:
- process_mem_base (TMR_BASE (ref), &symbol, &addrtype, ®, &offset, hbb);
- if (TMR_INDEX (ref))
- {
- hsa_op_reg *disp1;
- hsa_op_base *idx = hsa_cfun->reg_for_gimple_ssa
- (TMR_INDEX (ref))->get_in_type (addrtype, hbb);
- if (TMR_STEP (ref) && !integer_onep (TMR_STEP (ref)))
- {
- disp1 = new hsa_op_reg (addrtype);
- hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_MUL,
- addrtype);
-
- /* As step must respect addrtype, we overwrite the type
- of an immediate value. */
- hsa_op_immed *step = new hsa_op_immed (TMR_STEP (ref));
- step->m_type = addrtype;
-
- insn->set_op (0, disp1);
- insn->set_op (1, idx);
- insn->set_op (2, step);
- hbb->append_insn (insn);
- }
- else
- disp1 = as_a <hsa_op_reg *> (idx);
- reg = add_addr_regs_if_needed (reg, disp1, hbb);
- }
- if (TMR_INDEX2 (ref))
- {
- if (TREE_CODE (TMR_INDEX2 (ref)) == SSA_NAME)
- {
- hsa_op_base *disp2 = hsa_cfun->reg_for_gimple_ssa
- (TMR_INDEX2 (ref))->get_in_type (addrtype, hbb);
- reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (disp2),
- hbb);
- }
- else if (TREE_CODE (TMR_INDEX2 (ref)) == INTEGER_CST)
- offset += wi::to_offset (TMR_INDEX2 (ref));
- else
- gcc_unreachable ();
- }
- offset += wi::to_offset (TMR_OFFSET (ref));
- break;
- case FUNCTION_DECL:
- HSA_SORRY_AT (EXPR_LOCATION (origref),
- "support for HSA does not implement function pointers");
- goto out;
- default:
- HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does "
- "not implement memory access to %E", origref);
- goto out;
- }
-
- if (varoffset)
- {
- if (TREE_CODE (varoffset) == INTEGER_CST)
- offset += wi::to_offset (varoffset);
- else
- {
- hsa_op_base *off_op = gen_address_calculation (varoffset, hbb,
- addrtype);
- reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (off_op),
- hbb);
- }
- }
-
- gcc_checking_assert ((symbol
- && addrtype
- == hsa_get_segment_addr_type (symbol->m_segment))
- || (!symbol
- && addrtype
- == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)));
-out:
- HOST_WIDE_INT hwi_offset = offset.to_shwi ();
-
- /* Calculate remaining bitsize offset (if presented). */
- bitpos %= BITS_PER_UNIT;
- /* If bitsize is a power of two that is greater or equal to BITS_PER_UNIT, it
- is not a reason to think this is a bit-field access. */
- if (bitpos == 0
- && (bitsize >= BITS_PER_UNIT)
- && !(bitsize & (bitsize - 1)))
- bitsize = 0;
-
- if ((bitpos || bitsize) && (output_bitpos == NULL || output_bitsize == NULL))
- HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does not "
- "implement unhandled bit field reference such as %E", ref);
-
- if (output_bitsize != NULL && output_bitpos != NULL)
- {
- *output_bitsize = bitsize;
- *output_bitpos = bitpos;
- }
-
- return new hsa_op_address (symbol, reg, hwi_offset);
-}
-
-/* Generate HSA address operand for a given tree memory reference REF. If
- instructions need to be created to calculate the address, they will be added
- to the end of HBB. OUTPUT_ALIGN is alignment of the created address. */
-
-static hsa_op_address *
-gen_hsa_addr_with_align (tree ref, hsa_bb *hbb, BrigAlignment8_t *output_align)
-{
- hsa_op_address *addr = gen_hsa_addr (ref, hbb);
- if (addr->m_reg || !addr->m_symbol)
- *output_align = hsa_object_alignment (ref);
- else
- {
- /* If the address consists only of a symbol and an offset, we
- compute the alignment ourselves to take into account any alignment
- promotions we might have done for the HSA symbol representation. */
- unsigned align = hsa_byte_alignment (addr->m_symbol->m_align);
- unsigned misalign = addr->m_imm_offset & (align - 1);
- if (misalign)
- align = least_bit_hwi (misalign);
- *output_align = hsa_alignment_encoding (BITS_PER_UNIT * align);
- }
- return addr;
-}
-
-/* Generate HSA address for a function call argument of given TYPE.
- INDEX is used to generate corresponding name of the arguments.
- Special value -1 represents fact that result value is created. */
-
-static hsa_op_address *
-gen_hsa_addr_for_arg (tree tree_type, int index)
-{
- hsa_symbol *sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_ARG);
- sym->m_type = hsa_type_for_tree_type (tree_type, &sym->m_dim);
-
- if (index == -1) /* Function result. */
- sym->m_name = "res";
- else /* Function call arguments. */
- {
- sym->m_name = NULL;
- sym->m_name_number = index;
- }
-
- return new hsa_op_address (sym);
-}
-
-/* Generate HSA instructions that process all necessary conversions
- of an ADDR to flat addressing and place the result into DEST.
- Instructions are appended to HBB. */
-
-static void
-convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest,
- hsa_bb *hbb)
-{
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA);
- insn->set_op (1, addr);
- if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL)
- {
- /* LDA produces segment-relative address, we need to convert
- it to the flat one. */
- hsa_op_reg *tmp;
- tmp = new hsa_op_reg (hsa_get_segment_addr_type
- (addr->m_symbol->m_segment));
- hsa_insn_seg *seg;
- seg = new hsa_insn_seg (BRIG_OPCODE_STOF,
- hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT),
- tmp->m_type, addr->m_symbol->m_segment, dest,
- tmp);
-
- insn->set_op (0, tmp);
- insn->m_type = tmp->m_type;
- hbb->append_insn (insn);
- hbb->append_insn (seg);
- }
- else
- {
- insn->set_op (0, dest);
- insn->m_type = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
- hbb->append_insn (insn);
- }
-}
-
-/* Generate HSA instructions that calculate address of VAL including all
- necessary conversions to flat addressing and place the result into DEST.
- Instructions are appended to HBB. */
-
-static void
-gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb)
-{
- /* Handle cases like tmp = NULL, where we just emit a move instruction
- to a register. */
- if (TREE_CODE (val) == INTEGER_CST)
- {
- hsa_op_immed *c = new hsa_op_immed (val);
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
- dest->m_type, dest, c);
- hbb->append_insn (insn);
- return;
- }
-
- hsa_op_address *addr;
-
- gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
- if (TREE_CODE (val) == ADDR_EXPR)
- val = TREE_OPERAND (val, 0);
- addr = gen_hsa_addr (val, hbb);
-
- if (TREE_CODE (val) == CONST_DECL
- && is_gimple_reg_type (TREE_TYPE (val)))
- {
- gcc_assert (addr->m_symbol
- && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY);
- /* CONST_DECLs are in readonly segment which however does not have
- addresses convertible to flat segments. So copy it to a private one
- and take address of that. */
- BrigType16_t csttype
- = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (val),
- false));
- hsa_op_reg *r = new hsa_op_reg (csttype);
- hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, csttype, r,
- new hsa_op_address (addr->m_symbol)));
- hsa_symbol *copysym = hsa_cfun->create_hsa_temporary (csttype);
- hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, csttype, r,
- new hsa_op_address (copysym)));
- addr->m_symbol = copysym;
- }
- else if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY)
- {
- HSA_SORRY_ATV (EXPR_LOCATION (val), "support for HSA does "
- "not implement taking addresses of complex "
- "%<CONST_DECL%> such as %E", val);
- return;
- }
-
-
- convert_addr_to_flat_segment (addr, dest, hbb);
-}
-
-/* Return an HSA register or HSA immediate value operand corresponding to
- gimple operand OP. */
-
-static hsa_op_with_type *
-hsa_reg_or_immed_for_gimple_op (tree op, hsa_bb *hbb)
-{
- hsa_op_reg *tmp;
-
- if (TREE_CODE (op) == SSA_NAME)
- tmp = hsa_cfun->reg_for_gimple_ssa (op);
- else if (!POINTER_TYPE_P (TREE_TYPE (op)))
- return new hsa_op_immed (op);
- else
- {
- tmp = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
- gen_hsa_addr_insns (op, tmp, hbb);
- }
- return tmp;
-}
-
-/* Create a simple movement instruction with register destination DEST and
- register or immediate source SRC and append it to the end of HBB. */
-
-void
-hsa_build_append_simple_mov (hsa_op_reg *dest, hsa_op_base *src, hsa_bb *hbb)
-{
- /* Moves of packed data between registers need to adhere to the same type
- rules like when dealing with memory. */
- BrigType16_t tp = mem_type_for_type (dest->m_type);
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, tp, dest, src);
- hsa_fixup_mov_insn_type (insn);
- unsigned dest_size = hsa_type_bit_size (dest->m_type);
- if (hsa_op_reg *sreg = dyn_cast <hsa_op_reg *> (src))
- gcc_assert (dest_size == hsa_type_bit_size (sreg->m_type));
- else
- {
- unsigned imm_size
- = hsa_type_bit_size (as_a <hsa_op_immed *> (src)->m_type);
- gcc_assert ((dest_size == imm_size)
- /* Eventually < 32bit registers will be promoted to 32bit. */
- || (dest_size < 32 && imm_size == 32));
- }
- hbb->append_insn (insn);
-}
-
-/* Generate HSAIL instructions loading a bit field into register DEST.
- VALUE_REG is a register of a SSA name that is used in the bit field
- reference. To identify a bit field BITPOS is offset to the loaded memory
- and BITSIZE is number of bits of the bit field.
- Add instructions to HBB. */
-
-static void
-gen_hsa_insns_for_bitfield (hsa_op_reg *dest, hsa_op_reg *value_reg,
- HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
- hsa_bb *hbb)
-{
- unsigned type_bitsize
- = hsa_type_bit_size (hsa_extend_inttype_to_32bit (dest->m_type));
- unsigned left_shift = type_bitsize - (bitsize + bitpos);
- unsigned right_shift = left_shift + bitpos;
-
- if (left_shift)
- {
- hsa_op_reg *value_reg_2
- = new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type));
- hsa_op_immed *c = new hsa_op_immed (left_shift, BRIG_TYPE_U32);
-
- hsa_insn_basic *lshift
- = new hsa_insn_basic (3, BRIG_OPCODE_SHL, value_reg_2->m_type,
- value_reg_2, value_reg, c);
-
- hbb->append_insn (lshift);
-
- value_reg = value_reg_2;
- }
-
- if (right_shift)
- {
- hsa_op_reg *value_reg_2
- = new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type));
- hsa_op_immed *c = new hsa_op_immed (right_shift, BRIG_TYPE_U32);
-
- hsa_insn_basic *rshift
- = new hsa_insn_basic (3, BRIG_OPCODE_SHR, value_reg_2->m_type,
- value_reg_2, value_reg, c);
-
- hbb->append_insn (rshift);
-
- value_reg = value_reg_2;
- }
-
- hsa_insn_basic *assignment
- = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, NULL, value_reg);
- hsa_fixup_mov_insn_type (assignment);
- hbb->append_insn (assignment);
- assignment->set_output_in_type (dest, 0, hbb);
-}
-
-
-/* Generate HSAIL instructions loading a bit field into register DEST. ADDR is
- prepared memory address which is used to load the bit field. To identify a
- bit field BITPOS is offset to the loaded memory and BITSIZE is number of
- bits of the bit field. Add instructions to HBB. Load must be performed in
- alignment ALIGN. */
-
-static void
-gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, hsa_op_address *addr,
- HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos,
- hsa_bb *hbb, BrigAlignment8_t align)
-{
- hsa_op_reg *value_reg = new hsa_op_reg (dest->m_type);
- hsa_insn_mem *mem
- = new hsa_insn_mem (BRIG_OPCODE_LD,
- hsa_extend_inttype_to_32bit (dest->m_type),
- value_reg, addr);
- mem->set_align (align);
- hbb->append_insn (mem);
- gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb);
-}
-
-/* Return the alignment of base memory accesses we issue to perform bit-field
- memory access REF. */
-
-static BrigAlignment8_t
-hsa_bitmemref_alignment (tree ref)
-{
- unsigned HOST_WIDE_INT bit_offset = 0;
-
- while (true)
- {
- if (TREE_CODE (ref) == BIT_FIELD_REF)
- {
- if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2)))
- return BRIG_ALIGNMENT_1;
- bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2));
- }
- else if (TREE_CODE (ref) == COMPONENT_REF
- && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
- bit_offset += int_bit_position (TREE_OPERAND (ref, 1));
- else
- break;
- ref = TREE_OPERAND (ref, 0);
- }
-
- unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT;
- unsigned HOST_WIDE_INT byte_bits = bit_offset - bits;
- BrigAlignment8_t base = hsa_object_alignment (ref);
- if (byte_bits == 0)
- return base;
- return MIN (base, hsa_alignment_encoding (least_bit_hwi (byte_bits)));
-}
-
-/* Generate HSAIL instructions loading something into register DEST. RHS is
- tree representation of the loaded data, which are loaded as type TYPE. Add
- instructions to HBB. */
-
-static void
-gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb)
-{
- /* The destination SSA name will give us the type. */
- if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR)
- rhs = TREE_OPERAND (rhs, 0);
-
- if (TREE_CODE (rhs) == SSA_NAME)
- {
- hsa_op_reg *src = hsa_cfun->reg_for_gimple_ssa (rhs);
- hsa_build_append_simple_mov (dest, src, hbb);
- }
- else if (is_gimple_min_invariant (rhs)
- || TREE_CODE (rhs) == ADDR_EXPR)
- {
- if (POINTER_TYPE_P (TREE_TYPE (rhs)))
- {
- if (dest->m_type != hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (rhs),
- "support for HSA does not implement conversion "
- "of %E to the requested non-pointer type", rhs);
- return;
- }
-
- gen_hsa_addr_insns (rhs, dest, hbb);
- }
- else if (TREE_CODE (rhs) == COMPLEX_CST)
- {
- hsa_op_immed *real_part = new hsa_op_immed (TREE_REALPART (rhs));
- hsa_op_immed *imag_part = new hsa_op_immed (TREE_IMAGPART (rhs));
-
- hsa_op_reg *real_part_reg
- = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type),
- true));
- hsa_op_reg *imag_part_reg
- = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type),
- true));
-
- hsa_build_append_simple_mov (real_part_reg, real_part, hbb);
- hsa_build_append_simple_mov (imag_part_reg, imag_part, hbb);
-
- BrigType16_t src_type = hsa_bittype_for_type (real_part_reg->m_type);
-
- hsa_insn_packed *insn
- = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type,
- src_type, dest, real_part_reg,
- imag_part_reg);
- hbb->append_insn (insn);
- }
- else
- {
- hsa_op_immed *imm = new hsa_op_immed (rhs);
- hsa_build_append_simple_mov (dest, imm, hbb);
- }
- }
- else if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (rhs) == IMAGPART_EXPR)
- {
- tree pack_type = TREE_TYPE (TREE_OPERAND (rhs, 0));
-
- hsa_op_reg *packed_reg
- = new hsa_op_reg (hsa_type_for_scalar_tree_type (pack_type, true));
-
- tree complex_rhs = TREE_OPERAND (rhs, 0);
- gen_hsa_insns_for_load (packed_reg, complex_rhs, TREE_TYPE (complex_rhs),
- hbb);
-
- hsa_op_reg *real_reg
- = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true));
-
- hsa_op_reg *imag_reg
- = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true));
-
- BrigKind16_t brig_type = packed_reg->m_type;
- hsa_insn_packed *packed
- = new hsa_insn_packed (3, BRIG_OPCODE_EXPAND,
- hsa_bittype_for_type (real_reg->m_type),
- brig_type, real_reg, imag_reg, packed_reg);
-
- hbb->append_insn (packed);
-
- hsa_op_reg *source = TREE_CODE (rhs) == REALPART_EXPR ?
- real_reg : imag_reg;
-
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV,
- dest->m_type, NULL, source);
- hsa_fixup_mov_insn_type (insn);
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
- }
- else if (TREE_CODE (rhs) == BIT_FIELD_REF
- && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME)
- {
- tree ssa_name = TREE_OPERAND (rhs, 0);
- HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs, 1));
- HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs, 2));
-
- hsa_op_reg *imm_value = hsa_cfun->reg_for_gimple_ssa (ssa_name);
- gen_hsa_insns_for_bitfield (dest, imm_value, bitsize, bitpos, hbb);
- }
- else if (DECL_P (rhs) || TREE_CODE (rhs) == MEM_REF
- || TREE_CODE (rhs) == TARGET_MEM_REF
- || handled_component_p (rhs))
- {
- HOST_WIDE_INT bitsize, bitpos;
-
- /* Load from memory. */
- hsa_op_address *addr;
- addr = gen_hsa_addr (rhs, hbb, &bitsize, &bitpos);
-
- /* Handle load of a bit field. */
- if (bitsize > 64)
- {
- HSA_SORRY_AT (EXPR_LOCATION (rhs),
- "support for HSA does not implement load from a bit "
- "field bigger than 64 bits");
- return;
- }
-
- if (bitsize || bitpos)
- gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb,
- hsa_bitmemref_alignment (rhs));
- else
- {
- BrigType16_t mtype;
- /* Not dest->m_type, that's possibly extended. */
- mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (type,
- false));
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest,
- addr);
- mem->set_align (hsa_object_alignment (rhs));
- hbb->append_insn (mem);
- }
- }
- else
- HSA_SORRY_ATV (EXPR_LOCATION (rhs),
- "support for HSA does not implement loading "
- "of expression %E",
- rhs);
-}
-
-/* Return number of bits necessary for representation of a bit field,
- starting at BITPOS with size of BITSIZE. */
-
-static unsigned
-get_bitfield_size (unsigned bitpos, unsigned bitsize)
-{
- unsigned s = bitpos + bitsize;
- unsigned sizes[] = {8, 16, 32, 64};
-
- for (unsigned i = 0; i < 4; i++)
- if (s <= sizes[i])
- return sizes[i];
-
- gcc_unreachable ();
- return 0;
-}
-
-/* Generate HSAIL instructions storing into memory. LHS is the destination of
- the store, SRC is the source operand. Add instructions to HBB. */
-
-static void
-gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb)
-{
- HOST_WIDE_INT bitsize = 0, bitpos = 0;
- BrigAlignment8_t req_align;
- BrigType16_t mtype;
- mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
- false));
- hsa_op_address *addr;
- addr = gen_hsa_addr (lhs, hbb, &bitsize, &bitpos);
-
- /* Handle store to a bit field. */
- if (bitsize > 64)
- {
- HSA_SORRY_AT (EXPR_LOCATION (lhs),
- "support for HSA does not implement store to a bit field "
- "bigger than 64 bits");
- return;
- }
-
- unsigned type_bitsize = get_bitfield_size (bitpos, bitsize);
-
- /* HSAIL does not support MOV insn with 16-bits integers. */
- if (type_bitsize < 32)
- type_bitsize = 32;
-
- if (bitpos || (bitsize && type_bitsize != bitsize))
- {
- unsigned HOST_WIDE_INT mask = 0;
- BrigType16_t mem_type
- = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT,
- !TYPE_UNSIGNED (TREE_TYPE (lhs)));
-
- for (unsigned i = 0; i < type_bitsize; i++)
- if (i < bitpos || i >= bitpos + bitsize)
- mask |= ((unsigned HOST_WIDE_INT)1 << i);
-
- hsa_op_reg *value_reg = new hsa_op_reg (mem_type);
-
- req_align = hsa_bitmemref_alignment (lhs);
- /* Load value from memory. */
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type,
- value_reg, addr);
- mem->set_align (req_align);
- hbb->append_insn (mem);
-
- /* AND the loaded value with prepared mask. */
- hsa_op_reg *cleared_reg = new hsa_op_reg (mem_type);
-
- BrigType16_t t
- = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, false);
- hsa_op_immed *c = new hsa_op_immed (mask, t);
-
- hsa_insn_basic *clearing
- = new hsa_insn_basic (3, BRIG_OPCODE_AND, mem_type, cleared_reg,
- value_reg, c);
- hbb->append_insn (clearing);
-
- /* Shift to left a value that is going to be stored. */
- hsa_op_reg *new_value_reg = new hsa_op_reg (mem_type);
-
- hsa_insn_basic *basic = new hsa_insn_basic (2, BRIG_OPCODE_MOV, mem_type,
- new_value_reg, src);
- hsa_fixup_mov_insn_type (basic);
- hbb->append_insn (basic);
-
- if (bitpos)
- {
- hsa_op_reg *shifted_value_reg = new hsa_op_reg (mem_type);
- c = new hsa_op_immed (bitpos, BRIG_TYPE_U32);
-
- hsa_insn_basic *basic
- = new hsa_insn_basic (3, BRIG_OPCODE_SHL, mem_type,
- shifted_value_reg, new_value_reg, c);
- hbb->append_insn (basic);
-
- new_value_reg = shifted_value_reg;
- }
-
- /* OR the prepared value with prepared chunk loaded from memory. */
- hsa_op_reg *prepared_reg= new hsa_op_reg (mem_type);
- basic = new hsa_insn_basic (3, BRIG_OPCODE_OR, mem_type, prepared_reg,
- new_value_reg, cleared_reg);
- hbb->append_insn (basic);
-
- src = prepared_reg;
- mtype = mem_type;
- }
- else
- req_align = hsa_object_alignment (lhs);
-
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr);
- mem->set_align (req_align);
-
- /* The HSAIL verifier has another constraint: if the source is an immediate
- then it must match the destination type. If it's a register the low bits
- will be used for sub-word stores. We're always allocating new operands so
- we can modify the above in place. */
- if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src))
- {
- if (!hsa_type_packed_p (imm->m_type))
- imm->m_type = mem->m_type;
- else
- {
- /* ...and all vector immediates apparently need to be vectors of
- unsigned bytes. */
- unsigned bs = hsa_type_bit_size (imm->m_type);
- gcc_assert (bs == hsa_type_bit_size (mem->m_type));
- switch (bs)
- {
- case 32:
- imm->m_type = BRIG_TYPE_U8X4;
- break;
- case 64:
- imm->m_type = BRIG_TYPE_U8X8;
- break;
- case 128:
- imm->m_type = BRIG_TYPE_U8X16;
- break;
- default:
- gcc_unreachable ();
- }
- }
- }
-
- hbb->append_insn (mem);
-}
-
-/* Generate memory copy instructions that are going to be used
- for copying a SRC memory to TARGET memory,
- represented by pointer in a register. MIN_ALIGN is minimal alignment
- of provided HSA addresses. */
-
-static void
-gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src,
- unsigned size, BrigAlignment8_t min_align)
-{
- hsa_op_address *addr;
- hsa_insn_mem *mem;
-
- unsigned offset = 0;
- unsigned min_byte_align = hsa_byte_alignment (min_align);
-
- while (size)
- {
- unsigned s;
- if (size >= 8)
- s = 8;
- else if (size >= 4)
- s = 4;
- else if (size >= 2)
- s = 2;
- else
- s = 1;
-
- if (s > min_byte_align)
- s = min_byte_align;
-
- BrigType16_t t = get_integer_type_by_bytes (s, false);
-
- hsa_op_reg *tmp = new hsa_op_reg (t);
- addr = new hsa_op_address (src->m_symbol, src->m_reg,
- src->m_imm_offset + offset);
- mem = new hsa_insn_mem (BRIG_OPCODE_LD, t, tmp, addr);
- hbb->append_insn (mem);
-
- addr = new hsa_op_address (target->m_symbol, target->m_reg,
- target->m_imm_offset + offset);
- mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, tmp, addr);
- hbb->append_insn (mem);
- offset += s;
- size -= s;
- }
-}
-
-/* Create a memset mask that is created by copying a CONSTANT byte value
- to an integer of BYTE_SIZE bytes. */
-
-static unsigned HOST_WIDE_INT
-build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size)
-{
- if (constant == 0)
- return 0;
-
- HOST_WIDE_INT v = constant;
-
- for (unsigned i = 1; i < byte_size; i++)
- v |= constant << (8 * i);
-
- return v;
-}
-
-/* Generate memory set instructions that are going to be used
- for setting a CONSTANT byte value to TARGET memory of SIZE bytes.
- MIN_ALIGN is minimal alignment of provided HSA addresses. */
-
-static void
-gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target,
- unsigned HOST_WIDE_INT constant,
- unsigned size, BrigAlignment8_t min_align)
-{
- hsa_op_address *addr;
- hsa_insn_mem *mem;
-
- unsigned offset = 0;
- unsigned min_byte_align = hsa_byte_alignment (min_align);
-
- while (size)
- {
- unsigned s;
- if (size >= 8)
- s = 8;
- else if (size >= 4)
- s = 4;
- else if (size >= 2)
- s = 2;
- else
- s = 1;
-
- if (s > min_byte_align)
- s = min_byte_align;
-
- addr = new hsa_op_address (target->m_symbol, target->m_reg,
- target->m_imm_offset + offset);
-
- BrigType16_t t = get_integer_type_by_bytes (s, false);
- HOST_WIDE_INT c = build_memset_value (constant, s);
-
- mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (c, t),
- addr);
- hbb->append_insn (mem);
- offset += s;
- size -= s;
- }
-}
-
-/* Generate HSAIL instructions for a single assignment
- of an empty constructor to an ADDR_LHS. Constructor is passed as a
- tree RHS and all instructions are appended to HBB. ALIGN is
- alignment of the address. */
-
-void
-gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb,
- BrigAlignment8_t align)
-{
- if (CONSTRUCTOR_NELTS (rhs))
- {
- HSA_SORRY_AT (EXPR_LOCATION (rhs),
- "support for HSA does not implement load from constructor");
- return;
- }
-
- unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
- gen_hsa_memory_set (hbb, addr_lhs, 0, size, align);
-}
-
-/* Generate HSA instructions for a single assignment of RHS to LHS.
- HBB is the basic block they will be appended to. */
-
-static void
-gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb)
-{
- if (TREE_CODE (lhs) == SSA_NAME)
- {
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- if (hsa_seen_error ())
- return;
-
- gen_hsa_insns_for_load (dest, rhs, TREE_TYPE (lhs), hbb);
- }
- else if (TREE_CODE (rhs) == SSA_NAME
- || (is_gimple_min_invariant (rhs) && TREE_CODE (rhs) != STRING_CST))
- {
- /* Store to memory. */
- hsa_op_base *src = hsa_reg_or_immed_for_gimple_op (rhs, hbb);
- if (hsa_seen_error ())
- return;
-
- gen_hsa_insns_for_store (lhs, src, hbb);
- }
- else
- {
- BrigAlignment8_t lhs_align;
- hsa_op_address *addr_lhs = gen_hsa_addr_with_align (lhs, hbb,
- &lhs_align);
-
- if (TREE_CODE (rhs) == CONSTRUCTOR)
- gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align);
- else
- {
- BrigAlignment8_t rhs_align;
- hsa_op_address *addr_rhs = gen_hsa_addr_with_align (rhs, hbb,
- &rhs_align);
-
- unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs)));
- gen_hsa_memory_copy (hbb, addr_lhs, addr_rhs, size,
- MIN (lhs_align, rhs_align));
- }
- }
-}
-
-/* Prepend before INSN a load from spill symbol of SPILL_REG. Return the
- register into which we loaded. If this required another register to convert
- from a B1 type, return it in *PTMP2, otherwise store NULL into it. We
- assume we are out of SSA so the returned register does not have its
- definition set. */
-
-hsa_op_reg *
-hsa_spill_in (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2)
-{
- hsa_symbol *spill_sym = spill_reg->m_spill_sym;
- hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type);
- hsa_op_address *addr = new hsa_op_address (spill_sym);
-
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, spill_sym->m_type,
- reg, addr);
- hsa_insert_insn_before (mem, insn);
-
- *ptmp2 = NULL;
- if (spill_reg->m_type == BRIG_TYPE_B1)
- {
- hsa_insn_basic *cvtinsn;
- *ptmp2 = reg;
- reg = new hsa_op_reg (spill_reg->m_type);
-
- cvtinsn = new hsa_insn_cvt (reg, *ptmp2);
- hsa_insert_insn_before (cvtinsn, insn);
- }
- return reg;
-}
-
-/* Append after INSN a store to spill symbol of SPILL_REG. Return the register
- from which we stored. If this required another register to convert to a B1
- type, return it in *PTMP2, otherwise store NULL into it. We assume we are
- out of SSA so the returned register does not have its use updated. */
-
-hsa_op_reg *
-hsa_spill_out (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2)
-{
- hsa_symbol *spill_sym = spill_reg->m_spill_sym;
- hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type);
- hsa_op_address *addr = new hsa_op_address (spill_sym);
- hsa_op_reg *returnreg;
-
- *ptmp2 = NULL;
- returnreg = reg;
- if (spill_reg->m_type == BRIG_TYPE_B1)
- {
- hsa_insn_basic *cvtinsn;
- *ptmp2 = new hsa_op_reg (spill_sym->m_type);
- reg->m_type = spill_reg->m_type;
-
- cvtinsn = new hsa_insn_cvt (*ptmp2, returnreg);
- hsa_append_insn_after (cvtinsn, insn);
- insn = cvtinsn;
- reg = *ptmp2;
- }
-
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, spill_sym->m_type, reg,
- addr);
- hsa_append_insn_after (mem, insn);
- return returnreg;
-}
-
-/* Generate a comparison instruction that will compare LHS and RHS with
- comparison specified by CODE and put result into register DEST. DEST has to
- have its type set already but must not have its definition set yet.
- Generated instructions will be added to HBB. */
-
-static void
-gen_hsa_cmp_insn_from_gimple (enum tree_code code, tree lhs, tree rhs,
- hsa_op_reg *dest, hsa_bb *hbb)
-{
- BrigCompareOperation8_t compare;
-
- switch (code)
- {
- case LT_EXPR:
- compare = BRIG_COMPARE_LT;
- break;
- case LE_EXPR:
- compare = BRIG_COMPARE_LE;
- break;
- case GT_EXPR:
- compare = BRIG_COMPARE_GT;
- break;
- case GE_EXPR:
- compare = BRIG_COMPARE_GE;
- break;
- case EQ_EXPR:
- compare = BRIG_COMPARE_EQ;
- break;
- case NE_EXPR:
- compare = BRIG_COMPARE_NE;
- break;
- case UNORDERED_EXPR:
- compare = BRIG_COMPARE_NAN;
- break;
- case ORDERED_EXPR:
- compare = BRIG_COMPARE_NUM;
- break;
- case UNLT_EXPR:
- compare = BRIG_COMPARE_LTU;
- break;
- case UNLE_EXPR:
- compare = BRIG_COMPARE_LEU;
- break;
- case UNGT_EXPR:
- compare = BRIG_COMPARE_GTU;
- break;
- case UNGE_EXPR:
- compare = BRIG_COMPARE_GEU;
- break;
- case UNEQ_EXPR:
- compare = BRIG_COMPARE_EQU;
- break;
- case LTGT_EXPR:
- compare = BRIG_COMPARE_NEU;
- break;
-
- default:
- HSA_SORRY_ATV (EXPR_LOCATION (lhs),
- "support for HSA does not implement comparison tree "
- "code %s", get_tree_code_name (code));
- return;
- }
-
- /* CMP instruction returns e.g. 0xffffffff (for a 32-bit with integer)
- as a result of comparison. */
-
- BrigType16_t dest_type = hsa_type_integer_p (dest->m_type)
- ? (BrigType16_t) BRIG_TYPE_B1 : dest->m_type;
-
- hsa_insn_cmp *cmp = new hsa_insn_cmp (compare, dest_type);
- hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (lhs, hbb);
- cmp->set_op (1, op1->extend_int_to_32bit (hbb));
- hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs, hbb);
- cmp->set_op (2, op2->extend_int_to_32bit (hbb));
-
- hbb->append_insn (cmp);
- cmp->set_output_in_type (dest, 0, hbb);
-}
-
-/* Generate an unary instruction with OPCODE and append it to a basic block
- HBB. The instruction uses DEST as a destination and OP1
- as a single operand. */
-
-static void
-gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest,
- hsa_op_with_type *op1, hsa_bb *hbb)
-{
- gcc_checking_assert (dest);
- hsa_insn_basic *insn;
-
- if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type))
- {
- insn = new hsa_insn_cvt (dest, op1);
- hbb->append_insn (insn);
- return;
- }
-
- op1 = op1->extend_int_to_32bit (hbb);
- if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT)
- {
- BrigType16_t srctype = hsa_type_integer_p (op1->m_type) ? op1->m_type
- : hsa_unsigned_type_for_type (op1->m_type);
- insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, srctype, NULL,
- op1);
- }
- else
- {
- BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type);
- insn = new hsa_insn_basic (2, opcode, optype, NULL, op1);
-
- if (opcode == BRIG_OPCODE_MOV)
- hsa_fixup_mov_insn_type (insn);
- else if (opcode == BRIG_OPCODE_ABS || opcode == BRIG_OPCODE_NEG)
- {
- /* ABS and NEG only exist in _s form :-/ */
- if (insn->m_type == BRIG_TYPE_U32)
- insn->m_type = BRIG_TYPE_S32;
- else if (insn->m_type == BRIG_TYPE_U64)
- insn->m_type = BRIG_TYPE_S64;
- }
- }
-
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
-}
-
-/* Generate a binary instruction with OPCODE and append it to a basic block
- HBB. The instruction uses DEST as a destination and operands OP1
- and OP2. */
-
-static void
-gen_hsa_binary_operation (int opcode, hsa_op_reg *dest,
- hsa_op_with_type *op1, hsa_op_with_type *op2,
- hsa_bb *hbb)
-{
- gcc_checking_assert (dest);
-
- BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type);
- op1 = op1->extend_int_to_32bit (hbb);
- op2 = op2->extend_int_to_32bit (hbb);
-
- if ((opcode == BRIG_OPCODE_SHL || opcode == BRIG_OPCODE_SHR)
- && is_a <hsa_op_immed *> (op2))
- {
- hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2);
- i->set_type (BRIG_TYPE_U32);
- }
- if ((opcode == BRIG_OPCODE_OR
- || opcode == BRIG_OPCODE_XOR
- || opcode == BRIG_OPCODE_AND)
- && is_a <hsa_op_immed *> (op2))
- {
- hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2);
- i->set_type (hsa_unsigned_type_for_type (i->m_type));
- }
-
- hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, optype, NULL,
- op1, op2);
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
-}
-
-/* Generate HSA instructions for a single assignment. HBB is the basic block
- they will be appended to. */
-
-static void
-gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb)
-{
- tree_code code = gimple_assign_rhs_code (assign);
- gimple_rhs_class rhs_class = get_gimple_rhs_class (gimple_expr_code (assign));
-
- tree lhs = gimple_assign_lhs (assign);
- tree rhs1 = gimple_assign_rhs1 (assign);
- tree rhs2 = gimple_assign_rhs2 (assign);
- tree rhs3 = gimple_assign_rhs3 (assign);
-
- BrigOpcode opcode;
-
- switch (code)
- {
- CASE_CONVERT:
- case FLOAT_EXPR:
- /* The opcode is changed to BRIG_OPCODE_CVT if BRIG types
- needs a conversion. */
- opcode = BRIG_OPCODE_MOV;
- break;
-
- case PLUS_EXPR:
- case POINTER_PLUS_EXPR:
- opcode = BRIG_OPCODE_ADD;
- break;
- case MINUS_EXPR:
- opcode = BRIG_OPCODE_SUB;
- break;
- case MULT_EXPR:
- opcode = BRIG_OPCODE_MUL;
- break;
- case MULT_HIGHPART_EXPR:
- opcode = BRIG_OPCODE_MULHI;
- break;
- case RDIV_EXPR:
- case TRUNC_DIV_EXPR:
- case EXACT_DIV_EXPR:
- opcode = BRIG_OPCODE_DIV;
- break;
- case CEIL_DIV_EXPR:
- case FLOOR_DIV_EXPR:
- case ROUND_DIV_EXPR:
- HSA_SORRY_AT (gimple_location (assign),
- "support for HSA does not implement %<CEIL_DIV_EXPR%>, "
- "%<FLOOR_DIV_EXPR%> or %<ROUND_DIV_EXPR%>");
- return;
- case TRUNC_MOD_EXPR:
- opcode = BRIG_OPCODE_REM;
- break;
- case CEIL_MOD_EXPR:
- case FLOOR_MOD_EXPR:
- case ROUND_MOD_EXPR:
- HSA_SORRY_AT (gimple_location (assign),
- "support for HSA does not implement %<CEIL_MOD_EXPR%>, "
- "%<FLOOR_MOD_EXPR%> or %<ROUND_MOD_EXPR%>");
- return;
- case NEGATE_EXPR:
- opcode = BRIG_OPCODE_NEG;
- break;
- case MIN_EXPR:
- opcode = BRIG_OPCODE_MIN;
- break;
- case MAX_EXPR:
- opcode = BRIG_OPCODE_MAX;
- break;
- case ABS_EXPR:
- opcode = BRIG_OPCODE_ABS;
- break;
- case LSHIFT_EXPR:
- opcode = BRIG_OPCODE_SHL;
- break;
- case RSHIFT_EXPR:
- opcode = BRIG_OPCODE_SHR;
- break;
- case LROTATE_EXPR:
- case RROTATE_EXPR:
- {
- hsa_insn_basic *insn = NULL;
- int code1 = code == LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR;
- int code2 = code != LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR;
- BrigType16_t btype = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
- true);
-
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- hsa_op_reg *op1 = new hsa_op_reg (btype);
- hsa_op_reg *op2 = new hsa_op_reg (btype);
- hsa_op_with_type *shift1 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
-
- tree type = TREE_TYPE (rhs2);
- unsigned HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (TYPE_SIZE (type));
-
- hsa_op_with_type *shift2 = NULL;
- if (TREE_CODE (rhs2) == INTEGER_CST)
- shift2 = new hsa_op_immed (bitsize - tree_to_uhwi (rhs2),
- BRIG_TYPE_U32);
- else if (TREE_CODE (rhs2) == SSA_NAME)
- {
- hsa_op_reg *s = hsa_cfun->reg_for_gimple_ssa (rhs2);
- s = as_a <hsa_op_reg *> (s->extend_int_to_32bit (hbb));
- hsa_op_reg *d = new hsa_op_reg (s->m_type);
- hsa_op_immed *size_imm = new hsa_op_immed (bitsize, BRIG_TYPE_U32);
-
- insn = new hsa_insn_basic (3, BRIG_OPCODE_SUB, d->m_type,
- d, s, size_imm);
- hbb->append_insn (insn);
-
- shift2 = d;
- }
- else
- gcc_unreachable ();
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- gen_hsa_binary_operation (code1, op1, src, shift1, hbb);
- gen_hsa_binary_operation (code2, op2, src, shift2, hbb);
- gen_hsa_binary_operation (BRIG_OPCODE_OR, dest, op1, op2, hbb);
-
- return;
- }
- case BIT_IOR_EXPR:
- opcode = BRIG_OPCODE_OR;
- break;
- case BIT_XOR_EXPR:
- opcode = BRIG_OPCODE_XOR;
- break;
- case BIT_AND_EXPR:
- opcode = BRIG_OPCODE_AND;
- break;
- case BIT_NOT_EXPR:
- opcode = BRIG_OPCODE_NOT;
- break;
- case FIX_TRUNC_EXPR:
- {
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *v = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
-
- if (hsa_needs_cvt (dest->m_type, v->m_type))
- {
- hsa_op_reg *tmp = new hsa_op_reg (v->m_type);
-
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC,
- tmp->m_type, tmp, v);
- hbb->append_insn (insn);
-
- hsa_insn_basic *cvtinsn = new hsa_insn_cvt (dest, tmp);
- hbb->append_insn (cvtinsn);
- }
- else
- {
- hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC,
- dest->m_type, dest, v);
- hbb->append_insn (insn);
- }
-
- return;
- }
- opcode = BRIG_OPCODE_TRUNC;
- break;
-
- case LT_EXPR:
- case LE_EXPR:
- case GT_EXPR:
- case GE_EXPR:
- case EQ_EXPR:
- case NE_EXPR:
- case UNORDERED_EXPR:
- case ORDERED_EXPR:
- case UNLT_EXPR:
- case UNLE_EXPR:
- case UNGT_EXPR:
- case UNGE_EXPR:
- case UNEQ_EXPR:
- case LTGT_EXPR:
- {
- hsa_op_reg *dest
- = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
-
- gen_hsa_cmp_insn_from_gimple (code, rhs1, rhs2, dest, hbb);
- return;
- }
- case COND_EXPR:
- {
- hsa_op_reg *dest
- = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
- hsa_op_with_type *ctrl = NULL;
- tree cond = rhs1;
-
- if (CONSTANT_CLASS_P (cond) || TREE_CODE (cond) == SSA_NAME)
- ctrl = hsa_reg_or_immed_for_gimple_op (cond, hbb);
- else
- {
- hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1);
-
- gen_hsa_cmp_insn_from_gimple (TREE_CODE (cond),
- TREE_OPERAND (cond, 0),
- TREE_OPERAND (cond, 1),
- r, hbb);
-
- ctrl = r;
- }
-
- hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
- hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
- op2 = op2->extend_int_to_32bit (hbb);
- op3 = op3->extend_int_to_32bit (hbb);
-
- BrigType16_t type = hsa_extend_inttype_to_32bit (dest->m_type);
- BrigType16_t utype = hsa_unsigned_type_for_type (type);
- if (is_a <hsa_op_immed *> (op2))
- op2->m_type = utype;
- if (is_a <hsa_op_immed *> (op3))
- op3->m_type = utype;
-
- hsa_insn_basic *insn
- = new hsa_insn_basic (4, BRIG_OPCODE_CMOV,
- hsa_bittype_for_type (type),
- NULL, ctrl, op2, op3);
-
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
- return;
- }
- case COMPLEX_EXPR:
- {
- hsa_op_reg *dest
- = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign));
- hsa_op_with_type *rhs1_reg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- rhs1_reg = rhs1_reg->extend_int_to_32bit (hbb);
- hsa_op_with_type *rhs2_reg = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
- rhs2_reg = rhs2_reg->extend_int_to_32bit (hbb);
-
- if (hsa_seen_error ())
- return;
-
- BrigType16_t src_type = hsa_bittype_for_type (rhs1_reg->m_type);
- rhs1_reg = rhs1_reg->get_in_type (src_type, hbb);
- rhs2_reg = rhs2_reg->get_in_type (src_type, hbb);
-
- hsa_insn_packed *insn
- = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, src_type,
- dest, rhs1_reg, rhs2_reg);
- hbb->append_insn (insn);
-
- return;
- }
- default:
- /* Implement others as we come across them. */
- HSA_SORRY_ATV (gimple_location (assign),
- "support for HSA does not implement operation %s",
- get_tree_code_name (code));
- return;
- }
-
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- hsa_op_with_type *op2
- = rhs2 ? hsa_reg_or_immed_for_gimple_op (rhs2, hbb) : NULL;
-
- if (hsa_seen_error ())
- return;
-
- switch (rhs_class)
- {
- case GIMPLE_TERNARY_RHS:
- {
- hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
- op3 = op3->extend_int_to_32bit (hbb);
- hsa_insn_basic *insn = new hsa_insn_basic (4, opcode, dest->m_type, dest,
- op1, op2, op3);
- hbb->append_insn (insn);
- }
- return;
-
- case GIMPLE_BINARY_RHS:
- gen_hsa_binary_operation (opcode, dest, op1, op2, hbb);
- break;
-
- case GIMPLE_UNARY_RHS:
- gen_hsa_unary_operation (opcode, dest, op1, hbb);
- break;
- default:
- gcc_unreachable ();
- }
-}
-
-/* Generate HSA instructions for a given gimple condition statement COND.
- Instructions will be appended to HBB, which also needs to be the
- corresponding structure to the basic_block of COND. */
-
-static void
-gen_hsa_insns_for_cond_stmt (gimple *cond, hsa_bb *hbb)
-{
- hsa_op_reg *ctrl = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_insn_cbr *cbr;
-
- gen_hsa_cmp_insn_from_gimple (gimple_cond_code (cond),
- gimple_cond_lhs (cond),
- gimple_cond_rhs (cond),
- ctrl, hbb);
-
- cbr = new hsa_insn_cbr (ctrl);
- hbb->append_insn (cbr);
-}
-
-/* Maximum number of elements in a jump table for an HSA SBR instruction. */
-
-#define HSA_MAXIMUM_SBR_LABELS 16
-
-/* Return lowest value of a switch S that is handled in a non-default
- label. */
-
-static tree
-get_switch_low (gswitch *s)
-{
- unsigned labels = gimple_switch_num_labels (s);
- gcc_checking_assert (labels >= 1);
-
- return CASE_LOW (gimple_switch_label (s, 1));
-}
-
-/* Return highest value of a switch S that is handled in a non-default
- label. */
-
-static tree
-get_switch_high (gswitch *s)
-{
- unsigned labels = gimple_switch_num_labels (s);
-
- /* Compare last label to maximum number of labels. */
- tree label = gimple_switch_label (s, labels - 1);
- tree low = CASE_LOW (label);
- tree high = CASE_HIGH (label);
-
- return high != NULL_TREE ? high : low;
-}
-
-static tree
-get_switch_size (gswitch *s)
-{
- return int_const_binop (MINUS_EXPR, get_switch_high (s), get_switch_low (s));
-}
-
-/* Generate HSA instructions for a given gimple switch.
- Instructions will be appended to HBB. */
-
-static void
-gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb)
-{
- gimple_stmt_iterator it = gsi_for_stmt (s);
- gsi_prev (&it);
-
- /* Create preambule that verifies that index - lowest_label >= 0. */
- edge e = split_block (hbb->m_bb, gsi_stmt (it));
- e->flags &= ~EDGE_FALLTHRU;
- e->flags |= EDGE_TRUE_VALUE;
-
- tree index_tree = gimple_switch_index (s);
- tree lowest = get_switch_low (s);
- tree highest = get_switch_high (s);
-
- hsa_op_reg *index = hsa_cfun->reg_for_gimple_ssa (index_tree);
- index = as_a <hsa_op_reg *> (index->extend_int_to_32bit (hbb));
-
- hsa_op_reg *cmp1_reg = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_op_immed *cmp1_immed = new hsa_op_immed (lowest, true);
- hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_GE, cmp1_reg->m_type,
- cmp1_reg, index, cmp1_immed));
-
- hsa_op_reg *cmp2_reg = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_op_immed *cmp2_immed = new hsa_op_immed (highest, true);
- hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_LE, cmp2_reg->m_type,
- cmp2_reg, index, cmp2_immed));
-
- hsa_op_reg *cmp_reg = new hsa_op_reg (BRIG_TYPE_B1);
- hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_AND, cmp_reg->m_type,
- cmp_reg, cmp1_reg, cmp2_reg));
-
- hbb->append_insn (new hsa_insn_cbr (cmp_reg));
-
- basic_block default_label_bb = gimple_switch_default_bb (cfun, s);
-
- if (!gimple_seq_empty_p (phi_nodes (default_label_bb)))
- {
- default_label_bb = split_edge (find_edge (e->dest, default_label_bb));
- hsa_init_new_bb (default_label_bb);
- }
-
- make_edge (e->src, default_label_bb, EDGE_FALSE_VALUE);
-
- hsa_cfun->m_modified_cfg = true;
-
- /* Basic block with the SBR instruction. */
- hbb = hsa_init_new_bb (e->dest);
-
- hsa_op_reg *sub_index = new hsa_op_reg (index->m_type);
- hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_SUB, sub_index->m_type,
- sub_index, index,
- new hsa_op_immed (lowest, true)));
-
- hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb);
- sub_index = as_a <hsa_op_reg *> (tmp);
- unsigned labels = gimple_switch_num_labels (s);
- unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s));
-
- hsa_insn_sbr *sbr = new hsa_insn_sbr (sub_index, size + 1);
-
- /* Prepare array with default label destination. */
- for (unsigned HOST_WIDE_INT i = 0; i <= size; i++)
- sbr->m_jump_table.safe_push (default_label_bb);
-
- /* Iterate all labels and fill up the jump table. */
- for (unsigned i = 1; i < labels; i++)
- {
- tree label = gimple_switch_label (s, i);
- basic_block bb = label_to_block (cfun, CASE_LABEL (label));
-
- unsigned HOST_WIDE_INT sub_low
- = tree_to_uhwi (int_const_binop (MINUS_EXPR, CASE_LOW (label), lowest));
-
- unsigned HOST_WIDE_INT sub_high = sub_low;
- tree high = CASE_HIGH (label);
- if (high != NULL)
- sub_high = tree_to_uhwi (int_const_binop (MINUS_EXPR, high, lowest));
-
- for (unsigned HOST_WIDE_INT j = sub_low; j <= sub_high; j++)
- sbr->m_jump_table[j] = bb;
- }
-
- hbb->append_insn (sbr);
-}
-
-/* Verify that the function DECL can be handled by HSA. */
-
-static void
-verify_function_arguments (tree decl)
-{
- tree type = TREE_TYPE (decl);
- if (DECL_STATIC_CHAIN (decl))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (decl),
- "HSA does not support nested functions: %qD", decl);
- return;
- }
- else if (!TYPE_ARG_TYPES (type) || stdarg_p (type))
- {
- HSA_SORRY_ATV (EXPR_LOCATION (decl),
- "HSA does not support functions with variadic arguments "
- "(or unknown return type): %qD", decl);
- return;
- }
-}
-
-/* Return BRIG type for FORMAL_ARG_TYPE. If the formal argument type is NULL,
- return ACTUAL_ARG_TYPE. */
-
-static BrigType16_t
-get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type)
-{
- if (formal_arg_type == NULL)
- return actual_arg_type;
-
- BrigType16_t decl_type
- = hsa_type_for_scalar_tree_type (formal_arg_type, false);
- return mem_type_for_type (decl_type);
-}
-
-/* Generate HSA instructions for a direct call instruction.
- Instructions will be appended to HBB, which also needs to be the
- corresponding structure to the basic_block of STMT.
- If ASSIGN_LHS is false, do not copy HSA function result argument into the
- corresponding HSA representation of the gimple statement LHS. */
-
-static void
-gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb,
- bool assign_lhs = true)
-{
- tree decl = gimple_call_fndecl (stmt);
- verify_function_arguments (decl);
- if (hsa_seen_error ())
- return;
-
- hsa_insn_call *call_insn = new hsa_insn_call (decl);
- hsa_cfun->m_called_functions.safe_push (call_insn->m_called_function);
-
- /* Argument block start. */
- hsa_insn_arg_block *arg_start
- = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn);
- hbb->append_insn (arg_start);
-
- tree parm_type_chain = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
-
- /* Preparation of arguments that will be passed to function. */
- const unsigned args = gimple_call_num_args (stmt);
- for (unsigned i = 0; i < args; ++i)
- {
- tree parm = gimple_call_arg (stmt, (int)i);
- tree parm_decl_type = parm_type_chain != NULL_TREE
- ? TREE_VALUE (parm_type_chain) : NULL_TREE;
- hsa_op_address *addr;
-
- if (AGGREGATE_TYPE_P (TREE_TYPE (parm)))
- {
- addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i);
- BrigAlignment8_t align;
- hsa_op_address *src = gen_hsa_addr_with_align (parm, hbb, &align);
- gen_hsa_memory_copy (hbb, addr, src,
- addr->m_symbol->total_byte_size (), align);
- }
- else
- {
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb);
-
- if (parm_decl_type != NULL && AGGREGATE_TYPE_P (parm_decl_type))
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement an aggregate "
- "formal argument in a function call, while actual "
- "argument is not an aggregate");
- return;
- }
-
- BrigType16_t formal_arg_type
- = get_format_argument_type (parm_decl_type, src->m_type);
- if (hsa_seen_error ())
- return;
-
- if (src->m_type != formal_arg_type)
- src = src->get_in_type (formal_arg_type, hbb);
-
- addr
- = gen_hsa_addr_for_arg (parm_decl_type != NULL_TREE ?
- parm_decl_type: TREE_TYPE (parm), i);
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, formal_arg_type,
- src, addr);
-
- hbb->append_insn (mem);
- }
-
- call_insn->m_input_args.safe_push (addr->m_symbol);
- if (parm_type_chain)
- parm_type_chain = TREE_CHAIN (parm_type_chain);
- }
-
- call_insn->m_args_code_list = new hsa_op_code_list (args);
- hbb->append_insn (call_insn);
-
- tree result_type = TREE_TYPE (TREE_TYPE (decl));
-
- tree result = gimple_call_lhs (stmt);
- hsa_insn_mem *result_insn = NULL;
- if (!VOID_TYPE_P (result_type))
- {
- hsa_op_address *addr = gen_hsa_addr_for_arg (result_type, -1);
-
- /* Even if result of a function call is unused, we have to emit
- declaration for the result. */
- if (result && assign_lhs)
- {
- tree lhs_type = TREE_TYPE (result);
-
- if (hsa_seen_error ())
- return;
-
- if (AGGREGATE_TYPE_P (lhs_type))
- {
- BrigAlignment8_t align;
- hsa_op_address *result_addr
- = gen_hsa_addr_with_align (result, hbb, &align);
- gen_hsa_memory_copy (hbb, result_addr, addr,
- addr->m_symbol->total_byte_size (), align);
- }
- else
- {
- BrigType16_t mtype
- = mem_type_for_type (hsa_type_for_scalar_tree_type (lhs_type,
- false));
-
- hsa_op_reg *dst = hsa_cfun->reg_for_gimple_ssa (result);
- result_insn = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dst, addr);
- hbb->append_insn (result_insn);
- }
- }
-
- call_insn->m_output_arg = addr->m_symbol;
- call_insn->m_result_code_list = new hsa_op_code_list (1);
- }
- else
- {
- if (result)
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement an assignment of "
- "return value from a void function");
- return;
- }
-
- call_insn->m_result_code_list = new hsa_op_code_list (0);
- }
-
- /* Argument block end. */
- hsa_insn_arg_block *arg_end
- = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn);
- hbb->append_insn (arg_end);
-}
-
-/* Generate HSA instructions for a direct call of an internal fn.
- Instructions will be appended to HBB, which also needs to be the
- corresponding structure to the basic_block of STMT. */
-
-static void
-gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
-
- tree lhs_type = TREE_TYPE (lhs);
- tree rhs1 = gimple_call_arg (stmt, 0);
- tree rhs1_type = TREE_TYPE (rhs1);
- enum internal_fn fn = gimple_call_internal_fn (stmt);
- hsa_internal_fn *ifn
- = new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type)));
- hsa_insn_call *call_insn = new hsa_insn_call (ifn);
-
- gcc_checking_assert (FLOAT_TYPE_P (rhs1_type));
-
- if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn))
- hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn);
-
- hsa_insn_arg_block *arg_start
- = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn);
- hbb->append_insn (arg_start);
-
- unsigned num_args = gimple_call_num_args (stmt);
-
- /* Function arguments. */
- for (unsigned i = 0; i < num_args; i++)
- {
- tree parm = gimple_call_arg (stmt, (int)i);
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb);
-
- hsa_op_address *addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i);
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, src->m_type,
- src, addr);
-
- call_insn->m_input_args.safe_push (addr->m_symbol);
- hbb->append_insn (mem);
- }
-
- call_insn->m_args_code_list = new hsa_op_code_list (num_args);
- hbb->append_insn (call_insn);
-
- /* Assign returned value. */
- hsa_op_address *addr = gen_hsa_addr_for_arg (lhs_type, -1);
-
- call_insn->m_output_arg = addr->m_symbol;
- call_insn->m_result_code_list = new hsa_op_code_list (1);
-
- /* Argument block end. */
- hsa_insn_arg_block *arg_end
- = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn);
- hbb->append_insn (arg_end);
-}
-
-/* Generate HSA instructions for a return value instruction.
- Instructions will be appended to HBB, which also needs to be the
- corresponding structure to the basic_block of STMT. */
-
-static void
-gen_hsa_insns_for_return (greturn *stmt, hsa_bb *hbb)
-{
- tree retval = gimple_return_retval (stmt);
- if (retval)
- {
- hsa_op_address *addr = new hsa_op_address (hsa_cfun->m_output_arg);
-
- if (AGGREGATE_TYPE_P (TREE_TYPE (retval)))
- {
- BrigAlignment8_t align;
- hsa_op_address *retval_addr = gen_hsa_addr_with_align (retval, hbb,
- &align);
- gen_hsa_memory_copy (hbb, addr, retval_addr,
- hsa_cfun->m_output_arg->total_byte_size (),
- align);
- }
- else
- {
- BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (retval),
- false);
- BrigType16_t mtype = mem_type_for_type (t);
-
- /* Store of return value. */
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (retval, hbb);
- src = src->get_in_type (mtype, hbb);
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src,
- addr);
- hbb->append_insn (mem);
- }
- }
-
- /* HSAIL return instruction emission. */
- hsa_insn_basic *ret = new hsa_insn_basic (0, BRIG_OPCODE_RET);
- hbb->append_insn (ret);
-}
-
-/* Set OP_INDEX-th operand of the instruction to DEST, as the DEST
- can have a different type, conversion instructions are possibly
- appended to HBB. */
-
-void
-hsa_insn_basic::set_output_in_type (hsa_op_reg *dest, unsigned op_index,
- hsa_bb *hbb)
-{
- gcc_checking_assert (op_output_p (op_index));
-
- if (dest->m_type == m_type)
- {
- set_op (op_index, dest);
- return;
- }
-
- hsa_insn_basic *insn;
- hsa_op_reg *tmp;
- if (hsa_needs_cvt (dest->m_type, m_type))
- {
- tmp = new hsa_op_reg (m_type);
- insn = new hsa_insn_cvt (dest, tmp);
- }
- else if (hsa_type_bit_size (dest->m_type) == hsa_type_bit_size (m_type))
- {
- /* When output, HSA registers do not really have types, only sizes, so if
- the sizes match, we can use the register directly. */
- set_op (op_index, dest);
- return;
- }
- else
- {
- tmp = new hsa_op_reg (m_type);
- insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type,
- dest, tmp->get_in_type (dest->m_type, hbb));
- hsa_fixup_mov_insn_type (insn);
- }
- set_op (op_index, tmp);
- hbb->append_insn (insn);
-}
-
-/* Generate instruction OPCODE to query a property of HSA grid along the
- given DIMENSION. Store result into DEST and append the instruction to
- HBB. */
-
-static void
-query_hsa_grid_dim (hsa_op_reg *dest, int opcode, hsa_op_immed *dimension,
- hsa_bb *hbb)
-{
- hsa_insn_basic *insn = new hsa_insn_basic (2, opcode, BRIG_TYPE_U32, NULL,
- dimension);
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
-}
-
-/* Generate instruction OPCODE to query a property of HSA grid along the given
- dimension which is an immediate in first argument of STMT. Store result
- into the register corresponding to LHS of STMT and append the instruction to
- HBB. */
-
-static void
-query_hsa_grid_dim (gimple *stmt, int opcode, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt));
- if (lhs == NULL_TREE)
- return;
-
- tree arg = gimple_call_arg (stmt, 0);
- unsigned HOST_WIDE_INT dim = 5;
- if (tree_fits_uhwi_p (arg))
- dim = tree_to_uhwi (arg);
- if (dim > 2)
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "HSA grid query dimension must be immediate constant 0, 1 "
- "or 2");
- return;
- }
-
- hsa_op_immed *hdim = new hsa_op_immed (dim, (BrigKind16_t) BRIG_TYPE_U32);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- query_hsa_grid_dim (dest, opcode, hdim, hbb);
-}
-
-/* Generate instruction OPCODE to query a property of HSA grid that is
- independent of any dimension. Store result into the register corresponding
- to LHS of STMT and append the instruction to HBB. */
-
-static void
-query_hsa_grid_nodim (gimple *stmt, BrigOpcode16_t opcode, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt));
- if (lhs == NULL_TREE)
- return;
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- BrigType16_t brig_type = hsa_unsigned_type_for_type (dest->m_type);
- hsa_insn_basic *insn = new hsa_insn_basic (1, opcode, brig_type, dest);
- hbb->append_insn (insn);
-}
-
-/* Emit instructions that set hsa_num_threads according to provided VALUE.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_set_num_threads (tree value, hsa_bb *hbb)
-{
- hbb->append_insn (new hsa_insn_comment ("omp_set_num_threads"));
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb);
-
- src = src->get_in_type (hsa_num_threads->m_type, hbb);
- hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
-
- hsa_insn_basic *basic
- = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, src, addr);
- hbb->append_insn (basic);
-}
-
-/* Return byte offset of a FIELD_NAME in GOMP_hsa_kernel_dispatch which
- is defined in plugin-hsa.c. */
-
-static HOST_WIDE_INT
-get_hsa_kernel_dispatch_offset (const char *field_name)
-{
- tree *hsa_kernel_dispatch_type = hsa_get_kernel_dispatch_type ();
- if (*hsa_kernel_dispatch_type == NULL)
- {
- /* Collection of information needed for a dispatch of a kernel from a
- kernel. Keep in sync with libgomp's plugin-hsa.c. */
-
- *hsa_kernel_dispatch_type = make_node (RECORD_TYPE);
- tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("queue"), ptr_type_node);
- DECL_CHAIN (id_f1) = NULL_TREE;
- tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("omp_data_memory"),
- ptr_type_node);
- DECL_CHAIN (id_f2) = id_f1;
- tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernarg_address"),
- ptr_type_node);
- DECL_CHAIN (id_f3) = id_f2;
- tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("object"),
- uint64_type_node);
- DECL_CHAIN (id_f4) = id_f3;
- tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("signal"),
- uint64_type_node);
- DECL_CHAIN (id_f5) = id_f4;
- tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("private_segment_size"),
- uint32_type_node);
- DECL_CHAIN (id_f6) = id_f5;
- tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("group_segment_size"),
- uint32_type_node);
- DECL_CHAIN (id_f7) = id_f6;
- tree id_f8 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("kernel_dispatch_count"),
- uint64_type_node);
- DECL_CHAIN (id_f8) = id_f7;
- tree id_f9 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("debug"),
- uint64_type_node);
- DECL_CHAIN (id_f9) = id_f8;
- tree id_f10 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("omp_level"),
- uint64_type_node);
- DECL_CHAIN (id_f10) = id_f9;
- tree id_f11 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("children_dispatches"),
- ptr_type_node);
- DECL_CHAIN (id_f11) = id_f10;
- tree id_f12 = build_decl (BUILTINS_LOCATION, FIELD_DECL,
- get_identifier ("omp_num_threads"),
- uint32_type_node);
- DECL_CHAIN (id_f12) = id_f11;
-
-
- finish_builtin_struct (*hsa_kernel_dispatch_type, "__hsa_kernel_dispatch",
- id_f12, NULL_TREE);
- TYPE_ARTIFICIAL (*hsa_kernel_dispatch_type) = 1;
- }
-
- for (tree chain = TYPE_FIELDS (*hsa_kernel_dispatch_type);
- chain != NULL_TREE; chain = TREE_CHAIN (chain))
- if (id_equal (DECL_NAME (chain), field_name))
- return int_byte_position (chain);
-
- gcc_unreachable ();
-}
-
-/* Return an HSA register that will contain number of threads for
- a future dispatched kernel. Instructions are added to HBB. */
-
-static hsa_op_reg *
-gen_num_threads_for_dispatch (hsa_bb *hbb)
-{
- /* Step 1) Assign to number of threads:
- MIN (HSA_DEFAULT_NUM_THREADS, hsa_num_threads). */
- hsa_op_reg *threads = new hsa_op_reg (hsa_num_threads->m_type);
- hsa_op_address *addr = new hsa_op_address (hsa_num_threads);
-
- hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, threads->m_type,
- threads, addr));
-
- hsa_op_immed *limit = new hsa_op_immed (HSA_DEFAULT_NUM_THREADS,
- BRIG_TYPE_U32);
- hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_insn_cmp * cmp
- = new hsa_insn_cmp (BRIG_COMPARE_LT, r->m_type, r, threads, limit);
- hbb->append_insn (cmp);
-
- BrigType16_t btype = hsa_bittype_for_type (threads->m_type);
- hsa_op_reg *tmp = new hsa_op_reg (threads->m_type);
-
- hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp, r,
- threads, limit));
-
- /* Step 2) If the number is equal to zero,
- return shadow->omp_num_threads. */
- hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
-
- hsa_op_reg *shadow_thread_count = new hsa_op_reg (BRIG_TYPE_U32);
- addr
- = new hsa_op_address (shadow_reg_ptr,
- get_hsa_kernel_dispatch_offset ("omp_num_threads"));
- hsa_insn_basic *basic
- = new hsa_insn_mem (BRIG_OPCODE_LD, shadow_thread_count->m_type,
- shadow_thread_count, addr);
- hbb->append_insn (basic);
-
- hsa_op_reg *tmp2 = new hsa_op_reg (threads->m_type);
- r = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_op_immed *imm = new hsa_op_immed (0, shadow_thread_count->m_type);
- hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_EQ, r->m_type, r, tmp, imm));
- hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp2, r,
- shadow_thread_count, tmp));
-
- hsa_op_base *dest = tmp2->get_in_type (BRIG_TYPE_U16, hbb);
-
- return as_a <hsa_op_reg *> (dest);
-}
-
-/* Build OPCODE query for all three hsa dimensions, multiply them and store the
- result into DEST. */
-
-static void
-multiply_grid_dim_characteristics (hsa_op_reg *dest, int opcode, hsa_bb *hbb)
-{
- hsa_op_reg *dimx = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (dimx, opcode,
- new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- hsa_op_reg *dimy = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (dimy, opcode,
- new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- hsa_op_reg *dimz = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (dimz, opcode,
- new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp,
- dimx->get_in_type (dest->m_type, hbb),
- dimy->get_in_type (dest->m_type, hbb), hbb);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, dest, tmp,
- dimz->get_in_type (dest->m_type, hbb), hbb);
-}
-
-/* Emit instructions that assign number of threads to lhs of gimple STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_get_num_threads (gimple *stmt, hsa_bb *hbb)
-{
- if (gimple_call_lhs (stmt) == NULL_TREE)
- return;
-
- hbb->append_insn (new hsa_insn_comment ("omp_get_num_threads"));
- tree lhs = gimple_call_lhs (stmt);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- multiply_grid_dim_characteristics (dest, BRIG_OPCODE_CURRENTWORKGROUPSIZE,
- hbb);
-}
-
-/* Emit instructions that assign number of teams to lhs of gimple STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_get_num_teams (gimple *stmt, hsa_bb *hbb)
-{
- if (gimple_call_lhs (stmt) == NULL_TREE)
- return;
-
- hbb->append_insn (new hsa_insn_comment ("omp_get_num_teams"));
- tree lhs = gimple_call_lhs (stmt);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- multiply_grid_dim_characteristics (dest, BRIG_OPCODE_GRIDGROUPS, hbb);
-}
-
-/* Emit instructions that assign a team number to lhs of gimple STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_get_team_num (gimple *stmt, hsa_bb *hbb)
-{
- if (gimple_call_lhs (stmt) == NULL_TREE)
- return;
-
- hbb->append_insn (new hsa_insn_comment ("omp_get_team_num"));
- tree lhs = gimple_call_lhs (stmt);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
-
- hsa_op_reg *gnum_x = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (gnum_x, BRIG_OPCODE_GRIDGROUPS,
- new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- hsa_op_reg *gnum_y = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (gnum_y, BRIG_OPCODE_GRIDGROUPS,
- new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
-
- hsa_op_reg *gno_z = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (gno_z, BRIG_OPCODE_WORKGROUPID,
- new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb);
-
- hsa_op_reg *tmp1 = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp1,
- gnum_x->get_in_type (dest->m_type, hbb),
- gnum_y->get_in_type (dest->m_type, hbb), hbb);
- hsa_op_reg *tmp2 = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp2, tmp1,
- gno_z->get_in_type (dest->m_type, hbb), hbb);
-
- hsa_op_reg *gno_y = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (gno_y, BRIG_OPCODE_WORKGROUPID,
- new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- hsa_op_reg *tmp3 = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp3,
- gnum_x->get_in_type (dest->m_type, hbb),
- gno_y->get_in_type (dest->m_type, hbb), hbb);
- hsa_op_reg *tmp4 = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_ADD, tmp4, tmp3, tmp2, hbb);
- hsa_op_reg *gno_x = new hsa_op_reg (BRIG_TYPE_U32);
- query_hsa_grid_dim (gno_x, BRIG_OPCODE_WORKGROUPID,
- new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb);
- gen_hsa_binary_operation (BRIG_OPCODE_ADD, dest, tmp4,
- gno_x->get_in_type (dest->m_type, hbb), hbb);
-}
-
-/* Emit instructions that get levels-var ICV to lhs of gimple STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_get_level (gimple *stmt, hsa_bb *hbb)
-{
- if (gimple_call_lhs (stmt) == NULL_TREE)
- return;
-
- hbb->append_insn (new hsa_insn_comment ("omp_get_level"));
-
- tree lhs = gimple_call_lhs (stmt);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
-
- hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
- if (shadow_reg_ptr == NULL)
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement %<omp_get_level%> "
- "called from a function not being inlined within a kernel");
- return;
- }
-
- hsa_op_address *addr
- = new hsa_op_address (shadow_reg_ptr,
- get_hsa_kernel_dispatch_offset ("omp_level"));
-
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64,
- (hsa_op_base *) NULL, addr);
- hbb->append_insn (mem);
- mem->set_output_in_type (dest, 0, hbb);
-}
-
-/* Emit instruction that implement omp_get_max_threads of gimple STMT. */
-
-static void
-gen_get_max_threads (gimple *stmt, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
-
- hbb->append_insn (new hsa_insn_comment ("omp_get_max_threads"));
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *num_theads_reg = gen_num_threads_for_dispatch (hbb)
- ->get_in_type (dest->m_type, hbb);
- hsa_build_append_simple_mov (dest, num_theads_reg, hbb);
-}
-
-/* Emit instructions that implement alloca builtin gimple STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_alloca (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- tree fndecl = gimple_call_fndecl (call);
- built_in_function fn = DECL_FUNCTION_CODE (fndecl);
-
- gcc_checking_assert (ALLOCA_FUNCTION_CODE_P (fn));
-
- unsigned bit_alignment = 0;
-
- if (fn != BUILT_IN_ALLOCA)
- {
- tree alignment_tree = gimple_call_arg (call, 1);
- if (TREE_CODE (alignment_tree) != INTEGER_CST)
- {
- HSA_SORRY_ATV (gimple_location (call),
- "support for HSA does not implement "
- "%qD with a non-constant alignment %E",
- fndecl, alignment_tree);
- }
-
- bit_alignment = tree_to_uhwi (alignment_tree);
- }
-
- tree rhs1 = gimple_call_arg (call, 0);
- hsa_op_with_type *size = hsa_reg_or_immed_for_gimple_op (rhs1, hbb)
- ->get_in_type (BRIG_TYPE_U32, hbb);
- hsa_op_with_type *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
-
- hsa_op_reg *tmp
- = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE));
- hsa_insn_alloca *a = new hsa_insn_alloca (tmp, size, bit_alignment);
- hbb->append_insn (a);
-
- hsa_insn_seg *seg
- = new hsa_insn_seg (BRIG_OPCODE_STOF,
- hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT),
- tmp->m_type, BRIG_SEGMENT_PRIVATE, dest, tmp);
- hbb->append_insn (seg);
-}
-
-/* Emit instructions that implement clrsb builtin STMT:
- Returns the number of leading redundant sign bits in x, i.e. the number
- of bits following the most significant bit that are identical to it.
- There are no special cases for 0 or other values.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_clrsb (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- tree rhs1 = gimple_call_arg (call, 0);
- hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- arg->extend_int_to_32bit (hbb);
- BrigType16_t bittype = hsa_bittype_for_type (arg->m_type);
- unsigned bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs1)));
-
- /* FIRSTBIT instruction is defined just for 32 and 64-bits wide integers. */
- gcc_checking_assert (bitsize == 32 || bitsize == 64);
-
- /* Set true to MOST_SIG if the most significant bit is set to one. */
- hsa_op_immed *c = new hsa_op_immed (1ul << (bitsize - 1),
- hsa_uint_for_bitsize (bitsize));
-
- hsa_op_reg *and_reg = new hsa_op_reg (bittype);
- gen_hsa_binary_operation (BRIG_OPCODE_AND, and_reg, arg, c, hbb);
-
- hsa_op_reg *most_sign = new hsa_op_reg (BRIG_TYPE_B1);
- hsa_insn_cmp *cmp
- = new hsa_insn_cmp (BRIG_COMPARE_EQ, most_sign->m_type, most_sign,
- and_reg, c);
- hbb->append_insn (cmp);
-
- /* If the most significant bit is one, negate the input. Otherwise
- shift the input value to left by one bit. */
- hsa_op_reg *arg_neg = new hsa_op_reg (arg->m_type);
- gen_hsa_unary_operation (BRIG_OPCODE_NEG, arg_neg, arg, hbb);
-
- hsa_op_reg *shifted_arg = new hsa_op_reg (arg->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_SHL, shifted_arg, arg,
- new hsa_op_immed (1, BRIG_TYPE_U64), hbb);
-
- /* Assign the value that can be used for FIRSTBIT instruction according
- to the most significant bit. */
- hsa_op_reg *tmp = new hsa_op_reg (bittype);
- hsa_insn_basic *cmov
- = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, bittype, tmp, most_sign,
- arg_neg, shifted_arg);
- hbb->append_insn (cmov);
-
- hsa_op_reg *leading_bits = new hsa_op_reg (BRIG_TYPE_S32);
- gen_hsa_unary_operation (BRIG_OPCODE_FIRSTBIT, leading_bits,
- tmp->get_in_type (hsa_uint_for_bitsize (bitsize),
- hbb), hbb);
-
- /* Set flag if the input value is equal to zero. */
- hsa_op_reg *is_zero = new hsa_op_reg (BRIG_TYPE_B1);
- cmp = new hsa_insn_cmp (BRIG_COMPARE_EQ, is_zero->m_type, is_zero, arg,
- new hsa_op_immed (0, arg->m_type));
- hbb->append_insn (cmp);
-
- /* Return the number of leading bits,
- or (bitsize - 1) if the input value is zero. */
- cmov = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, BRIG_TYPE_B32, NULL, is_zero,
- new hsa_op_immed (bitsize - 1, BRIG_TYPE_U32),
- leading_bits->get_in_type (BRIG_TYPE_B32, hbb));
- hbb->append_insn (cmov);
- cmov->set_output_in_type (dest, 0, hbb);
-}
-
-/* Emit instructions that implement ffs builtin STMT:
- Returns one plus the index of the least significant 1-bit of x,
- or if x is zero, returns zero.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_ffs (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
-
- tree rhs1 = gimple_call_arg (call, 0);
- hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- arg = arg->extend_int_to_32bit (hbb);
-
- hsa_op_reg *tmp = new hsa_op_reg (BRIG_TYPE_U32);
- hsa_insn_srctype *insn = new hsa_insn_srctype (2, BRIG_OPCODE_LASTBIT,
- tmp->m_type, arg->m_type,
- tmp, arg);
- hbb->append_insn (insn);
-
- hsa_insn_basic *addition
- = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, NULL, tmp,
- new hsa_op_immed (1, tmp->m_type));
- hbb->append_insn (addition);
- addition->set_output_in_type (dest, 0, hbb);
-}
-
-static void
-gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb)
-{
- gcc_checking_assert (hsa_type_integer_p (arg->m_type));
-
- if (hsa_type_bit_size (arg->m_type) < 32)
- arg = arg->get_in_type (BRIG_TYPE_B32, hbb);
-
- BrigType16_t srctype = hsa_bittype_for_type (arg->m_type);
- if (!hsa_btype_p (arg->m_type))
- arg = arg->get_in_type (srctype, hbb);
-
- hsa_insn_srctype *popcount
- = new hsa_insn_srctype (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32,
- srctype, NULL, arg);
- hbb->append_insn (popcount);
- popcount->set_output_in_type (dest, 0, hbb);
-}
-
-/* Emit instructions that implement parity builtin STMT:
- Returns the parity of x, i.e. the number of 1-bits in x modulo 2.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_parity (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- tree rhs1 = gimple_call_arg (call, 0);
- hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
-
- hsa_op_reg *popcount = new hsa_op_reg (BRIG_TYPE_U32);
- gen_hsa_popcount_to_dest (popcount, arg, hbb);
-
- hsa_insn_basic *insn
- = new hsa_insn_basic (3, BRIG_OPCODE_REM, popcount->m_type, NULL, popcount,
- new hsa_op_immed (2, popcount->m_type));
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
-}
-
-/* Emit instructions that implement popcount builtin STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_popcount (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- tree rhs1 = gimple_call_arg (call, 0);
- hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
-
- gen_hsa_popcount_to_dest (dest, arg, hbb);
-}
-
-/* Emit instructions that implement DIVMOD builtin STMT.
- Instructions are appended to basic block HBB. */
-
-static void
-gen_hsa_divmod (gcall *call, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- tree rhs0 = gimple_call_arg (call, 0);
- tree rhs1 = gimple_call_arg (call, 1);
-
- hsa_op_with_type *arg0 = hsa_reg_or_immed_for_gimple_op (rhs0, hbb);
- arg0 = arg0->extend_int_to_32bit (hbb);
- hsa_op_with_type *arg1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- arg1 = arg1->extend_int_to_32bit (hbb);
-
- hsa_op_reg *dest0 = new hsa_op_reg (arg0->m_type);
- hsa_op_reg *dest1 = new hsa_op_reg (arg1->m_type);
-
- hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_DIV, dest0->m_type,
- dest0, arg0, arg1);
- hbb->append_insn (insn);
- insn = new hsa_insn_basic (3, BRIG_OPCODE_REM, dest1->m_type, dest1, arg0,
- arg1);
- hbb->append_insn (insn);
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- BrigType16_t dst_type = hsa_extend_inttype_to_32bit (dest->m_type);
- BrigType16_t src_type = hsa_bittype_for_type (dest0->m_type);
-
- insn = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dst_type,
- src_type, NULL, dest0, dest1);
- hbb->append_insn (insn);
- insn->set_output_in_type (dest, 0, hbb);
-}
-
-/* Emit instructions that implement FMA, FMS, FNMA or FNMS call STMT.
- Instructions are appended to basic block HBB. NEGATE1 is true for
- FNMA and FNMS. NEGATE3 is true for FMS and FNMS. */
-
-static void
-gen_hsa_fma (gcall *call, hsa_bb *hbb, bool negate1, bool negate3)
-{
- tree lhs = gimple_call_lhs (call);
- if (lhs == NULL_TREE)
- return;
-
- tree rhs1 = gimple_call_arg (call, 0);
- tree rhs2 = gimple_call_arg (call, 1);
- tree rhs3 = gimple_call_arg (call, 2);
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
- hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb);
- hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb);
-
- if (negate1)
- {
- hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
- gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op1, hbb);
- op1 = tmp;
- }
-
- /* There is a native HSA instruction for scalar FMAs but not for vector
- ones. */
- if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE)
- {
- hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
- gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, op1, op2, hbb);
- gen_hsa_binary_operation (negate3 ? BRIG_OPCODE_SUB : BRIG_OPCODE_ADD,
- dest, tmp, op3, hbb);
- }
- else
- {
- if (negate3)
- {
- hsa_op_reg *tmp = new hsa_op_reg (dest->m_type);
- gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op3, hbb);
- op3 = tmp;
- }
- hsa_insn_basic *insn = new hsa_insn_basic (4, BRIG_OPCODE_MAD,
- dest->m_type, dest,
- op1, op2, op3);
- hbb->append_insn (insn);
- }
-}
-
-/* Set VALUE to a shadow kernel debug argument and append a new instruction
- to HBB basic block. */
-
-static void
-set_debug_value (hsa_bb *hbb, hsa_op_with_type *value)
-{
- hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg ();
- if (shadow_reg_ptr == NULL)
- return;
-
- hsa_op_address *addr
- = new hsa_op_address (shadow_reg_ptr,
- get_hsa_kernel_dispatch_offset ("debug"));
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, BRIG_TYPE_U64, value,
- addr);
- hbb->append_insn (mem);
-}
-
-void
-omp_simple_builtin::generate (gimple *stmt, hsa_bb *hbb)
-{
- if (m_sorry)
- {
- if (m_warning_message)
- HSA_SORRY_AT (gimple_location (stmt), m_warning_message);
- else
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement calls to %qs",
- m_name);
- }
- else if (m_warning_message != NULL)
- warning_at (gimple_location (stmt), OPT_Whsa, m_warning_message);
-
- if (m_return_value != NULL)
- {
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
-
- hbb->append_insn (new hsa_insn_comment (m_name));
-
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *op = m_return_value->get_in_type (dest->m_type, hbb);
- hsa_build_append_simple_mov (dest, op, hbb);
- }
-}
-
-/* If STMT is a call of a known library function, generate code to perform
- it and return true. */
-
-static bool
-gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb)
-{
- bool handled = false;
- const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt));
-
- char *copy = NULL;
- size_t len = strlen (name);
- if (len > 0 && name[len - 1] == '_')
- {
- copy = XNEWVEC (char, len + 1);
- strcpy (copy, name);
- copy[len - 1] = '\0';
- name = copy;
- }
-
- /* Handle omp_* routines. */
- if (strstr (name, "omp_") == name)
- {
- hsa_init_simple_builtins ();
- omp_simple_builtin *builtin = omp_simple_builtins->get (name);
- if (builtin)
- {
- builtin->generate (stmt, hbb);
- return true;
- }
-
- handled = true;
- if (strcmp (name, "omp_set_num_threads") == 0)
- gen_set_num_threads (gimple_call_arg (stmt, 0), hbb);
- else if (strcmp (name, "omp_get_thread_num") == 0)
- {
- hbb->append_insn (new hsa_insn_comment (name));
- query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb);
- }
- else if (strcmp (name, "omp_get_num_threads") == 0)
- {
- hbb->append_insn (new hsa_insn_comment (name));
- gen_get_num_threads (stmt, hbb);
- }
- else if (strcmp (name, "omp_get_num_teams") == 0)
- gen_get_num_teams (stmt, hbb);
- else if (strcmp (name, "omp_get_team_num") == 0)
- gen_get_team_num (stmt, hbb);
- else if (strcmp (name, "omp_get_level") == 0)
- gen_get_level (stmt, hbb);
- else if (strcmp (name, "omp_get_active_level") == 0)
- gen_get_level (stmt, hbb);
- else if (strcmp (name, "omp_in_parallel") == 0)
- gen_get_level (stmt, hbb);
- else if (strcmp (name, "omp_get_max_threads") == 0)
- gen_get_max_threads (stmt, hbb);
- else
- handled = false;
-
- if (handled)
- {
- if (copy)
- free (copy);
- return true;
- }
- }
-
- if (strcmp (name, "__hsa_set_debug_value") == 0)
- {
- handled = true;
- if (hsa_cfun->has_shadow_reg_p ())
- {
- tree rhs1 = gimple_call_arg (stmt, 0);
- hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb);
-
- src = src->get_in_type (BRIG_TYPE_U64, hbb);
- set_debug_value (hbb, src);
- }
- }
-
- if (copy)
- free (copy);
- return handled;
-}
-
-/* Helper functions to create a single unary HSA operations out of calls to
- builtins. OPCODE is the HSA operation to be generated. STMT is a gimple
- call to a builtin. HBB is the HSA BB to which the instruction should be
- added. Note that nothing will be created if STMT does not have a LHS. */
-
-static void
-gen_hsa_unaryop_for_builtin (BrigOpcode opcode, gimple *stmt, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- hsa_op_with_type *op
- = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
- gen_hsa_unary_operation (opcode, dest, op, hbb);
-}
-
-/* Helper functions to create a call to standard library if LHS of the
- STMT is used. HBB is the HSA BB to which the instruction should be
- added. */
-
-static void
-gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb)
-{
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
-
- if (gimple_call_internal_p (stmt))
- gen_hsa_insns_for_call_of_internal_fn (stmt, hbb);
- else
- gen_hsa_insns_for_direct_call (stmt, hbb);
-}
-
-/* Helper functions to create a single unary HSA operations out of calls to
- builtins (if unsafe math optimizations are enable). Otherwise, create
- a call to standard library function.
- OPCODE is the HSA operation to be generated. STMT is a gimple
- call to a builtin. HBB is the HSA BB to which the instruction should be
- added. Note that nothing will be created if STMT does not have a LHS. */
-
-static void
-gen_hsa_unaryop_or_call_for_builtin (BrigOpcode opcode, gimple *stmt,
- hsa_bb *hbb)
-{
- if (flag_unsafe_math_optimizations)
- gen_hsa_unaryop_for_builtin (opcode, stmt, hbb);
- else
- gen_hsa_unaryop_builtin_call (stmt, hbb);
-}
-
-/* Generate HSA address corresponding to a value VAL (as opposed to a memory
- reference tree), for example an SSA_NAME or an ADDR_EXPR. HBB is the HSA BB
- to which the instruction should be added. */
-
-static hsa_op_address *
-get_address_from_value (tree val, hsa_bb *hbb)
-{
- switch (TREE_CODE (val))
- {
- case SSA_NAME:
- {
- BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
- hsa_op_base *reg
- = hsa_cfun->reg_for_gimple_ssa (val)->get_in_type (addrtype, hbb);
- return new hsa_op_address (NULL, as_a <hsa_op_reg *> (reg), 0);
- }
- case ADDR_EXPR:
- return gen_hsa_addr (TREE_OPERAND (val, 0), hbb);
-
- case INTEGER_CST:
- if (tree_fits_shwi_p (val))
- return new hsa_op_address (NULL, NULL, tree_to_shwi (val));
- /* fall-through */
-
- default:
- HSA_SORRY_ATV (EXPR_LOCATION (val),
- "support for HSA does not implement memory access to %E",
- val);
- return new hsa_op_address (NULL, NULL, 0);
- }
-}
-
-/* Expand assignment of a result of a string BUILTIN to DST.
- Size of the operation is N bytes, where instructions
- will be append to HBB. */
-
-static void
-expand_lhs_of_string_op (gimple *stmt,
- unsigned HOST_WIDE_INT n, hsa_bb *hbb,
- enum built_in_function builtin)
-{
- /* If LHS is expected, we need to emit a PHI instruction. */
- tree lhs = gimple_call_lhs (stmt);
- if (!lhs)
- return;
-
- hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs);
-
- hsa_op_with_type *dst_reg
- = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
- hsa_op_with_type *tmp;
-
- switch (builtin)
- {
- case BUILT_IN_MEMPCPY:
- {
- tmp = new hsa_op_reg (dst_reg->m_type);
- hsa_insn_basic *add
- = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type,
- tmp, dst_reg,
- new hsa_op_immed (n, dst_reg->m_type));
- hbb->append_insn (add);
- break;
- }
- case BUILT_IN_MEMCPY:
- case BUILT_IN_MEMSET:
- tmp = dst_reg;
- break;
- default:
- gcc_unreachable ();
- }
-
- hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type,
- lhs_reg, tmp));
-}
-
-#define HSA_MEMORY_BUILTINS_LIMIT 128
-
-/* Expand a string builtin (from a gimple STMT) in a way that
- according to MISALIGNED_FLAG we process either direct emission
- (a bunch of memory load and store instructions), or we emit a function call
- of a library function (for instance 'memcpy'). Actually, a basic block
- for direct emission is just prepared, where caller is responsible
- for emission of corresponding instructions.
- All instruction are appended to HBB. */
-
-hsa_bb *
-expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb,
- hsa_op_reg *misaligned_flag)
-{
- edge e = split_block (hbb->m_bb, stmt);
- basic_block condition_bb = e->src;
- hbb->append_insn (new hsa_insn_cbr (misaligned_flag));
-
- /* Prepare the control flow. */
- edge condition_edge = EDGE_SUCC (condition_bb, 0);
- basic_block call_bb = split_edge (condition_edge);
-
- basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0));
- basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest;
- basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0));
-
- condition_edge->flags &= ~EDGE_FALLTHRU;
- condition_edge->flags |= EDGE_TRUE_VALUE;
- make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE);
-
- redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb);
-
- hsa_cfun->m_modified_cfg = true;
-
- hsa_init_new_bb (expanded_bb);
-
- /* Slow path: function call. */
- gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false);
-
- return hsa_bb_for_bb (expanded_bb);
-}
-
-/* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from
- a gimple STMT and store all necessary instruction to HBB basic block. */
-
-static void
-expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin)
-{
- tree byte_size = gimple_call_arg (stmt, 2);
-
- if (!tree_fits_uhwi_p (byte_size))
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
-
- if (n > HSA_MEMORY_BUILTINS_LIMIT)
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- tree dst = gimple_call_arg (stmt, 0);
- tree src = gimple_call_arg (stmt, 1);
-
- hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
- hsa_op_address *src_addr = get_address_from_value (src, hbb);
-
- /* As gen_hsa_memory_copy relies on memory alignment
- greater or equal to 8 bytes, we need to verify the alignment. */
- BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
- hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype);
- hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
-
- convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb);
- convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
-
- /* Process BIT OR for source and destination addresses. */
- hsa_op_reg *or_reg = new hsa_op_reg (addrtype);
- gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg,
- dst_addr_reg, hbb);
-
- /* Process BIT AND with 0x7 to identify the desired alignment
- of 8 bytes. */
- hsa_op_reg *masked = new hsa_op_reg (addrtype);
-
- gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg,
- new hsa_op_immed (7, addrtype), hbb);
-
- hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
- hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
- misaligned, masked,
- new hsa_op_immed (0, masked->m_type)));
-
- hsa_bb *native_impl_bb
- = expand_string_operation_builtin (stmt, hbb, misaligned);
-
- gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8);
- hsa_bb *merge_bb
- = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
- expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
-}
-
-
-/* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from
- a gimple STMT and store all necessary instruction to HBB basic block.
- The operation set N bytes with a CONSTANT value. */
-
-static void
-expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n,
- unsigned HOST_WIDE_INT constant, hsa_bb *hbb,
- enum built_in_function builtin)
-{
- tree dst = gimple_call_arg (stmt, 0);
- hsa_op_address *dst_addr = get_address_from_value (dst, hbb);
-
- /* As gen_hsa_memory_set relies on memory alignment
- greater or equal to 8 bytes, we need to verify the alignment. */
- BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT);
- hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype);
- convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb);
-
- /* Process BIT AND with 0x7 to identify the desired alignment
- of 8 bytes. */
- hsa_op_reg *masked = new hsa_op_reg (addrtype);
-
- gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg,
- new hsa_op_immed (7, addrtype), hbb);
-
- hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1);
- hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type,
- misaligned, masked,
- new hsa_op_immed (0, masked->m_type)));
-
- hsa_bb *native_impl_bb
- = expand_string_operation_builtin (stmt, hbb, misaligned);
-
- gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8);
- hsa_bb *merge_bb
- = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest);
- expand_lhs_of_string_op (stmt, n, merge_bb, builtin);
-}
-
-/* Store into MEMORDER the memory order specified by tree T, which must be an
- integer constant representing a C++ memory order. If it isn't, issue an HSA
- sorry message using LOC and return true, otherwise return false and store
- the name of the requested order to *MNAME. */
-
-static bool
-hsa_memorder_from_tree (tree t, BrigMemoryOrder *memorder, const char **mname,
- location_t loc)
-{
- if (!tree_fits_uhwi_p (t))
- {
- HSA_SORRY_ATV (loc, "support for HSA does not implement memory model %E",
- t);
- return true;
- }
-
- unsigned HOST_WIDE_INT mm = tree_to_uhwi (t);
- switch (mm & MEMMODEL_BASE_MASK)
- {
- case MEMMODEL_RELAXED:
- *memorder = BRIG_MEMORY_ORDER_RELAXED;
- *mname = "relaxed";
- break;
- case MEMMODEL_CONSUME:
- /* HSA does not have an equivalent, but we can use the slightly stronger
- ACQUIRE. */
- *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
- *mname = "consume";
- break;
- case MEMMODEL_ACQUIRE:
- *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
- *mname = "acquire";
- break;
- case MEMMODEL_RELEASE:
- *memorder = BRIG_MEMORY_ORDER_SC_RELEASE;
- *mname = "release";
- break;
- case MEMMODEL_ACQ_REL:
- *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
- *mname = "acq_rel";
- break;
- case MEMMODEL_SEQ_CST:
- /* Callers implementing a simple load or store need to remove the release
- or acquire part respectively. */
- *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
- *mname = "seq_cst";
- break;
- default:
- {
- HSA_SORRY_AT (loc, "support for HSA does not implement the specified "
- "memory model");
- return true;
- }
- }
- return false;
-}
-
-/* Helper function to create an HSA atomic operation instruction out of calls
- to atomic builtins. RET_ORIG is true if the built-in is the variant that
- return s the value before applying operation, and false if it should return
- the value after applying the operation (if it returns value at all). ACODE
- is the atomic operation code, STMT is a gimple call to a builtin. HBB is
- the HSA BB to which the instruction should be added. If SIGNAL is true, the
- created operation will work on HSA signals rather than atomic variables. */
-
-static void
-gen_hsa_atomic_for_builtin (bool ret_orig, enum BrigAtomicOperation acode,
- gimple *stmt, hsa_bb *hbb, bool signal)
-{
- tree lhs = gimple_call_lhs (stmt);
-
- tree type = TREE_TYPE (gimple_call_arg (stmt, 1));
- BrigType16_t hsa_type = hsa_type_for_scalar_tree_type (type, false);
- BrigType16_t mtype = mem_type_for_type (hsa_type);
- BrigMemoryOrder memorder;
- const char *mmname;
-
- if (hsa_memorder_from_tree (gimple_call_arg (stmt, 2), &memorder, &mmname,
- gimple_location (stmt)))
- return;
-
- /* Certain atomic insns must have Bx memory types. */
- switch (acode)
- {
- case BRIG_ATOMIC_LD:
- case BRIG_ATOMIC_ST:
- case BRIG_ATOMIC_AND:
- case BRIG_ATOMIC_OR:
- case BRIG_ATOMIC_XOR:
- case BRIG_ATOMIC_EXCH:
- mtype = hsa_bittype_for_type (mtype);
- break;
- default:
- break;
- }
-
- hsa_op_reg *dest;
- int nops, opcode;
- if (lhs)
- {
- if (ret_orig)
- dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- else
- dest = new hsa_op_reg (hsa_type);
- opcode = signal ? BRIG_OPCODE_SIGNAL : BRIG_OPCODE_ATOMIC;
- nops = 3;
- }
- else
- {
- dest = NULL;
- opcode = signal ? BRIG_OPCODE_SIGNALNORET : BRIG_OPCODE_ATOMICNORET;
- nops = 2;
- }
-
- if (acode == BRIG_ATOMIC_ST)
- {
- if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE)
- memorder = BRIG_MEMORY_ORDER_SC_RELEASE;
-
- if (memorder != BRIG_MEMORY_ORDER_RELAXED
- && memorder != BRIG_MEMORY_ORDER_SC_RELEASE
- && memorder != BRIG_MEMORY_ORDER_NONE)
- {
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement memory model for "
- "%<ATOMIC_ST%>: %s", mmname);
- return;
- }
- }
-
- hsa_insn_basic *atominsn;
- hsa_op_base *tgt;
- if (signal)
- {
- atominsn = new hsa_insn_signal (nops, opcode, acode, mtype, memorder);
- tgt = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb);
- }
- else
- {
- atominsn = new hsa_insn_atomic (nops, opcode, acode, mtype, memorder);
- hsa_op_address *addr;
- addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
- if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_PRIVATE)
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "HSA does not implement atomic operations in private "
- "segment");
- return;
- }
- tgt = addr;
- }
-
- hsa_op_with_type *op
- = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb);
- if (lhs)
- {
- atominsn->set_op (0, dest);
- atominsn->set_op (1, tgt);
- atominsn->set_op (2, op);
- }
- else
- {
- atominsn->set_op (0, tgt);
- atominsn->set_op (1, op);
- }
-
- hbb->append_insn (atominsn);
-
- /* HSA does not natively support the variants that return the modified value,
- so re-do the operation again non-atomically if that is what was
- requested. */
- if (lhs && !ret_orig)
- {
- int arith;
- switch (acode)
- {
- case BRIG_ATOMIC_ADD:
- arith = BRIG_OPCODE_ADD;
- break;
- case BRIG_ATOMIC_AND:
- arith = BRIG_OPCODE_AND;
- break;
- case BRIG_ATOMIC_OR:
- arith = BRIG_OPCODE_OR;
- break;
- case BRIG_ATOMIC_SUB:
- arith = BRIG_OPCODE_SUB;
- break;
- case BRIG_ATOMIC_XOR:
- arith = BRIG_OPCODE_XOR;
- break;
- default:
- gcc_unreachable ();
- }
- hsa_op_reg *real_dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- gen_hsa_binary_operation (arith, real_dest, dest, op, hbb);
- }
-}
-
-/* Generate HSA instructions for an internal fn.
- Instructions will be appended to HBB, which also needs to be the
- corresponding structure to the basic_block of STMT. */
-
-static void
-gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb)
-{
- gcc_checking_assert (gimple_call_internal_fn (stmt));
- internal_fn fn = gimple_call_internal_fn (stmt);
-
- bool is_float_type_p = false;
- if (gimple_call_lhs (stmt) != NULL
- && TREE_TYPE (gimple_call_lhs (stmt)) == float_type_node)
- is_float_type_p = true;
-
- switch (fn)
- {
- case IFN_CEIL:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb);
- break;
-
- case IFN_FLOOR:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb);
- break;
-
- case IFN_RINT:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb);
- break;
-
- case IFN_SQRT:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb);
- break;
-
- case IFN_RSQRT:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_NRSQRT, stmt, hbb);
- break;
-
- case IFN_TRUNC:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb);
- break;
-
- case IFN_COS:
- {
- if (is_float_type_p)
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb);
- else
- gen_hsa_unaryop_builtin_call (stmt, hbb);
-
- break;
- }
- case IFN_EXP2:
- {
- if (is_float_type_p)
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb);
- else
- gen_hsa_unaryop_builtin_call (stmt, hbb);
-
- break;
- }
-
- case IFN_LOG2:
- {
- if (is_float_type_p)
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb);
- else
- gen_hsa_unaryop_builtin_call (stmt, hbb);
-
- break;
- }
-
- case IFN_SIN:
- {
- if (is_float_type_p)
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb);
- else
- gen_hsa_unaryop_builtin_call (stmt, hbb);
- break;
- }
-
- case IFN_CLRSB:
- gen_hsa_clrsb (stmt, hbb);
- break;
-
- case IFN_CLZ:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb);
- break;
-
- case IFN_CTZ:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb);
- break;
-
- case IFN_FFS:
- gen_hsa_ffs (stmt, hbb);
- break;
-
- case IFN_PARITY:
- gen_hsa_parity (stmt, hbb);
- break;
-
- case IFN_POPCOUNT:
- gen_hsa_popcount (stmt, hbb);
- break;
-
- case IFN_DIVMOD:
- gen_hsa_divmod (stmt, hbb);
- break;
-
- case IFN_ACOS:
- case IFN_ASIN:
- case IFN_ATAN:
- case IFN_EXP:
- case IFN_EXP10:
- case IFN_EXPM1:
- case IFN_LOG:
- case IFN_LOG10:
- case IFN_LOG1P:
- case IFN_LOGB:
- case IFN_SIGNIFICAND:
- case IFN_TAN:
- case IFN_NEARBYINT:
- case IFN_ROUND:
- case IFN_ATAN2:
- case IFN_COPYSIGN:
- case IFN_FMOD:
- case IFN_POW:
- case IFN_REMAINDER:
- case IFN_SCALB:
- case IFN_FMIN:
- case IFN_FMAX:
- gen_hsa_insns_for_call_of_internal_fn (stmt, hbb);
- break;
-
- case IFN_FMA:
- gen_hsa_fma (stmt, hbb, false, false);
- break;
-
- case IFN_FMS:
- gen_hsa_fma (stmt, hbb, false, true);
- break;
-
- case IFN_FNMA:
- gen_hsa_fma (stmt, hbb, true, false);
- break;
-
- case IFN_FNMS:
- gen_hsa_fma (stmt, hbb, true, true);
- break;
-
- default:
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement internal function: %s",
- internal_fn_name (fn));
- break;
- }
-}
-
-/* Generate HSA instructions for the given call statement STMT. Instructions
- will be appended to HBB. */
-
-static void
-gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
-{
- gcall *call = as_a <gcall *> (stmt);
- tree lhs = gimple_call_lhs (stmt);
- hsa_op_reg *dest;
-
- if (gimple_call_internal_p (stmt))
- {
- gen_hsa_insn_for_internal_fn_call (call, hbb);
- return;
- }
-
- if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
- {
- tree function_decl = gimple_call_fndecl (stmt);
-
- if (function_decl == NULL_TREE)
- {
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement indirect calls");
- return;
- }
-
- /* Prefetch pass can create type-mismatching prefetch builtin calls which
- fail the gimple_call_builtin_p test above. Handle them here. */
- if (fndecl_built_in_p (function_decl, BUILT_IN_PREFETCH))
- return;
-
- if (hsa_callable_function_p (function_decl))
- gen_hsa_insns_for_direct_call (stmt, hbb);
- else if (!gen_hsa_insns_for_known_library_call (stmt, hbb))
- HSA_SORRY_AT (gimple_location (stmt),
- "HSA supports only calls of functions marked with "
- "%<#pragma omp declare target%>");
- return;
- }
-
- tree fndecl = gimple_call_fndecl (stmt);
- enum built_in_function builtin = DECL_FUNCTION_CODE (fndecl);
- switch (builtin)
- {
- case BUILT_IN_FABS:
- case BUILT_IN_FABSF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_ABS, stmt, hbb);
- break;
-
- case BUILT_IN_CEIL:
- case BUILT_IN_CEILF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb);
- break;
-
- case BUILT_IN_FLOOR:
- case BUILT_IN_FLOORF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb);
- break;
-
- case BUILT_IN_RINT:
- case BUILT_IN_RINTF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb);
- break;
-
- case BUILT_IN_SQRT:
- case BUILT_IN_SQRTF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb);
- break;
-
- case BUILT_IN_TRUNC:
- case BUILT_IN_TRUNCF:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb);
- break;
-
- case BUILT_IN_COS:
- case BUILT_IN_SIN:
- case BUILT_IN_EXP2:
- case BUILT_IN_LOG2:
- /* HSAIL does not provide an instruction for double argument type. */
- gen_hsa_unaryop_builtin_call (stmt, hbb);
- break;
-
- case BUILT_IN_COSF:
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb);
- break;
-
- case BUILT_IN_EXP2F:
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb);
- break;
-
- case BUILT_IN_LOG2F:
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb);
- break;
-
- case BUILT_IN_SINF:
- gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb);
- break;
-
- case BUILT_IN_CLRSB:
- case BUILT_IN_CLRSBL:
- case BUILT_IN_CLRSBLL:
- gen_hsa_clrsb (call, hbb);
- break;
-
- case BUILT_IN_CLZ:
- case BUILT_IN_CLZL:
- case BUILT_IN_CLZLL:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb);
- break;
-
- case BUILT_IN_CTZ:
- case BUILT_IN_CTZL:
- case BUILT_IN_CTZLL:
- gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb);
- break;
-
- case BUILT_IN_FFS:
- case BUILT_IN_FFSL:
- case BUILT_IN_FFSLL:
- gen_hsa_ffs (call, hbb);
- break;
-
- case BUILT_IN_PARITY:
- case BUILT_IN_PARITYL:
- case BUILT_IN_PARITYLL:
- gen_hsa_parity (call, hbb);
- break;
-
- case BUILT_IN_POPCOUNT:
- case BUILT_IN_POPCOUNTL:
- case BUILT_IN_POPCOUNTLL:
- gen_hsa_popcount (call, hbb);
- break;
-
- case BUILT_IN_ATOMIC_LOAD_1:
- case BUILT_IN_ATOMIC_LOAD_2:
- case BUILT_IN_ATOMIC_LOAD_4:
- case BUILT_IN_ATOMIC_LOAD_8:
- case BUILT_IN_ATOMIC_LOAD_16:
- {
- BrigType16_t mtype;
- hsa_op_base *src;
- src = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
-
- BrigMemoryOrder memorder;
- const char *mmname;
- if (hsa_memorder_from_tree (gimple_call_arg (stmt, 1), &memorder,
- &mmname, gimple_location (stmt)))
- return;
-
- if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE)
- memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE;
-
- if (memorder != BRIG_MEMORY_ORDER_RELAXED
- && memorder != BRIG_MEMORY_ORDER_SC_ACQUIRE
- && memorder != BRIG_MEMORY_ORDER_NONE)
- {
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement "
- "memory model for atomic loads: %s", mmname);
- return;
- }
-
- if (lhs)
- {
- BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs),
- false);
- mtype = mem_type_for_type (t);
- mtype = hsa_bittype_for_type (mtype);
- dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- }
- else
- {
- mtype = BRIG_TYPE_B64;
- dest = new hsa_op_reg (mtype);
- }
-
- hsa_insn_basic *atominsn;
- atominsn = new hsa_insn_atomic (2, BRIG_OPCODE_ATOMIC, BRIG_ATOMIC_LD,
- mtype, memorder, dest, src);
-
- hbb->append_insn (atominsn);
- break;
- }
-
- case BUILT_IN_ATOMIC_EXCHANGE_1:
- case BUILT_IN_ATOMIC_EXCHANGE_2:
- case BUILT_IN_ATOMIC_EXCHANGE_4:
- case BUILT_IN_ATOMIC_EXCHANGE_8:
- case BUILT_IN_ATOMIC_EXCHANGE_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_EXCH, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_FETCH_ADD_1:
- case BUILT_IN_ATOMIC_FETCH_ADD_2:
- case BUILT_IN_ATOMIC_FETCH_ADD_4:
- case BUILT_IN_ATOMIC_FETCH_ADD_8:
- case BUILT_IN_ATOMIC_FETCH_ADD_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ADD, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_FETCH_SUB_1:
- case BUILT_IN_ATOMIC_FETCH_SUB_2:
- case BUILT_IN_ATOMIC_FETCH_SUB_4:
- case BUILT_IN_ATOMIC_FETCH_SUB_8:
- case BUILT_IN_ATOMIC_FETCH_SUB_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_SUB, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_FETCH_AND_1:
- case BUILT_IN_ATOMIC_FETCH_AND_2:
- case BUILT_IN_ATOMIC_FETCH_AND_4:
- case BUILT_IN_ATOMIC_FETCH_AND_8:
- case BUILT_IN_ATOMIC_FETCH_AND_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_AND, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_FETCH_XOR_1:
- case BUILT_IN_ATOMIC_FETCH_XOR_2:
- case BUILT_IN_ATOMIC_FETCH_XOR_4:
- case BUILT_IN_ATOMIC_FETCH_XOR_8:
- case BUILT_IN_ATOMIC_FETCH_XOR_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_XOR, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_FETCH_OR_1:
- case BUILT_IN_ATOMIC_FETCH_OR_2:
- case BUILT_IN_ATOMIC_FETCH_OR_4:
- case BUILT_IN_ATOMIC_FETCH_OR_8:
- case BUILT_IN_ATOMIC_FETCH_OR_16:
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_OR, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_STORE_1:
- case BUILT_IN_ATOMIC_STORE_2:
- case BUILT_IN_ATOMIC_STORE_4:
- case BUILT_IN_ATOMIC_STORE_8:
- case BUILT_IN_ATOMIC_STORE_16:
- /* Since there cannot be any LHS, the first parameter is meaningless. */
- gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ST, stmt, hbb, false);
- break;
- break;
-
- case BUILT_IN_ATOMIC_ADD_FETCH_1:
- case BUILT_IN_ATOMIC_ADD_FETCH_2:
- case BUILT_IN_ATOMIC_ADD_FETCH_4:
- case BUILT_IN_ATOMIC_ADD_FETCH_8:
- case BUILT_IN_ATOMIC_ADD_FETCH_16:
- gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_ADD, stmt, hbb, false);
- break;
-
- case BUILT_IN_ATOMIC_SUB_FETCH_1:
- case BUILT_IN_ATOMIC_SUB_FETCH_2:
- case BUILT_IN_ATOMIC_SUB_FETCH_4:
- case BUILT_IN_ATOMIC_SUB_FETCH_8:
- case BUILT_IN_ATOMIC_SUB_FETCH_16:
- gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_SUB, stmt, hbb, false);
- break;
-
- case BUILT_IN_ATOMIC_AND_FETCH_1:
- case BUILT_IN_ATOMIC_AND_FETCH_2:
- case BUILT_IN_ATOMIC_AND_FETCH_4:
- case BUILT_IN_ATOMIC_AND_FETCH_8:
- case BUILT_IN_ATOMIC_AND_FETCH_16:
- gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_AND, stmt, hbb, false);
- break;
-
- case BUILT_IN_ATOMIC_XOR_FETCH_1:
- case BUILT_IN_ATOMIC_XOR_FETCH_2:
- case BUILT_IN_ATOMIC_XOR_FETCH_4:
- case BUILT_IN_ATOMIC_XOR_FETCH_8:
- case BUILT_IN_ATOMIC_XOR_FETCH_16:
- gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_XOR, stmt, hbb, false);
- break;
-
- case BUILT_IN_ATOMIC_OR_FETCH_1:
- case BUILT_IN_ATOMIC_OR_FETCH_2:
- case BUILT_IN_ATOMIC_OR_FETCH_4:
- case BUILT_IN_ATOMIC_OR_FETCH_8:
- case BUILT_IN_ATOMIC_OR_FETCH_16:
- gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_OR, stmt, hbb, false);
- break;
-
- case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1:
- case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2:
- case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4:
- case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8:
- case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_16:
- {
- tree type = TREE_TYPE (gimple_call_arg (stmt, 1));
- BrigType16_t atype
- = hsa_bittype_for_type (hsa_type_for_scalar_tree_type (type, false));
- BrigMemoryOrder memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE;
- hsa_insn_basic *atominsn;
- hsa_op_base *tgt;
- atominsn = new hsa_insn_atomic (4, BRIG_OPCODE_ATOMIC,
- BRIG_ATOMIC_CAS, atype, memorder);
- tgt = get_address_from_value (gimple_call_arg (stmt, 0), hbb);
-
- if (lhs != NULL)
- dest = hsa_cfun->reg_for_gimple_ssa (lhs);
- else
- dest = new hsa_op_reg (atype);
-
- atominsn->set_op (0, dest);
- atominsn->set_op (1, tgt);
-
- hsa_op_with_type *op
- = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb);
- atominsn->set_op (2, op);
- op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 2), hbb);
- atominsn->set_op (3, op);
-
- hbb->append_insn (atominsn);
- break;
- }
-
- case BUILT_IN_HSA_WORKGROUPID:
- query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKGROUPID, hbb);
- break;
- case BUILT_IN_HSA_WORKITEMID:
- query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMID, hbb);
- break;
- case BUILT_IN_HSA_WORKITEMABSID:
- query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMABSID, hbb);
- break;
- case BUILT_IN_HSA_GRIDSIZE:
- query_hsa_grid_dim (stmt, BRIG_OPCODE_GRIDSIZE, hbb);
- break;
- case BUILT_IN_HSA_CURRENTWORKGROUPSIZE:
- query_hsa_grid_dim (stmt, BRIG_OPCODE_CURRENTWORKGROUPSIZE, hbb);
- break;
-
- case BUILT_IN_GOMP_BARRIER:
- hbb->append_insn (new hsa_insn_br (0, BRIG_OPCODE_BARRIER, BRIG_TYPE_NONE,
- BRIG_WIDTH_ALL));
- break;
- case BUILT_IN_GOMP_PARALLEL:
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement non-gridified "
- "OpenMP parallel constructs");
- break;
-
- case BUILT_IN_OMP_GET_THREAD_NUM:
- {
- query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb);
- break;
- }
-
- case BUILT_IN_OMP_GET_NUM_THREADS:
- {
- gen_get_num_threads (stmt, hbb);
- break;
- }
- case BUILT_IN_GOMP_TEAMS:
- {
- gen_set_num_threads (gimple_call_arg (stmt, 1), hbb);
- break;
- }
- case BUILT_IN_OMP_GET_NUM_TEAMS:
- {
- gen_get_num_teams (stmt, hbb);
- break;
- }
- case BUILT_IN_OMP_GET_TEAM_NUM:
- {
- gen_get_team_num (stmt, hbb);
- break;
- }
- case BUILT_IN_MEMCPY:
- case BUILT_IN_MEMPCPY:
- {
- expand_memory_copy (stmt, hbb, builtin);
- break;
- }
- case BUILT_IN_MEMSET:
- {
- tree c = gimple_call_arg (stmt, 1);
-
- if (TREE_CODE (c) != INTEGER_CST)
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- tree byte_size = gimple_call_arg (stmt, 2);
-
- if (!tree_fits_uhwi_p (byte_size))
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
-
- if (n > HSA_MEMORY_BUILTINS_LIMIT)
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- unsigned HOST_WIDE_INT constant
- = tree_to_uhwi (fold_convert (unsigned_char_type_node, c));
-
- expand_memory_set (stmt, n, constant, hbb, builtin);
-
- break;
- }
- case BUILT_IN_BZERO:
- {
- tree byte_size = gimple_call_arg (stmt, 1);
-
- if (!tree_fits_uhwi_p (byte_size))
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size);
-
- if (n > HSA_MEMORY_BUILTINS_LIMIT)
- {
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
-
- expand_memory_set (stmt, n, 0, hbb, builtin);
-
- break;
- }
- CASE_BUILT_IN_ALLOCA:
- {
- gen_hsa_alloca (call, hbb);
- break;
- }
- case BUILT_IN_PREFETCH:
- break;
- default:
- {
- tree name_tree = DECL_NAME (fndecl);
- const char *s = IDENTIFIER_POINTER (name_tree);
- size_t len = strlen (s);
- if (len > 4 && (strncmp (s, "__builtin_GOMP_", 15) == 0))
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement GOMP function %s",
- s);
- else
- gen_hsa_insns_for_direct_call (stmt, hbb);
- return;
- }
- }
-}
-
-/* Generate HSA instructions for a given gimple statement. Instructions will be
- appended to HBB. */
-
-static void
-gen_hsa_insns_for_gimple_stmt (gimple *stmt, hsa_bb *hbb)
-{
- switch (gimple_code (stmt))
- {
- case GIMPLE_ASSIGN:
- if (gimple_clobber_p (stmt))
- break;
-
- if (gimple_assign_single_p (stmt))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
- gen_hsa_insns_for_single_assignment (lhs, rhs, hbb);
- }
- else
- gen_hsa_insns_for_operation_assignment (stmt, hbb);
- break;
- case GIMPLE_RETURN:
- gen_hsa_insns_for_return (as_a <greturn *> (stmt), hbb);
- break;
- case GIMPLE_COND:
- gen_hsa_insns_for_cond_stmt (stmt, hbb);
- break;
- case GIMPLE_CALL:
- gen_hsa_insns_for_call (stmt, hbb);
- break;
- case GIMPLE_DEBUG:
- /* ??? HSA supports some debug facilities. */
- break;
- case GIMPLE_LABEL:
- {
- tree label = gimple_label_label (as_a <glabel *> (stmt));
- if (FORCED_LABEL (label))
- HSA_SORRY_AT (gimple_location (stmt),
- "support for HSA does not implement gimple label with "
- "address taken");
-
- break;
- }
- case GIMPLE_NOP:
- {
- hbb->append_insn (new hsa_insn_basic (0, BRIG_OPCODE_NOP));
- break;
- }
- case GIMPLE_SWITCH:
- {
- gen_hsa_insns_for_switch_stmt (as_a <gswitch *> (stmt), hbb);
- break;
- }
- default:
- HSA_SORRY_ATV (gimple_location (stmt),
- "support for HSA does not implement gimple statement %s",
- gimple_code_name[(int) gimple_code (stmt)]);
- }
-}
-
-/* Generate a HSA PHI from a gimple PHI. */
-
-static void
-gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb)
-{
- hsa_insn_phi *hphi;
- unsigned count = gimple_phi_num_args (phi_stmt);
-
- hsa_op_reg *dest
- = hsa_cfun->reg_for_gimple_ssa (gimple_phi_result (phi_stmt));
- hphi = new hsa_insn_phi (count, dest);
- hphi->m_bb = hbb->m_bb;
-
- auto_vec <tree, 8> aexprs;
- auto_vec <hsa_op_reg *, 8> aregs;
-
- /* Calling split_edge when processing a PHI node messes up with the order of
- gimple phi node arguments (it moves the one associated with the edge to
- the end). We need to keep the order of edges and arguments of HSA phi
- node arguments consistent, so we do all required splitting as the first
- step, and in reverse order as to not be affected by the re-orderings. */
- for (unsigned j = count; j != 0; j--)
- {
- unsigned i = j - 1;
- tree op = gimple_phi_arg_def (phi_stmt, i);
- if (TREE_CODE (op) != ADDR_EXPR)
- continue;
-
- edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i);
- hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e));
- hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0),
- hbb_src);
-
- hsa_op_reg *dest
- = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT));
- hsa_insn_basic *insn
- = new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64,
- dest, addr);
- hbb_src->append_insn (insn);
- aexprs.safe_push (op);
- aregs.safe_push (dest);
- }
-
- tree lhs = gimple_phi_result (phi_stmt);
- for (unsigned i = 0; i < count; i++)
- {
- tree op = gimple_phi_arg_def (phi_stmt, i);
-
- if (TREE_CODE (op) == SSA_NAME)
- {
- hsa_op_reg *hreg = hsa_cfun->reg_for_gimple_ssa (op);
- hphi->set_op (i, hreg);
- }
- else
- {
- gcc_assert (is_gimple_min_invariant (op));
- tree t = TREE_TYPE (op);
- if (!POINTER_TYPE_P (t)
- || (TREE_CODE (op) == STRING_CST
- && TREE_CODE (TREE_TYPE (t)) == INTEGER_TYPE))
- hphi->set_op (i, new hsa_op_immed (op));
- else if (POINTER_TYPE_P (TREE_TYPE (lhs))
- && TREE_CODE (op) == INTEGER_CST)
- {
- /* Handle assignment of NULL value to a pointer type. */
- hphi->set_op (i, new hsa_op_immed (op));
- }
- else if (TREE_CODE (op) == ADDR_EXPR)
- {
- hsa_op_reg *dest = NULL;
- for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++)
- if (aexprs[a_idx] == op)
- {
- dest = aregs[a_idx];
- break;
- }
- gcc_assert (dest);
- hphi->set_op (i, dest);
- }
- else
- {
- HSA_SORRY_AT (gimple_location (phi_stmt),
- "support for HSA does not handle PHI nodes with "
- "constant address operands");
- return;
- }
- }
- }
-
- hbb->append_phi (hphi);
-}
-
-/* Constructor of class containing HSA-specific information about a basic
- block. CFG_BB is the CFG BB this HSA BB is associated with. IDX is the new
- index of this BB (so that the constructor does not attempt to use
- hsa_cfun during its construction). */
-
-hsa_bb::hsa_bb (basic_block cfg_bb, int idx)
- : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL),
- m_last_phi (NULL), m_index (idx)
-{
- gcc_assert (!cfg_bb->aux);
- cfg_bb->aux = this;
-}
-
-/* Constructor of class containing HSA-specific information about a basic
- block. CFG_BB is the CFG BB this HSA BB is associated with. */
-
-hsa_bb::hsa_bb (basic_block cfg_bb)
- : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL),
- m_last_phi (NULL), m_index (hsa_cfun->m_hbb_count++)
-{
- gcc_assert (!cfg_bb->aux);
- cfg_bb->aux = this;
-}
-
-/* Create and initialize and return a new hsa_bb structure for a given CFG
- basic block BB. */
-
-hsa_bb *
-hsa_init_new_bb (basic_block bb)
-{
- void *m = obstack_alloc (&hsa_obstack, sizeof (hsa_bb));
- return new (m) hsa_bb (bb);
-}
-
-/* Initialize OMP in an HSA basic block PROLOGUE. */
-
-static void
-init_prologue (void)
-{
- if (!hsa_cfun->m_kern_p)
- return;
-
- hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
-
- /* Create a magic number that is going to be printed by libgomp. */
- unsigned index = hsa_get_number_decl_kernel_mappings ();
-
- /* Emit store to debug argument. */
- if (param_hsa_gen_debug_stores > 0)
- set_debug_value (prologue, new hsa_op_immed (1000 + index, BRIG_TYPE_U64));
-}
-
-/* Initialize hsa_num_threads to a default value. */
-
-static void
-init_hsa_num_threads (void)
-{
- hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
-
- /* Save the default value to private variable hsa_num_threads. */
- hsa_insn_basic *basic
- = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type,
- new hsa_op_immed (0, hsa_num_threads->m_type),
- new hsa_op_address (hsa_num_threads));
- prologue->append_insn (basic);
-}
-
-/* Go over gimple representation and generate our internal HSA one. */
-
-static void
-gen_body_from_gimple ()
-{
- basic_block bb;
-
- /* Verify CFG for complex edges we are unable to handle. */
- edge_iterator ei;
- edge e;
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- FOR_EACH_EDGE (e, ei, bb->succs)
- {
- /* Verify all unsupported flags for edges that point
- to the same basic block. */
- if (e->flags & EDGE_EH)
- {
- HSA_SORRY_AT (UNKNOWN_LOCATION,
- "support for HSA does not implement exception "
- "handling");
- return;
- }
- }
- }
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- gimple_stmt_iterator gsi;
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- if (hbb)
- continue;
-
- hbb = hsa_init_new_bb (bb);
-
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gen_hsa_insns_for_gimple_stmt (gsi_stmt (gsi), hbb);
- if (hsa_seen_error ())
- return;
- }
- }
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- gimple_stmt_iterator gsi;
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- gcc_assert (hbb != NULL);
-
- for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- if (!virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
- gen_hsa_phi_from_gimple_phi (gsi_stmt (gsi), hbb);
- }
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "------- Generated SSA form -------\n");
- dump_hsa_cfun (dump_file);
- }
-}
-
-static void
-gen_function_decl_parameters (hsa_function_representation *f,
- tree decl)
-{
- tree parm;
- unsigned i;
-
- for (parm = TYPE_ARG_TYPES (TREE_TYPE (decl)), i = 0;
- parm;
- parm = TREE_CHAIN (parm), i++)
- {
- /* Result type if last in the tree list. */
- if (TREE_CHAIN (parm) == NULL)
- break;
-
- tree v = TREE_VALUE (parm);
-
- hsa_symbol *arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_NONE);
- arg->m_type = hsa_type_for_tree_type (v, &arg->m_dim);
- arg->m_name_number = i;
-
- f->m_input_args.safe_push (arg);
- }
-
- tree result_type = TREE_TYPE (TREE_TYPE (decl));
- if (!VOID_TYPE_P (result_type))
- {
- f->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_NONE);
- f->m_output_arg->m_type
- = hsa_type_for_tree_type (result_type, &f->m_output_arg->m_dim);
- f->m_output_arg->m_name = "res";
- }
-}
-
-/* Generate the vector of parameters of the HSA representation of the current
- function. This also includes the output parameter representing the
- result. */
-
-static void
-gen_function_def_parameters ()
-{
- tree parm;
-
- hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
-
- for (parm = DECL_ARGUMENTS (cfun->decl); parm;
- parm = DECL_CHAIN (parm))
- {
- class hsa_symbol **slot;
-
- hsa_symbol *arg
- = new hsa_symbol (BRIG_TYPE_NONE, hsa_cfun->m_kern_p
- ? BRIG_SEGMENT_KERNARG : BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_FUNCTION);
- arg->fillup_for_decl (parm);
-
- hsa_cfun->m_input_args.safe_push (arg);
-
- if (hsa_seen_error ())
- return;
-
- arg->m_name = hsa_get_declaration_name (parm);
-
- /* Copy all input arguments and create corresponding private symbols
- for them. */
- hsa_symbol *private_arg;
- hsa_op_address *parm_addr = new hsa_op_address (arg);
-
- if (TREE_ADDRESSABLE (parm)
- || (!is_gimple_reg (parm) && !TREE_READONLY (parm)))
- {
- private_arg = hsa_cfun->create_hsa_temporary (arg->m_type);
- private_arg->fillup_for_decl (parm);
-
- BrigAlignment8_t align = MIN (arg->m_align, private_arg->m_align);
-
- hsa_op_address *private_arg_addr = new hsa_op_address (private_arg);
- gen_hsa_memory_copy (prologue, private_arg_addr, parm_addr,
- arg->total_byte_size (), align);
- }
- else
- private_arg = arg;
-
- slot = hsa_cfun->m_local_symbols->find_slot (private_arg, INSERT);
- gcc_assert (!*slot);
- *slot = private_arg;
-
- if (is_gimple_reg (parm))
- {
- tree ddef = ssa_default_def (cfun, parm);
- if (ddef && !has_zero_uses (ddef))
- {
- BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (ddef),
- false);
- BrigType16_t mtype = mem_type_for_type (t);
- hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (ddef);
- hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype,
- dest, parm_addr);
- gcc_assert (!parm_addr->m_reg);
- prologue->append_insn (mem);
- }
- }
- }
-
- if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (cfun->decl))))
- {
- class hsa_symbol **slot;
-
- hsa_cfun->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_FUNCTION);
- hsa_cfun->m_output_arg->fillup_for_decl (DECL_RESULT (cfun->decl));
-
- if (hsa_seen_error ())
- return;
-
- hsa_cfun->m_output_arg->m_name = "res";
- slot = hsa_cfun->m_local_symbols->find_slot (hsa_cfun->m_output_arg,
- INSERT);
- gcc_assert (!*slot);
- *slot = hsa_cfun->m_output_arg;
- }
-}
-
-/* Generate function representation that corresponds to
- a function declaration. */
-
-hsa_function_representation *
-hsa_generate_function_declaration (tree decl)
-{
- hsa_function_representation *fun
- = new hsa_function_representation (decl, false, 0);
-
- fun->m_declaration_p = true;
- fun->m_name = get_brig_function_name (decl);
- gen_function_decl_parameters (fun, decl);
-
- return fun;
-}
-
-
-/* Generate function representation that corresponds to
- an internal FN. */
-
-hsa_function_representation *
-hsa_generate_internal_fn_decl (hsa_internal_fn *fn)
-{
- hsa_function_representation *fun = new hsa_function_representation (fn);
-
- fun->m_name = fn->name ();
-
- for (unsigned i = 0; i < fn->get_arity (); i++)
- {
- hsa_symbol *arg
- = new hsa_symbol (fn->get_argument_type (i), BRIG_SEGMENT_ARG,
- BRIG_LINKAGE_NONE);
- arg->m_name_number = i;
- fun->m_input_args.safe_push (arg);
- }
-
- fun->m_output_arg = new hsa_symbol (fn->get_argument_type (-1),
- BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE);
- fun->m_output_arg->m_name = "res";
-
- return fun;
-}
-
-/* Return true if switch statement S can be transformed
- to a SBR instruction in HSAIL. */
-
-static bool
-transformable_switch_to_sbr_p (gswitch *s)
-{
- /* Identify if a switch statement can be transformed to
- SBR instruction, like:
-
- sbr_u32 $s1 [@label1, @label2, @label3];
- */
-
- tree size = get_switch_size (s);
- if (!tree_fits_uhwi_p (size))
- return false;
-
- if (tree_to_uhwi (size) > HSA_MAXIMUM_SBR_LABELS)
- return false;
-
- return true;
-}
-
-/* Structure hold connection between PHI nodes and immediate
- values hold by there nodes. */
-
-class phi_definition
-{
-public:
- phi_definition (unsigned phi_i, unsigned label_i, tree imm):
- phi_index (phi_i), label_index (label_i), phi_value (imm)
- {}
-
- unsigned phi_index;
- unsigned label_index;
- tree phi_value;
-};
-
-/* Sum slice of a vector V, starting from index START and ending
- at the index END - 1. */
-
-template <typename T>
-static
-T sum_slice (const auto_vec <T> &v, unsigned start, unsigned end,
- T zero)
-{
- T s = zero;
-
- for (unsigned i = start; i < end; i++)
- s += v[i];
-
- return s;
-}
-
-/* Function transforms GIMPLE SWITCH statements to a series of IF statements.
- Let's assume following example:
-
-L0:
- switch (index)
- case C1:
-L1: hard_work_1 ();
- break;
- case C2..C3:
-L2: hard_work_2 ();
- break;
- default:
-LD: hard_work_3 ();
- break;
-
- The transformation encompasses following steps:
- 1) all immediate values used by edges coming from the switch basic block
- are saved
- 2) all these edges are removed
- 3) the switch statement (in L0) is replaced by:
- if (index == C1)
- goto L1;
- else
- goto L1';
-
- 4) newly created basic block Lx' is used for generation of
- a next condition
- 5) else branch of the last condition goes to LD
- 6) fix all immediate values in PHI nodes that were propagated though
- edges that were removed in step 2
-
- Note: if a case is made by a range C1..C2, then process
- following transformation:
-
- switch_cond_op1 = C1 <= index;
- switch_cond_op2 = index <= C2;
- switch_cond_and = switch_cond_op1 & switch_cond_op2;
- if (switch_cond_and != 0)
- goto Lx;
- else
- goto Ly;
-
-*/
-
-static bool
-convert_switch_statements (void)
-{
- basic_block bb;
-
- bool modified_cfg = false;
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- gimple_stmt_iterator gsi = gsi_last_bb (bb);
- if (gsi_end_p (gsi))
- continue;
-
- gimple *stmt = gsi_stmt (gsi);
-
- if (gimple_code (stmt) == GIMPLE_SWITCH)
- {
- gswitch *s = as_a <gswitch *> (stmt);
-
- /* If the switch can utilize SBR insn, skip the statement. */
- if (transformable_switch_to_sbr_p (s))
- continue;
-
- modified_cfg = true;
-
- unsigned labels = gimple_switch_num_labels (s);
- tree index = gimple_switch_index (s);
- tree index_type = TREE_TYPE (index);
- tree default_label = gimple_switch_default_label (s);
- basic_block default_label_bb
- = label_to_block (cfun, CASE_LABEL (default_label));
- basic_block cur_bb = bb;
-
- auto_vec <edge> new_edges;
- auto_vec <phi_definition *> phi_todo_list;
- auto_vec <profile_count> edge_counts;
- auto_vec <profile_probability> edge_probabilities;
-
- /* Investigate all labels that and PHI nodes in these edges which
- should be fixed after we add new collection of edges. */
- for (unsigned i = 0; i < labels; i++)
- {
- basic_block label_bb = gimple_switch_label_bb (cfun, s, i);
- edge e = find_edge (bb, label_bb);
- edge_counts.safe_push (e->count ());
- edge_probabilities.safe_push (e->probability);
- gphi_iterator phi_gsi;
-
- /* Save PHI definitions that will be destroyed because of an edge
- is going to be removed. */
- unsigned phi_index = 0;
- for (phi_gsi = gsi_start_phis (e->dest);
- !gsi_end_p (phi_gsi); gsi_next (&phi_gsi))
- {
- gphi *phi = phi_gsi.phi ();
- for (unsigned j = 0; j < gimple_phi_num_args (phi); j++)
- {
- if (gimple_phi_arg_edge (phi, j) == e)
- {
- tree imm = gimple_phi_arg_def (phi, j);
- phi_definition *p = new phi_definition (phi_index, i,
- imm);
- phi_todo_list.safe_push (p);
- break;
- }
- }
- phi_index++;
- }
- }
-
- /* Remove all edges for the current basic block. */
- for (int i = EDGE_COUNT (bb->succs) - 1; i >= 0; i--)
- {
- edge e = EDGE_SUCC (bb, i);
- remove_edge (e);
- }
-
- /* Iterate all non-default labels. */
- for (unsigned i = 1; i < labels; i++)
- {
- tree label = gimple_switch_label (s, i);
- tree low = CASE_LOW (label);
- tree high = CASE_HIGH (label);
-
- if (!useless_type_conversion_p (TREE_TYPE (low), index_type))
- low = fold_convert (index_type, low);
-
- gimple_stmt_iterator cond_gsi = gsi_last_bb (cur_bb);
- gimple *c = NULL;
- if (high)
- {
- tree tmp1 = make_temp_ssa_name (boolean_type_node, NULL,
- "switch_cond_op1");
-
- gimple *assign1 = gimple_build_assign (tmp1, LE_EXPR, low,
- index);
-
- tree tmp2 = make_temp_ssa_name (boolean_type_node, NULL,
- "switch_cond_op2");
-
- if (!useless_type_conversion_p (TREE_TYPE (high), index_type))
- high = fold_convert (index_type, high);
- gimple *assign2 = gimple_build_assign (tmp2, LE_EXPR, index,
- high);
-
- tree tmp3 = make_temp_ssa_name (boolean_type_node, NULL,
- "switch_cond_and");
- gimple *assign3 = gimple_build_assign (tmp3, BIT_AND_EXPR, tmp1,
- tmp2);
-
- gsi_insert_before (&cond_gsi, assign1, GSI_SAME_STMT);
- gsi_insert_before (&cond_gsi, assign2, GSI_SAME_STMT);
- gsi_insert_before (&cond_gsi, assign3, GSI_SAME_STMT);
-
- tree b = constant_boolean_node (false, boolean_type_node);
- c = gimple_build_cond (NE_EXPR, tmp3, b, NULL, NULL);
- }
- else
- c = gimple_build_cond (EQ_EXPR, index, low, NULL, NULL);
-
- gimple_set_location (c, gimple_location (stmt));
-
- gsi_insert_before (&cond_gsi, c, GSI_SAME_STMT);
-
- basic_block label_bb = label_to_block (cfun, CASE_LABEL (label));
- edge new_edge = make_edge (cur_bb, label_bb, EDGE_TRUE_VALUE);
- profile_probability prob_sum = sum_slice <profile_probability>
- (edge_probabilities, i, labels, profile_probability::never ())
- + edge_probabilities[0];
-
- if (prob_sum.initialized_p ())
- new_edge->probability = edge_probabilities[i] / prob_sum;
-
- new_edges.safe_push (new_edge);
-
- if (i < labels - 1)
- {
- /* Prepare another basic block that will contain
- next condition. */
- basic_block next_bb = create_empty_bb (cur_bb);
- if (current_loops)
- {
- add_bb_to_loop (next_bb, cur_bb->loop_father);
- loops_state_set (LOOPS_NEED_FIXUP);
- }
-
- edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
- next_edge->probability = new_edge->probability.invert ();
- next_bb->count = next_edge->count ();
- cur_bb = next_bb;
- }
- else /* Link last IF statement and default label
- of the switch. */
- {
- edge e = make_edge (cur_bb, default_label_bb, EDGE_FALSE_VALUE);
- e->probability = new_edge->probability.invert ();
- new_edges.safe_insert (0, e);
- }
- }
-
- /* Restore original PHI immediate value. */
- for (unsigned i = 0; i < phi_todo_list.length (); i++)
- {
- phi_definition *phi_def = phi_todo_list[i];
- edge new_edge = new_edges[phi_def->label_index];
-
- gphi_iterator it = gsi_start_phis (new_edge->dest);
- for (unsigned i = 0; i < phi_def->phi_index; i++)
- gsi_next (&it);
-
- gphi *phi = it.phi ();
- add_phi_arg (phi, phi_def->phi_value, new_edge, UNKNOWN_LOCATION);
- delete phi_def;
- }
-
- /* Remove the original GIMPLE switch statement. */
- gsi_remove (&gsi, true);
- }
- }
-
- if (dump_file)
- dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS);
-
- return modified_cfg;
-}
-
-/* Expand builtins that can't be handled by HSA back-end. */
-
-static void
-expand_builtins ()
-{
- basic_block bb;
-
- FOR_EACH_BB_FN (bb, cfun)
- {
- for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
- gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
-
- if (gimple_code (stmt) != GIMPLE_CALL)
- continue;
-
- gcall *call = as_a <gcall *> (stmt);
-
- if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL))
- continue;
-
- tree fndecl = gimple_call_fndecl (stmt);
- enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
- switch (fn)
- {
- case BUILT_IN_CEXPF:
- case BUILT_IN_CEXPIF:
- case BUILT_IN_CEXPI:
- {
- /* Similar to builtins.c (expand_builtin_cexpi), the builtin
- can be transformed to: cexp(I * z) = ccos(z) + I * csin(z). */
- tree lhs = gimple_call_lhs (stmt);
- tree rhs = gimple_call_arg (stmt, 0);
- tree rhs_type = TREE_TYPE (rhs);
- bool float_type_p = rhs_type == float_type_node;
- tree real_part = make_temp_ssa_name (rhs_type, NULL,
- "cexp_real_part");
- tree imag_part = make_temp_ssa_name (rhs_type, NULL,
- "cexp_imag_part");
-
- tree cos_fndecl
- = mathfn_built_in (rhs_type, fn == float_type_p
- ? BUILT_IN_COSF : BUILT_IN_COS);
- gcall *cos = gimple_build_call (cos_fndecl, 1, rhs);
- gimple_call_set_lhs (cos, real_part);
- gsi_insert_before (&gsi, cos, GSI_SAME_STMT);
-
- tree sin_fndecl
- = mathfn_built_in (rhs_type, fn == float_type_p
- ? BUILT_IN_SINF : BUILT_IN_SIN);
- gcall *sin = gimple_build_call (sin_fndecl, 1, rhs);
- gimple_call_set_lhs (sin, imag_part);
- gsi_insert_before (&gsi, sin, GSI_SAME_STMT);
-
-
- gassign *assign = gimple_build_assign (lhs, COMPLEX_EXPR,
- real_part, imag_part);
- gsi_insert_before (&gsi, assign, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
-
- break;
- }
- default:
- break;
- }
- }
- }
-}
-
-/* Emit HSA module variables that are global for the entire module. */
-
-static void
-emit_hsa_module_variables (void)
-{
- hsa_num_threads = new hsa_symbol (BRIG_TYPE_U32, BRIG_SEGMENT_PRIVATE,
- BRIG_LINKAGE_MODULE, true);
-
- hsa_num_threads->m_name = "hsa_num_threads";
-
- hsa_brig_emit_omp_symbols ();
-}
-
-/* Generate HSAIL representation of the current function and write into a
- special section of the output file. If KERNEL is set, the function will be
- considered an HSA kernel callable from the host, otherwise it will be
- compiled as an HSA function callable from other HSA code. */
-
-static void
-generate_hsa (bool kernel)
-{
- hsa_init_data_for_cfun ();
-
- if (hsa_num_threads == NULL)
- emit_hsa_module_variables ();
-
- bool modified_cfg = convert_switch_statements ();
- /* Initialize hsa_cfun. */
- hsa_cfun = new hsa_function_representation (cfun->decl, kernel,
- SSANAMES (cfun)->length (),
- modified_cfg);
- hsa_cfun->init_extra_bbs ();
-
- if (flag_tm)
- {
- HSA_SORRY_AT (UNKNOWN_LOCATION,
- "support for HSA does not implement transactional memory");
- goto fail;
- }
-
- verify_function_arguments (cfun->decl);
- if (hsa_seen_error ())
- goto fail;
-
- hsa_cfun->m_name = get_brig_function_name (cfun->decl);
-
- gen_function_def_parameters ();
- if (hsa_seen_error ())
- goto fail;
-
- init_prologue ();
-
- gen_body_from_gimple ();
- if (hsa_seen_error ())
- goto fail;
-
- if (hsa_cfun->m_kernel_dispatch_count)
- init_hsa_num_threads ();
-
- if (hsa_cfun->m_kern_p)
- {
- hsa_function_summary *s
- = hsa_summaries->get_create (cgraph_node::get (hsa_cfun->m_decl));
- hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name,
- hsa_cfun->m_maximum_omp_data_size,
- s->m_gridified_kernel_p);
- }
-
- if (flag_checking)
- {
- for (unsigned i = 0; i < hsa_cfun->m_ssa_map.length (); i++)
- if (hsa_cfun->m_ssa_map[i])
- hsa_cfun->m_ssa_map[i]->verify_ssa ();
-
- basic_block bb;
- FOR_EACH_BB_FN (bb, cfun)
- {
- hsa_bb *hbb = hsa_bb_for_bb (bb);
-
- for (hsa_insn_basic *insn = hbb->m_first_insn; insn;
- insn = insn->m_next)
- insn->verify ();
- }
- }
-
- hsa_regalloc ();
- hsa_brig_emit_function ();
-
- fail:
- hsa_deinit_data_for_cfun ();
-}
-
-namespace {
-
-const pass_data pass_data_gen_hsail =
-{
- GIMPLE_PASS,
- "hsagen", /* name */
- OPTGROUP_OMP, /* optinfo_flags */
- TV_NONE, /* tv_id */
- PROP_cfg | PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0 /* todo_flags_finish */
-};
-
-class pass_gen_hsail : public gimple_opt_pass
-{
-public:
- pass_gen_hsail (gcc::context *ctxt)
- : gimple_opt_pass(pass_data_gen_hsail, ctxt)
- {}
-
- /* opt_pass methods: */
- bool gate (function *);
- unsigned int execute (function *);
-
-}; // class pass_gen_hsail
-
-/* Determine whether or not to run generation of HSAIL. */
-
-bool
-pass_gen_hsail::gate (function *f)
-{
- return hsa_gen_requested_p ()
- && hsa_gpu_implementation_p (f->decl);
-}
-
-unsigned int
-pass_gen_hsail::execute (function *)
-{
- cgraph_node *node = cgraph_node::get_create (current_function_decl);
- hsa_function_summary *s = hsa_summaries->get_create (node);
-
- expand_builtins ();
- generate_hsa (s->m_kind == HSA_KERNEL);
- TREE_ASM_WRITTEN (current_function_decl) = 1;
- return TODO_discard_function;
-}
-
-} // anon namespace
-
-/* Create the instance of hsa gen pass. */
-
-gimple_opt_pass *
-make_pass_gen_hsail (gcc::context *ctxt)
-{
- return new pass_gen_hsail (ctxt);
-}
+++ /dev/null
-/* HSAIL IL Register allocation and out-of-SSA.
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
- Contributed by Michael Matz <matz@suse.de>
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "is-a.h"
-#include "vec.h"
-#include "tree.h"
-#include "dominance.h"
-#include "basic-block.h"
-#include "function.h"
-#include "cfganal.h"
-#include "cfg.h"
-#include "bitmap.h"
-#include "dumpfile.h"
-#include "cgraph.h"
-#include "print-tree.h"
-#include "cfghooks.h"
-#include "alloc-pool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-
-
-/* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
-
-static void
-naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
-{
- unsigned count = phi->operand_count ();
- for (unsigned i = 0; i < count; i++)
- {
- gcc_checking_assert (phi->get_op (i));
- hsa_op_base *op = phi->get_op (i);
- hsa_bb *hbb;
- edge e;
-
- if (!op)
- break;
-
- e = predecessors[i];
- if (single_succ_p (e->src))
- hbb = hsa_bb_for_bb (e->src);
- else
- {
- basic_block old_dest = e->dest;
- hbb = hsa_init_new_bb (split_edge (e));
-
- /* If switch insn used this edge, fix jump table. */
- hsa_bb *source = hsa_bb_for_bb (e->src);
- hsa_insn_sbr *sbr;
- if (source->m_last_insn
- && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
- sbr->replace_all_labels (old_dest, hbb->m_bb);
- }
-
- hsa_build_append_simple_mov (phi->m_dest, op, hbb);
- }
-}
-
-/* Naive out-of SSA. */
-
-static void
-naive_outof_ssa (void)
-{
- basic_block bb;
-
- hsa_cfun->m_in_ssa = false;
-
- FOR_ALL_BB_FN (bb, cfun)
- {
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- hsa_insn_phi *phi;
-
- /* naive_process_phi can call split_edge on an incoming edge which order if
- the incoming edges to the basic block and thus make it inconsistent with
- the ordering of PHI arguments, so we collect them in advance. */
- auto_vec<edge, 8> predecessors;
- unsigned pred_count = EDGE_COUNT (bb->preds);
- for (unsigned i = 0; i < pred_count; i++)
- predecessors.safe_push (EDGE_PRED (bb, i));
-
- for (phi = hbb->m_first_phi;
- phi;
- phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
- naive_process_phi (phi, predecessors);
-
- /* Zap PHI nodes, they will be deallocated when everything else will. */
- hbb->m_first_phi = NULL;
- hbb->m_last_phi = NULL;
- }
-}
-
-/* Return register class number for the given HSA TYPE. 0 means the 'c' one
- bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
- and 3 for 'q' 128 bit class. */
-
-static int
-m_reg_class_for_type (BrigType16_t type)
-{
- switch (type)
- {
- case BRIG_TYPE_B1:
- return 0;
-
- case BRIG_TYPE_U8:
- case BRIG_TYPE_U16:
- case BRIG_TYPE_U32:
- case BRIG_TYPE_S8:
- case BRIG_TYPE_S16:
- case BRIG_TYPE_S32:
- case BRIG_TYPE_F16:
- case BRIG_TYPE_F32:
- case BRIG_TYPE_B8:
- case BRIG_TYPE_B16:
- case BRIG_TYPE_B32:
- case BRIG_TYPE_U8X4:
- case BRIG_TYPE_S8X4:
- case BRIG_TYPE_U16X2:
- case BRIG_TYPE_S16X2:
- case BRIG_TYPE_F16X2:
- return 1;
-
- case BRIG_TYPE_U64:
- case BRIG_TYPE_S64:
- case BRIG_TYPE_F64:
- case BRIG_TYPE_B64:
- case BRIG_TYPE_U8X8:
- case BRIG_TYPE_S8X8:
- case BRIG_TYPE_U16X4:
- case BRIG_TYPE_S16X4:
- case BRIG_TYPE_F16X4:
- case BRIG_TYPE_U32X2:
- case BRIG_TYPE_S32X2:
- case BRIG_TYPE_F32X2:
- return 2;
-
- case BRIG_TYPE_B128:
- case BRIG_TYPE_U8X16:
- case BRIG_TYPE_S8X16:
- case BRIG_TYPE_U16X8:
- case BRIG_TYPE_S16X8:
- case BRIG_TYPE_F16X8:
- case BRIG_TYPE_U32X4:
- case BRIG_TYPE_U64X2:
- case BRIG_TYPE_S32X4:
- case BRIG_TYPE_S64X2:
- case BRIG_TYPE_F32X4:
- case BRIG_TYPE_F64X2:
- return 3;
-
- default:
- gcc_unreachable ();
- }
-}
-
-/* If the Ith operands of INSN is or contains a register (in an address),
- return the address of that register operand. If not return NULL. */
-
-static hsa_op_reg **
-insn_reg_addr (hsa_insn_basic *insn, int i)
-{
- hsa_op_base *op = insn->get_op (i);
- if (!op)
- return NULL;
- hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
- if (reg)
- return (hsa_op_reg **) insn->get_op_addr (i);
- hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
- if (addr && addr->m_reg)
- return &addr->m_reg;
- return NULL;
-}
-
-struct m_reg_class_desc
-{
- unsigned next_avail, max_num;
- unsigned used_num, max_used;
- uint64_t used[2];
- char cl_char;
-};
-
-/* Rewrite the instructions in BB to observe spilled live ranges.
- CLASSES is the global register class state. */
-
-static void
-rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
-{
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- hsa_insn_basic *insn, *next_insn;
-
- for (insn = hbb->m_first_insn; insn; insn = next_insn)
- {
- next_insn = insn->m_next;
- unsigned count = insn->operand_count ();
- for (unsigned i = 0; i < count; i++)
- {
- gcc_checking_assert (insn->get_op (i));
- hsa_op_reg **regaddr = insn_reg_addr (insn, i);
-
- if (regaddr)
- {
- hsa_op_reg *reg = *regaddr;
- if (reg->m_reg_class)
- continue;
- gcc_assert (reg->m_spill_sym);
-
- int cl = m_reg_class_for_type (reg->m_type);
- hsa_op_reg *tmp, *tmp2;
- if (insn->op_output_p (i))
- tmp = hsa_spill_out (insn, reg, &tmp2);
- else
- tmp = hsa_spill_in (insn, reg, &tmp2);
-
- *regaddr = tmp;
-
- tmp->m_reg_class = classes[cl].cl_char;
- tmp->m_hard_num = (char) (classes[cl].max_num + i);
- if (tmp2)
- {
- gcc_assert (cl == 0);
- tmp2->m_reg_class = classes[1].cl_char;
- tmp2->m_hard_num = (char) (classes[1].max_num + i);
- }
- }
- }
- }
-}
-
-/* Dump current function to dump file F, with info specific
- to register allocation. */
-
-void
-dump_hsa_cfun_regalloc (FILE *f)
-{
- basic_block bb;
-
- fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
-
- FOR_ALL_BB_FN (bb, cfun)
- {
- hsa_bb *hbb = (class hsa_bb *) bb->aux;
- bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
- dump_hsa_bb (f, hbb);
- bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
- }
-}
-
-/* Given the global register allocation state CLASSES and a
- register REG, try to give it a hardware register. If successful,
- store that hardreg in REG and return it, otherwise return -1.
- Also changes CLASSES to accommodate for the allocated register. */
-
-static int
-try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
-{
- int cl = m_reg_class_for_type (reg->m_type);
- int ret = -1;
- if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
- >= 128 - 5)
- return -1;
- if (classes[cl].used_num < classes[cl].max_num)
- {
- unsigned int i;
- classes[cl].used_num++;
- if (classes[cl].used_num > classes[cl].max_used)
- classes[cl].max_used = classes[cl].used_num;
- for (i = 0; i < classes[cl].used_num; i++)
- if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
- break;
- ret = i;
- classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
- reg->m_reg_class = classes[cl].cl_char;
- reg->m_hard_num = i;
- }
- return ret;
-}
-
-/* Free up hardregs used by REG, into allocation state CLASSES. */
-
-static void
-free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
-{
- int cl = m_reg_class_for_type (reg->m_type);
- int ret = reg->m_hard_num;
- gcc_assert (reg->m_reg_class == classes[cl].cl_char);
- classes[cl].used_num--;
- classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
-}
-
-/* Note that the live range for REG ends at least at END. */
-
-static void
-note_lr_end (hsa_op_reg *reg, int end)
-{
- if (reg->m_lr_end < end)
- reg->m_lr_end = end;
-}
-
-/* Note that the live range for REG starts at least at BEGIN. */
-
-static void
-note_lr_begin (hsa_op_reg *reg, int begin)
-{
- if (reg->m_lr_begin > begin)
- reg->m_lr_begin = begin;
-}
-
-/* Given two registers A and B, return -1, 0 or 1 if A's live range
- starts before, at or after B's live range. */
-
-static int
-cmp_begin (const void *a, const void *b)
-{
- const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
- const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
- int ret;
- if (rega == regb)
- return 0;
- ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
- if (ret)
- return ret;
- return ((*rega)->m_order - (*regb)->m_order);
-}
-
-/* Given two registers REGA and REGB, return true if REGA's
- live range ends after REGB's. This results in a sorting order
- with earlier end points at the end. */
-
-static bool
-cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b)
-{
- int ret;
- if (rega == regb)
- return false;
- ret = (regb)->m_lr_end - (rega)->m_lr_end;
- if (ret)
- return ret < 0;
- return (((regb)->m_order - (rega)->m_order)) < 0;
-}
-
-/* Expire all old intervals in ACTIVE (a per-regclass vector),
- that is, those that end before the interval REG starts. Give
- back resources freed so into the state CLASSES. */
-
-static void
-expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
- struct m_reg_class_desc *classes)
-{
- for (int i = 0; i < 4; i++)
- while (!active[i].is_empty ())
- {
- hsa_op_reg *a = active[i].pop ();
- if (a->m_lr_end > reg->m_lr_begin)
- {
- active[i].quick_push (a);
- break;
- }
- free_reg (classes, a);
- }
-}
-
-/* The interval REG didn't get a hardreg. Spill it or one of those
- from ACTIVE (if the latter, then REG will become allocated to the
- hardreg that formerly was used by it). */
-
-static void
-spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
-{
- int cl = m_reg_class_for_type (reg->m_type);
- gcc_assert (!active[cl].is_empty ());
- hsa_op_reg *cand = active[cl][0];
- if (cand->m_lr_end > reg->m_lr_end)
- {
- reg->m_reg_class = cand->m_reg_class;
- reg->m_hard_num = cand->m_hard_num;
- active[cl].ordered_remove (0);
- unsigned place = active[cl].lower_bound (reg, cmp_end);
- active[cl].quick_insert (place, reg);
- }
- else
- cand = reg;
-
- gcc_assert (!cand->m_spill_sym);
- BrigType16_t type = cand->m_type;
- if (type == BRIG_TYPE_B1)
- type = BRIG_TYPE_U8;
- cand->m_reg_class = 0;
- cand->m_spill_sym = hsa_get_spill_symbol (type);
- cand->m_spill_sym->m_name_number = cand->m_order;
-}
-
-/* Given the global register state CLASSES allocate all HSA virtual
- registers either to hardregs or to a spill symbol. */
-
-static void
-linear_scan_regalloc (struct m_reg_class_desc *classes)
-{
- /* Compute liveness. */
- bool changed;
- int i, n;
- int insn_order;
- int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
- bitmap work = BITMAP_ALLOC (NULL);
- vec<hsa_op_reg*> ind2reg = vNULL;
- vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
- hsa_insn_basic *m_last_insn;
-
- /* We will need the reverse post order for linearization,
- and the post order for liveness analysis, which is the same
- backward. */
- n = pre_and_rev_post_order_compute (NULL, bbs, true);
- ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
-
- /* Give all instructions a linearized number, at the same time
- build a mapping from register index to register. */
- insn_order = 1;
- for (i = 0; i < n; i++)
- {
- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- hsa_insn_basic *insn;
- for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
- {
- unsigned opi;
- insn->m_number = insn_order++;
- for (opi = 0; opi < insn->operand_count (); opi++)
- {
- gcc_checking_assert (insn->get_op (opi));
- hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
- if (regaddr)
- ind2reg[(*regaddr)->m_order] = *regaddr;
- }
- }
- }
-
- /* Initialize all live ranges to [after-end, 0). */
- for (i = 0; i < hsa_cfun->m_reg_count; i++)
- if (ind2reg[i])
- ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
-
- /* Classic liveness analysis, as long as something changes:
- m_liveout is union (m_livein of successors)
- m_livein is m_liveout minus defs plus uses. */
- do
- {
- changed = false;
- for (i = n - 1; i >= 0; i--)
- {
- edge e;
- edge_iterator ei;
- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
- hsa_bb *hbb = hsa_bb_for_bb (bb);
-
- /* Union of successors m_livein (or empty if none). */
- bool first = true;
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- hsa_bb *succ = hsa_bb_for_bb (e->dest);
- if (first)
- {
- bitmap_copy (work, succ->m_livein);
- first = false;
- }
- else
- bitmap_ior_into (work, succ->m_livein);
- }
- if (first)
- bitmap_clear (work);
-
- bitmap_copy (hbb->m_liveout, work);
-
- /* Remove defs, include uses in a backward insn walk. */
- hsa_insn_basic *insn;
- for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
- {
- unsigned opi;
- unsigned ndefs = insn->input_count ();
- for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
- {
- gcc_checking_assert (insn->get_op (opi));
- hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
- if (regaddr)
- bitmap_clear_bit (work, (*regaddr)->m_order);
- }
- for (; opi < insn->operand_count (); opi++)
- {
- gcc_checking_assert (insn->get_op (opi));
- hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
- if (regaddr)
- bitmap_set_bit (work, (*regaddr)->m_order);
- }
- }
-
- /* Note if that changed something. */
- if (bitmap_ior_into (hbb->m_livein, work))
- changed = true;
- }
- }
- while (changed);
-
- /* Make one pass through all instructions in linear order,
- noting and merging possible live range start and end points. */
- m_last_insn = NULL;
- for (i = n - 1; i >= 0; i--)
- {
- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
- hsa_bb *hbb = hsa_bb_for_bb (bb);
- hsa_insn_basic *insn;
- int after_end_number;
- unsigned bit;
- bitmap_iterator bi;
-
- if (m_last_insn)
- after_end_number = m_last_insn->m_number;
- else
- after_end_number = insn_order;
- /* Everything live-out in this BB has at least an end point
- after us. */
- EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
- note_lr_end (ind2reg[bit], after_end_number);
-
- for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
- {
- unsigned opi;
- unsigned ndefs = insn->input_count ();
- for (opi = 0; opi < insn->operand_count (); opi++)
- {
- gcc_checking_assert (insn->get_op (opi));
- hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
- if (regaddr)
- {
- hsa_op_reg *reg = *regaddr;
- if (opi < ndefs)
- note_lr_begin (reg, insn->m_number);
- else
- note_lr_end (reg, insn->m_number);
- }
- }
- }
-
- /* Everything live-in in this BB has a start point before
- our first insn. */
- int before_start_number;
- if (hbb->m_first_insn)
- before_start_number = hbb->m_first_insn->m_number;
- else
- before_start_number = after_end_number;
- before_start_number--;
- EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
- note_lr_begin (ind2reg[bit], before_start_number);
-
- if (hbb->m_first_insn)
- m_last_insn = hbb->m_first_insn;
- }
-
- for (i = 0; i < hsa_cfun->m_reg_count; i++)
- if (ind2reg[i])
- {
- /* All regs that have still their start at after all code actually
- are defined at the start of the routine (prologue). */
- if (ind2reg[i]->m_lr_begin == insn_order)
- ind2reg[i]->m_lr_begin = 0;
- /* All regs that have no use but a def will have lr_end == 0,
- they are actually live from def until after the insn they are
- defined in. */
- if (ind2reg[i]->m_lr_end == 0)
- ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
- }
-
- /* Sort all intervals by increasing start point. */
- gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
-
- if (flag_checking)
- for (unsigned i = 0; i < ind2reg.length (); i++)
- gcc_assert (ind2reg[i]);
-
- ind2reg.qsort (cmp_begin);
- for (i = 0; i < 4; i++)
- active[i].reserve_exact (hsa_cfun->m_reg_count);
-
- /* Now comes the linear scan allocation. */
- for (i = 0; i < hsa_cfun->m_reg_count; i++)
- {
- hsa_op_reg *reg = ind2reg[i];
- if (!reg)
- continue;
- expire_old_intervals (reg, active, classes);
- int cl = m_reg_class_for_type (reg->m_type);
- if (try_alloc_reg (classes, reg) >= 0)
- {
- unsigned place = active[cl].lower_bound (reg, cmp_end);
- active[cl].quick_insert (place, reg);
- }
- else
- spill_at_interval (reg, active);
-
- /* Some interesting dumping as we go. */
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, " reg%d: [%5d, %5d)->",
- reg->m_order, reg->m_lr_begin, reg->m_lr_end);
- if (reg->m_reg_class)
- fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
- else
- fprintf (dump_file, "[%%__%s_%i]",
- hsa_seg_name (reg->m_spill_sym->m_segment),
- reg->m_spill_sym->m_name_number);
- for (int cl = 0; cl < 4; cl++)
- {
- bool first = true;
- hsa_op_reg *r;
- fprintf (dump_file, " {");
- for (int j = 0; active[cl].iterate (j, &r); j++)
- if (first)
- {
- fprintf (dump_file, "%d", r->m_order);
- first = false;
- }
- else
- fprintf (dump_file, ", %d", r->m_order);
- fprintf (dump_file, "}");
- }
- fprintf (dump_file, "\n");
- }
- }
-
- BITMAP_FREE (work);
- free (bbs);
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "------- After liveness: -------\n");
- dump_hsa_cfun_regalloc (dump_file);
- fprintf (dump_file, " ----- Intervals:\n");
- for (i = 0; i < hsa_cfun->m_reg_count; i++)
- {
- hsa_op_reg *reg = ind2reg[i];
- if (!reg)
- continue;
- fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
- reg->m_lr_begin, reg->m_lr_end);
- if (reg->m_reg_class)
- fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
- else
- fprintf (dump_file, "[%%__%s_%i]\n",
- hsa_seg_name (reg->m_spill_sym->m_segment),
- reg->m_spill_sym->m_name_number);
- }
- }
-
- for (i = 0; i < 4; i++)
- active[i].release ();
- ind2reg.release ();
-}
-
-/* Entry point for register allocation. */
-
-static void
-regalloc (void)
-{
- basic_block bb;
- m_reg_class_desc classes[4];
-
- /* If there are no registers used in the function, exit right away. */
- if (hsa_cfun->m_reg_count == 0)
- return;
-
- memset (classes, 0, sizeof (classes));
- classes[0].next_avail = 0;
- classes[0].max_num = 7;
- classes[0].cl_char = 'c';
- classes[1].cl_char = 's';
- classes[2].cl_char = 'd';
- classes[3].cl_char = 'q';
-
- for (int i = 1; i < 4; i++)
- {
- classes[i].next_avail = 0;
- classes[i].max_num = 20;
- }
-
- linear_scan_regalloc (classes);
-
- FOR_ALL_BB_FN (bb, cfun)
- rewrite_code_bb (bb, classes);
-}
-
-/* Out of SSA and register allocation on HSAIL IL. */
-
-void
-hsa_regalloc (void)
-{
- hsa_cfun->update_dominance ();
- naive_outof_ssa ();
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "------- After out-of-SSA: -------\n");
- dump_hsa_cfun (dump_file);
- }
-
- regalloc ();
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "------- After register allocation: -------\n");
- dump_hsa_cfun (dump_file);
- }
-}
+++ /dev/null
-/* Callgraph based analysis of static variables.
- Copyright (C) 2015-2020 Free Software Foundation, Inc.
- Contributed by Martin Liska <mliska@suse.cz>
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-/* Interprocedural HSA pass is responsible for creation of HSA clones.
- For all these HSA clones, we emit HSAIL instructions and pass processing
- is terminated. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
-#include "is-a.h"
-#include "hash-set.h"
-#include "vec.h"
-#include "tree.h"
-#include "tree-pass.h"
-#include "function.h"
-#include "basic-block.h"
-#include "gimple.h"
-#include "dumpfile.h"
-#include "gimple-pretty-print.h"
-#include "tree-streamer.h"
-#include "stringpool.h"
-#include "cgraph.h"
-#include "print-tree.h"
-#include "alloc-pool.h"
-#include "symbol-summary.h"
-#include "hsa-common.h"
-
-namespace {
-
-/* If NODE is not versionable, warn about not emiting HSAIL and return false.
- Otherwise return true. */
-
-static bool
-check_warn_node_versionable (cgraph_node *node)
-{
- if (!node->versionable)
- {
- warning_at (EXPR_LOCATION (node->decl), OPT_Whsa,
- "could not emit HSAIL for function %s: function cannot be "
- "cloned", node->dump_name ());
- return false;
- }
- return true;
-}
-
-/* The function creates HSA clones for all functions that were either
- marked as HSA kernels or are callable HSA functions. Apart from that,
- we redirect all edges that come from an HSA clone and end in another
- HSA clone to connect these two functions. */
-
-static unsigned int
-process_hsa_functions (void)
-{
- struct cgraph_node *node;
-
- if (hsa_summaries == NULL)
- hsa_summaries = new hsa_summary_t (symtab);
-
- FOR_EACH_DEFINED_FUNCTION (node)
- {
- hsa_function_summary *s = hsa_summaries->get (node);
-
- /* A linked function is skipped. */
- if (s != NULL && s->m_bound_function != NULL)
- continue;
-
- if (s != NULL)
- {
- if (!check_warn_node_versionable (node))
- continue;
- cgraph_node *clone
- = node->create_virtual_clone (vec <cgraph_edge *> (),
- NULL, NULL, "hsa", 0);
- TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
- clone->externally_visible = node->externally_visible;
-
- clone->force_output = true;
- hsa_summaries->link_functions (clone, node, s->m_kind, false);
-
- if (dump_file)
- fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n",
- clone->dump_name (),
- s->m_kind == HSA_KERNEL ? "kernel" : "function");
- }
- else if (hsa_callable_function_p (node->decl)
- /* At this point, this is enough to identify clones for
- parallel, which for HSA would need to be kernels anyway. */
- && !DECL_ARTIFICIAL (node->decl))
- {
- if (!check_warn_node_versionable (node))
- continue;
- cgraph_node *clone
- = node->create_virtual_clone (vec <cgraph_edge *> (),
- NULL, NULL, "hsa", 0);
- TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
- clone->externally_visible = node->externally_visible;
-
- if (!node->local)
- clone->force_output = true;
- hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false);
-
- if (dump_file)
- fprintf (dump_file, "Created a new HSA function clone: %s\n",
- clone->dump_name ());
- }
- }
-
- /* Redirect all edges that are between HSA clones. */
- FOR_EACH_DEFINED_FUNCTION (node)
- {
- cgraph_edge *e = node->callees;
-
- while (e)
- {
- hsa_function_summary *src = hsa_summaries->get (node);
- if (src != NULL && src->m_gpu_implementation_p)
- {
- hsa_function_summary *dst = hsa_summaries->get (e->callee);
- if (dst != NULL && !dst->m_gpu_implementation_p)
- {
- e->redirect_callee (dst->m_bound_function);
- if (dump_file)
- fprintf (dump_file,
- "Redirecting edge to HSA function: %s->%s\n",
- e->caller->dump_name (),
- e->callee->dump_name ());
- }
- }
-
- e = e->next_callee;
- }
- }
-
- return 0;
-}
-
-/* Iterate all HSA functions and stream out HSA function summary. */
-
-static void
-ipa_hsa_write_summary (void)
-{
- struct bitpack_d bp;
- struct cgraph_node *node;
- struct output_block *ob;
- unsigned int count = 0;
- lto_symtab_encoder_iterator lsei;
- lto_symtab_encoder_t encoder;
-
- if (!hsa_summaries)
- return;
-
- ob = create_output_block (LTO_section_ipa_hsa);
- encoder = ob->decl_state->symtab_node_encoder;
- ob->symbol = NULL;
- for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
- lsei_next_function_in_partition (&lsei))
- {
- node = lsei_cgraph_node (lsei);
- hsa_function_summary *s = hsa_summaries->get (node);
-
- if (s != NULL)
- count++;
- }
-
- streamer_write_uhwi (ob, count);
-
- /* Process all of the functions. */
- for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
- lsei_next_function_in_partition (&lsei))
- {
- node = lsei_cgraph_node (lsei);
- hsa_function_summary *s = hsa_summaries->get (node);
-
- if (s != NULL)
- {
- encoder = ob->decl_state->symtab_node_encoder;
- int node_ref = lto_symtab_encoder_encode (encoder, node);
- streamer_write_uhwi (ob, node_ref);
-
- bp = bitpack_create (ob->main_stream);
- bp_pack_value (&bp, s->m_kind, 2);
- bp_pack_value (&bp, s->m_gpu_implementation_p, 1);
- bp_pack_value (&bp, s->m_bound_function != NULL, 1);
- streamer_write_bitpack (&bp);
- if (s->m_bound_function)
- stream_write_tree (ob, s->m_bound_function->decl, true);
- }
- }
-
- streamer_write_char_stream (ob->main_stream, 0);
- produce_asm (ob, NULL);
- destroy_output_block (ob);
-}
-
-/* Read section in file FILE_DATA of length LEN with data DATA. */
-
-static void
-ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
- size_t len)
-{
- const struct lto_function_header *header
- = (const struct lto_function_header *) data;
- const int cfg_offset = sizeof (struct lto_function_header);
- const int main_offset = cfg_offset + header->cfg_size;
- const int string_offset = main_offset + header->main_size;
- class data_in *data_in;
- unsigned int i;
- unsigned int count;
-
- lto_input_block ib_main ((const char *) data + main_offset,
- header->main_size, file_data->mode_table);
-
- data_in
- = lto_data_in_create (file_data, (const char *) data + string_offset,
- header->string_size, vNULL);
- count = streamer_read_uhwi (&ib_main);
-
- for (i = 0; i < count; i++)
- {
- unsigned int index;
- struct cgraph_node *node;
- lto_symtab_encoder_t encoder;
-
- index = streamer_read_uhwi (&ib_main);
- encoder = file_data->symtab_node_encoder;
- node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
- index));
- gcc_assert (node->definition);
- hsa_function_summary *s = hsa_summaries->get_create (node);
-
- struct bitpack_d bp = streamer_read_bitpack (&ib_main);
- s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
- s->m_gpu_implementation_p = bp_unpack_value (&bp, 1);
- bool has_tree = bp_unpack_value (&bp, 1);
-
- if (has_tree)
- {
- tree decl = stream_read_tree (&ib_main, data_in);
- s->m_bound_function = cgraph_node::get_create (decl);
- }
- }
- lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data,
- len);
- lto_data_in_delete (data_in);
-}
-
-/* Load streamed HSA functions summary and assign the summary to a function. */
-
-static void
-ipa_hsa_read_summary (void)
-{
- struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
- struct lto_file_decl_data *file_data;
- unsigned int j = 0;
-
- if (hsa_summaries == NULL)
- hsa_summaries = new hsa_summary_t (symtab);
-
- while ((file_data = file_data_vec[j++]))
- {
- size_t len;
- const char *data
- = lto_get_summary_section_data (file_data, LTO_section_ipa_hsa, &len);
- if (data)
- ipa_hsa_read_section (file_data, data, len);
- }
-}
-
-const pass_data pass_data_ipa_hsa =
-{
- IPA_PASS, /* type */
- "hsa", /* name */
- OPTGROUP_OMP, /* optinfo_flags */
- TV_IPA_HSA, /* tv_id */
- 0, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_dump_symtab, /* todo_flags_finish */
-};
-
-class pass_ipa_hsa : public ipa_opt_pass_d
-{
-public:
- pass_ipa_hsa (gcc::context *ctxt)
- : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt,
- NULL, /* generate_summary */
- ipa_hsa_write_summary, /* write_summary */
- ipa_hsa_read_summary, /* read_summary */
- ipa_hsa_write_summary, /* write_optimization_summary */
- ipa_hsa_read_summary, /* read_optimization_summary */
- NULL, /* stmt_fixup */
- 0, /* function_transform_todo_flags_start */
- NULL, /* function_transform */
- NULL) /* variable_transform */
- {}
-
- /* opt_pass methods: */
- virtual bool gate (function *);
-
- virtual unsigned int execute (function *) { return process_hsa_functions (); }
-
-}; // class pass_ipa_reference
-
-bool
-pass_ipa_hsa::gate (function *)
-{
- return hsa_gen_requested_p ();
-}
-
-} // anon namespace
-
-ipa_opt_pass_d *
-make_pass_ipa_hsa (gcc::context *ctxt)
-{
- return new pass_ipa_hsa (ctxt);
-}
"icf",
"offload_table",
"mode_table",
- "hsa",
"lto",
"ipa_sra",
"odr_types",
LTO_section_ipa_icf,
LTO_section_offload_table,
LTO_section_mode_table,
- LTO_section_ipa_hsa,
LTO_section_lto,
LTO_section_ipa_sra,
LTO_section_odr_types,
return;
unsigned num_targets = parse_env_var (target_names, &names, NULL);
- int next_name_entry = 0;
const char *compiler_path = getenv ("COMPILER_PATH");
if (!compiler_path)
goto out;
offload_names = XCNEWVEC (char *, num_targets + 1);
for (unsigned i = 0; i < num_targets; i++)
{
- /* HSA does not use LTO-like streaming and a different compiler, skip
- it. */
- if (strcmp (names[i], "hsa") == 0)
- continue;
-
- offload_names[next_name_entry]
+ offload_names[i]
= compile_offload_image (names[i], compiler_path, in_argc, in_argv,
compiler_opts, compiler_opt_count,
linker_opts, linker_opt_count);
- if (!offload_names[next_name_entry])
+ if (!offload_names[i])
fatal_error (input_location,
"problem with building target image for %s", names[i]);
- next_name_entry++;
}
out:
#include "symbol-summary.h"
#include "gomp-constants.h"
#include "gimple-pretty-print.h"
-#include "hsa-common.h"
#include "stringpool.h"
#include "attribs.h"
return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
}
-/* Return true if a parallel REGION is within a declare target function or
- within a target region and is not a part of a gridified target. */
-
-static bool
-parallel_needs_hsa_kernel_p (struct omp_region *region)
-{
- bool indirect = false;
- for (region = region->outer; region; region = region->outer)
- {
- if (region->type == GIMPLE_OMP_PARALLEL)
- indirect = true;
- else if (region->type == GIMPLE_OMP_TARGET)
- {
- gomp_target *tgt_stmt
- = as_a <gomp_target *> (last_stmt (region->entry));
-
- if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
- OMP_CLAUSE__GRIDDIM_))
- return indirect;
- else
- return true;
- }
- }
-
- if (lookup_attribute ("omp declare target",
- DECL_ATTRIBUTES (current_function_decl)))
- return true;
-
- return false;
-}
-
/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
Add CHILD_FNDECL to decl chain of the supercontext of the block
ENTRY_BLOCK - this is the block which originally contained the
}
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
-
- if (hsa_gen_requested_p ()
- && parallel_needs_hsa_kernel_p (region))
- {
- cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
- hsa_register_kernel (child_cnode);
- }
}
/* Build the function call to GOMP_task to actually
loop->in_oacc_kernels_region = true;
}
-/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
-
-struct GTY(()) grid_launch_attributes_trees
-{
- tree kernel_dim_array_type;
- tree kernel_lattrs_dimnum_decl;
- tree kernel_lattrs_grid_decl;
- tree kernel_lattrs_group_decl;
- tree kernel_launch_attributes_type;
-};
-
-static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
-
-/* Create types used to pass kernel launch attributes to target. */
-
-static void
-grid_create_kernel_launch_attr_types (void)
-{
- if (grid_attr_trees)
- return;
- grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
-
- tree dim_arr_index_type
- = build_index_type (build_int_cst (integer_type_node, 2));
- grid_attr_trees->kernel_dim_array_type
- = build_array_type (uint32_type_node, dim_arr_index_type);
-
- grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
- grid_attr_trees->kernel_lattrs_dimnum_decl
- = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
- uint32_type_node);
- DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
-
- grid_attr_trees->kernel_lattrs_grid_decl
- = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
- grid_attr_trees->kernel_dim_array_type);
- DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
- = grid_attr_trees->kernel_lattrs_dimnum_decl;
- grid_attr_trees->kernel_lattrs_group_decl
- = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
- grid_attr_trees->kernel_dim_array_type);
- DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
- = grid_attr_trees->kernel_lattrs_grid_decl;
- finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
- "__gomp_kernel_launch_attributes",
- grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
-}
-
-/* Insert before the current statement in GSI a store of VALUE to INDEX of
- array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
- of type uint32_type_node. */
-
-static void
-grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
- tree fld_decl, int index, tree value)
-{
- tree ref = build4 (ARRAY_REF, uint32_type_node,
- build3 (COMPONENT_REF,
- grid_attr_trees->kernel_dim_array_type,
- range_var, fld_decl, NULL_TREE),
- build_int_cst (integer_type_node, index),
- NULL_TREE, NULL_TREE);
- gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
-}
-
-/* Return a tree representation of a pointer to a structure with grid and
- work-group size information. Statements filling that information will be
- inserted before GSI, TGT_STMT is the target statement which has the
- necessary information in it. */
-
-static tree
-grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
- gomp_target *tgt_stmt)
-{
- grid_create_kernel_launch_attr_types ();
- tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
- "__kernel_launch_attrs");
-
- unsigned max_dim = 0;
- for (tree clause = gimple_omp_target_clauses (tgt_stmt);
- clause;
- clause = OMP_CLAUSE_CHAIN (clause))
- {
- if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
- continue;
-
- unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
- max_dim = MAX (dim, max_dim);
-
- grid_insert_store_range_dim (gsi, lattrs,
- grid_attr_trees->kernel_lattrs_grid_decl,
- dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
- grid_insert_store_range_dim (gsi, lattrs,
- grid_attr_trees->kernel_lattrs_group_decl,
- dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
- }
-
- tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
- grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
- gcc_checking_assert (max_dim <= 2);
- tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
- gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
- GSI_SAME_STMT);
- TREE_ADDRESSABLE (lattrs) = 1;
- return build_fold_addr_expr (lattrs);
-}
-
/* Build target argument identifier from the DEVICE identifier, value
identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
GOMP_TARGET_ARG_THREAD_LIMIT, t,
&args);
- /* Add HSA-specific grid sizes, if available. */
- if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
- OMP_CLAUSE__GRIDDIM_))
- {
- int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
- t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
- args.quick_push (t);
- args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
- }
-
/* Produce more, perhaps device specific, arguments here. */
tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
}
}
-/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
- iteration variable derived from the thread number. INTRA_GROUP means this
- is an expansion of a loop iterating over work-items within a separate
- iteration over groups. */
-
-static void
-grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
-{
- gimple_stmt_iterator gsi;
- gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
- gcc_checking_assert (gimple_omp_for_kind (for_stmt)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- size_t collapse = gimple_omp_for_collapse (for_stmt);
- struct omp_for_data_loop *loops
- = XALLOCAVEC (struct omp_for_data_loop,
- gimple_omp_for_collapse (for_stmt));
- struct omp_for_data fd;
-
- remove_edge (BRANCH_EDGE (kfor->entry));
- basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
-
- gcc_assert (kfor->cont);
- omp_extract_for_data (for_stmt, &fd, loops);
-
- gsi = gsi_start_bb (body_bb);
-
- for (size_t dim = 0; dim < collapse; dim++)
- {
- tree type, itype;
- itype = type = TREE_TYPE (fd.loops[dim].v);
- if (POINTER_TYPE_P (type))
- itype = signed_type_for (type);
-
- tree n1 = fd.loops[dim].n1;
- tree step = fd.loops[dim].step;
- n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
- true, NULL_TREE, true, GSI_SAME_STMT);
- step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
- true, NULL_TREE, true, GSI_SAME_STMT);
- tree threadid;
- if (gimple_omp_for_grid_group_iter (for_stmt))
- {
- gcc_checking_assert (!intra_group);
- threadid = build_call_expr (builtin_decl_explicit
- (BUILT_IN_HSA_WORKGROUPID), 1,
- build_int_cstu (unsigned_type_node, dim));
- }
- else if (intra_group)
- threadid = build_call_expr (builtin_decl_explicit
- (BUILT_IN_HSA_WORKITEMID), 1,
- build_int_cstu (unsigned_type_node, dim));
- else
- threadid = build_call_expr (builtin_decl_explicit
- (BUILT_IN_HSA_WORKITEMABSID), 1,
- build_int_cstu (unsigned_type_node, dim));
- threadid = fold_convert (itype, threadid);
- threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
- true, GSI_SAME_STMT);
-
- tree startvar = fd.loops[dim].v;
- tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
- if (POINTER_TYPE_P (type))
- t = fold_build_pointer_plus (n1, t);
- else
- t = fold_build2 (PLUS_EXPR, type, t, n1);
- t = fold_convert (type, t);
- t = force_gimple_operand_gsi (&gsi, t,
- DECL_P (startvar)
- && TREE_ADDRESSABLE (startvar),
- NULL_TREE, true, GSI_SAME_STMT);
- gassign *assign_stmt = gimple_build_assign (startvar, t);
- gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
- }
- /* Remove the omp for statement. */
- gsi = gsi_last_nondebug_bb (kfor->entry);
- gsi_remove (&gsi, true);
-
- /* Remove the GIMPLE_OMP_CONTINUE statement. */
- gsi = gsi_last_nondebug_bb (kfor->cont);
- gcc_assert (!gsi_end_p (gsi)
- && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
- gsi_remove (&gsi, true);
-
- /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
- gsi = gsi_last_nondebug_bb (kfor->exit);
- gcc_assert (!gsi_end_p (gsi)
- && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
- if (intra_group)
- gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
- gsi_remove (&gsi, true);
-
- /* Fixup the much simpler CFG. */
- remove_edge (find_edge (kfor->cont, body_bb));
-
- if (kfor->cont != body_bb)
- set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
- set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
-}
-
-/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
- argument_decls. */
-
-struct grid_arg_decl_map
-{
- tree old_arg;
- tree new_arg;
-};
-
-/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
- pertaining to kernel function. */
-
-static tree
-grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
-{
- struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
- struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
- tree t = *tp;
-
- if (t == adm->old_arg)
- *tp = adm->new_arg;
- *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
- return NULL_TREE;
-}
-
-/* If TARGET region contains a kernel body for loop, remove its region from the
- TARGET and expand it in HSA gridified kernel fashion. */
-
-static void
-grid_expand_target_grid_body (struct omp_region *target)
-{
- if (!hsa_gen_requested_p ())
- return;
-
- gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
- struct omp_region **pp;
-
- for (pp = &target->inner; *pp; pp = &(*pp)->next)
- if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
- break;
-
- struct omp_region *gpukernel = *pp;
-
- tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
- if (!gpukernel)
- {
- /* HSA cannot handle OACC stuff. */
- if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
- return;
- gcc_checking_assert (orig_child_fndecl);
- gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
- OMP_CLAUSE__GRIDDIM_));
- cgraph_node *n = cgraph_node::get (orig_child_fndecl);
-
- hsa_register_kernel (n);
- return;
- }
-
- gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
- OMP_CLAUSE__GRIDDIM_));
- tree inside_block
- = gimple_block (first_stmt (single_succ (gpukernel->entry)));
- *pp = gpukernel->next;
- for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
- if ((*pp)->type == GIMPLE_OMP_FOR)
- break;
-
- struct omp_region *kfor = *pp;
- gcc_assert (kfor);
- gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
- gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
- *pp = kfor->next;
- if (kfor->inner)
- {
- if (gimple_omp_for_grid_group_iter (for_stmt))
- {
- struct omp_region **next_pp;
- for (pp = &kfor->inner; *pp; pp = next_pp)
- {
- next_pp = &(*pp)->next;
- if ((*pp)->type != GIMPLE_OMP_FOR)
- continue;
- gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
- gcc_assert (gimple_omp_for_kind (inner)
- == GF_OMP_FOR_KIND_GRID_LOOP);
- grid_expand_omp_for_loop (*pp, true);
- *pp = (*pp)->next;
- next_pp = pp;
- }
- }
- expand_omp (kfor->inner);
- }
- if (gpukernel->inner)
- expand_omp (gpukernel->inner);
-
- tree kern_fndecl = copy_node (orig_child_fndecl);
- DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
- "kernel");
- SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
- tree tgtblock = gimple_block (tgt_stmt);
- tree fniniblock = make_node (BLOCK);
- BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
- BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
- BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
- BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
- DECL_INITIAL (kern_fndecl) = fniniblock;
- push_struct_function (kern_fndecl);
- cfun->function_end_locus = gimple_location (tgt_stmt);
- init_tree_ssa (cfun);
- pop_cfun ();
-
- tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
- gcc_assert (!DECL_CHAIN (old_parm_decl));
- tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
- DECL_CONTEXT (new_parm_decl) = kern_fndecl;
- DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
- gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
- DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
- DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
- struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
- kern_cfun->curr_properties = cfun->curr_properties;
-
- grid_expand_omp_for_loop (kfor, false);
-
- /* Remove the omp for statement. */
- gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
- gsi_remove (&gsi, true);
- /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
- return. */
- gsi = gsi_last_nondebug_bb (gpukernel->exit);
- gcc_assert (!gsi_end_p (gsi)
- && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
- gimple *ret_stmt = gimple_build_return (NULL);
- gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
-
- /* Statements in the first BB in the target construct have been produced by
- target lowering and must be copied inside the GPUKERNEL, with the two
- exceptions of the first OMP statement and the OMP_DATA assignment
- statement. */
- gsi = gsi_start_bb (single_succ (gpukernel->entry));
- tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
- tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
- for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
- !gsi_end_p (tsi); gsi_next (&tsi))
- {
- gimple *stmt = gsi_stmt (tsi);
- if (is_gimple_omp (stmt))
- break;
- if (sender
- && is_gimple_assign (stmt)
- && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
- && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
- continue;
- gimple *copy = gimple_copy (stmt);
- gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
- gimple_set_block (copy, fniniblock);
- }
-
- move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
- gpukernel->exit, inside_block);
-
- cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
- kcn->mark_force_output ();
- cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
-
- hsa_register_kernel (kcn, orig_child);
-
- cgraph_node::add_new_function (kern_fndecl, true);
- push_cfun (kern_cfun);
- cgraph_edge::rebuild_edges ();
-
- /* Re-map any mention of the PARM_DECL of the original function to the
- PARM_DECL of the new one.
-
- TODO: It would be great if lowering produced references into the GPU
- kernel decl straight away and we did not have to do this. */
- struct grid_arg_decl_map adm;
- adm.old_arg = old_parm_decl;
- adm.new_arg = new_parm_decl;
- basic_block bb;
- FOR_EACH_BB_FN (bb, kern_cfun)
- {
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
- struct walk_stmt_info wi;
- memset (&wi, 0, sizeof (wi));
- wi.info = &adm;
- walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
- }
- }
- pop_cfun ();
-
- return;
-}
-
/* Expand the parallel region tree rooted at REGION. Expansion
proceeds in depth-first order. Innermost regions are expanded
first. This way, parallel regions that require a new function to
region. */
if (region->type == GIMPLE_OMP_PARALLEL)
determine_parallel_type (region);
- else if (region->type == GIMPLE_OMP_TARGET)
- grid_expand_target_grid_body (region);
if (region->type == GIMPLE_OMP_FOR
&& gimple_omp_for_combined_p (last_stmt (region->entry)))
case GIMPLE_OMP_TASKGROUP:
case GIMPLE_OMP_CRITICAL:
case GIMPLE_OMP_SECTION:
- case GIMPLE_OMP_GRID_BODY:
cur_region = new_omp_region (bb, code, cur_region);
fallthru = true;
break;
return fallthru;
}
-
-#include "gt-omp-expand.h"
#include "cgraph.h"
#include "alloc-pool.h"
#include "symbol-summary.h"
-#include "hsa-common.h"
#include "tree-pass.h"
#include "omp-device-properties.h"
#include "tree-iterator.h"
static bool
omp_maybe_offloaded (void)
{
- if (!hsa_gen_requested_p ())
- {
- if (!ENABLE_OFFLOADING)
- return false;
- const char *names = getenv ("OFFLOAD_TARGET_NAMES");
- if (names == NULL || *names == '\0')
- return false;
- }
+ if (!ENABLE_OFFLOADING)
+ return false;
+ const char *names = getenv ("OFFLOAD_TARGET_NAMES");
+ if (names == NULL || *names == '\0')
+ return false;
+
if (symtab->state == PARSING)
/* Maybe. */
return true;
also offloading values. */
if (!omp_maybe_offloaded ())
return 0;
- if (strcmp (arch, "hsa") == 0
- && hsa_gen_requested_p ())
- {
- ret = -1;
- continue;
- }
if (ENABLE_OFFLOADING)
{
const char *arches = omp_offload_device_arch;
also offloading values. */
if (!omp_maybe_offloaded ())
return 0;
- if (strcmp (prop, "gpu") == 0
- && hsa_gen_requested_p ())
- {
- ret = -1;
- continue;
- }
if (ENABLE_OFFLOADING)
{
const char *kinds = omp_offload_device_kind;
+++ /dev/null
-/* Lowering and expansion of OpenMP directives for HSA GPU agents.
-
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "tree.h"
-#include "gimple.h"
-#include "tree-pass.h"
-#include "ssa.h"
-#include "cgraph.h"
-#include "pretty-print.h"
-#include "fold-const.h"
-#include "gimplify.h"
-#include "gimple-iterator.h"
-#include "gimple-walk.h"
-#include "tree-inline.h"
-#include "langhooks.h"
-#include "omp-general.h"
-#include "omp-low.h"
-#include "omp-grid.h"
-#include "gimple-pretty-print.h"
-
-/* Return the lastprivate predicate for a given gridified loop described by
- FD). */
-
-tree
-omp_grid_lastprivate_predicate (struct omp_for_data *fd)
-{
- /* When dealing with a gridified loop, we need to check up to three collapsed
- iteration variables but they are not actually captured in this fd.
- Fortunately, we can easily rely on HSA builtins to get this
- information. */
-
- tree id, size;
- if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
- && gimple_omp_for_grid_intra_group (fd->for_stmt))
- {
- id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
- size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
- }
- else
- {
- id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
- size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
- }
- tree cond = NULL;
- for (int dim = 0; dim < fd->collapse; dim++)
- {
- tree dim_tree = build_int_cstu (unsigned_type_node, dim);
- tree u1 = build_int_cstu (unsigned_type_node, 1);
- tree c2
- = build2 (EQ_EXPR, boolean_type_node,
- build2 (PLUS_EXPR, unsigned_type_node,
- build_call_expr (id, 1, dim_tree), u1),
- build_call_expr (size, 1, dim_tree));
- if (cond)
- cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
- else
- cond = c2;
- }
- return cond;
-}
-
-/* Structure describing the basic properties of the loop we ara analyzing
- whether it can be gridified and when it is gridified. */
-
-class grid_prop
-{
-public:
- /* True when we are doing tiling gridification, i.e. when there is a distinct
- distribute loop over groups and a loop construct over work-items. False
- when distribute and parallel for loops form a combined construct. */
- bool tiling;
- /* Location of the target construct for optimization information
- messages. */
- dump_user_location_t target_loc;
- /* The collapse clause of the involved loops. Collapse value of all of them
- must be the same for gridification to take place. */
- size_t collapse;
- /* Group sizes, if requested by the user or NULL if not requested. */
- tree group_sizes[3];
-};
-
-#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
- "gridified HSA kernel because "
-
-/* Return true if STMT is an assignment of a register-type into a local
- VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
- any of the trees specifying group sizes there. */
-
-static bool
-grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
-{
- gassign *assign = dyn_cast <gassign *> (stmt);
- if (!assign)
- return false;
- if (gimple_clobber_p (assign))
- return true;
- tree lhs = gimple_assign_lhs (assign);
- if (!VAR_P (lhs)
- || !is_gimple_reg_type (TREE_TYPE (lhs))
- || is_global_var (lhs))
- return false;
- if (grid)
- for (unsigned i = 0; i < grid->collapse; i++)
- if (lhs == grid->group_sizes[i])
- return false;
- return true;
-}
-
-/* Return true if all statements in SEQ are assignments to local register-type
- variables that do not hold group size information. */
-
-static bool
-grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
-{
- if (!seq)
- return true;
-
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
- if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
- return false;
- return true;
-}
-
-/* Scan statements in SEQ and call itself recursively on any bind. GRID
- describes hitherto discovered properties of the loop that is evaluated for
- possible gridification. If during whole search only assignments to
- register-type local variables (that do not overwrite group size information)
- and one single OMP statement is encountered, return true, otherwise return
- false. RET is where we store any OMP statement encountered. */
-
-static bool
-grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
- const char *name, gimple **ret)
-{
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
-
- if (grid_safe_assignment_p (stmt, grid))
- continue;
- if (gbind *bind = dyn_cast <gbind *> (stmt))
- {
- gimple_seq bind_body = gimple_bind_body (bind);
- if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
- ret))
- return false;
- }
- else if (is_gimple_omp (stmt))
- {
- if (*ret)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "%s construct "
- "contains multiple OpenMP constructs\n",
- name);
- dump_printf_loc (MSG_NOTE, *ret,
- "The first OpenMP construct within "
- "a parallel\n");
- dump_printf_loc (MSG_NOTE, stmt,
- "The second OpenMP construct within "
- "a parallel\n");
- }
- return false;
- }
- *ret = stmt;
- }
- else
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "%s construct contains "
- "a complex statement\n", name);
- dump_printf_loc (MSG_NOTE, stmt,
- "This statement cannot be analyzed for "
- "gridification\n");
- }
- return false;
- }
- }
- return true;
-}
-
-/* Scan statements in SEQ and make sure that it and any binds in it contain
- only assignments to local register-type variables (that do not overwrite
- group size information) and one OMP construct. If so, return that
- construct, otherwise return NULL. GRID describes hitherto discovered
- properties of the loop that is evaluated for possible gridification. If
- dumping is enabled and function fails, use NAME to dump a note with the
- reason for failure. */
-
-static gimple *
-grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
- const char *name)
-{
- if (!seq)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
- name);
- return NULL;
- }
-
- gimple *ret = NULL;
- if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
- {
- if (!ret && dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "%s construct does not contain"
- " any other OpenMP construct\n", name);
- return ret;
- }
- else
- return NULL;
-}
-
-/* Walker function looking for statements there is no point gridifying (and for
- noreturn function calls which we cannot do). Return non-NULL if such a
- function is found. */
-
-static tree
-grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
- bool *handled_ops_p,
- struct walk_stmt_info *wi)
-{
- *handled_ops_p = false;
- gimple *stmt = gsi_stmt (*gsi);
- switch (gimple_code (stmt))
- {
- case GIMPLE_CALL:
- if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
- {
- *handled_ops_p = true;
- wi->info = stmt;
- return error_mark_node;
- }
- break;
-
- /* We may reduce the following list if we find a way to implement the
- clauses, but now there is no point trying further. */
- case GIMPLE_OMP_CRITICAL:
- case GIMPLE_OMP_TASKGROUP:
- case GIMPLE_OMP_TASK:
- case GIMPLE_OMP_SECTION:
- case GIMPLE_OMP_SECTIONS:
- case GIMPLE_OMP_SECTIONS_SWITCH:
- case GIMPLE_OMP_TARGET:
- case GIMPLE_OMP_ORDERED:
- *handled_ops_p = true;
- wi->info = stmt;
- return error_mark_node;
- default:
- break;
- }
- return NULL;
-}
-
-/* Examine clauses of omp parallel statement PAR and if any prevents
- gridification, issue a missed-optimization diagnostics and return false,
- otherwise return true. GRID describes hitherto discovered properties of the
- loop that is evaluated for possible gridification. */
-
-static bool
-grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
-{
- tree clauses = gimple_omp_parallel_clauses (par);
- while (clauses)
- {
- switch (OMP_CLAUSE_CODE (clauses))
- {
- case OMP_CLAUSE_NUM_THREADS:
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "because there is "
- "a num_threads clause of the parallel "
- "construct\n");
- dump_printf_loc (MSG_NOTE, par,
- "Parallel construct has a num_threads clause\n");
- }
- return false;
-
- case OMP_CLAUSE_REDUCTION:
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "a reduction clause "
- "is present\n ");
- dump_printf_loc (MSG_NOTE, par,
- "Parallel construct has a reduction clause\n");
- }
- return false;
-
- default:
- break;
- }
- clauses = OMP_CLAUSE_CHAIN (clauses);
- }
- return true;
-}
-
-/* Examine clauses and the body of omp loop statement GFOR and if something
- prevents gridification, issue a missed-optimization diagnostics and return
- false, otherwise return true. GRID describes hitherto discovered properties
- of the loop that is evaluated for possible gridification. */
-
-static bool
-grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
-{
- if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
- grid))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the inner loop "
- "loop bounds computation contains a complex "
- "statement\n");
- dump_printf_loc (MSG_NOTE, gfor,
- "Loop construct cannot be analyzed for "
- "gridification\n");
- }
- return false;
- }
-
- tree clauses = gimple_omp_for_clauses (gfor);
- while (clauses)
- {
- switch (OMP_CLAUSE_CODE (clauses))
- {
- case OMP_CLAUSE_SCHEDULE:
- if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the inner loop "
- "has a non-automatic schedule clause\n");
- dump_printf_loc (MSG_NOTE, gfor,
- "Loop construct has a non automatic "
- "schedule clause\n");
- }
- return false;
- }
- break;
-
- case OMP_CLAUSE_REDUCTION:
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "a reduction "
- "clause is present\n ");
- dump_printf_loc (MSG_NOTE, gfor,
- "Loop construct has a reduction schedule "
- "clause\n");
- }
- return false;
-
- default:
- break;
- }
- clauses = OMP_CLAUSE_CHAIN (clauses);
- }
- struct walk_stmt_info wi;
- memset (&wi, 0, sizeof (wi));
- if (walk_gimple_seq (gimple_omp_body (gfor),
- grid_find_ungridifiable_statement,
- NULL, &wi))
- {
- gimple *bad = (gimple *) wi.info;
- if (dump_enabled_p ())
- {
- if (is_gimple_call (bad))
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the inner loop contains "
- "call to a noreturn function\n");
- else
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the inner loop contains "
- "statement %s which cannot be transformed\n",
- gimple_code_name[(int) gimple_code (bad)]);
- dump_printf_loc (MSG_NOTE, bad,
- "This statement cannot be analyzed for "
- "gridification\n");
- }
- return false;
- }
- return true;
-}
-
-/* Given distribute omp construct represented by DIST, which in the original
- source forms a compound construct with a looping construct, return true if it
- can be turned into a gridified HSA kernel. Otherwise return false. GRID
- describes hitherto discovered properties of the loop that is evaluated for
- possible gridification. */
-
-static bool
-grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
-{
- dump_user_location_t tloc = grid->target_loc;
- gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
- grid, "distribute");
- gomp_parallel *par;
- if (!stmt
- || !(par = dyn_cast <gomp_parallel *> (stmt))
- || !grid_parallel_clauses_gridifiable (par, tloc))
- return false;
-
- stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
- "parallel");
- gomp_for *gfor;
- if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
- return false;
-
- if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "the inner loop is not "
- "a simple for loop\n");
- return false;
- }
- gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
-
- if (!grid_inner_loop_gridifiable_p (gfor, grid))
- return false;
-
- return true;
-}
-
-/* Given an omp loop statement GFOR, return true if it can participate in
- tiling gridification, i.e. in one where the distribute and parallel for
- loops do not form a compound statement. GRID describes hitherto discovered
- properties of the loop that is evaluated for possible gridification. */
-
-static bool
-grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
-{
- if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "an inner loop is not "
- "a simple for loop\n");
- dump_printf_loc (MSG_NOTE, gfor,
- "This statement is not a simple for loop\n");
- }
- return false;
- }
-
- if (!grid_inner_loop_gridifiable_p (gfor, grid))
- return false;
-
- if (gimple_omp_for_collapse (gfor) != grid->collapse)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "an inner loop does not "
- "have use the same collapse clause\n");
- dump_printf_loc (MSG_NOTE, gfor,
- "Loop construct uses a different collapse clause\n");
- }
- return false;
- }
-
- struct omp_for_data fd;
- struct omp_for_data_loop *loops
- = (struct omp_for_data_loop *)alloca (grid->collapse
- * sizeof (struct omp_for_data_loop));
- omp_extract_for_data (gfor, &fd, loops);
- for (unsigned i = 0; i < grid->collapse; i++)
- {
- tree itype, type = TREE_TYPE (fd.loops[i].v);
- if (POINTER_TYPE_P (type))
- itype = signed_type_for (type);
- else
- itype = type;
-
- tree n1 = fold_convert (itype, fd.loops[i].n1);
- tree n2 = fold_convert (itype, fd.loops[i].n2);
- tree t = build_int_cst (itype,
- (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
- t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
- t = fold_build2 (PLUS_EXPR, itype, t, n2);
- t = fold_build2 (MINUS_EXPR, itype, t, n1);
- if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
- t = fold_build2 (TRUNC_DIV_EXPR, itype,
- fold_build1 (NEGATE_EXPR, itype, t),
- fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
- else
- t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
-
- if (!operand_equal_p (grid->group_sizes[i], t, 0))
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the distribute and "
- "an internal loop do not agree on tile size\n");
- dump_printf_loc (MSG_NOTE, gfor,
- "Loop construct does not seem to loop over "
- "a tile size\n");
- }
- return false;
- }
- }
- return true;
-}
-
-/* Facing a call to FNDECL in the body of a distribute construct, return true
- if we can handle it or false if it precludes gridification. */
-
-static bool
-grid_call_permissible_in_distribute_p (tree fndecl)
-{
- if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
- return true;
-
- const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
- if (strstr (name, "omp_") != name)
- return false;
-
- if ((strcmp (name, "omp_get_thread_num") == 0)
- || (strcmp (name, "omp_get_num_threads") == 0)
- || (strcmp (name, "omp_get_num_teams") == 0)
- || (strcmp (name, "omp_get_team_num") == 0)
- || (strcmp (name, "omp_get_level") == 0)
- || (strcmp (name, "omp_get_active_level") == 0)
- || (strcmp (name, "omp_in_parallel") == 0))
- return true;
-
- return false;
-}
-
-/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
- of a distribute construct that is pointed at by GSI, modify it as necessary
- for gridification. If the statement itself got removed, return true. */
-
-static bool
-grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
-{
- gimple *stmt = gsi_stmt (*gsi);
- tree fndecl = gimple_call_fndecl (stmt);
- gcc_checking_assert (stmt);
- if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
- return false;
-
- const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
- if ((strcmp (name, "omp_get_thread_num") == 0)
- || (strcmp (name, "omp_get_level") == 0)
- || (strcmp (name, "omp_get_active_level") == 0)
- || (strcmp (name, "omp_in_parallel") == 0))
- {
- tree lhs = gimple_call_lhs (stmt);
- if (lhs)
- {
- gassign *assign
- = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
- gsi_insert_before (gsi, assign, GSI_SAME_STMT);
- }
- gsi_remove (gsi, true);
- return true;
- }
-
- /* The rest of the omp functions can stay as they are, HSA back-end will
- handle them correctly. */
- gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
- || (strcmp (name, "omp_get_num_teams") == 0)
- || (strcmp (name, "omp_get_team_num") == 0));
- return false;
-}
-
-/* Given a sequence of statements within a distribute omp construct or a
- parallel construct, which in the original source does not form a compound
- construct with a looping construct, return true if it does not prevent us
- from turning it into a gridified HSA kernel. Otherwise return false. GRID
- describes hitherto discovered properties of the loop that is evaluated for
- possible gridification. IN_PARALLEL must be true if seq is within a
- parallel construct and flase if it is only within a distribute
- construct. */
-
-static bool
-grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
- bool in_parallel)
-{
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
-
- if (grid_safe_assignment_p (stmt, grid)
- || gimple_code (stmt) == GIMPLE_GOTO
- || gimple_code (stmt) == GIMPLE_LABEL
- || gimple_code (stmt) == GIMPLE_COND)
- continue;
- else if (gbind *bind = dyn_cast <gbind *> (stmt))
- {
- if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
- grid, in_parallel))
- return false;
- continue;
- }
- else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
- {
- if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the distribute "
- "construct contains a try..catch region\n");
- dump_printf_loc (MSG_NOTE, try_stmt,
- "This statement cannot be analyzed for "
- "tiled gridification\n");
- }
- return false;
- }
- if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
- grid, in_parallel))
- return false;
- if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
- grid, in_parallel))
- return false;
- continue;
- }
- else if (is_gimple_call (stmt))
- {
- tree fndecl = gimple_call_fndecl (stmt);
- if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
- continue;
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the distribute "
- "construct contains a call\n");
- dump_printf_loc (MSG_NOTE, stmt,
- "This statement cannot be analyzed for "
- "tiled gridification\n");
- }
- return false;
- }
- else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
- {
- if (in_parallel)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "a parallel "
- "construct contains another parallel "
- "construct\n");
- dump_printf_loc (MSG_NOTE, stmt,
- "This parallel construct is nested in "
- "another one\n");
- }
- return false;
- }
- if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
- || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
- grid, true))
- return false;
- }
- else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
- {
- if (!in_parallel)
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "a loop "
- "construct is not nested within a parallel "
- "construct\n");
- dump_printf_loc (MSG_NOTE, stmt,
- "This loop construct is not nested in "
- "a parallel construct\n");
- }
- return false;
- }
- if (!grid_gfor_follows_tiling_pattern (gfor, grid))
- return false;
- }
- else
- {
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
- GRID_MISSED_MSG_PREFIX "the distribute "
- "construct contains a complex statement\n");
- dump_printf_loc (MSG_NOTE, stmt,
- "This statement cannot be analyzed for "
- "tiled gridification\n");
- }
- return false;
- }
- }
- return true;
-}
-
-/* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
- return true, otherwise return false. In the case of success, also fill in
- GRID with information describing the kernel grid. */
-
-static bool
-grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
-{
- if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
- return false;
-
- dump_user_location_t tloc = target;
- grid->target_loc = tloc;
- gimple *stmt
- = grid_find_single_omp_among_assignments (gimple_omp_body (target),
- grid, "target");
- if (!stmt)
- return false;
- gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
- tree group_size = NULL;
- if (!teams)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "it does not have a sole "
- "teams construct in it.\n");
- return false;
- }
-
- tree clauses = gimple_omp_teams_clauses (teams);
- while (clauses)
- {
- switch (OMP_CLAUSE_CODE (clauses))
- {
- case OMP_CLAUSE_NUM_TEAMS:
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "the teams construct "
- "contains a num_teams clause\n ");
- return false;
-
- case OMP_CLAUSE_REDUCTION:
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "a reduction "
- "clause is present\n ");
- return false;
-
- case OMP_CLAUSE_THREAD_LIMIT:
- if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
- group_size = OMP_CLAUSE_OPERAND (clauses, 0);
- break;
-
- default:
- break;
- }
- clauses = OMP_CLAUSE_CHAIN (clauses);
- }
-
- stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
- "teams");
- if (!stmt)
- return false;
- gomp_for *dist = dyn_cast <gomp_for *> (stmt);
- if (!dist)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "the teams construct does not "
- "have a single distribute construct in it.\n");
- return false;
- }
-
- gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
-
- grid->collapse = gimple_omp_for_collapse (dist);
- if (grid->collapse > 3)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "the distribute construct "
- "contains collapse clause with parameter greater "
- "than 3\n");
- return false;
- }
-
- struct omp_for_data fd;
- struct omp_for_data_loop *dist_loops
- = (struct omp_for_data_loop *)alloca (grid->collapse
- * sizeof (struct omp_for_data_loop));
- omp_extract_for_data (dist, &fd, dist_loops);
- if (fd.chunk_size)
- {
- if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "the teams "
- "thread limit is different from distribute "
- "schedule chunk\n");
- return false;
- }
- group_size = fd.chunk_size;
- }
- if (group_size && grid->collapse > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "group size cannot be "
- "set using thread_limit or schedule clauses "
- "when also using a collapse clause greater than 1\n");
- return false;
- }
-
- if (gimple_omp_for_combined_p (dist))
- {
- grid->tiling = false;
- grid->group_sizes[0] = group_size;
- for (unsigned i = 1; i < grid->collapse; i++)
- grid->group_sizes[i] = NULL;
- return grid_dist_follows_simple_pattern (dist, grid);
- }
- else
- {
- grid->tiling = true;
- if (group_size)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
- GRID_MISSED_MSG_PREFIX "group size cannot be set "
- "using thread_limit or schedule clauses when "
- "distribute and loop constructs do not form "
- "one combined construct\n");
- return false;
- }
- for (unsigned i = 0; i < grid->collapse; i++)
- {
- if (fd.loops[i].cond_code == GT_EXPR)
- grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
- TREE_TYPE (fd.loops[i].step),
- fd.loops[i].step);
- else
- grid->group_sizes[i] = fd.loops[i].step;
- }
- return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
- false);
- }
-}
-
-/* Operand walker, used to remap pre-body declarations according to a hash map
- provided in DATA. */
-
-static tree
-grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
-{
- tree t = *tp;
-
- if (DECL_P (t) || TYPE_P (t))
- *walk_subtrees = 0;
- else
- *walk_subtrees = 1;
-
- if (VAR_P (t))
- {
- struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
- hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
- tree *repl = declmap->get (t);
- if (repl)
- *tp = *repl;
- }
- return NULL_TREE;
-}
-
-/* Identifiers of segments into which a particular variable should be places
- when gridifying. */
-
-enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
- GRID_SEGMENT_GLOBAL};
-
-/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
- builtin call into SEQ that will make sure the variable is always considered
- address taken. */
-
-static void
-grid_mark_variable_segment (tree var, enum grid_var_segment segment)
-{
- /* Making a non-addressable variables would require that we re-gimplify all
- their uses. Fortunately, we do not have to do this because if they are
- not addressable, it means they are not used in atomic or parallel
- statements and so relaxed GPU consistency rules mean we can just keep them
- private. */
- if (!TREE_ADDRESSABLE (var))
- return;
-
- switch (segment)
- {
- case GRID_SEGMENT_GROUP:
- DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
- NULL, DECL_ATTRIBUTES (var));
- break;
- case GRID_SEGMENT_GLOBAL:
- DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
- NULL, DECL_ATTRIBUTES (var));
- break;
- default:
- gcc_unreachable ();
- }
-
- if (!TREE_STATIC (var))
- {
- TREE_STATIC (var) = 1;
- const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
- SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
- varpool_node::finalize_decl (var);
- }
-
-}
-
-/* Copy leading register-type assignments to local variables in SRC to just
- before DST, Creating temporaries, adjusting mapping of operands in WI and
- remapping operands as necessary. Add any new temporaries to TGT_BIND.
- Return the first statement that does not conform to grid_safe_assignment_p
- or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
- variables in traversed bind statements so that they are put into the
- appropriate segment. */
-
-static gimple *
-grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
- gbind *tgt_bind,
- enum grid_var_segment var_segment,
- struct walk_stmt_info *wi)
-{
- hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple *stmt = gsi_stmt (gsi);
- if (gbind *bind = dyn_cast <gbind *> (stmt))
- {
- gimple *r = grid_copy_leading_local_assignments
- (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
-
- if (var_segment != GRID_SEGMENT_PRIVATE)
- for (tree var = gimple_bind_vars (bind);
- var;
- var = DECL_CHAIN (var))
- grid_mark_variable_segment (var, var_segment);
- if (r)
- return r;
- else
- continue;
- }
- if (!grid_safe_assignment_p (stmt, NULL))
- return stmt;
- tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
- tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
- TREE_TYPE (lhs));
- DECL_CONTEXT (repl) = current_function_decl;
- gimple_bind_append_vars (tgt_bind, repl);
-
- declmap->put (lhs, repl);
- gassign *copy = as_a <gassign *> (gimple_copy (stmt));
- walk_gimple_op (copy, grid_remap_prebody_decls, wi);
- gsi_insert_before (dst, copy, GSI_SAME_STMT);
- }
- return NULL;
-}
-
-/* Statement walker function to make adjustments to statements within the
- gridifed kernel copy. */
-
-static tree
-grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
- struct walk_stmt_info *)
-{
- *handled_ops_p = false;
- gimple *stmt = gsi_stmt (*gsi);
- if (gimple_code (stmt) == GIMPLE_OMP_FOR
- && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
- {
- gomp_for *loop = as_a <gomp_for *> (stmt);
- tree clauses = gimple_omp_for_clauses (loop);
- tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
- if (cl)
- OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
- else
- {
- tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
- OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
- OMP_CLAUSE_CHAIN (c) = clauses;
- gimple_omp_for_set_clauses (loop, c);
- }
- }
- return NULL_TREE;
-}
-
-/* Given a PARLOOP that is a normal for looping construct but also a part of a
- combined construct with a simd loop, eliminate the simd loop. */
-
-static void
-grid_eliminate_combined_simd_part (gomp_for *parloop)
-{
- struct walk_stmt_info wi;
-
- memset (&wi, 0, sizeof (wi));
- wi.val_only = true;
- enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
- wi.info = (void *) &msk;
- walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
- gimple *stmt = (gimple *) wi.info;
- /* We expect that the SIMD id the only statement in the parallel loop. */
- gcc_assert (stmt
- && gimple_code (stmt) == GIMPLE_OMP_FOR
- && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
- && gimple_omp_for_combined_into_p (stmt)
- && !gimple_omp_for_combined_p (stmt));
- gomp_for *simd = as_a <gomp_for *> (stmt);
-
- /* Copy over the iteration properties because the body refers to the index in
- the bottmom-most loop. */
- unsigned i, collapse = gimple_omp_for_collapse (parloop);
- gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
- for (i = 0; i < collapse; i++)
- {
- gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
- gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
- gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
- gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
- }
-
- tree *tgt= gimple_omp_for_clauses_ptr (parloop);
- while (*tgt)
- tgt = &OMP_CLAUSE_CHAIN (*tgt);
-
- /* Copy over all clauses, except for linear clauses, which are turned into
- private clauses, and all other simd-specific clauses, which are
- ignored. */
- tree *pc = gimple_omp_for_clauses_ptr (simd);
- while (*pc)
- {
- tree c = *pc;
- switch (OMP_CLAUSE_CODE (c))
- {
- case OMP_CLAUSE_LINEAR:
- {
- tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
- OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
- OMP_CLAUSE_CHAIN (priv) = NULL;
- *tgt = priv;
- tgt = &OMP_CLAUSE_CHAIN (priv);
- pc = &OMP_CLAUSE_CHAIN (c);
- break;
- }
-
- case OMP_CLAUSE_SAFELEN:
- case OMP_CLAUSE_SIMDLEN:
- case OMP_CLAUSE_ALIGNED:
- pc = &OMP_CLAUSE_CHAIN (c);
- break;
-
- default:
- *pc = OMP_CLAUSE_CHAIN (c);
- OMP_CLAUSE_CHAIN (c) = NULL;
- *tgt = c;
- tgt = &OMP_CLAUSE_CHAIN (c);
- break;
- }
- }
-
- /* Finally, throw away the simd and mark the parallel loop as not
- combined. */
- gimple_omp_set_body (parloop, gimple_omp_body (simd));
- gimple_omp_for_set_combined_p (parloop, false);
-}
-
-/* Statement walker function marking all parallels as grid_phony and loops as
- grid ones representing threads of a particular thread group. */
-
-static tree
-grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
- struct walk_stmt_info *wi_in)
-{
- *handled_ops_p = false;
- if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
- {
- *handled_ops_p = true;
- gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
- gimple_omp_for_set_grid_intra_group (loop, true);
- if (gimple_omp_for_combined_p (loop))
- grid_eliminate_combined_simd_part (loop);
-
- struct walk_stmt_info body_wi;
- memset (&body_wi, 0, sizeof (body_wi));
- walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
- grid_process_grid_body, NULL, &body_wi);
-
- gbind *bind = (gbind *) wi_in->info;
- tree c;
- for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
- {
- push_gimplify_context ();
- tree ov = OMP_CLAUSE_DECL (c);
- tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
- TREE_TYPE (ov));
-
- grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
- DECL_CONTEXT (gv) = current_function_decl;
- gimple_bind_append_vars (bind, gv);
- tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
- gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
- x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
- gimple_seq l = NULL;
- gimplify_and_add (x, &l);
- gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
- pop_gimplify_context (bind);
- }
- }
- return NULL_TREE;
-}
-
-/* Statement walker function marking all parallels as grid_phony and loops as
- grid ones representing threads of a particular thread group. */
-
-static tree
-grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
- bool *handled_ops_p,
- struct walk_stmt_info *wi_in)
-{
- *handled_ops_p = false;
- wi_in->removed_stmt = false;
- gimple *stmt = gsi_stmt (*gsi);
- if (gbind *bind = dyn_cast <gbind *> (stmt))
- {
- for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
- grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
- }
- else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
- {
- *handled_ops_p = true;
- gimple_omp_parallel_set_grid_phony (parallel, true);
-
- gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
- gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
- gimple_seq s = NULL;
- gimple_seq_add_stmt (&s, new_bind);
- gimple_omp_set_body (parallel, s);
-
- struct walk_stmt_info wi_par;
- memset (&wi_par, 0, sizeof (wi_par));
- wi_par.info = new_bind;
- walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
- grid_mark_tiling_loops, NULL, &wi_par);
- }
- else if (is_a <gcall *> (stmt))
- wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
- return NULL_TREE;
-}
-
-/* Given freshly copied top level kernel SEQ, identify the individual OMP
- components, mark them as part of kernel, copy assignment leading to them
- just before DST, remapping them using WI and adding new temporaries to
- TGT_BIND, and return the loop that will be used for kernel dispatch. */
-
-static gomp_for *
-grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
- gimple_stmt_iterator *dst,
- gbind *tgt_bind, struct walk_stmt_info *wi)
-{
- gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
- GRID_SEGMENT_GLOBAL, wi);
- gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
- gcc_assert (teams);
- gimple_omp_teams_set_grid_phony (teams, true);
- stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
- tgt_bind, GRID_SEGMENT_GLOBAL,
- wi);
- gcc_checking_assert (stmt);
- gomp_for *dist = dyn_cast <gomp_for *> (stmt);
- gcc_assert (dist);
- gimple_seq prebody = gimple_omp_for_pre_body (dist);
- if (prebody)
- grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
- GRID_SEGMENT_GROUP, wi);
-
- if (grid->tiling)
- {
- gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
- gimple_omp_for_set_grid_group_iter (dist, true);
-
- struct walk_stmt_info wi_tiled;
- memset (&wi_tiled, 0, sizeof (wi_tiled));
- walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
- grid_mark_tiling_parallels_and_loops, NULL,
- &wi_tiled);
- return dist;
- }
- else
- {
- gimple_omp_for_set_grid_phony (dist, true);
- stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
- tgt_bind,
- GRID_SEGMENT_PRIVATE, wi);
- gcc_checking_assert (stmt);
- gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
- gimple_omp_parallel_set_grid_phony (parallel, true);
- stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
- dst, tgt_bind,
- GRID_SEGMENT_PRIVATE, wi);
- gomp_for *inner_loop = as_a <gomp_for *> (stmt);
- gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
- prebody = gimple_omp_for_pre_body (inner_loop);
- if (prebody)
- grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
- GRID_SEGMENT_PRIVATE, wi);
-
- if (gimple_omp_for_combined_p (inner_loop))
- grid_eliminate_combined_simd_part (inner_loop);
- struct walk_stmt_info body_wi;
- memset (&body_wi, 0, sizeof (body_wi));
- walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
- grid_process_grid_body, NULL, &body_wi);
-
- return inner_loop;
- }
-}
-
-/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
- create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
- is the bind into which temporaries inserted before TARGET should be
- added. */
-
-static void
-grid_attempt_target_gridification (gomp_target *target,
- gimple_stmt_iterator *gsi,
- gbind *tgt_bind)
-{
- /* removed group_size */
- grid_prop grid = {};
- if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
- return;
-
- location_t loc = gimple_location (target);
- if (dump_enabled_p ())
- dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
- "Target construct will be turned into a gridified HSA "
- "kernel\n");
-
- /* Copy target body to a GPUKERNEL construct: */
- gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
- (gimple_omp_body (target));
-
- hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
- struct walk_stmt_info wi;
- memset (&wi, 0, sizeof (struct walk_stmt_info));
- wi.info = declmap;
-
- /* Copy assignments in between OMP statements before target, mark OMP
- statements within copy appropriately. */
- gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
- tgt_bind, &wi);
-
- gbind *old_bind
- = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
- gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
- tree new_block = gimple_bind_block (new_bind);
- tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
- BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
- BLOCK_SUBBLOCKS (enc_block) = new_block;
- BLOCK_SUPERCONTEXT (new_block) = enc_block;
- gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
- gimple_seq_add_stmt
- (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
- gpukernel);
-
- for (size_t i = 0; i < grid.collapse; i++)
- walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
- push_gimplify_context ();
- for (size_t i = 0; i < grid.collapse; i++)
- {
- tree index_var = gimple_omp_for_index (inner_loop, i);
- tree itype, type = TREE_TYPE (index_var);
- if (POINTER_TYPE_P (type))
- itype = signed_type_for (type);
- else
- itype = type;
-
- enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
- tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
- walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
- tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
- walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
- tree step
- = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
- omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
- n1 = fold_convert (itype, n1);
- n2 = fold_convert (itype, n2);
-
- tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
-
- tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
- t = fold_build2 (PLUS_EXPR, itype, step, t);
- t = fold_build2 (PLUS_EXPR, itype, t, n2);
- t = fold_build2 (MINUS_EXPR, itype, t, n1);
- if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
- t = fold_build2 (TRUNC_DIV_EXPR, itype,
- fold_build1 (NEGATE_EXPR, itype, t),
- fold_build1 (NEGATE_EXPR, itype, step));
- else
- t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
- t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
- if (grid.tiling)
- {
- if (cond_code == GT_EXPR)
- step = fold_build1 (NEGATE_EXPR, itype, step);
- t = fold_build2 (MULT_EXPR, itype, t, step);
- }
-
- tree gs = fold_convert (uint32_type_node, t);
- gimple_seq tmpseq = NULL;
- gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
- if (!gimple_seq_empty_p (tmpseq))
- gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
-
- tree ws;
- if (grid.group_sizes[i])
- {
- ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
- tmpseq = NULL;
- gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
- if (!gimple_seq_empty_p (tmpseq))
- gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
- }
- else
- ws = build_zero_cst (uint32_type_node);
-
- tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
- OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
- OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
- OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
- OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
- gimple_omp_target_set_clauses (target, c);
- }
- pop_gimplify_context (tgt_bind);
- delete declmap;
- return;
-}
-
-/* Walker function doing all the work for create_target_kernels. */
-
-static tree
-grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
- bool *handled_ops_p,
- struct walk_stmt_info *incoming)
-{
- *handled_ops_p = false;
-
- gimple *stmt = gsi_stmt (*gsi);
- gomp_target *target = dyn_cast <gomp_target *> (stmt);
- if (target)
- {
- gbind *tgt_bind = (gbind *) incoming->info;
- gcc_checking_assert (tgt_bind);
- grid_attempt_target_gridification (target, gsi, tgt_bind);
- return NULL_TREE;
- }
- gbind *bind = dyn_cast <gbind *> (stmt);
- if (bind)
- {
- *handled_ops_p = true;
- struct walk_stmt_info wi;
- memset (&wi, 0, sizeof (wi));
- wi.info = bind;
- walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
- grid_gridify_all_targets_stmt, NULL, &wi);
- }
- return NULL_TREE;
-}
-
-/* Attempt to gridify all target constructs in BODY_P. All such targets will
- have their bodies duplicated, with the new copy being put into a
- gimple_omp_grid_body statement. All kernel-related construct within the
- grid_body will be marked with phony flags or kernel kinds. Moreover, some
- re-structuring is often needed, such as copying pre-bodies before the target
- construct so that kernel grid sizes can be computed. */
-
-void
-omp_grid_gridify_all_targets (gimple_seq *body_p)
-{
- struct walk_stmt_info wi;
- memset (&wi, 0, sizeof (wi));
- walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
-}
+++ /dev/null
-/* Lowering and expansion of OpenMP directives for HSA GPU agents.
-
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3. If not see
-<http://www.gnu.org/licenses/>. */
-
-#ifndef GCC_OMP_GRID_H
-#define GCC_OMP_GRID_H
-
-extern tree omp_grid_lastprivate_predicate (struct omp_for_data *fd);
-extern void omp_grid_gridify_all_targets (gimple_seq *body_p);
-
-#endif /* GCC_OMP_GRID_H */
#include "splay-tree.h"
#include "omp-general.h"
#include "omp-low.h"
-#include "omp-grid.h"
#include "gimple-low.h"
#include "alloc-pool.h"
#include "symbol-summary.h"
#include "context.h"
#include "gomp-constants.h"
#include "gimple-pretty-print.h"
-#include "hsa-common.h"
#include "stringpool.h"
#include "attribs.h"
}
}
else if (outer)
- {
- if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
- {
- outer = outer->outer;
- gcc_assert (outer
- && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY);
- }
- x = lookup_decl (var, outer);
- }
+ x = lookup_decl (var, outer);
else if (omp_is_reference (var))
/* This can happen with orphaned constructs. If var is reference, it is
possible it is shared and as such valid. */
}
break;
- case OMP_CLAUSE__GRIDDIM_:
- if (ctx->outer)
- {
- scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer);
- scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer);
- }
- break;
-
case OMP_CLAUSE_ORDER:
ctx->order_concurrent = true;
break;
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE_TILE:
- case OMP_CLAUSE__GRIDDIM_:
case OMP_CLAUSE__SIMT_:
case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE:
DECL_NAMELESS (name) = 1;
TYPE_NAME (ctx->record_type) = name;
TYPE_ARTIFICIAL (ctx->record_type) = 1;
- if (!gimple_omp_parallel_grid_phony (stmt))
- {
- create_omp_child_function (ctx, false);
- gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
- }
+ create_omp_child_function (ctx, false);
+ gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
{
tree c;
- if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY)
- /* GRID_BODY is an artificial construct, nesting rules will be checked in
- the original copy of its contents. */
- return true;
-
/* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin)
inside an OpenACC CTX. */
if (!(is_gimple_omp (stmt)
{
if ((gimple_code (stmt) != GIMPLE_OMP_FOR
|| (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_DISTRIBUTE
- && gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
&& omp_find_clause (gimple_omp_for_clauses (stmt),
OMP_CLAUSE_BIND) == NULL_TREE))
&& gimple_code (stmt) != GIMPLE_OMP_PARALLEL)
case GIMPLE_OMP_MASTER:
case GIMPLE_OMP_ORDERED:
case GIMPLE_OMP_CRITICAL:
- case GIMPLE_OMP_GRID_BODY:
ctx = new_omp_context (stmt, ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
break;
cond_code = EQ_EXPR;
}
- if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
- || gimple_omp_for_grid_phony (fd->for_stmt))
- cond = omp_grid_lastprivate_predicate (fd);
- else
+ tree n2 = fd->loop.n2;
+ if (fd->collapse > 1
+ && TREE_CODE (n2) != INTEGER_CST
+ && gimple_omp_for_combined_into_p (fd->for_stmt))
{
- tree n2 = fd->loop.n2;
- if (fd->collapse > 1
- && TREE_CODE (n2) != INTEGER_CST
- && gimple_omp_for_combined_into_p (fd->for_stmt))
+ struct omp_context *taskreg_ctx = NULL;
+ if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
{
- struct omp_context *taskreg_ctx = NULL;
- if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
+ gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
+ if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR
+ || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE)
{
- gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
- if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR
- || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE)
+ if (gimple_omp_for_combined_into_p (gfor))
{
- if (gimple_omp_for_combined_into_p (gfor))
- {
- gcc_assert (ctx->outer->outer
- && is_parallel_ctx (ctx->outer->outer));
- taskreg_ctx = ctx->outer->outer;
- }
- else
- {
- struct omp_for_data outer_fd;
- omp_extract_for_data (gfor, &outer_fd, NULL);
- n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
- }
+ gcc_assert (ctx->outer->outer
+ && is_parallel_ctx (ctx->outer->outer));
+ taskreg_ctx = ctx->outer->outer;
}
- else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
- taskreg_ctx = ctx->outer->outer;
- }
- else if (is_taskreg_ctx (ctx->outer))
- taskreg_ctx = ctx->outer;
- if (taskreg_ctx)
- {
- int i;
- tree taskreg_clauses
- = gimple_omp_taskreg_clauses (taskreg_ctx->stmt);
- tree innerc = omp_find_clause (taskreg_clauses,
- OMP_CLAUSE__LOOPTEMP_);
- gcc_assert (innerc);
- for (i = 0; i < fd->collapse; i++)
+ else
{
- innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
- OMP_CLAUSE__LOOPTEMP_);
- gcc_assert (innerc);
+ struct omp_for_data outer_fd;
+ omp_extract_for_data (gfor, &outer_fd, NULL);
+ n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
}
+ }
+ else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
+ taskreg_ctx = ctx->outer->outer;
+ }
+ else if (is_taskreg_ctx (ctx->outer))
+ taskreg_ctx = ctx->outer;
+ if (taskreg_ctx)
+ {
+ int i;
+ tree taskreg_clauses
+ = gimple_omp_taskreg_clauses (taskreg_ctx->stmt);
+ tree innerc = omp_find_clause (taskreg_clauses,
+ OMP_CLAUSE__LOOPTEMP_);
+ gcc_assert (innerc);
+ for (i = 0; i < fd->collapse; i++)
+ {
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
- if (innerc)
- n2 = fold_convert (TREE_TYPE (n2),
- lookup_decl (OMP_CLAUSE_DECL (innerc),
- taskreg_ctx));
+ gcc_assert (innerc);
}
+ innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+ OMP_CLAUSE__LOOPTEMP_);
+ if (innerc)
+ n2 = fold_convert (TREE_TYPE (n2),
+ lookup_decl (OMP_CLAUSE_DECL (innerc),
+ taskreg_ctx));
}
- cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
}
+ cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
clauses = gimple_omp_for_clauses (fd->for_stmt);
stmts = NULL;
ctx);
}
- bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP
- && gimple_omp_for_grid_phony (stmt));
if ((ctx->scan_inclusive || ctx->scan_exclusive)
&& gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
- {
- gcc_assert (!phony_loop);
- lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
- }
+ lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx);
else
{
- if (!phony_loop)
- gimple_seq_add_stmt (&body, stmt);
+ gimple_seq_add_stmt (&body, stmt);
gimple_seq_add_seq (&body, gimple_omp_body (stmt));
}
- if (!phony_loop)
- gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
- fd.loop.v));
+ gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
+ fd.loop.v));
/* After the loop, add exit clauses. */
lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, &clist, ctx);
body = maybe_catch_exception (body);
- if (!phony_loop)
- {
- /* Region exit marker goes at the end of the loop body. */
- gimple *g = gimple_build_omp_return (fd.have_nowait);
- gimple_seq_add_stmt (&body, g);
+ /* Region exit marker goes at the end of the loop body. */
+ gimple *g = gimple_build_omp_return (fd.have_nowait);
+ gimple_seq_add_stmt (&body, g);
- gimple_seq_add_seq (&body, tred_dlist);
+ gimple_seq_add_seq (&body, tred_dlist);
- maybe_add_implicit_barrier_cancel (ctx, g, &body);
+ maybe_add_implicit_barrier_cancel (ctx, g, &body);
- if (rclauses)
- OMP_CLAUSE_DECL (rclauses) = rtmp;
- }
+ if (rclauses)
+ OMP_CLAUSE_DECL (rclauses) = rtmp;
/* Add OpenACC joining and reduction markers just after the loop. */
if (oacc_tail)
gimple_seq par_olist = NULL;
gimple_seq par_ilist = NULL;
gimple_seq par_rlist = NULL;
- bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL
- && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt));
- if (phony_construct && ctx->record_type)
- {
- gcc_checking_assert (!ctx->receiver_decl);
- ctx->receiver_decl = create_tmp_var
- (build_reference_type (ctx->record_type), ".omp_rec");
- }
lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL);
lower_omp (&par_body, ctx);
if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
gimple_seq_add_stmt (&new_body,
gimple_build_omp_continue (integer_zero_node,
integer_zero_node));
- if (!phony_construct)
- {
- gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
- gimple_omp_set_body (stmt, new_body);
- }
+ gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
+ gimple_omp_set_body (stmt, new_body);
if (dep_bind && gimple_bind_block (par_bind) == NULL_TREE)
bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
gimple_bind_add_seq (bind, ilist);
- if (!phony_construct)
- gimple_bind_add_stmt (bind, stmt);
- else
- gimple_bind_add_seq (bind, new_body);
+ gimple_bind_add_stmt (bind, stmt);
gimple_bind_add_seq (bind, olist);
pop_gimplify_context (NULL);
lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist,
NULL, ctx);
- if (!gimple_omp_teams_grid_phony (teams_stmt))
- {
- gimple_seq_add_stmt (&bind_body, teams_stmt);
- location_t loc = gimple_location (teams_stmt);
- tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
- gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
- gimple_set_location (call, loc);
- gimple_seq_add_stmt (&bind_body, call);
- }
+ gimple_seq_add_stmt (&bind_body, teams_stmt);
+
+ location_t loc = gimple_location (teams_stmt);
+ tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
+ gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
+ gimple_set_location (call, loc);
+ gimple_seq_add_stmt (&bind_body, call);
gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
gimple_omp_set_body (teams_stmt, NULL);
gimple_seq_add_seq (&bind_body, olist);
gimple_seq_add_seq (&bind_body, dlist);
- if (!gimple_omp_teams_grid_phony (teams_stmt))
- gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
+ gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
gimple_bind_set_body (bind, bind_body);
pop_gimplify_context (bind);
TREE_USED (block) = 1;
}
-/* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */
-
-static void
-lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx)
-{
- gimple *stmt = gsi_stmt (*gsi_p);
- lower_omp (gimple_omp_body_ptr (stmt), ctx);
- gimple_seq_add_stmt (gimple_omp_body_ptr (stmt),
- gimple_build_omp_return (false));
-}
-
-
/* Callback for lower_omp_1. Return non-NULL if *tp needs to be
regimplified. If DATA is non-NULL, lower_omp_1 is outside
of OMP context, but with task_shared_vars set. */
else
lower_omp_teams (gsi_p, ctx);
break;
- case GIMPLE_OMP_GRID_BODY:
- ctx = maybe_lookup_ctx (stmt);
- gcc_assert (ctx);
- lower_omp_grid_body (gsi_p, ctx);
- break;
case GIMPLE_CALL:
tree fndecl;
call_stmt = as_a <gcall *> (stmt);
body = gimple_body (current_function_decl);
- if (hsa_gen_requested_p ())
- omp_grid_gridify_all_targets (&body);
-
scan_omp (&body, NULL);
gcc_assert (taskreg_nesting_level == 0);
FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx)
break;
case OPT_foffload_:
- {
- const char *p = arg;
- opts->x_flag_disable_hsa = true;
- while (*p != 0)
- {
- const char *comma = strchr (p, ',');
-
- if ((strncmp (p, "disable", 7) == 0)
- && (p[7] == ',' || p[7] == '\0'))
- {
- opts->x_flag_disable_hsa = true;
- break;
- }
-
- if ((strncmp (p, "hsa", 3) == 0)
- && (p[3] == ',' || p[3] == '\0'))
- {
-#ifdef ENABLE_HSA
- opts->x_flag_disable_hsa = false;
-#else
- sorry ("HSA has not been enabled during configuration");
-#endif
- }
- if (!comma)
- break;
- p = comma + 1;
- }
- break;
- }
+ /* Deferred. */
+ break;
#ifndef ACCEL_COMPILER
case OPT_foffload_abi_:
Common Joined UInteger Var(param_hot_bb_frequency_fraction) Init(1000) Param
The denominator n of fraction 1/n of the execution frequency of the entry block of a function that a basic block of this function needs to at least have in order to be considered hot.
--param=hsa-gen-debug-stores=
-Common Joined UInteger Var(param_hsa_gen_debug_stores) IntegerRange(0, 1) Param
-Level of hsa debug stores verbosity.
-
-param=inline-heuristics-hint-percent=
Common Joined UInteger Var(param_inline_heuristics_hint_percent) Init(200) Optimization IntegerRange(100, 1000000) Param
The scale (in percents) applied to inline-insns-single and auto limits when heuristics hints that inlining is very profitable.
NEXT_PASS (pass_ipa_cp);
NEXT_PASS (pass_ipa_sra);
NEXT_PASS (pass_ipa_cdtor_merge);
- NEXT_PASS (pass_ipa_hsa);
NEXT_PASS (pass_ipa_fn_summary);
NEXT_PASS (pass_ipa_inline);
NEXT_PASS (pass_ipa_pure_const);
NEXT_PASS (pass_gimple_isel);
NEXT_PASS (pass_cleanup_cfg_post_optimizing);
NEXT_PASS (pass_warn_function_noreturn);
- NEXT_PASS (pass_gen_hsail);
NEXT_PASS (pass_expand);
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-require-effective-target offload_hsa } */
-/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */
-
-void
-foo1 (int n, int *a, int workgroup_size)
-{
- int i;
-#pragma omp target
-#pragma omp teams thread_limit(workgroup_size)
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i)
- for (i = 0; i < n; i++)
- a[i]++;
-}
-
-void
-foo2 (int j, int n, int *a)
-{
- int i;
-#pragma omp target teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- for (i = j + 1; i < n; i++)
- a[i] = i;
-}
-
-void
-foo3 (int j, int n, int *a)
-{
- int i;
-#pragma omp target teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- for (i = j + 1; i < n; i += 3)
- a[i] = i;
-}
-
-void
-foo4 (int j, int n, int *a)
-{
-#pragma omp parallel
- {
- #pragma omp single
- {
- int i;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- for (i = j + 1; i < n; i += 3)
- a[i] = i;
- }
- }
-}
-
-
-/* { dg-final { scan-tree-dump-times "Target construct will be turned into a gridified HSA kernel" 4 "omplower" } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-require-effective-target offload_hsa } */
-/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */
-
-#define BLOCK_SIZE 16
-
-
-void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA,
- const float*B, const int LDB, const float beta, float*C, const int LDC){
-
-#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N])
-#pragma omp distribute collapse(2)
- for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE)
- for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE)
- {
-// Each team has a local copy of these mini matrices
- float As[BLOCK_SIZE][BLOCK_SIZE];
- float Bs[BLOCK_SIZE][BLOCK_SIZE];
-#pragma omp parallel
- {
- int C_row, C_col;
- float Cval = 0.0;
-
- for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE )
- {
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (kblock + col < K))
- As[row][col] = A[(C_row*LDA)+ kblock + col];
- else
- As[row][col] = 0;
- if ((kblock + row < K) && C_col < N)
- Bs[row][col] = B[((kblock+row)*LDB)+ C_col];
- else
- Bs[row][col] = 0;
- }
-
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- for (int e = 0; e < BLOCK_SIZE; ++e)
- Cval += As[row][e] * Bs[e][col];
- }
- } /* End for kblock .. */
-
-
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (C_col < N))
- C[(C_row*LDC)+C_col] = alpha*Cval + beta*C[(C_row*LDC)+C_col];
-
- }
- } /* end parallel */
- } /* end target teams distribute */
-}
-
-/* { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } */
+++ /dev/null
-/* { dg-do compile } */
-/* { dg-require-effective-target offload_hsa } */
-/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */
-
-#define BLOCK_SIZE 16
-
-void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA,
- const float*B, const int LDB, const float beta, float*C, const int LDC)
-{
-#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N])
-#pragma omp distribute collapse(2)
- for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE)
- for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE)
- {
- float As[BLOCK_SIZE][BLOCK_SIZE];
- float Bs[BLOCK_SIZE][BLOCK_SIZE];
- float Cs[BLOCK_SIZE][BLOCK_SIZE];
- int C_row, C_col;
-
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- Cs[row][col] = 0.0;
- }
-
-
- for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE )
- {
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (kblock + col < K))
- As[row][col] = A[(C_row*LDA)+ kblock + col];
- else
- As[row][col] = 0;
- if ((kblock + row < K) && C_col < N)
- Bs[row][col] = B[((kblock+row)*LDB)+ C_col];
- else
- Bs[row][col] = 0;
- }
-
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- for (int e = 0; e < BLOCK_SIZE; ++e)
- Cs[row][col] += As[row][e] * Bs[e][col];
- }
- } /* End for kblock .. */
-
-
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (C_col < N))
- C[(C_row*LDC)+C_col] = alpha*Cs[row][col] + beta*C[(C_row*LDC)+C_col];
- }
- } /* End distribute */
-}
-
-/* { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } */
+++ /dev/null
-/* Instead of ICE, we'd like "HSA does not implement indirect calls". */
-
-/* Reduced from 'libgomp.c/target-39.c'. */
-
-/* { dg-require-effective-target offload_hsa } */
-/* { dg-additional-options "-Whsa" } to override '{gcc,g++}.dg/gomp/gomp.exp'. */
-
-typedef void (*fnp) (void);
-void f1 (void) { }
-fnp f2 (void) { return f1; }
-#pragma omp declare target to (f1, f2)
-
-int
-main ()
-{
- #pragma omp target
- {
- fnp fnp = f2 ();
- fnp (); /* { dg-message "note: support for HSA does not implement indirect calls" } */
- }
- return 0;
-}
-
-/* { dg-warning "could not emit HSAIL for the function" "" { target *-*-* } 0 } */
# Main loop.
g++-dg-runtest [lsort [concat \
[find $srcdir/$subdir *.C] \
- [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp -Wno-hsa"
+ [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp"
# All done.
dg-finish
# Main loop.
dg-runtest [lsort [concat \
[find $srcdir/$subdir *.c] \
- [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp -Wno-hsa"
+ [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp"
# All done.
dg-finish
# Main loop.
gfortran-dg-runtest [lsort \
- [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp -Wno-hsa"
+ [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp"
# All done.
dg-finish
+++ /dev/null
-! { dg-do compile }
-! { dg-require-effective-target offload_hsa }
-! { dg-options "-fopenmp -fdump-tree-omplower-details" } */
-
-subroutine vector_square(n, a, b)
- integer i, n, b(n), a(n)
-!$omp target teams
-!$omp distribute parallel do
- do i=1,n
- b(i) = a(i) * a(i)
- enddo
-!$omp end distribute parallel do
-!$omp end target teams
-end subroutine vector_square
-
-! { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } }
} "-foffload=nvptx-none" ]
}
-# Return 1 if the compiler has been configured with hsa offloading.
-
-proc check_effective_target_offload_hsa { } {
- return [check_no_compiler_messages offload_hsa assembly {
- int main () {return 0;}
- } "-foffload=hsa" ]
-}
-
# Return 1 if the compiler has been configured with gcn offloading.
proc check_effective_target_offload_gcn { } {
DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning")
DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans")
DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference")
-DEFTIMEVAR (TV_IPA_HSA , "ipa HSA")
DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile")
DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile")
DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const")
#include "ipa-prop.h"
#include "gcse.h"
#include "omp-offload.h"
-#include "hsa-common.h"
#include "edit-context.h"
#include "tree-pass.h"
#include "dumpfile.h"
omp_finish_file ();
- hsa_output_brig ();
-
output_shared_constant_pool ();
output_object_blocks ();
finish_tm_clone_pairs ();
/* OpenACC clause: tile ( size-expr-list ). */
OMP_CLAUSE_TILE,
- /* OpenMP internal-only clause to specify grid dimensions of a gridified
- kernel. */
- OMP_CLAUSE__GRIDDIM_,
-
/* OpenACC clause: if_present. */
OMP_CLAUSE_IF_PRESENT,
enum omp_clause_defaultmap_kind defaultmap_kind;
enum omp_clause_bind_kind bind_kind;
enum omp_clause_device_type_kind device_type_kind;
- /* The dimension a OMP_CLAUSE__GRIDDIM_ clause of a gridified target
- construct describes. */
- unsigned int dimension;
} GTY ((skip)) subcode;
/* The gimplification of OMP_CLAUSE_REDUCTION_{INIT,MERGE} for omp-low's
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_:
case OMP_CLAUSE__SIMDUID_:
- case OMP_CLAUSE__GRIDDIM_:
case OMP_CLAUSE__SIMT_:
/* Anything else. */
default:
case OMP_CLAUSE__LOOPTEMP_:
case OMP_CLAUSE__REDUCTEMP_:
case OMP_CLAUSE__SIMDUID_:
- case OMP_CLAUSE__GRIDDIM_:
case OMP_CLAUSE__SIMT_:
/* Anything else. */
default:
extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_oacc (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_oacc_kernels (gcc::context *ctxt);
-extern gimple_opt_pass *make_pass_gen_hsail (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_warn_nonnull_compare (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_sprintf_length (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_walloca (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
-extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
pp_right_paren (pp);
break;
- case OMP_CLAUSE__GRIDDIM_:
- pp_string (pp, "_griddim_(");
- pp_unsigned_wide_integer (pp, OMP_CLAUSE__GRIDDIM__DIMENSION (clause));
- pp_colon (pp);
- dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__SIZE (clause), spc, flags,
- false);
- pp_comma (pp);
- dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__GROUP (clause), spc, flags,
- false);
- pp_right_paren (pp);
- break;
case OMP_CLAUSE_IF_PRESENT:
pp_string (pp, "if_present");
break;
1, /* OMP_CLAUSE_NUM_WORKERS */
1, /* OMP_CLAUSE_VECTOR_LENGTH */
3, /* OMP_CLAUSE_TILE */
- 2, /* OMP_CLAUSE__GRIDDIM_ */
0, /* OMP_CLAUSE_IF_PRESENT */
0, /* OMP_CLAUSE_FINALIZE */
};
"num_workers",
"vector_length",
"tile",
- "_griddim_",
"if_present",
"finalize",
};
switch (OMP_CLAUSE_CODE (*tp))
{
case OMP_CLAUSE_GANG:
- case OMP_CLAUSE__GRIDDIM_:
WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1));
/* FALLTHRU */
#define OMP_CLAUSE_TILE_COUNT(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 2)
-#define OMP_CLAUSE__GRIDDIM__DIMENSION(NODE) \
- (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_)\
- ->omp_clause.subcode.dimension)
-#define OMP_CLAUSE__GRIDDIM__SIZE(NODE) \
- OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 0)
-#define OMP_CLAUSE__GRIDDIM__GROUP(NODE) \
- OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 1)
-
/* _CONDTEMP_ holding temporary with iteration count. */
#define OMP_CLAUSE__CONDTEMP__ITER(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag)
#define GOMP_VERSION 1
#define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0
-#define GOMP_VERSION_HSA 0
#define GOMP_VERSION_GCN 1
#define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV))
-# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
host_triplet = @host@
target_triplet = @target@
@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la
-@PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la
-@PLUGIN_GCN_TRUE@am__append_3 = libgomp-plugin-gcn.la
-@USE_FORTRAN_TRUE@am__append_4 = openacc.f90
+@PLUGIN_GCN_TRUE@am__append_2 = libgomp-plugin-gcn.la
+@USE_FORTRAN_TRUE@am__append_3 = openacc.f90
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(libgomp_plugin_gcn_la_LDFLAGS) $(LDFLAGS) -o $@
@PLUGIN_GCN_TRUE@am_libgomp_plugin_gcn_la_rpath = -rpath \
@PLUGIN_GCN_TRUE@ $(toolexeclibdir)
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_DEPENDENCIES = libgomp.la \
-@PLUGIN_HSA_TRUE@ $(am__DEPENDENCIES_1)
-@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_OBJECTS = \
-@PLUGIN_HSA_TRUE@ libgomp_plugin_hsa_la-plugin-hsa.lo
-libgomp_plugin_hsa_la_OBJECTS = $(am_libgomp_plugin_hsa_la_OBJECTS)
-libgomp_plugin_hsa_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
- $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(libgomp_plugin_hsa_la_LDFLAGS) $(LDFLAGS) -o $@
-@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_rpath = -rpath \
-@PLUGIN_HSA_TRUE@ $(toolexeclibdir)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \
@PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1)
@PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
-am__depfiles_maybe = depfiles
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = ./$(DEPDIR)/affinity-fmt.Plo \
+ ./$(DEPDIR)/affinity.Plo ./$(DEPDIR)/alloc.Plo \
+ ./$(DEPDIR)/allocator.Plo ./$(DEPDIR)/atomic.Plo \
+ ./$(DEPDIR)/bar.Plo ./$(DEPDIR)/barrier.Plo \
+ ./$(DEPDIR)/critical.Plo ./$(DEPDIR)/env.Plo \
+ ./$(DEPDIR)/error.Plo ./$(DEPDIR)/fortran.Plo \
+ ./$(DEPDIR)/icv-device.Plo ./$(DEPDIR)/icv.Plo \
+ ./$(DEPDIR)/iter.Plo ./$(DEPDIR)/iter_ull.Plo \
+ ./$(DEPDIR)/libgomp-plugin.Plo \
+ ./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo \
+ ./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo \
+ ./$(DEPDIR)/lock.Plo ./$(DEPDIR)/loop.Plo \
+ ./$(DEPDIR)/loop_ull.Plo ./$(DEPDIR)/mutex.Plo \
+ ./$(DEPDIR)/oacc-async.Plo ./$(DEPDIR)/oacc-cuda.Plo \
+ ./$(DEPDIR)/oacc-host.Plo ./$(DEPDIR)/oacc-init.Plo \
+ ./$(DEPDIR)/oacc-mem.Plo ./$(DEPDIR)/oacc-parallel.Plo \
+ ./$(DEPDIR)/oacc-plugin.Plo ./$(DEPDIR)/oacc-profiling.Plo \
+ ./$(DEPDIR)/oacc-target.Plo ./$(DEPDIR)/ordered.Plo \
+ ./$(DEPDIR)/parallel.Plo ./$(DEPDIR)/priority_queue.Plo \
+ ./$(DEPDIR)/proc.Plo ./$(DEPDIR)/ptrlock.Plo \
+ ./$(DEPDIR)/sections.Plo ./$(DEPDIR)/sem.Plo \
+ ./$(DEPDIR)/single.Plo ./$(DEPDIR)/splay-tree.Plo \
+ ./$(DEPDIR)/target.Plo ./$(DEPDIR)/task.Plo \
+ ./$(DEPDIR)/team.Plo ./$(DEPDIR)/teams.Plo \
+ ./$(DEPDIR)/time.Plo ./$(DEPDIR)/work.Plo
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
am__v_FCLD_0 = @echo " FCLD " $@;
am__v_FCLD_1 =
SOURCES = $(libgomp_plugin_gcn_la_SOURCES) \
- $(libgomp_plugin_hsa_la_SOURCES) \
$(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES)
AM_V_DVIPS = $(am__v_DVIPS_@AM_V@)
am__v_DVIPS_ = $(am__v_DVIPS_@AM_DEFAULT_V@)
PLUGIN_GCN_CPPFLAGS = @PLUGIN_GCN_CPPFLAGS@
PLUGIN_GCN_LDFLAGS = @PLUGIN_GCN_LDFLAGS@
PLUGIN_GCN_LIBS = @PLUGIN_GCN_LIBS@
-PLUGIN_HSA = @PLUGIN_HSA@
-PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
-PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
-PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
AM_CPPFLAGS = $(addprefix -I, $(search_path))
AM_CFLAGS = $(XCFLAGS)
AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
-toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2) \
- $(am__append_3)
+toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2)
nodist_toolexeclib_HEADERS = libgomp.spec
# -Wc is only a libtool option.
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c $(am__append_4)
+ oacc-target.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS)
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
-# Heterogenous Systems Architecture plugin
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS) \
-@PLUGIN_HSA_TRUE@ -D_GNU_SOURCE
-
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LDFLAGS = \
-@PLUGIN_HSA_TRUE@ $(libgomp_plugin_hsa_version_info) \
-@PLUGIN_HSA_TRUE@ $(lt_host_flags) $(PLUGIN_HSA_LDFLAGS)
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
-@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
-
# AMD GCN plugin
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_version_info = -version-info $(libtool_VERSION)
@PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_SOURCES = plugin/plugin-gcn.c
echo ' $(SHELL) ./config.status'; \
$(SHELL) ./config.status;; \
*) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
esac;
$(top_srcdir)/plugin/Makefrag.am $(top_srcdir)/../multilib.am $(am__empty):
libgomp-plugin-gcn.la: $(libgomp_plugin_gcn_la_OBJECTS) $(libgomp_plugin_gcn_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_gcn_la_DEPENDENCIES)
$(AM_V_CCLD)$(libgomp_plugin_gcn_la_LINK) $(am_libgomp_plugin_gcn_la_rpath) $(libgomp_plugin_gcn_la_OBJECTS) $(libgomp_plugin_gcn_la_LIBADD) $(LIBS)
-libgomp-plugin-hsa.la: $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_hsa_la_DEPENDENCIES)
- $(AM_V_CCLD)$(libgomp_plugin_hsa_la_LINK) $(am_libgomp_plugin_hsa_la_rpath) $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_LIBADD) $(LIBS)
-
libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_nvptx_la_DEPENDENCIES)
$(AM_V_CCLD)$(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/critical.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv-device.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity-fmt.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affinity.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/atomic.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bar.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/critical.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fortran.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv-device.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icv.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop_ull.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mutex.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-async.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-cuda.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-host.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-init.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+ @$(MKDIR_P) $(@D)
+ @echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
.c.o:
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_gcn_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_gcn_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_gcn_la-plugin-gcn.lo `test -f 'plugin/plugin-gcn.c' || echo '$(srcdir)/'`plugin/plugin-gcn.c
-libgomp_plugin_hsa_la-plugin-hsa.lo: plugin/plugin-hsa.c
-@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_hsa_la-plugin-hsa.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
-@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='plugin/plugin-hsa.c' object='libgomp_plugin_hsa_la-plugin-hsa.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c
-
libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
distclean: distclean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
- -rm -rf ./$(DEPDIR)
+ -rm -f ./$(DEPDIR)/affinity-fmt.Plo
+ -rm -f ./$(DEPDIR)/affinity.Plo
+ -rm -f ./$(DEPDIR)/alloc.Plo
+ -rm -f ./$(DEPDIR)/allocator.Plo
+ -rm -f ./$(DEPDIR)/atomic.Plo
+ -rm -f ./$(DEPDIR)/bar.Plo
+ -rm -f ./$(DEPDIR)/barrier.Plo
+ -rm -f ./$(DEPDIR)/critical.Plo
+ -rm -f ./$(DEPDIR)/env.Plo
+ -rm -f ./$(DEPDIR)/error.Plo
+ -rm -f ./$(DEPDIR)/fortran.Plo
+ -rm -f ./$(DEPDIR)/icv-device.Plo
+ -rm -f ./$(DEPDIR)/icv.Plo
+ -rm -f ./$(DEPDIR)/iter.Plo
+ -rm -f ./$(DEPDIR)/iter_ull.Plo
+ -rm -f ./$(DEPDIR)/libgomp-plugin.Plo
+ -rm -f ./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo
+ -rm -f ./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
+ -rm -f ./$(DEPDIR)/lock.Plo
+ -rm -f ./$(DEPDIR)/loop.Plo
+ -rm -f ./$(DEPDIR)/loop_ull.Plo
+ -rm -f ./$(DEPDIR)/mutex.Plo
+ -rm -f ./$(DEPDIR)/oacc-async.Plo
+ -rm -f ./$(DEPDIR)/oacc-cuda.Plo
+ -rm -f ./$(DEPDIR)/oacc-host.Plo
+ -rm -f ./$(DEPDIR)/oacc-init.Plo
+ -rm -f ./$(DEPDIR)/oacc-mem.Plo
+ -rm -f ./$(DEPDIR)/oacc-parallel.Plo
+ -rm -f ./$(DEPDIR)/oacc-plugin.Plo
+ -rm -f ./$(DEPDIR)/oacc-profiling.Plo
+ -rm -f ./$(DEPDIR)/oacc-target.Plo
+ -rm -f ./$(DEPDIR)/ordered.Plo
+ -rm -f ./$(DEPDIR)/parallel.Plo
+ -rm -f ./$(DEPDIR)/priority_queue.Plo
+ -rm -f ./$(DEPDIR)/proc.Plo
+ -rm -f ./$(DEPDIR)/ptrlock.Plo
+ -rm -f ./$(DEPDIR)/sections.Plo
+ -rm -f ./$(DEPDIR)/sem.Plo
+ -rm -f ./$(DEPDIR)/single.Plo
+ -rm -f ./$(DEPDIR)/splay-tree.Plo
+ -rm -f ./$(DEPDIR)/target.Plo
+ -rm -f ./$(DEPDIR)/task.Plo
+ -rm -f ./$(DEPDIR)/team.Plo
+ -rm -f ./$(DEPDIR)/teams.Plo
+ -rm -f ./$(DEPDIR)/time.Plo
+ -rm -f ./$(DEPDIR)/work.Plo
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-hdr distclean-libtool distclean-local distclean-tags
maintainer-clean: maintainer-clean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
- -rm -rf ./$(DEPDIR)
+ -rm -f ./$(DEPDIR)/affinity-fmt.Plo
+ -rm -f ./$(DEPDIR)/affinity.Plo
+ -rm -f ./$(DEPDIR)/alloc.Plo
+ -rm -f ./$(DEPDIR)/allocator.Plo
+ -rm -f ./$(DEPDIR)/atomic.Plo
+ -rm -f ./$(DEPDIR)/bar.Plo
+ -rm -f ./$(DEPDIR)/barrier.Plo
+ -rm -f ./$(DEPDIR)/critical.Plo
+ -rm -f ./$(DEPDIR)/env.Plo
+ -rm -f ./$(DEPDIR)/error.Plo
+ -rm -f ./$(DEPDIR)/fortran.Plo
+ -rm -f ./$(DEPDIR)/icv-device.Plo
+ -rm -f ./$(DEPDIR)/icv.Plo
+ -rm -f ./$(DEPDIR)/iter.Plo
+ -rm -f ./$(DEPDIR)/iter_ull.Plo
+ -rm -f ./$(DEPDIR)/libgomp-plugin.Plo
+ -rm -f ./$(DEPDIR)/libgomp_plugin_gcn_la-plugin-gcn.Plo
+ -rm -f ./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo
+ -rm -f ./$(DEPDIR)/lock.Plo
+ -rm -f ./$(DEPDIR)/loop.Plo
+ -rm -f ./$(DEPDIR)/loop_ull.Plo
+ -rm -f ./$(DEPDIR)/mutex.Plo
+ -rm -f ./$(DEPDIR)/oacc-async.Plo
+ -rm -f ./$(DEPDIR)/oacc-cuda.Plo
+ -rm -f ./$(DEPDIR)/oacc-host.Plo
+ -rm -f ./$(DEPDIR)/oacc-init.Plo
+ -rm -f ./$(DEPDIR)/oacc-mem.Plo
+ -rm -f ./$(DEPDIR)/oacc-parallel.Plo
+ -rm -f ./$(DEPDIR)/oacc-plugin.Plo
+ -rm -f ./$(DEPDIR)/oacc-profiling.Plo
+ -rm -f ./$(DEPDIR)/oacc-target.Plo
+ -rm -f ./$(DEPDIR)/ordered.Plo
+ -rm -f ./$(DEPDIR)/parallel.Plo
+ -rm -f ./$(DEPDIR)/priority_queue.Plo
+ -rm -f ./$(DEPDIR)/proc.Plo
+ -rm -f ./$(DEPDIR)/ptrlock.Plo
+ -rm -f ./$(DEPDIR)/sections.Plo
+ -rm -f ./$(DEPDIR)/sem.Plo
+ -rm -f ./$(DEPDIR)/single.Plo
+ -rm -f ./$(DEPDIR)/splay-tree.Plo
+ -rm -f ./$(DEPDIR)/target.Plo
+ -rm -f ./$(DEPDIR)/task.Plo
+ -rm -f ./$(DEPDIR)/team.Plo
+ -rm -f ./$(DEPDIR)/teams.Plo
+ -rm -f ./$(DEPDIR)/time.Plo
+ -rm -f ./$(DEPDIR)/work.Plo
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-aminfo \
maintainer-clean-generic maintainer-clean-local
.MAKE: $(am__recursive_targets) all install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am all-local \
- am--refresh check check-am clean clean-aminfo clean-cscope \
- clean-generic clean-libtool clean-local \
+ am--depfiles am--refresh check check-am clean clean-aminfo \
+ clean-cscope clean-generic clean-libtool clean-local \
clean-toolexeclibLTLIBRARIES cscope cscopelist-am ctags \
ctags-am dist-info distclean distclean-compile \
distclean-generic distclean-hdr distclean-libtool \
-# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.16.1 -*- Autoconf -*-
-# Copyright (C) 1996-2017 Free Software Foundation, Inc.
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
-# Copyright (C) 2002-2017 Free Software Foundation, Inc.
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
-[am__api_version='1.15'
+[am__api_version='1.16'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.15.1], [],
+m4_if([$1], [1.16.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.15.1])dnl
+[AM_AUTOMAKE_VERSION([1.16.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# AM_CONDITIONAL -*- Autoconf -*-
-# Copyright (C) 1997-2017 Free Software Foundation, Inc.
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999-2017 Free Software Foundation, Inc.
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Generate code to set up dependency tracking. -*- Autoconf -*-
-# Copyright (C) 1999-2017 Free Software Foundation, Inc.
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-
# _AM_OUTPUT_DEPENDENCY_COMMANDS
# ------------------------------
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
- case $CONFIG_FILES in
- *\'*) eval set x "$CONFIG_FILES" ;;
- *) set x $CONFIG_FILES ;;
- esac
+ # TODO: see whether this extra hack can be removed once we start
+ # requiring Autoconf 2.70 or later.
+ AS_CASE([$CONFIG_FILES],
+ [*\'*], [eval set x "$CONFIG_FILES"],
+ [*], [set x $CONFIG_FILES])
shift
- for mf
+ # Used to flag and report bootstrapping failures.
+ am_rc=0
+ for am_mf
do
# Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named 'Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # Grep'ing the whole file is not good either: AIX grep has a line
+ am_mf=`AS_ECHO(["$am_mf"]) | sed -e 's/:.*$//'`
+ # Check whether this is an Automake generated Makefile which includes
+ # dependency-tracking related rules and includes.
+ # Grep'ing the whole file directly is not great: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
- if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
- dirpart=`AS_DIRNAME("$mf")`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running 'make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "$am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`AS_DIRNAME(["$file"])`
- AS_MKDIR_P([$dirpart/$fdir])
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
+ sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+ || continue
+ am_dirpart=`AS_DIRNAME(["$am_mf"])`
+ am_filepart=`AS_BASENAME(["$am_mf"])`
+ AM_RUN_LOG([cd "$am_dirpart" \
+ && sed -e '/# am--include-marker/d' "$am_filepart" \
+ | $MAKE -f - am--depfiles]) || am_rc=$?
done
+ if test $am_rc -ne 0; then
+ AC_MSG_FAILURE([Something went wrong bootstrapping makefile fragments
+ for automatic dependency tracking. Try re-running configure with the
+ '--disable-dependency-tracking' option to at least be able to build
+ the package (albeit without support for automatic dependency tracking).])
+ fi
+ AS_UNSET([am_dirpart])
+ AS_UNSET([am_filepart])
+ AS_UNSET([am_mf])
+ AS_UNSET([am_rc])
+ rm -f conftest-deps.mk
}
])# _AM_OUTPUT_DEPENDENCY_COMMANDS
# -----------------------------
# This macro should only be invoked once -- use via AC_REQUIRE.
#
-# This code is only required when automatic dependency tracking
-# is enabled. FIXME. This creates each '.P' file that we will
-# need in order to bootstrap the dependency handling code.
+# This code is only required when automatic dependency tracking is enabled.
+# This creates each '.Po' and '.Plo' makefile fragment that we'll need in
+# order to bootstrap the dependency handling code.
AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
[AC_CONFIG_COMMANDS([depfiles],
[test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
- [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
-])
+ [AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"])])
# Do all the work for Automake. -*- Autoconf -*-
-# Copyright (C) 1996-2017 Free Software Foundation, Inc.
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
AC_REQUIRE([AC_PROG_MKDIR_P])dnl
# For better backward compatibility. To be removed once Automake 1.9.x
# dies out for good. For more background, see:
-# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
-# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
# We need awk for the "check" target (and possibly the TAP driver). The
# system "awk" is bad on some platforms.
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
-that behaves properly: <http://www.gnu.org/software/coreutils/>.
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
-# Copyright (C) 1996-2017 Free Software Foundation, Inc.
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Check to see how 'make' treats includes. -*- Autoconf -*-
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# AM_MAKE_INCLUDE()
# -----------------
-# Check to see how make treats includes.
+# Check whether make has an 'include' directive that can support all
+# the idioms we need for our automatic dependency tracking code.
AC_DEFUN([AM_MAKE_INCLUDE],
-[am_make=${MAKE-make}
-cat > confinc << 'END'
+[AC_MSG_CHECKING([whether ${MAKE-make} supports the include directive])
+cat > confinc.mk << 'END'
am__doit:
- @echo this is the am__doit target
+ @echo this is the am__doit target >confinc.out
.PHONY: am__doit
END
-# If we don't find an include directive, just comment out the code.
-AC_MSG_CHECKING([for style of include used by $am_make])
am__include="#"
am__quote=
-_am_result=none
-# First try GNU make style include.
-echo "include confinc" > confmf
-# Ignore all kinds of additional output from 'make'.
-case `$am_make -s -f confmf 2> /dev/null` in #(
-*the\ am__doit\ target*)
- am__include=include
- am__quote=
- _am_result=GNU
- ;;
-esac
-# Now try BSD make style include.
-if test "$am__include" = "#"; then
- echo '.include "confinc"' > confmf
- case `$am_make -s -f confmf 2> /dev/null` in #(
- *the\ am__doit\ target*)
- am__include=.include
- am__quote="\""
- _am_result=BSD
- ;;
- esac
-fi
-AC_SUBST([am__include])
-AC_SUBST([am__quote])
-AC_MSG_RESULT([$_am_result])
-rm -f confinc confmf
-])
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+ AM_RUN_LOG([${MAKE-make} -f confmf.$s && cat confinc.out])
+ AS_CASE([$?:`cat confinc.out 2>/dev/null`],
+ ['0:this is the am__doit target'],
+ [AS_CASE([$s],
+ [BSD], [am__include='.include' am__quote='"'],
+ [am__include='include' am__quote=''])])
+ if test "$am__include" != "#"; then
+ _am_result="yes ($s style)"
+ break
+ fi
+done
+rm -f confinc.* confmf.*
+AC_MSG_RESULT([${_am_result}])
+AC_SUBST([am__include])])
+AC_SUBST([am__quote])])
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-# Copyright (C) 1997-2017 Free Software Foundation, Inc.
+# Copyright (C) 1997-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1999-2017 Free Software Foundation, Inc.
+# Copyright (C) 1999-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Check to make sure that the build environment is sane. -*- Autoconf -*-
-# Copyright (C) 1996-2017 Free Software Foundation, Inc.
+# Copyright (C) 1996-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
rm -f conftest.file
])
-# Copyright (C) 2009-2017 Free Software Foundation, Inc.
+# Copyright (C) 2009-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
-# Copyright (C) 2001-2017 Free Software Foundation, Inc.
+# Copyright (C) 2001-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006-2017 Free Software Foundation, Inc.
+# Copyright (C) 2006-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004-2017 Free Software Foundation, Inc.
+# Copyright (C) 2004-2018 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
/* Define to 1 if the GCN plugin is built, 0 if not. */
#undef PLUGIN_GCN
-/* Define to 1 if the HSA plugin is built, 0 if not. */
-#undef PLUGIN_HSA
-
/* Define to 1 if the NVIDIA plugin is built, 0 if not. */
#undef PLUGIN_NVPTX
SECTION_LDFLAGS
PLUGIN_GCN_FALSE
PLUGIN_GCN_TRUE
-PLUGIN_HSA_FALSE
-PLUGIN_HSA_TRUE
PLUGIN_NVPTX_FALSE
PLUGIN_NVPTX_TRUE
offload_additional_lib_paths
PLUGIN_GCN_LDFLAGS
PLUGIN_GCN_CPPFLAGS
PLUGIN_GCN
-PLUGIN_HSA_LIBS
-PLUGIN_HSA_LDFLAGS
-PLUGIN_HSA_CPPFLAGS
-PLUGIN_HSA
HSA_RUNTIME_LIB
HSA_RUNTIME_INCLUDE
PLUGIN_NVPTX_LIBS
AMDEPBACKSLASH
AMDEP_FALSE
AMDEP_TRUE
-am__quote
am__include
DEPDIR
OBJEXT
PACKAGE_TARNAME
PACKAGE_NAME
PATH_SEPARATOR
-SHELL'
+SHELL
+am__quote'
ac_subst_files=''
ac_user_opts='
enable_option_checking
# -Wall: turns on all automake warnings...
# -Wno-portability: ...except this one, since GNU make is required.
# -Wno-override: ... and this one, since we do want this in testsuite.
-am__api_version='1.15'
+am__api_version='1.16'
# Find a good install program. We prefer a C program (faster),
# so one script is as good as another. But avoid the broken or
# For better backward compatibility. To be removed once Automake 1.9.x
# dies out for good. For more background, see:
-# <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
-# <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
mkdir_p='$(MKDIR_P)'
# We need awk for the "check" target (and possibly the TAP driver). The
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
-that behaves properly: <http://www.gnu.org/software/coreutils/>.
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
ac_config_commands="$ac_config_commands depfiles"
-
-am_make=${MAKE-make}
-cat > confinc << 'END'
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} supports the include directive" >&5
+$as_echo_n "checking whether ${MAKE-make} supports the include directive... " >&6; }
+cat > confinc.mk << 'END'
am__doit:
- @echo this is the am__doit target
+ @echo this is the am__doit target >confinc.out
.PHONY: am__doit
END
-# If we don't find an include directive, just comment out the code.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5
-$as_echo_n "checking for style of include used by $am_make... " >&6; }
am__include="#"
am__quote=
-_am_result=none
-# First try GNU make style include.
-echo "include confinc" > confmf
-# Ignore all kinds of additional output from 'make'.
-case `$am_make -s -f confmf 2> /dev/null` in #(
-*the\ am__doit\ target*)
- am__include=include
- am__quote=
- _am_result=GNU
- ;;
-esac
-# Now try BSD make style include.
-if test "$am__include" = "#"; then
- echo '.include "confinc"' > confmf
- case `$am_make -s -f confmf 2> /dev/null` in #(
- *the\ am__doit\ target*)
- am__include=.include
- am__quote="\""
- _am_result=BSD
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+ { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
+ (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); }
+ case $?:`cat confinc.out 2>/dev/null` in #(
+ '0:this is the am__doit target') :
+ case $s in #(
+ BSD) :
+ am__include='.include' am__quote='"' ;; #(
+ *) :
+ am__include='include' am__quote='' ;;
+esac ;; #(
+ *) :
;;
- esac
-fi
-
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5
-$as_echo "$_am_result" >&6; }
-rm -f confinc confmf
+esac
+ if test "$am__include" != "#"; then
+ _am_result="yes ($s style)"
+ break
+ fi
+done
+rm -f confinc.* confmf.*
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
+$as_echo "${_am_result}" >&6; }
# Check whether --enable-dependency-tracking was given.
if test "${enable_dependency_tracking+set}" = set; then :
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11438 "configure"
+#line 11432 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 11544 "configure"
+#line 11538 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
fi
-PLUGIN_HSA=0
-PLUGIN_HSA_CPPFLAGS=
-PLUGIN_HSA_LDFLAGS=
-PLUGIN_HSA_LIBS=
-
-
-
-
-
PLUGIN_GCN=0
PLUGIN_GCN_CPPFLAGS=
PLUGIN_GCN_LDFLAGS=
;;
esac
;;
- hsa*)
- case "${target}" in
- x86_64-*-*)
- case " ${CC} ${CFLAGS} " in
- *" -m32 "*|*" -mx32 "*)
- PLUGIN_HSA=0
- ;;
- *)
- tgt_plugin=hsa
- PLUGIN_HSA=$tgt
- PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
- PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
- PLUGIN_HSA_LIBS="-ldl"
-
- PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
- CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
- PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
- LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
- PLUGIN_HSA_save_LIBS=$LIBS
- LIBS="$PLUGIN_HSA_LIBS $LIBS"
-
- PLUGIN_HSA=1
- CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
- LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
- LIBS=$PLUGIN_HSA_save_LIBS
- case $PLUGIN_HSA in
- hsa*)
- HSA_PLUGIN=0
- as_fn_error $? "HSA run-time package required for HSA support" "$LINENO" 5
- ;;
- esac
- ;;
- esac
- ;;
- *-*-*)
- PLUGIN_HSA=0
- ;;
- esac
- ;;
amdgcn*)
case "${target}" in
offload_targets=$offload_targets,$tgt
fi
# Configure additional search paths.
- if test "$tgt_plugin" = hsa; then
- # Offloading compilation is all handled by the target compiler.
- :
- elif test x"$tgt_dir" != x; then
+ if test x"$tgt_dir" != x; then
offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
else
cat >>confdefs.h <<_ACEOF
#define PLUGIN_NVPTX_DYNAMIC $PLUGIN_NVPTX_DYNAMIC
-_ACEOF
-
- if test $PLUGIN_HSA = 1; then
- PLUGIN_HSA_TRUE=
- PLUGIN_HSA_FALSE='#'
-else
- PLUGIN_HSA_TRUE='#'
- PLUGIN_HSA_FALSE=
-fi
-
-
-cat >>confdefs.h <<_ACEOF
-#define PLUGIN_HSA $PLUGIN_HSA
_ACEOF
if test $PLUGIN_GCN = 1; then
case "$enable_cet" in
auto)
# Check if target supports multi-byte NOPs
- # and if assembler supports CET insn.
+ # and if compiler and assembler support CET insn.
cet_save_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -fcf-protection"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
as_fn_error $? "conditional \"PLUGIN_NVPTX\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
-if test -z "${PLUGIN_HSA_TRUE}" && test -z "${PLUGIN_HSA_FALSE}"; then
- as_fn_error $? "conditional \"PLUGIN_HSA\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
if test -z "${PLUGIN_GCN_TRUE}" && test -z "${PLUGIN_GCN_FALSE}"; then
as_fn_error $? "conditional \"PLUGIN_GCN\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
CXX="$CXX"
GFORTRAN="$GFORTRAN"
GDC="$GDC"
-AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"
+AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
# The HP-UX ksh and POSIX shell print the target directory to stdout
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
- case $CONFIG_FILES in
- *\'*) eval set x "$CONFIG_FILES" ;;
- *) set x $CONFIG_FILES ;;
- esac
+ # TODO: see whether this extra hack can be removed once we start
+ # requiring Autoconf 2.70 or later.
+ case $CONFIG_FILES in #(
+ *\'*) :
+ eval set x "$CONFIG_FILES" ;; #(
+ *) :
+ set x $CONFIG_FILES ;; #(
+ *) :
+ ;;
+esac
shift
- for mf
+ # Used to flag and report bootstrapping failures.
+ am_rc=0
+ for am_mf
do
# Strip MF so we end up with the name of the file.
- mf=`echo "$mf" | sed -e 's/:.*$//'`
- # Check whether this is an Automake generated Makefile or not.
- # We used to match only the files named 'Makefile.in', but
- # some people rename them; so instead we look at the file content.
- # Grep'ing the first line is not enough: some people post-process
- # each Makefile.in and add a new line on top of each file to say so.
- # Grep'ing the whole file is not good either: AIX grep has a line
+ am_mf=`$as_echo "$am_mf" | sed -e 's/:.*$//'`
+ # Check whether this is an Automake generated Makefile which includes
+ # dependency-tracking related rules and includes.
+ # Grep'ing the whole file directly is not great: AIX grep has a line
# limit of 2048, but all sed's we know have understand at least 4000.
- if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
- dirpart=`$as_dirname -- "$mf" ||
-$as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$mf" : 'X\(//\)[^/]' \| \
- X"$mf" : 'X\(//\)$' \| \
- X"$mf" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$mf" |
+ sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+ || continue
+ am_dirpart=`$as_dirname -- "$am_mf" ||
+$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+ X"$am_mf" : 'X\(//\)[^/]' \| \
+ X"$am_mf" : 'X\(//\)$' \| \
+ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X"$am_mf" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
q
}
s/.*/./; q'`
- else
- continue
- fi
- # Extract the definition of DEPDIR, am__include, and am__quote
- # from the Makefile without running 'make'.
- DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
- test -z "$DEPDIR" && continue
- am__include=`sed -n 's/^am__include = //p' < "$mf"`
- test -z "$am__include" && continue
- am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
- # Find all dependency output files, they are included files with
- # $(DEPDIR) in their names. We invoke sed twice because it is the
- # simplest approach to changing $(DEPDIR) to its actual value in the
- # expansion.
- for file in `sed -n "
- s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
- sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
- # Make sure the directory exists.
- test -f "$dirpart/$file" && continue
- fdir=`$as_dirname -- "$file" ||
-$as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$file" : 'X\(//\)[^/]' \| \
- X"$file" : 'X\(//\)$' \| \
- X"$file" : 'X\(/\)' \| . 2>/dev/null ||
-$as_echo X"$file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
- s//\1/
- q
- }
- /^X\(\/\/\)[^/].*/{
+ am_filepart=`$as_basename -- "$am_mf" ||
+$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
+ X"$am_mf" : 'X\(//\)$' \| \
+ X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+$as_echo X/"$am_mf" |
+ sed '/^.*\/\([^/][^/]*\)\/*$/{
s//\1/
q
}
- /^X\(\/\/\)$/{
+ /^X\/\(\/\/\)$/{
s//\1/
q
}
- /^X\(\/\).*/{
+ /^X\/\(\/\).*/{
s//\1/
q
}
s/.*/./; q'`
- as_dir=$dirpart/$fdir; as_fn_mkdir_p
- # echo "creating $dirpart/$file"
- echo '# dummy' > "$dirpart/$file"
- done
+ { echo "$as_me:$LINENO: cd "$am_dirpart" \
+ && sed -e '/# am--include-marker/d' "$am_filepart" \
+ | $MAKE -f - am--depfiles" >&5
+ (cd "$am_dirpart" \
+ && sed -e '/# am--include-marker/d' "$am_filepart" \
+ | $MAKE -f - am--depfiles) >&5 2>&5
+ ac_status=$?
+ echo "$as_me:$LINENO: \$? = $ac_status" >&5
+ (exit $ac_status); } || am_rc=$?
done
+ if test $am_rc -ne 0; then
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "Something went wrong bootstrapping makefile fragments
+ for automatic dependency tracking. Try re-running configure with the
+ '--disable-dependency-tracking' option to at least be able to build
+ the package (albeit without support for automatic dependency tracking).
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ { am_dirpart=; unset am_dirpart;}
+ { am_filepart=; unset am_filepart;}
+ { am_mf=; unset am_mf;}
+ { am_rc=; unset am_rc;}
+ rm -f conftest-deps.mk
}
;;
"libtool":C)
libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static
endif
-if PLUGIN_HSA
-# Heterogenous Systems Architecture plugin
-libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION)
-toolexeclib_LTLIBRARIES += libgomp-plugin-hsa.la
-libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c
-libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS) \
- -D_GNU_SOURCE
-libgomp_plugin_hsa_la_LDFLAGS = $(libgomp_plugin_hsa_version_info) \
- $(lt_host_flags)
-libgomp_plugin_hsa_la_LDFLAGS += $(PLUGIN_HSA_LDFLAGS)
-libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
-libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
-endif
-
if PLUGIN_GCN
# AMD GCN plugin
libgomp_plugin_gcn_version_info = -version-info $(libtool_VERSION)
HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB
fi
-PLUGIN_HSA=0
-PLUGIN_HSA_CPPFLAGS=
-PLUGIN_HSA_LDFLAGS=
-PLUGIN_HSA_LIBS=
-AC_SUBST(PLUGIN_HSA)
-AC_SUBST(PLUGIN_HSA_CPPFLAGS)
-AC_SUBST(PLUGIN_HSA_LDFLAGS)
-AC_SUBST(PLUGIN_HSA_LIBS)
-
PLUGIN_GCN=0
PLUGIN_GCN_CPPFLAGS=
PLUGIN_GCN_LDFLAGS=
;;
esac
;;
- hsa*)
- case "${target}" in
- x86_64-*-*)
- case " ${CC} ${CFLAGS} " in
- *" -m32 "*|*" -mx32 "*)
- PLUGIN_HSA=0
- ;;
- *)
- tgt_plugin=hsa
- PLUGIN_HSA=$tgt
- PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS
- PLUGIN_HSA_LDFLAGS="$HSA_RUNTIME_LDFLAGS"
- PLUGIN_HSA_LIBS="-ldl"
-
- PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS
- CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS"
- PLUGIN_HSA_save_LDFLAGS=$LDFLAGS
- LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS"
- PLUGIN_HSA_save_LIBS=$LIBS
- LIBS="$PLUGIN_HSA_LIBS $LIBS"
-
- PLUGIN_HSA=1
- CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS
- LDFLAGS=$PLUGIN_HSA_save_LDFLAGS
- LIBS=$PLUGIN_HSA_save_LIBS
- case $PLUGIN_HSA in
- hsa*)
- HSA_PLUGIN=0
- AC_MSG_ERROR([HSA run-time package required for HSA support])
- ;;
- esac
- ;;
- esac
- ;;
- *-*-*)
- PLUGIN_HSA=0
- ;;
- esac
- ;;
amdgcn*)
case "${target}" in
offload_targets=$offload_targets,$tgt
fi
# Configure additional search paths.
- if test "$tgt_plugin" = hsa; then
- # Offloading compilation is all handled by the target compiler.
- :
- elif test x"$tgt_dir" != x; then
+ if test x"$tgt_dir" != x; then
offload_additional_options="$offload_additional_options -B$tgt_dir/libexec/gcc/\$(target_alias)/\$(gcc_version) -B$tgt_dir/bin"
offload_additional_lib_paths="$offload_additional_lib_paths:$tgt_dir/lib64:$tgt_dir/lib:$tgt_dir/lib32"
else
[Define to 1 if the NVIDIA plugin is built, 0 if not.])
AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC],
[Define to 1 if the NVIDIA plugin should dlopen libcuda.so.1, 0 if it should be linked against it.])
-AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1])
-AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA],
- [Define to 1 if the HSA plugin is built, 0 if not.])
AM_CONDITIONAL([PLUGIN_GCN], [test $PLUGIN_GCN = 1])
AC_DEFINE_UNQUOTED([PLUGIN_GCN], [$PLUGIN_GCN],
[Define to 1 if the GCN plugin is built, 0 if not.])
+++ /dev/null
-/* HSA Extensions API 1.0.1 representation description.
- Copyright (C) 2016-2020 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>.
-
-The contents of the file was created by extracting data structures, enum,
-typedef and other definitions from HSA Runtime Programmer’s Reference Manual
-Version 1.0 (http://www.hsafoundation.com/standards/).
-
-HTML version is provided on the following link:
-http://www.hsafoundation.com/html/Content/Runtime/Topics/Runtime_title_page.htm
-*/
-
-
-#ifndef _HSA_EXT_FINALIZE_H
-#define _HSA_EXT_FINALIZE_H 1
-
-struct BrigModuleHeader;
-typedef struct BrigModuleHeader *BrigModule_t;
-
-typedef enum {
- HSA_EXT_IMAGE_GEOMETRY_1D = 0,
- HSA_EXT_IMAGE_GEOMETRY_2D = 1,
- HSA_EXT_IMAGE_GEOMETRY_3D = 2,
- HSA_EXT_IMAGE_GEOMETRY_1DA = 3,
- HSA_EXT_IMAGE_GEOMETRY_2DA = 4,
- HSA_EXT_IMAGE_GEOMETRY_1DB = 5,
- HSA_EXT_IMAGE_GEOMETRY_2DDEPTH = 6,
- HSA_EXT_IMAGE_GEOMETRY_2DADEPTH = 7
-} hsa_ext_image_geometry_t;
-
-typedef enum {
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 = 0,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 = 1,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 = 2,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 = 3,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24 = 4,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = 5,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = 6,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010 = 7,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = 8,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = 9,
- HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = 10,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = 11,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = 12,
- HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = 13,
- HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT = 14,
- HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT = 15
-} hsa_ext_image_channel_type_t;
-
-typedef enum {
- HSA_EXT_IMAGE_CHANNEL_ORDER_A = 0,
- HSA_EXT_IMAGE_CHANNEL_ORDER_R = 1,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RX = 2,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RG = 3,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGX = 4,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RA = 5,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGB = 6,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX = 7,
- HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA = 8,
- HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA = 9,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB = 10,
- HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR = 11,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB = 12,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX = 13,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA = 14,
- HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA = 15,
- HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY = 16,
- HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE = 17,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH = 18,
- HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL = 19
-} hsa_ext_image_channel_order_t;
-
-typedef struct hsa_ext_image_format_s
-{
- hsa_ext_image_channel_type_t channel_type;
- hsa_ext_image_channel_order_t channel_order;
-} hsa_ext_image_format_t;
-
-typedef struct hsa_ext_sampler_s
-{
- uint64_t handle;
-} hsa_ext_sampler_t;
-typedef struct hsa_ext_image_data_info_s
-{
- size_t size;
- size_t alignment;
-} hsa_ext_image_data_info_t;
-typedef enum {
- HSA_EXT_SAMPLER_ADDRESSING_MODE_UNDEFINED = 0,
- HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = 1,
- HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER = 2,
- HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT = 3,
- HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = 4
-} hsa_ext_sampler_addressing_mode_t;
-typedef struct hsa_ext_image_s
-{
- uint64_t handle;
-} hsa_ext_image_t;
-typedef enum {
- HSA_EXT_IMAGE_CAPABILITY_NOT_SUPPORTED = 0x0,
- HSA_EXT_IMAGE_CAPABILITY_READ_ONLY = 0x1,
- HSA_EXT_IMAGE_CAPABILITY_WRITE_ONLY = 0x2,
- HSA_EXT_IMAGE_CAPABILITY_READ_WRITE = 0x4,
- HSA_EXT_IMAGE_CAPABILITY_READ_MODIFY_WRITE = 0x8,
- HSA_EXT_IMAGE_CAPABILITY_ACCESS_INVARIANT_DATA_LAYOUT = 0x10
-} hsa_ext_image_capability_t;
-typedef struct hsa_ext_control_directives_s
-{
- uint64_t control_directives_mask;
- uint16_t break_exceptions_mask;
- uint16_t detect_exceptions_mask;
- uint32_t max_dynamic_group_size;
- uint64_t max_flat_grid_size;
- uint32_t max_flat_workgroup_size;
- uint32_t reserved1;
- uint64_t required_grid_size[3];
- hsa_dim3_t required_workgroup_size;
- uint8_t required_dim;
- uint8_t reserved2[75];
-} hsa_ext_control_directives_t;
-typedef enum {
- HSA_EXT_SAMPLER_FILTER_MODE_NEAREST = 0,
- HSA_EXT_SAMPLER_FILTER_MODE_LINEAR = 1
-} hsa_ext_sampler_filter_mode_t;
-
-typedef enum {
- HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED = 0,
- HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED = 1
-} hsa_ext_sampler_coordinate_mode_t;
-typedef enum {
- HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO = -1
-} hsa_ext_finalizer_call_convention_t;
-typedef struct hsa_ext_program_s
-{
- uint64_t handle;
-} hsa_ext_program_t;
-typedef struct hsa_ext_image_descriptor_s
-{
- hsa_ext_image_geometry_t geometry;
- size_t width;
- size_t height;
- size_t depth;
- size_t array_size;
- hsa_ext_image_format_t format;
-} hsa_ext_image_descriptor_t;
-typedef enum {
- HSA_EXT_PROGRAM_INFO_MACHINE_MODEL = 0,
- HSA_EXT_PROGRAM_INFO_PROFILE = 1,
- HSA_EXT_PROGRAM_INFO_DEFAULT_FLOAT_ROUNDING_MODE = 2
-} hsa_ext_program_info_t;
-typedef BrigModule_t hsa_ext_module_t;
-typedef struct hsa_ext_sampler_descriptor_s
-{
- hsa_ext_sampler_coordinate_mode_t coordinate_mode;
- hsa_ext_sampler_filter_mode_t filter_mode;
- hsa_ext_sampler_addressing_mode_t address_mode;
-} hsa_ext_sampler_descriptor_t;
-
-typedef struct hsa_ext_image_region_s
-{
- hsa_dim3_t offset;
- hsa_dim3_t range;
-} hsa_ext_image_region_t;
-hsa_status_t hsa_ext_image_export (hsa_agent_t agent, hsa_ext_image_t src_image,
- void *dst_memory, size_t dst_row_pitch,
- size_t dst_slice_pitch,
- const hsa_ext_image_region_t *image_region);
-hsa_status_t hsa_ext_program_add_module (hsa_ext_program_t program,
- hsa_ext_module_t module);
-hsa_status_t hsa_ext_program_iterate_modules (
- hsa_ext_program_t program,
- hsa_status_t (*callback) (hsa_ext_program_t program, hsa_ext_module_t module,
- void *data),
- void *data);
-hsa_status_t hsa_ext_program_create (
- hsa_machine_model_t machine_model, hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options, hsa_ext_program_t *program);
-hsa_status_t
-hsa_ext_image_data_get_info (hsa_agent_t agent,
- const hsa_ext_image_descriptor_t *image_descriptor,
- hsa_access_permission_t access_permission,
- hsa_ext_image_data_info_t *image_data_info);
-
-hsa_status_t hsa_ext_image_import (hsa_agent_t agent, const void *src_memory,
- size_t src_row_pitch, size_t src_slice_pitch,
- hsa_ext_image_t dst_image,
- const hsa_ext_image_region_t *image_region);
-hsa_status_t hsa_ext_program_get_info (hsa_ext_program_t program,
- hsa_ext_program_info_t attribute,
- void *value);
-enum
-{
- HSA_EXT_STATUS_ERROR_IMAGE_FORMAT_UNSUPPORTED = 0x3000,
- HSA_EXT_STATUS_ERROR_IMAGE_SIZE_UNSUPPORTED = 0x3001
-};
-hsa_status_t hsa_ext_image_destroy (hsa_agent_t agent, hsa_ext_image_t image);
-hsa_status_t hsa_ext_image_get_capability (
- hsa_agent_t agent, hsa_ext_image_geometry_t geometry,
- const hsa_ext_image_format_t *image_format, uint32_t *capability_mask);
-enum
-{
- HSA_EXT_STATUS_ERROR_INVALID_PROGRAM = 0x2000,
- HSA_EXT_STATUS_ERROR_INVALID_MODULE = 0x2001,
- HSA_EXT_STATUS_ERROR_INCOMPATIBLE_MODULE = 0x2002,
- HSA_EXT_STATUS_ERROR_MODULE_ALREADY_INCLUDED = 0x2003,
- HSA_EXT_STATUS_ERROR_SYMBOL_MISMATCH = 0x2004,
- HSA_EXT_STATUS_ERROR_FINALIZATION_FAILED = 0x2005,
- HSA_EXT_STATUS_ERROR_DIRECTIVE_MISMATCH = 0x2006
-};
-hsa_status_t hsa_ext_sampler_destroy (hsa_agent_t agent,
- hsa_ext_sampler_t sampler);
-hsa_status_t hsa_ext_program_finalize (
- hsa_ext_program_t program, hsa_isa_t isa, int32_t call_convention,
- hsa_ext_control_directives_t control_directives, const char *options,
- hsa_code_object_type_t code_object_type, hsa_code_object_t *code_object);
-hsa_status_t hsa_ext_image_create (
- hsa_agent_t agent, const hsa_ext_image_descriptor_t *image_descriptor,
- const void *image_data, hsa_access_permission_t access_permission,
- hsa_ext_image_t *image);
-hsa_status_t hsa_ext_program_destroy (hsa_ext_program_t program);
-hsa_status_t hsa_ext_image_copy (hsa_agent_t agent, hsa_ext_image_t src_image,
- const hsa_dim3_t *src_offset,
- hsa_ext_image_t dst_image,
- const hsa_dim3_t *dst_offset,
- const hsa_dim3_t *range);
-hsa_status_t hsa_ext_image_clear (hsa_agent_t agent, hsa_ext_image_t image,
- const void *data,
- const hsa_ext_image_region_t *image_region);
-enum
-{
- HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS = 0x3000,
- HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS = 0x3001,
- HSA_EXT_AGENT_INFO_IMAGE_1DB_MAX_ELEMENTS = 0x3002,
- HSA_EXT_AGENT_INFO_IMAGE_2D_MAX_ELEMENTS = 0x3003,
- HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS = 0x3004,
- HSA_EXT_AGENT_INFO_IMAGE_2DDEPTH_MAX_ELEMENTS = 0x3005,
- HSA_EXT_AGENT_INFO_IMAGE_2DADEPTH_MAX_ELEMENTS = 0x3006,
- HSA_EXT_AGENT_INFO_IMAGE_3D_MAX_ELEMENTS = 0x3007,
- HSA_EXT_AGENT_INFO_IMAGE_ARRAY_MAX_LAYERS = 0x3008,
- HSA_EXT_AGENT_INFO_MAX_IMAGE_RD_HANDLES = 0x3009,
- HSA_EXT_AGENT_INFO_MAX_IMAGE_RORW_HANDLES = 0x300A,
- HSA_EXT_AGENT_INFO_MAX_SAMPLER_HANDLERS = 0x300B
-};
-hsa_status_t
-hsa_ext_sampler_create (hsa_agent_t agent,
- const hsa_ext_sampler_descriptor_t *sampler_descriptor,
- hsa_ext_sampler_t *sampler);
-
-#endif /* _HSA_EXT_FINALIZE_H */
+++ /dev/null
-/* Plugin for HSAIL execution.
-
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-#include <stdbool.h>
-#include <hsa.h>
-#include <plugin/hsa_ext_finalize.h>
-#include <dlfcn.h>
-#include "libgomp-plugin.h"
-#include "gomp-constants.h"
-#include "secure_getenv.h"
-
-#ifdef HAVE_INTTYPES_H
-typedef uint64_t print_uint64_t;
-#else
-#define PRIu64 "lu"
-typedef unsigned long print_uint64_t;
-#endif
-
-/* As an HSA runtime is dlopened, following structure defines function
- pointers utilized by the HSA plug-in. */
-
-struct hsa_runtime_fn_info
-{
- /* HSA runtime. */
- hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
- const char **status_string);
- hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
- hsa_agent_info_t attribute,
- void *value);
- hsa_status_t (*hsa_init_fn) (void);
- hsa_status_t (*hsa_iterate_agents_fn)
- (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
- hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
- hsa_region_info_t attribute,
- void *value);
- hsa_status_t (*hsa_queue_create_fn)
- (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
- void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
- void *data, uint32_t private_segment_size,
- uint32_t group_segment_size, hsa_queue_t **queue);
- hsa_status_t (*hsa_agent_iterate_regions_fn)
- (hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
- hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
- hsa_status_t (*hsa_executable_create_fn)
- (hsa_profile_t profile, hsa_executable_state_t executable_state,
- const char *options, hsa_executable_t *executable);
- hsa_status_t (*hsa_executable_global_variable_define_fn)
- (hsa_executable_t executable, const char *variable_name, void *address);
- hsa_status_t (*hsa_executable_load_code_object_fn)
- (hsa_executable_t executable, hsa_agent_t agent,
- hsa_code_object_t code_object, const char *options);
- hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
- const char *options);
- hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
- uint32_t num_consumers,
- const hsa_agent_t *consumers,
- hsa_signal_t *signal);
- hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
- void **ptr);
- hsa_status_t (*hsa_memory_free_fn) (void *ptr);
- hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
- hsa_status_t (*hsa_executable_get_symbol_fn)
- (hsa_executable_t executable, const char *module_name,
- const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
- hsa_executable_symbol_t *symbol);
- hsa_status_t (*hsa_executable_symbol_get_info_fn)
- (hsa_executable_symbol_t executable_symbol,
- hsa_executable_symbol_info_t attribute, void *value);
- uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
- uint64_t value);
- uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
- void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
- hsa_signal_value_t value);
- void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
- hsa_signal_value_t value);
- hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
- (hsa_signal_t signal, hsa_signal_condition_t condition,
- hsa_signal_value_t compare_value, uint64_t timeout_hint,
- hsa_wait_state_t wait_state_hint);
- hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
- hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
-
- /* HSA finalizer. */
- hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
- hsa_ext_module_t module);
- hsa_status_t (*hsa_ext_program_create_fn)
- (hsa_machine_model_t machine_model, hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options, hsa_ext_program_t *program);
- hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
- hsa_status_t (*hsa_ext_program_finalize_fn)
- (hsa_ext_program_t program,hsa_isa_t isa,
- int32_t call_convention, hsa_ext_control_directives_t control_directives,
- const char *options, hsa_code_object_type_t code_object_type,
- hsa_code_object_t *code_object);
-};
-
-/* HSA runtime functions that are initialized in init_hsa_context. */
-
-static struct hsa_runtime_fn_info hsa_fns;
-
-/* Keep the following GOMP prefixed structures in sync with respective parts of
- the compiler. */
-
-/* Structure describing the run-time and grid properties of an HSA kernel
- lauch. */
-
-struct GOMP_kernel_launch_attributes
-{
- /* Number of dimensions the workload has. Maximum number is 3. */
- uint32_t ndim;
- /* Size of the grid in the three respective dimensions. */
- uint32_t gdims[3];
- /* Size of work-groups in the respective dimensions. */
- uint32_t wdims[3];
-};
-
-/* Collection of information needed for a dispatch of a kernel from a
- kernel. */
-
-struct GOMP_hsa_kernel_dispatch
-{
- /* Pointer to a command queue associated with a kernel dispatch agent. */
- void *queue;
- /* Pointer to reserved memory for OMP data struct copying. */
- void *omp_data_memory;
- /* Pointer to a memory space used for kernel arguments passing. */
- void *kernarg_address;
- /* Kernel object. */
- uint64_t object;
- /* Synchronization signal used for dispatch synchronization. */
- uint64_t signal;
- /* Private segment size. */
- uint32_t private_segment_size;
- /* Group segment size. */
- uint32_t group_segment_size;
- /* Number of children kernel dispatches. */
- uint64_t kernel_dispatch_count;
- /* Debug purpose argument. */
- uint64_t debug;
- /* Levels-var ICV. */
- uint64_t omp_level;
- /* Kernel dispatch structures created for children kernel dispatches. */
- struct GOMP_hsa_kernel_dispatch **children_dispatches;
- /* Number of threads. */
- uint32_t omp_num_threads;
-};
-
-/* Part of the libgomp plugin interface. Return the name of the accelerator,
- which is "hsa". */
-
-const char *
-GOMP_OFFLOAD_get_name (void)
-{
- return "hsa";
-}
-
-/* Part of the libgomp plugin interface. Return the specific capabilities the
- HSA accelerator have. */
-
-unsigned int
-GOMP_OFFLOAD_get_caps (void)
-{
- return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
-}
-
-/* Part of the libgomp plugin interface. Identify as HSA accelerator. */
-
-int
-GOMP_OFFLOAD_get_type (void)
-{
- return OFFLOAD_TARGET_TYPE_HSA;
-}
-
-/* Return the libgomp version number we're compatible with. There is
- no requirement for cross-version compatibility. */
-
-unsigned
-GOMP_OFFLOAD_version (void)
-{
- return GOMP_VERSION;
-}
-
-/* Flag to decide whether print to stderr information about what is going on.
- Set in init_debug depending on environment variables. */
-
-static bool debug;
-
-/* Flag to decide if the runtime should suppress a possible fallback to host
- execution. */
-
-static bool suppress_host_fallback;
-
-/* Flag to locate HSA runtime shared library that is dlopened
- by this plug-in. */
-
-static const char *hsa_runtime_lib;
-
-/* Flag to decide if the runtime should support also CPU devices (can be
- a simulator). */
-
-static bool support_cpu_devices;
-
-/* Initialize debug and suppress_host_fallback according to the environment. */
-
-static void
-init_enviroment_variables (void)
-{
- if (secure_getenv ("HSA_DEBUG"))
- debug = true;
- else
- debug = false;
-
- if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
- suppress_host_fallback = true;
- else
- suppress_host_fallback = false;
-
- hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
- if (hsa_runtime_lib == NULL)
- hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
-
- support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
-}
-
-/* Print a logging message with PREFIX to stderr if HSA_DEBUG value
- is set to true. */
-
-#define HSA_LOG(prefix, ...) \
- do \
- { \
- if (debug) \
- { \
- fprintf (stderr, prefix); \
- fprintf (stderr, __VA_ARGS__); \
- } \
- } \
- while (false)
-
-/* Print a debugging message to stderr. */
-
-#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
-
-/* Print a warning message to stderr. */
-
-#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
-
-/* Print HSA warning STR with an HSA STATUS code. */
-
-static void
-hsa_warn (const char *str, hsa_status_t status)
-{
- if (!debug)
- return;
-
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
-
- fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
-}
-
-/* Report a fatal error STR together with the HSA error corresponding to STATUS
- and terminate execution of the current process. */
-
-static void
-hsa_fatal (const char *str, hsa_status_t status)
-{
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
- GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error_msg);
-}
-
-/* Like hsa_fatal, except only report error message, and return FALSE
- for propagating error processing to outside of plugin. */
-
-static bool
-hsa_error (const char *str, hsa_status_t status)
-{
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
- GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error_msg);
- return false;
-}
-
-struct hsa_kernel_description
-{
- const char *name;
- unsigned omp_data_size;
- bool gridified_kernel_p;
- unsigned kernel_dependencies_count;
- const char **kernel_dependencies;
-};
-
-struct global_var_info
-{
- const char *name;
- void *address;
-};
-
-/* Data passed by the static initializer of a compilation unit containing BRIG
- to GOMP_offload_register. */
-
-struct brig_image_desc
-{
- hsa_ext_module_t brig_module;
- const unsigned kernel_count;
- struct hsa_kernel_description *kernel_infos;
- const unsigned global_variable_count;
- struct global_var_info *global_variables;
-};
-
-struct agent_info;
-
-/* Information required to identify, finalize and run any given kernel. */
-
-struct kernel_info
-{
- /* Name of the kernel, required to locate it within the brig module. */
- const char *name;
- /* Size of memory space for OMP data. */
- unsigned omp_data_size;
- /* The specific agent the kernel has been or will be finalized for and run
- on. */
- struct agent_info *agent;
- /* The specific module where the kernel takes place. */
- struct module_info *module;
- /* Mutex enforcing that at most once thread ever initializes a kernel for
- use. A thread should have locked agent->modules_rwlock for reading before
- acquiring it. */
- pthread_mutex_t init_mutex;
- /* Flag indicating whether the kernel has been initialized and all fields
- below it contain valid data. */
- bool initialized;
- /* Flag indicating that the kernel has a problem that blocks an execution. */
- bool initialization_failed;
- /* The object to be put into the dispatch queue. */
- uint64_t object;
- /* Required size of kernel arguments. */
- uint32_t kernarg_segment_size;
- /* Required size of group segment. */
- uint32_t group_segment_size;
- /* Required size of private segment. */
- uint32_t private_segment_size;
- /* List of all kernel dependencies. */
- const char **dependencies;
- /* Number of dependencies. */
- unsigned dependencies_count;
- /* Maximum OMP data size necessary for kernel from kernel dispatches. */
- unsigned max_omp_data_size;
- /* True if the kernel is gridified. */
- bool gridified_kernel_p;
-};
-
-/* Information about a particular brig module, its image and kernels. */
-
-struct module_info
-{
- /* The next and previous module in the linked list of modules of an agent. */
- struct module_info *next, *prev;
- /* The description with which the program has registered the image. */
- struct brig_image_desc *image_desc;
-
- /* Number of kernels in this module. */
- int kernel_count;
- /* An array of kernel_info structures describing each kernel in this
- module. */
- struct kernel_info kernels[];
-};
-
-/* Information about shared brig library. */
-
-struct brig_library_info
-{
- char *file_name;
- hsa_ext_module_t image;
-};
-
-/* Description of an HSA GPU agent and the program associated with it. */
-
-struct agent_info
-{
- /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
- hsa_agent_t id;
- /* Whether the agent has been initialized. The fields below are usable only
- if it has been. */
- bool initialized;
- /* The HSA ISA of this agent. */
- hsa_isa_t isa;
- /* Command queue of the agent. */
- hsa_queue_t *command_q;
- /* Kernel from kernel dispatch command queue. */
- hsa_queue_t *kernel_dispatch_command_q;
- /* The HSA memory region from which to allocate kernel arguments. */
- hsa_region_t kernarg_region;
-
- /* Read-write lock that protects kernels which are running or about to be run
- from interference with loading and unloading of images. Needs to be
- locked for reading while a kernel is being run, and for writing if the
- list of modules is manipulated (and thus the HSA program invalidated). */
- pthread_rwlock_t modules_rwlock;
- /* The first module in a linked list of modules associated with this
- kernel. */
- struct module_info *first_module;
-
- /* Mutex enforcing that only one thread will finalize the HSA program. A
- thread should have locked agent->modules_rwlock for reading before
- acquiring it. */
- pthread_mutex_t prog_mutex;
- /* Flag whether the HSA program that consists of all the modules has been
- finalized. */
- bool prog_finalized;
- /* Flag whether the program was finalized but with a failure. */
- bool prog_finalized_error;
- /* HSA executable - the finalized program that is used to locate kernels. */
- hsa_executable_t executable;
- /* List of BRIG libraries. */
- struct brig_library_info **brig_libraries;
- /* Number of loaded shared BRIG libraries. */
- unsigned brig_libraries_count;
-};
-
-/* Information about the whole HSA environment and all of its agents. */
-
-struct hsa_context_info
-{
- /* Whether the structure has been initialized. */
- bool initialized;
- /* Number of usable GPU HSA agents in the system. */
- int agent_count;
- /* Array of agent_info structures describing the individual HSA agents. */
- struct agent_info *agents;
-};
-
-/* Information about the whole HSA environment and all of its agents. */
-
-static struct hsa_context_info hsa_context;
-
-#define DLSYM_FN(function) \
- hsa_fns.function##_fn = dlsym (handle, #function); \
- if (hsa_fns.function##_fn == NULL) \
- goto dl_fail;
-
-static bool
-init_hsa_runtime_functions (void)
-{
- void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
- if (handle == NULL)
- goto dl_fail;
-
- DLSYM_FN (hsa_status_string)
- DLSYM_FN (hsa_agent_get_info)
- DLSYM_FN (hsa_init)
- DLSYM_FN (hsa_iterate_agents)
- DLSYM_FN (hsa_region_get_info)
- DLSYM_FN (hsa_queue_create)
- DLSYM_FN (hsa_agent_iterate_regions)
- DLSYM_FN (hsa_executable_destroy)
- DLSYM_FN (hsa_executable_create)
- DLSYM_FN (hsa_executable_global_variable_define)
- DLSYM_FN (hsa_executable_load_code_object)
- DLSYM_FN (hsa_executable_freeze)
- DLSYM_FN (hsa_signal_create)
- DLSYM_FN (hsa_memory_allocate)
- DLSYM_FN (hsa_memory_free)
- DLSYM_FN (hsa_signal_destroy)
- DLSYM_FN (hsa_executable_get_symbol)
- DLSYM_FN (hsa_executable_symbol_get_info)
- DLSYM_FN (hsa_queue_add_write_index_release)
- DLSYM_FN (hsa_queue_load_read_index_acquire)
- DLSYM_FN (hsa_signal_wait_acquire)
- DLSYM_FN (hsa_signal_store_relaxed)
- DLSYM_FN (hsa_signal_store_release)
- DLSYM_FN (hsa_signal_load_acquire)
- DLSYM_FN (hsa_queue_destroy)
- DLSYM_FN (hsa_ext_program_add_module)
- DLSYM_FN (hsa_ext_program_create)
- DLSYM_FN (hsa_ext_program_destroy)
- DLSYM_FN (hsa_ext_program_finalize)
- return true;
-
- dl_fail:
- HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
- return false;
-}
-
-/* Find kernel for an AGENT by name provided in KERNEL_NAME. */
-
-static struct kernel_info *
-get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
-{
- struct module_info *module = agent->first_module;
-
- while (module)
- {
- for (unsigned i = 0; i < module->kernel_count; i++)
- if (strcmp (module->kernels[i].name, kernel_name) == 0)
- return &module->kernels[i];
-
- module = module->next;
- }
-
- return NULL;
-}
-
-/* Return true if the agent is a GPU and acceptable of concurrent submissions
- from different threads. */
-
-static bool
-suitable_hsa_agent_p (hsa_agent_t agent)
-{
- hsa_device_type_t device_type;
- hsa_status_t status
- = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
- &device_type);
- if (status != HSA_STATUS_SUCCESS)
- return false;
-
- switch (device_type)
- {
- case HSA_DEVICE_TYPE_GPU:
- break;
- case HSA_DEVICE_TYPE_CPU:
- if (!support_cpu_devices)
- return false;
- break;
- default:
- return false;
- }
-
- uint32_t features = 0;
- status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
- &features);
- if (status != HSA_STATUS_SUCCESS
- || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
- return false;
- hsa_queue_type_t queue_type;
- status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
- &queue_type);
- if (status != HSA_STATUS_SUCCESS
- || (queue_type != HSA_QUEUE_TYPE_MULTI))
- return false;
-
- return true;
-}
-
-/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
- agent_count in hsa_context. */
-
-static hsa_status_t
-count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
-{
- if (suitable_hsa_agent_p (agent))
- hsa_context.agent_count++;
- return HSA_STATUS_SUCCESS;
-}
-
-/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
- id to the describing structure in the hsa context. The index of the
- structure is pointed to by DATA, increment it afterwards. */
-
-static hsa_status_t
-assign_agent_ids (hsa_agent_t agent, void *data)
-{
- if (suitable_hsa_agent_p (agent))
- {
- int *agent_index = (int *) data;
- hsa_context.agents[*agent_index].id = agent;
- ++*agent_index;
- }
- return HSA_STATUS_SUCCESS;
-}
-
-/* Initialize hsa_context if it has not already been done.
- Return TRUE on success. */
-
-static bool
-init_hsa_context (void)
-{
- hsa_status_t status;
- int agent_index = 0;
-
- if (hsa_context.initialized)
- return true;
- init_enviroment_variables ();
- if (!init_hsa_runtime_functions ())
- {
- HSA_DEBUG ("Run-time could not be dynamically opened\n");
- return false;
- }
- status = hsa_fns.hsa_init_fn ();
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Run-time could not be initialized", status);
- HSA_DEBUG ("HSA run-time initialized\n");
- status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("HSA GPU devices could not be enumerated", status);
- HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
-
- hsa_context.agents
- = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
- * sizeof (struct agent_info));
- status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
- if (agent_index != hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
- return false;
- }
- hsa_context.initialized = true;
- return true;
-}
-
-/* Callback of dispatch queues to report errors. */
-
-static void
-queue_callback (hsa_status_t status,
- hsa_queue_t *queue __attribute__ ((unused)),
- void *data __attribute__ ((unused)))
-{
- hsa_fatal ("Asynchronous queue error", status);
-}
-
-/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
- used for kernarg allocations and if so write it to the memory pointed to by
- DATA and break the query. */
-
-static hsa_status_t
-get_kernarg_memory_region (hsa_region_t region, void *data)
-{
- hsa_status_t status;
- hsa_region_segment_t segment;
-
- status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
- &segment);
- if (status != HSA_STATUS_SUCCESS)
- return status;
- if (segment != HSA_REGION_SEGMENT_GLOBAL)
- return HSA_STATUS_SUCCESS;
-
- uint32_t flags;
- status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
- &flags);
- if (status != HSA_STATUS_SUCCESS)
- return status;
- if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
- {
- hsa_region_t *ret = (hsa_region_t *) data;
- *ret = region;
- return HSA_STATUS_INFO_BREAK;
- }
- return HSA_STATUS_SUCCESS;
-}
-
-/* Part of the libgomp plugin interface. Return the number of HSA devices on
- the system. */
-
-int
-GOMP_OFFLOAD_get_num_devices (void)
-{
- if (!init_hsa_context ())
- return 0;
- return hsa_context.agent_count;
-}
-
-/* Part of the libgomp plugin interface. Initialize agent number N so that it
- can be used for computation. Return TRUE on success. */
-
-bool
-GOMP_OFFLOAD_init_device (int n)
-{
- if (!init_hsa_context ())
- return false;
- if (n >= hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
- return false;
- }
- struct agent_info *agent = &hsa_context.agents[n];
-
- if (agent->initialized)
- return true;
-
- if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
- return false;
- }
- if (pthread_mutex_init (&agent->prog_mutex, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
- return false;
- }
-
- uint32_t queue_size;
- hsa_status_t status;
- status = hsa_fns.hsa_agent_get_info_fn (agent->id,
- HSA_AGENT_INFO_QUEUE_MAX_SIZE,
- &queue_size);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error requesting maximum queue size of the HSA agent",
- status);
- status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
- &agent->isa);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error querying the ISA of the agent", status);
- status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
- HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX,
- UINT32_MAX,
- &agent->command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error creating command queue", status);
-
- status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
- HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX,
- UINT32_MAX,
- &agent->kernel_dispatch_command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error creating kernel dispatch command queue", status);
-
- agent->kernarg_region.handle = (uint64_t) -1;
- status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
- get_kernarg_memory_region,
- &agent->kernarg_region);
- if (agent->kernarg_region.handle == (uint64_t) -1)
- {
- GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
- "arguments");
- return false;
- }
- HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
- (long long unsigned) agent->command_q->id);
- HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
- (long long unsigned) agent->kernel_dispatch_command_q->id);
- agent->initialized = true;
- return true;
-}
-
-/* Verify that hsa_context has already been initialized and return the
- agent_info structure describing device number N. Return NULL on error. */
-
-static struct agent_info *
-get_agent_info (int n)
-{
- if (!hsa_context.initialized)
- {
- GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
- return NULL;
- }
- if (n >= hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
- return NULL;
- }
- if (!hsa_context.agents[n].initialized)
- {
- GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
- return NULL;
- }
- return &hsa_context.agents[n];
-}
-
-/* Insert MODULE to the linked list of modules of AGENT. */
-
-static void
-add_module_to_agent (struct agent_info *agent, struct module_info *module)
-{
- if (agent->first_module)
- agent->first_module->prev = module;
- module->next = agent->first_module;
- module->prev = NULL;
- agent->first_module = module;
-}
-
-/* Remove MODULE from the linked list of modules of AGENT. */
-
-static void
-remove_module_from_agent (struct agent_info *agent, struct module_info *module)
-{
- if (agent->first_module == module)
- agent->first_module = module->next;
- if (module->prev)
- module->prev->next = module->next;
- if (module->next)
- module->next->prev = module->prev;
-}
-
-/* Free the HSA program in agent and everything associated with it and set
- agent->prog_finalized and the initialized flags of all kernels to false.
- Return TRUE on success. */
-
-static bool
-destroy_hsa_program (struct agent_info *agent)
-{
- if (!agent->prog_finalized || agent->prog_finalized_error)
- return true;
-
- hsa_status_t status;
-
- HSA_DEBUG ("Destroying the current HSA program.\n");
-
- status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Could not destroy HSA executable", status);
-
- struct module_info *module;
- for (module = agent->first_module; module; module = module->next)
- {
- int i;
- for (i = 0; i < module->kernel_count; i++)
- module->kernels[i].initialized = false;
- }
- agent->prog_finalized = false;
- return true;
-}
-
-/* Initialize KERNEL from D and other parameters. Return true on success. */
-
-static bool
-init_basic_kernel_info (struct kernel_info *kernel,
- struct hsa_kernel_description *d,
- struct agent_info *agent,
- struct module_info *module)
-{
- kernel->agent = agent;
- kernel->module = module;
- kernel->name = d->name;
- kernel->omp_data_size = d->omp_data_size;
- kernel->gridified_kernel_p = d->gridified_kernel_p;
- kernel->dependencies_count = d->kernel_dependencies_count;
- kernel->dependencies = d->kernel_dependencies;
- if (pthread_mutex_init (&kernel->init_mutex, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Load BRIG module described by struct
- brig_image_desc in TARGET_DATA and return references to kernel descriptors
- in TARGET_TABLE. */
-
-int
-GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
- struct addr_pair **target_table)
-{
- if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- {
- GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
- return -1;
- }
-
- struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
- struct agent_info *agent;
- struct addr_pair *pair;
- struct module_info *module;
- struct kernel_info *kernel;
- int kernel_count = image_desc->kernel_count;
-
- agent = get_agent_info (ord);
- if (!agent)
- return -1;
-
- if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
- return -1;
- }
- if (agent->prog_finalized
- && !destroy_hsa_program (agent))
- return -1;
-
- HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
- pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
- *target_table = pair;
- module = (struct module_info *)
- GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
- + kernel_count * sizeof (struct kernel_info));
- module->image_desc = image_desc;
- module->kernel_count = kernel_count;
-
- kernel = &module->kernels[0];
-
- /* Allocate memory for kernel dependencies. */
- for (unsigned i = 0; i < kernel_count; i++)
- {
- pair->start = (uintptr_t) kernel;
- pair->end = (uintptr_t) (kernel + 1);
-
- struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
- if (!init_basic_kernel_info (kernel, d, agent, module))
- return -1;
- kernel++;
- pair++;
- }
-
- add_module_to_agent (agent, module);
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
- return -1;
- }
- return kernel_count;
-}
-
-/* Add a shared BRIG library from a FILE_NAME to an AGENT. */
-
-static struct brig_library_info *
-add_shared_library (const char *file_name, struct agent_info *agent)
-{
- struct brig_library_info *library = NULL;
-
- void *f = dlopen (file_name, RTLD_NOW);
- void *start = dlsym (f, "__brig_start");
- void *end = dlsym (f, "__brig_end");
-
- if (start == NULL || end == NULL)
- return NULL;
-
- unsigned size = end - start;
- char *buf = (char *) GOMP_PLUGIN_malloc (size);
- memcpy (buf, start, size);
-
- library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
- library->file_name = (char *) GOMP_PLUGIN_malloc
- ((strlen (file_name) + 1));
- strcpy (library->file_name, file_name);
- library->image = (hsa_ext_module_t) buf;
-
- return library;
-}
-
-/* Release memory used for BRIG shared libraries that correspond
- to an AGENT. */
-
-static void
-release_agent_shared_libraries (struct agent_info *agent)
-{
- for (unsigned i = 0; i < agent->brig_libraries_count; i++)
- if (agent->brig_libraries[i])
- {
- free (agent->brig_libraries[i]->file_name);
- free (agent->brig_libraries[i]->image);
- free (agent->brig_libraries[i]);
- }
-
- free (agent->brig_libraries);
-}
-
-/* Create and finalize the program consisting of all loaded modules. */
-
-static void
-create_and_finalize_hsa_program (struct agent_info *agent)
-{
- hsa_status_t status;
- hsa_ext_program_t prog_handle;
- int mi = 0;
-
- if (pthread_mutex_lock (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
- if (agent->prog_finalized)
- goto final;
-
- status = hsa_fns.hsa_ext_program_create_fn
- (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
- NULL, &prog_handle);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not create an HSA program", status);
-
- HSA_DEBUG ("Created a finalized program\n");
-
- struct module_info *module = agent->first_module;
- while (module)
- {
- status = hsa_fns.hsa_ext_program_add_module_fn
- (prog_handle, module->image_desc->brig_module);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not add a module to the HSA program", status);
- module = module->next;
- mi++;
- }
-
- /* Load all shared libraries. */
- const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
- const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
-
- agent->brig_libraries_count = libraries_count;
- agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
- (sizeof (struct brig_library_info) * libraries_count);
-
- for (unsigned i = 0; i < libraries_count; i++)
- {
- struct brig_library_info *library = add_shared_library (libraries[i],
- agent);
- if (library == NULL)
- {
- HSA_WARNING ("Could not open a shared BRIG library: %s\n",
- libraries[i]);
- continue;
- }
-
- status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
- library->image);
- if (status != HSA_STATUS_SUCCESS)
- hsa_warn ("Could not add a shared BRIG library the HSA program",
- status);
- else
- HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
- libraries[i]);
- }
-
- hsa_ext_control_directives_t control_directives;
- memset (&control_directives, 0, sizeof (control_directives));
- hsa_code_object_t code_object;
- status = hsa_fns.hsa_ext_program_finalize_fn
- (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
- control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
- if (status != HSA_STATUS_SUCCESS)
- {
- hsa_warn ("Finalization of the HSA program failed", status);
- goto failure;
- }
-
- HSA_DEBUG ("Finalization done\n");
- hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
-
- status
- = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
- HSA_EXECUTABLE_STATE_UNFROZEN,
- "", &agent->executable);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not create HSA executable", status);
-
- module = agent->first_module;
- while (module)
- {
- /* Initialize all global variables declared in the module. */
- for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
- {
- struct global_var_info *var;
- var = &module->image_desc->global_variables[i];
- status = hsa_fns.hsa_executable_global_variable_define_fn
- (agent->executable, var->name, var->address);
-
- HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
- var->address);
-
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not define a global variable in the HSA program",
- status);
- }
-
- module = module->next;
- }
-
- status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
- agent->id,
- code_object, "");
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not add a code object to the HSA executable", status);
- status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not freeze the HSA executable", status);
-
- HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
-
- /* If all goes good, jump to final. */
- goto final;
-
-failure:
- agent->prog_finalized_error = true;
-
-final:
- agent->prog_finalized = true;
-
- if (pthread_mutex_unlock (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
-}
-
-/* Create kernel dispatch data structure for given KERNEL. */
-
-static struct GOMP_hsa_kernel_dispatch *
-create_single_kernel_dispatch (struct kernel_info *kernel,
- unsigned omp_data_size)
-{
- struct agent_info *agent = kernel->agent;
- struct GOMP_hsa_kernel_dispatch *shadow
- = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
-
- shadow->queue = agent->command_q;
- shadow->omp_data_memory
- = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
- unsigned dispatch_count = kernel->dependencies_count;
- shadow->kernel_dispatch_count = dispatch_count;
-
- shadow->children_dispatches
- = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
-
- shadow->object = kernel->object;
-
- hsa_signal_t sync_signal;
- hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating the HSA sync signal", status);
-
- shadow->signal = sync_signal.handle;
- shadow->private_segment_size = kernel->private_segment_size;
- shadow->group_segment_size = kernel->group_segment_size;
-
- status
- = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
- kernel->kernarg_segment_size,
- &shadow->kernarg_address);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
-
- return shadow;
-}
-
-/* Release data structure created for a kernel dispatch in SHADOW argument. */
-
-static void
-release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
-{
- HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
- shadow, (print_uint64_t) shadow->debug,
- (void *) (uintptr_t) shadow->debug);
-
- hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
-
- hsa_signal_t s;
- s.handle = shadow->signal;
- hsa_fns.hsa_signal_destroy_fn (s);
-
- free (shadow->omp_data_memory);
-
- for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
- release_kernel_dispatch (shadow->children_dispatches[i]);
-
- free (shadow->children_dispatches);
- free (shadow);
-}
-
-/* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
- to calculate maximum necessary memory for OMP data allocation. */
-
-static void
-init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
-{
- hsa_status_t status;
- struct agent_info *agent = kernel->agent;
- hsa_executable_symbol_t kernel_symbol;
- status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
- kernel->name, agent->id,
- 0, &kernel_symbol);
- if (status != HSA_STATUS_SUCCESS)
- {
- hsa_warn ("Could not find symbol for kernel in the code object", status);
- goto failure;
- }
- HSA_DEBUG ("Located kernel %s\n", kernel->name);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not extract a kernel object from its symbol", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
- &kernel->kernarg_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel argument size", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
- &kernel->group_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel group segment size", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
- &kernel->private_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel private segment size",
- status);
-
- HSA_DEBUG ("Kernel structure for %s fully initialized with "
- "following segment sizes: \n", kernel->name);
- HSA_DEBUG (" group_segment_size: %u\n",
- (unsigned) kernel->group_segment_size);
- HSA_DEBUG (" private_segment_size: %u\n",
- (unsigned) kernel->private_segment_size);
- HSA_DEBUG (" kernarg_segment_size: %u\n",
- (unsigned) kernel->kernarg_segment_size);
- HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
- HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
-
- if (kernel->omp_data_size > *max_omp_data_size)
- *max_omp_data_size = kernel->omp_data_size;
-
- for (unsigned i = 0; i < kernel->dependencies_count; i++)
- {
- struct kernel_info *dependency
- = get_kernel_for_agent (agent, kernel->dependencies[i]);
-
- if (dependency == NULL)
- {
- HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
- "dependency name: %s\n", kernel->name,
- kernel->dependencies[i]);
- goto failure;
- }
-
- if (dependency->dependencies_count > 0)
- {
- HSA_DEBUG ("HSA does not allow kernel dispatching code with "
- "a depth bigger than one\n");
- goto failure;
- }
-
- init_single_kernel (dependency, max_omp_data_size);
- }
-
- return;
-
-failure:
- kernel->initialization_failed = true;
-}
-
-/* Indent stream F by INDENT spaces. */
-
-static void
-indent_stream (FILE *f, unsigned indent)
-{
- fprintf (f, "%*s", indent, "");
-}
-
-/* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
-
-static void
-print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
-{
- indent_stream (stderr, indent);
- fprintf (stderr, "this: %p\n", dispatch);
- indent_stream (stderr, indent);
- fprintf (stderr, "queue: %p\n", dispatch->queue);
- indent_stream (stderr, indent);
- fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
- indent_stream (stderr, indent);
- fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
- indent_stream (stderr, indent);
- fprintf (stderr, "object: %" PRIu64 "\n", (print_uint64_t) dispatch->object);
- indent_stream (stderr, indent);
- fprintf (stderr, "signal: %" PRIu64 "\n", (print_uint64_t) dispatch->signal);
- indent_stream (stderr, indent);
- fprintf (stderr, "private_segment_size: %u\n",
- dispatch->private_segment_size);
- indent_stream (stderr, indent);
- fprintf (stderr, "group_segment_size: %u\n",
- dispatch->group_segment_size);
- indent_stream (stderr, indent);
- fprintf (stderr, "children dispatches: %" PRIu64 "\n",
- (print_uint64_t) dispatch->kernel_dispatch_count);
- indent_stream (stderr, indent);
- fprintf (stderr, "omp_num_threads: %u\n",
- dispatch->omp_num_threads);
- fprintf (stderr, "\n");
-
- for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
- print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
-}
-
-/* Create kernel dispatch data structure for a KERNEL and all its
- dependencies. */
-
-static struct GOMP_hsa_kernel_dispatch *
-create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
-{
- struct GOMP_hsa_kernel_dispatch *shadow
- = create_single_kernel_dispatch (kernel, omp_data_size);
- shadow->omp_num_threads = 64;
- shadow->debug = 0;
- shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
-
- /* Create kernel dispatch data structures. We do not allow to have
- a kernel dispatch with depth bigger than one. */
- for (unsigned i = 0; i < kernel->dependencies_count; i++)
- {
- struct kernel_info *dependency
- = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
- shadow->children_dispatches[i]
- = create_single_kernel_dispatch (dependency, omp_data_size);
- shadow->children_dispatches[i]->queue
- = kernel->agent->kernel_dispatch_command_q;
- shadow->children_dispatches[i]->omp_level = 1;
- }
-
- return shadow;
-}
-
-/* Do all the work that is necessary before running KERNEL for the first time.
- The function assumes the program has been created, finalized and frozen by
- create_and_finalize_hsa_program. */
-
-static void
-init_kernel (struct kernel_info *kernel)
-{
- if (pthread_mutex_lock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
- if (kernel->initialized)
- {
- if (pthread_mutex_unlock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
- "mutex");
-
- return;
- }
-
- /* Precomputed maximum size of OMP data necessary for a kernel from kernel
- dispatch operation. */
- init_single_kernel (kernel, &kernel->max_omp_data_size);
-
- if (!kernel->initialization_failed)
- HSA_DEBUG ("\n");
-
- kernel->initialized = true;
- if (pthread_mutex_unlock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
- "mutex");
-}
-
-/* Parse the target attributes INPUT provided by the compiler and return true
- if we should run anything all. If INPUT is NULL, fill DEF with default
- values, then store INPUT or DEF into *RESULT. */
-
-static bool
-parse_target_attributes (void **input,
- struct GOMP_kernel_launch_attributes *def,
- struct GOMP_kernel_launch_attributes **result)
-{
- if (!input)
- GOMP_PLUGIN_fatal ("No target arguments provided");
-
- bool attrs_found = false;
- while (*input)
- {
- uintptr_t id = (uintptr_t) *input;
- if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
- && ((id & GOMP_TARGET_ARG_ID_MASK)
- == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
- {
- input++;
- attrs_found = true;
- break;
- }
-
- if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
- input++;
- input++;
- }
-
- if (!attrs_found)
- {
- def->ndim = 1;
- def->gdims[0] = 1;
- def->gdims[1] = 1;
- def->gdims[2] = 1;
- def->wdims[0] = 1;
- def->wdims[1] = 1;
- def->wdims[2] = 1;
- *result = def;
- HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
- return true;
- }
-
- struct GOMP_kernel_launch_attributes *kla;
- kla = (struct GOMP_kernel_launch_attributes *) *input;
- *result = kla;
- if (kla->ndim == 0 || kla->ndim > 3)
- GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
-
- HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
- unsigned i;
- for (i = 0; i < kla->ndim; i++)
- {
- HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
- kla->gdims[i], kla->wdims[i]);
- if (kla->gdims[i] == 0)
- return false;
- }
- return true;
-}
-
-/* Return the group size given the requested GROUP size, GRID size and number
- of grid dimensions NDIM. */
-
-static uint32_t
-get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
-{
- if (group == 0)
- {
- /* TODO: Provide a default via environment or device characteristics. */
- if (ndim == 1)
- group = 64;
- else if (ndim == 2)
- group = 8;
- else
- group = 4;
- }
-
- if (group > grid)
- group = grid;
- return group;
-}
-
-/* Return true if the HSA runtime can run function FN_PTR. */
-
-bool
-GOMP_OFFLOAD_can_run (void *fn_ptr)
-{
- struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- struct agent_info *agent = kernel->agent;
- create_and_finalize_hsa_program (agent);
-
- if (agent->prog_finalized_error)
- goto failure;
-
- init_kernel (kernel);
- if (kernel->initialization_failed)
- goto failure;
-
- return true;
-
-failure:
- if (suppress_host_fallback)
- GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
- HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
- return false;
-}
-
-/* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
-
-void
-packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
-{
- __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
-}
-
-/* Run KERNEL on its agent, pass VARS to it as arguments and take
- launchattributes from KLA. */
-
-void
-run_kernel (struct kernel_info *kernel, void *vars,
- struct GOMP_kernel_launch_attributes *kla)
-{
- struct agent_info *agent = kernel->agent;
- if (pthread_rwlock_rdlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
-
- if (!agent->initialized)
- GOMP_PLUGIN_fatal ("Agent must be initialized");
-
- if (!kernel->initialized)
- GOMP_PLUGIN_fatal ("Called kernel must be initialized");
-
- struct GOMP_hsa_kernel_dispatch *shadow
- = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
-
- if (debug)
- {
- fprintf (stderr, "\nKernel has following dependencies:\n");
- print_kernel_dispatch (shadow, 2);
- }
-
- uint64_t index
- = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
- HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
-
- /* Wait until the queue is not full before writing the packet. */
- while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
- >= agent->command_q->size)
- ;
-
- hsa_kernel_dispatch_packet_t *packet;
- packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
- + index % agent->command_q->size;
-
- memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
- packet->grid_size_x = kla->gdims[0];
- packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
- kla->wdims[0]);
-
- if (kla->ndim >= 2)
- {
- packet->grid_size_y = kla->gdims[1];
- packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
- kla->wdims[1]);
- }
- else
- {
- packet->grid_size_y = 1;
- packet->workgroup_size_y = 1;
- }
-
- if (kla->ndim == 3)
- {
- packet->grid_size_z = kla->gdims[2];
- packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
- kla->wdims[2]);
- }
- else
- {
- packet->grid_size_z = 1;
- packet->workgroup_size_z = 1;
- }
-
- packet->private_segment_size = kernel->private_segment_size;
- packet->group_segment_size = kernel->group_segment_size;
- packet->kernel_object = kernel->object;
- packet->kernarg_address = shadow->kernarg_address;
- hsa_signal_t s;
- s.handle = shadow->signal;
- packet->completion_signal = s;
- hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
- memcpy (shadow->kernarg_address, &vars, sizeof (vars));
-
- /* PR hsa/70337. */
- size_t vars_size = sizeof (vars);
- if (kernel->kernarg_segment_size > vars_size)
- {
- if (kernel->kernarg_segment_size != vars_size
- + sizeof (struct hsa_kernel_runtime *))
- GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
- memcpy (packet->kernarg_address + vars_size, &shadow,
- sizeof (struct hsa_kernel_runtime *));
- }
-
- HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
-
- uint16_t header;
- header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
- header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
- header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
-
- HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
-
- packet_store_release ((uint32_t *) packet, header,
- (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
-
- hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
- index);
-
- /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
- signal wait and signal load operations on their own and we need to
- periodically call the hsa_signal_load_acquire on completion signals of
- children kernels in the CPU to make that happen. As soon the
- limitation will be resolved, this workaround can be removed. */
-
- HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
-
- /* Root signal waits with 1ms timeout. */
- while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
- 1000 * 1000,
- HSA_WAIT_STATE_BLOCKED) != 0)
- for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
- {
- hsa_signal_t child_s;
- child_s.handle = shadow->children_dispatches[i]->signal;
-
- HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
- (print_uint64_t) shadow->children_dispatches[i]->signal);
- hsa_fns.hsa_signal_load_acquire_fn (child_s);
- }
-
- release_kernel_dispatch (shadow);
-
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
-}
-
-/* Part of the libgomp plugin interface. Run a kernel on device N (the number
- is actually ignored, we assume the FN_PTR has been mapped using the correct
- device) and pass it an array of pointers in VARS as a parameter. The kernel
- is identified by FN_PTR which must point to a kernel_info structure. */
-
-void
-GOMP_OFFLOAD_run (int n __attribute__((unused)),
- void *fn_ptr, void *vars, void **args)
-{
- struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- struct GOMP_kernel_launch_attributes def;
- struct GOMP_kernel_launch_attributes *kla;
- if (!parse_target_attributes (args, &def, &kla))
- {
- HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
- return;
- }
- run_kernel (kernel, vars, kla);
-}
-
-/* Information to be passed to a thread running a kernel asycnronously. */
-
-struct async_run_info
-{
- int device;
- void *tgt_fn;
- void *tgt_vars;
- void **args;
- void *async_data;
-};
-
-/* Thread routine to run a kernel asynchronously. */
-
-static void *
-run_kernel_asynchronously (void *thread_arg)
-{
- struct async_run_info *info = (struct async_run_info *) thread_arg;
- int device = info->device;
- void *tgt_fn = info->tgt_fn;
- void *tgt_vars = info->tgt_vars;
- void **args = info->args;
- void *async_data = info->async_data;
-
- free (info);
- GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
- GOMP_PLUGIN_target_task_completion (async_data);
- return NULL;
-}
-
-/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
- does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
- has finished. */
-
-void
-GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
- void **args, void *async_data)
-{
- pthread_t pt;
- struct async_run_info *info;
- HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
- info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
-
- info->device = device;
- info->tgt_fn = tgt_fn;
- info->tgt_vars = tgt_vars;
- info->args = args;
- info->async_data = async_data;
-
- int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
- if (err != 0)
- GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
- strerror (err));
- err = pthread_detach (pt);
- if (err != 0)
- GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
- "asynchronously: %s", strerror (err));
-}
-
-/* Deinitialize all information associated with MODULE and kernels within
- it. Return TRUE on success. */
-
-static bool
-destroy_module (struct module_info *module)
-{
- int i;
- for (i = 0; i < module->kernel_count; i++)
- if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
- "mutex");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Unload BRIG module described by
- struct brig_image_desc in TARGET_DATA from agent number N. Return
- TRUE on success. */
-
-bool
-GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
-{
- if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- {
- GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
- return false;
- }
-
- struct agent_info *agent;
- agent = get_agent_info (n);
- if (!agent)
- return false;
-
- if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
- return false;
- }
- struct module_info *module = agent->first_module;
- while (module)
- {
- if (module->image_desc == target_data)
- break;
- module = module->next;
- }
- if (!module)
- {
- GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
- "loaded before");
- return false;
- }
-
- remove_module_from_agent (agent, module);
- if (!destroy_module (module))
- return false;
- free (module);
- if (!destroy_hsa_program (agent))
- return false;
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Deinitialize all information and
- status associated with agent number N. We do not attempt any
- synchronization, assuming the user and libgomp will not attempt
- deinitialization of a device that is in any way being used at the same
- time. Return TRUE on success. */
-
-bool
-GOMP_OFFLOAD_fini_device (int n)
-{
- struct agent_info *agent = get_agent_info (n);
- if (!agent)
- return false;
-
- if (!agent->initialized)
- return true;
-
- struct module_info *next_module = agent->first_module;
- while (next_module)
- {
- struct module_info *module = next_module;
- next_module = module->next;
- if (!destroy_module (module))
- return false;
- free (module);
- }
- agent->first_module = NULL;
- if (!destroy_hsa_program (agent))
- return false;
-
- release_agent_shared_libraries (agent);
-
- hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error destroying command queue", status);
- status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error destroying kernel dispatch command queue", status);
- if (pthread_mutex_destroy (&agent->prog_mutex))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
- return false;
- }
- if (pthread_rwlock_destroy (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
- return false;
- }
- agent->initialized = false;
- return true;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-void *
-GOMP_OFFLOAD_alloc (int ord, size_t size)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
- "it should never be called");
- return NULL;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_free (int ord, void *ptr)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
- "it should never be called");
- return false;
-}
-# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# Makefile.in generated by automake 1.16.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+# Copyright (C) 1994-2018 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
PLUGIN_GCN_CPPFLAGS = @PLUGIN_GCN_CPPFLAGS@
PLUGIN_GCN_LDFLAGS = @PLUGIN_GCN_LDFLAGS@
PLUGIN_GCN_LIBS = @PLUGIN_GCN_LIBS@
-PLUGIN_HSA = @PLUGIN_HSA@
-PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@
-PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@
-PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@
PLUGIN_NVPTX = @PLUGIN_NVPTX@
PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@
PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
# Disable caret
lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret"
- # Disable HSA warnings by default.
- lappend ALWAYS_CFLAGS "additional_flags=-Wno-hsa"
-
# Disable color diagnostics
lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
disable {
return "host"
}
- hsa* {
- return ""
- }
*-intelmic* {
return ""
}
return [string match "host" $openacc_device_type]
}
-# Return 1 if the selected OMP device is actually a HSA device
-
-proc check_effective_target_hsa_offloading_selected_nocache {} {
- global tool
-
- set src {
- int main () {
- int v = 1;
- #pragma omp target map(from:v)
- v = 0;
- return v;
- }
- }
-
- set result [check_compile hsa_offloading_src executable $src]
- set lines [lindex $result 0]
- set exe [lindex $result 1]
-
- set ok 0
- if { [string match "" $lines] } {
- # No error messages, let us switch on HSA debugging output and run it
- set prev_HSA_DEBUG [getenv HSA_DEBUG]
- setenv HSA_DEBUG "1"
- set result [remote_load target "./$exe"]
- if { [string match "" $prev_HSA_DEBUG] } {
- unsetenv HSA_DEBUG
- } else {
- setenv HSA_DEBUG $prev_HSA_DEBUG
- }
- set status [lindex $result 0]
- if { $status != "pass" } {
- remote_file build delete $exe
- verbose "HSA availability test failed"
- return 0
- }
- set output [lindex $result 1]
- if { [string match "*HSA debug: Going to dispatch kernel*" $output] } {
- verbose "HSA availability detected"
- set ok 1
- }
- }
- remote_file build delete $exe
- return $ok
-}
-
-# Return 1 if the selected OMP device is actually a HSA device and
-# cache the result
-
-proc check_effective_target_hsa_offloading_selected {} {
- return [check_cached_effective_target hsa_offloading_selected {
- check_effective_target_hsa_offloading_selected_nocache
- }]
-}
-
# Return 1 if at least one AMD GPU is accessible.
proc check_effective_target_openacc_radeon_accel_present { } {
+++ /dev/null
-#define size 10
-int i, j, k;
-
-int
-main ()
-{
- char *s = __builtin_malloc (size + 1);
-
-#pragma omp target teams
- {
-#pragma omp distribute parallel for default(none) private(i) shared(s)
- for (i = 0; i < size; ++i)
- {
- char *buffer = __builtin_alloca (10);
- buffer[5] = 97 + i;
- s[i] = buffer[5];
- }
- }
-
- for (i = 0; i < size; ++i)
- if (s[i] != 97 + i)
- __builtin_abort ();
-
- return 0;
-}
+++ /dev/null
-#include <assert.h>
-
-#define ASSIGN_SX(N) \
- s##N.a1 = 1; \
- s##N.a2 = 2; \
- s##N.a3 = 3; \
- s##N.a4 = 4; \
- s##N.a5 = 5; \
- s##N.a6 = 6; \
- s##N.a7 = 7; \
- s##N.a8 = 8; \
- s##N.a9 = 9; \
- s##N.a10 = 10;
-
-#define ASSERT_SX(N) \
- assert (s##N.a1 == 1); \
- assert (s##N.a2 == 2); \
- assert (s##N.a3 == 3); \
- assert (s##N.a4 == 4); \
- assert (s##N.a5 == 5); \
- assert (s##N.a6 == 6); \
- assert (s##N.a7 == 7); \
- assert (s##N.a8 == 8); \
- assert (s##N.a9 == 9); \
- assert (s##N.a10 == 10);
-
-struct S1
-{
- unsigned a : 10;
- unsigned b : 20;
-};
-
-struct S2
-{
- unsigned a1 : 10;
- unsigned a2 : 10;
- unsigned a3 : 10;
- unsigned a4 : 10;
- unsigned a5 : 10;
- unsigned a6 : 10;
- unsigned a7 : 10;
- unsigned a8 : 10;
- unsigned a9 : 10;
- unsigned a10 : 10;
-};
-
-struct S3
-{
- unsigned a1 : 10;
- unsigned a2 : 9;
- unsigned a3 : 8;
- unsigned a4 : 7;
- unsigned a5 : 6;
- unsigned a6 : 5;
- unsigned a7 : 6;
- unsigned a8 : 7;
- unsigned a9 : 8;
- unsigned a10 : 9;
-};
-
-struct S4
-{
- unsigned a1 : 10;
- int a2 : 9;
- unsigned a3 : 8;
- int a4 : 7;
- unsigned a5 : 6;
- int a6 : 5;
- unsigned a7 : 6;
- int a8 : 7;
- unsigned a9 : 8;
- int a10 : 9;
-};
-
-struct S5
-{
- unsigned a1 : 31;
- int a2 : 9;
- unsigned a3 : 17;
- int a4 : 7;
- unsigned a5 : 6;
- int a6 : 5;
- unsigned long a7 : 55;
- int a8 : 7;
- unsigned a9 : 8;
- int a10 : 9;
-};
-
-int
-main ()
-{
- struct S1 s1;
-
-#pragma omp target map(to: s1)
- {
- s1.a = 2;
- s1.b = 3;
- }
-
- assert (s1.a == 2);
- assert (s1.b == 3);
-
- struct S2 s2;
-
-#pragma omp target map(to: s2)
- {
- ASSIGN_SX (2)
- }
-
- ASSERT_SX (2)
-
- struct S3 s3;
-
-#pragma omp target map(to: s3)
- {
- ASSIGN_SX (3)
- }
-
- ASSERT_SX (3)
-
- struct S4 s4;
-
-#pragma omp target map(to: s4)
- {
- ASSIGN_SX (4)
- }
-
- ASSERT_SX (4)
-
- struct S4 s5;
-
- s5.a1 = 0;
- s5.a2 = 1;
- s5.a3 = 2;
- s5.a4 = 3;
- s5.a5 = 4;
- s5.a6 = 5;
- s5.a7 = 6;
- s5.a8 = 7;
- s5.a9 = 8;
- s5.a10 = 9;
-
-#pragma omp target map(to: s5)
- {
- s5.a1++;
- s5.a2++;
- s5.a3++;
- s5.a4++;
- s5.a5++;
- s5.a6++;
- s5.a7++;
- s5.a8++;
- s5.a9++;
- s5.a10++;
- }
-
- ASSERT_SX (5)
-
- return 0;
-}
+++ /dev/null
-#include <math.h>
-
-#define N 12
-
-int main()
-{
- unsigned int arguments[N] = {0u, 1u, 2u, 3u, 111u, 333u, 444u, 0x80000000u, 0x0000ffffu, 0xf0000000u, 0xff000000u, 0xffffffffu};
- int clrsb[N] = {};
- int clz[N] = {};
- int ctz[N] = {};
- int ffs[N] = {};
- int parity[N] = {};
- int popcount[N] = {};
-
- int ref_clrsb[N] = {};
- int ref_clz[N] = {};
- int ref_ctz[N] = {};
- int ref_ffs[N] = {};
- int ref_parity[N] = {};
- int ref_popcount[N] = {};
-
- for (unsigned i = 0; i < N; i++)
- {
- ref_clrsb[i] = __builtin_clrsb (arguments[i]);
- ref_clz[i] = __builtin_clz (arguments[i]);
- ref_ctz[i] = __builtin_ctz (arguments[i]);
- ref_ffs[i] = __builtin_ffs (arguments[i]);
- ref_parity[i] = __builtin_parity (arguments[i]);
- ref_popcount[i] = __builtin_popcount (arguments[i]);
- }
-
- #pragma omp target map(from:clz, ctz, ffs, parity, popcount)
- {
- for (unsigned i = 0; i < N; i++)
- {
- clrsb[i] = __builtin_clrsb (arguments[i]);
- clz[i] = __builtin_clz (arguments[i]);
- ctz[i] = __builtin_ctz (arguments[i]);
- ffs[i] = __builtin_ffs (arguments[i]);
- parity[i] = __builtin_parity (arguments[i]);
- popcount[i] = __builtin_popcount (arguments[i]);
- }
- }
-
- for (unsigned i = 0; i < N; i++)
- if (ref_clrsb[i] != clrsb[i])
- __builtin_abort ();
-
- /* CLZ of zero is undefined for zero. */
- for (unsigned i = 1; i < N; i++)
- if (ref_clz[i] != clz[i])
- __builtin_abort ();
-
- /* Likewise for ctz */
- for (unsigned i = 1; i < N; i++)
- if (ref_ctz[i] != ctz[i])
- __builtin_abort ();
-
- for (unsigned i = 0; i < N; i++)
- if (ref_ffs[i] != ffs[i])
- __builtin_abort ();
-
- for (unsigned i = 0; i < N; i++)
- if (ref_parity[i] != parity[i])
- __builtin_abort ();
-
- for (unsigned i = 0; i < N; i++)
- if (ref_popcount[i] != popcount[i])
- __builtin_abort ();
-
- return 0;
-}
-
+++ /dev/null
-/* { dg-additional-options "-ffast-math" } */
-
-#include <assert.h>
-#include <math.h>
-
-#define N 10
-#define N2 14
-
-#define c1 1.2345f
-#define c2 1.2345
-
-#define DELTA 0.001
-
-#define TEST_BIT_BUILTINS(T, S, S2) \
- { \
- T arguments[N2] \
- = {0##S, 1##S, 2##S, 3##S, \
- 111##S, 333##S, 444##S, 0x80000000##S, \
- 0x0000ffff##S, 0xf0000000##S, 0xff000000##S, 0xffffffff##S}; \
- int clrsb[N2] = {}; \
- int clz[N2] = {}; \
- int ctz[N2] = {}; \
- int ffs[N2] = {}; \
- int parity[N2] = {}; \
- int popcount[N2] = {}; \
- \
- _Pragma ("omp target map(to:clz[:N2], ctz[:N2], ffs[:N2], parity[:N2], popcount[:N2])") \
- { \
- for (unsigned i = 0; i < N2; i++) \
- { \
- clrsb[i] = __builtin_clrsb##S2 (arguments[i]); \
- clz[i] = __builtin_clz##S2 (arguments[i]); \
- ctz[i] = __builtin_ctz##S2 (arguments[i]); \
- ffs[i] = __builtin_ffs##S2 (arguments[i]); \
- parity[i] = __builtin_parity##S2 (arguments[i]); \
- popcount[i] = __builtin_popcount##S2 (arguments[i]); \
- } \
- } \
- \
- for (unsigned i = 0; i < N2; i++) \
- { \
- assert (clrsb[i] == __builtin_clrsb##S2 (arguments[i])); \
- if (arguments[0] != 0) \
- { \
- assert (clz[i] == __builtin_clz##S2 (arguments[i])); \
- assert (ctz[i] == __builtin_ctz##S2 (arguments[i])); \
- } \
- assert (ffs[i] == __builtin_ffs##S2 (arguments[i])); \
- assert (parity[i] == __builtin_parity##S2 (arguments[i])); \
- assert (popcount[i] == __builtin_popcount##S2 (arguments[i])); \
- } \
- }
-
-#define ASSERT(v1, v2) assert (fabs (v1 - v2) < DELTA)
-
-int
-main ()
-{
- float f[N] = {};
- float d[N] = {};
-
-/* 1) test direct mapping to HSA insns. */
-
-#pragma omp target map(to: f[ : N], d[ : N])
- {
- f[0] = sinf (c1);
- f[1] = cosf (c1);
- f[2] = exp2f (c1);
- f[3] = log2f (c1);
- f[4] = truncf (c1);
- f[5] = sqrtf (c1);
-
- d[0] = trunc (c2);
- d[1] = sqrt (c2);
- }
-
- ASSERT (f[0], sinf (c1));
- ASSERT (f[1], cosf (c1));
- ASSERT (f[2], exp2f (c1));
- ASSERT (f[3], log2f (c1));
- ASSERT (f[4], truncf (c1));
- ASSERT (f[5], sqrtf (c1));
-
- ASSERT (d[0], trunc (c2));
- ASSERT (d[1], sqrt (c2));
-
- /* 2) test bit builtins for unsigned int. */
- TEST_BIT_BUILTINS (int, , );
-
- /* 3) test bit builtins for unsigned long int. */
- TEST_BIT_BUILTINS (long, l, l);
-
- /* 4) test bit builtins for unsigned long long int. */
- TEST_BIT_BUILTINS (long long, ll, ll);
-
- return 0;
-}
+++ /dev/null
-if [info exists lang_library_path] then {
- unset lang_library_path
- unset lang_link_flags
-}
-if [info exists lang_test_file] then {
- unset lang_test_file
-}
-if [info exists lang_include_flags] then {
- unset lang_include_flags
-}
-
-load_lib libgomp-dg.exp
-load_gcc_lib gcc-dg.exp
-
-# Initialize dg.
-dg-init
-
-# Turn on OpenMP.
-lappend ALWAYS_CFLAGS "additional_flags=-fopenmp"
-
-set ld_library_path $always_ld_library_path
-append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
-set_ld_library_path_env_vars
-
-global DEFAULT_CFLAGS
-if [info exists DEFAULT_CFLAGS] then {
- set CFLAGS_list [list "-O0" $DEFAULT_CFLAGS]
-} else {
- set CFLAGS_list [list "-O0" "-O2"]
-}
-
-if [check_effective_target_hsa_offloading_selected] {
- foreach USE_CFLAGS $CFLAGS_list {
- # Gather a list of all tests.
- set tests [lsort [find $srcdir/$subdir *.c]]
- # Main loop.
- dg-runtest $tests "" [concat $USE_CFLAGS "-Whsa"]
- }
-}
-
-# All done.
-dg-finish
+++ /dev/null
-#include <assert.h>
-#include <complex.h>
-#include <math.h>
-
-#define uchar unsigned char
-#define C 123
-
-#define TEST(type) \
- type foo_##type (void) \
- { \
- _Complex type a = C + 45I; \
- return __real__ a; \
- }
-
-#pragma omp declare target
-TEST (char)
-TEST (uchar)
-TEST (short)
-TEST (int)
-
-float
-bar (float a, float b)
-{
- _Complex float c = a + b * I;
-
- c += 11.f + 12.f * I;
-
- _Complex float d = 2.f + 4.44f * I;
-
- return __real__(crealf (c + d) + cimag (d) * I);
-}
-
-#pragma omp end declare target
-
-int
-main (void)
-{
- int v = 0;
- float v2 = 0.0f;
-
-#pragma omp target map(to: v)
- v = foo_char ();
-
- assert (v == C);
-
-#pragma omp target map(to: v)
- v = foo_uchar ();
-
- assert (v == C);
-
-#pragma omp target map(to: v)
- v = foo_short ();
-
- assert (v == C);
-
-#pragma omp target map(to: v)
- v = foo_int ();
-
- assert (v == C);
-
-#pragma omp target map(to: v2)
- v2 = bar (1.12f, 4.44f);
-
- assert (fabs (v2 - 14.12) < 0.0001f);
-}
+++ /dev/null
-#pragma omp declare target
- _Complex int *g;
-#pragma omp end declare target
-
-
-
-_Complex float f(void);
-
-int
-main ()
-{
- _Complex int y;
-#pragma omp target map(from:y)
- {
- _Complex int x;
- g = &x;
- __imag__ x = 1;
- __real__ x = 2;
- y = x;
- }
-
- if ((__imag__ y != 1)
- || (__real__ y != 2))
- __builtin_abort ();
- return 0;
-}
-
+++ /dev/null
-#include <assert.h>
-
-struct Cube
-{
- int x;
- int y;
- int z;
-};
-
-#pragma omp declare target
-int
-foo (short a)
-{
- switch (a)
- {
- case 1:
- return 11;
- break;
- case 33:
- return 333;
- break;
- case 55:
- return 55;
- break;
- default:
- return -1;
- }
-}
-
-int
-bar (int a)
-{
- int *ptr = &a;
-
- *ptr = 100;
- return a + *ptr;
-}
-
-struct Cube
-baz (struct Cube c)
-{
- c.x = 11;
- return c;
-}
-
-#pragma omp end declare target
-
-#define s 100
-
-int
-main (int argc)
-{
- /* Test 1: argument types: char to short. */
-
- int array[s];
-#pragma omp target map(tofrom : array[ : s])
- {
- for (char i = 0; i < s; i++)
- array[i] = foo (i);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == foo (i));
-
- /* Test 2: argument address is taken. */
- int v = 2;
-
-#pragma omp target map(tofrom : v)
- v = bar (v);
-
- assert (v == 200);
-
- /* Test 3: passing a structure as a function argument. */
- struct Cube r;
- struct Cube c = {.x = 1, .y = 2, .z = 3};
-
-#pragma omp target map(to : r) map(from : c)
- r = baz (c);
-
- assert (r.x == 11);
- assert (r.y == c.y);
- assert (r.z == c.z);
-}
+++ /dev/null
-#define size 8
-
-#pragma omp declare target
-int
-identity (int x)
-{
- return x;
-}
-
-int
-expx (int x, int n)
-{
- for (int i = 0; i < n - 1; i++)
- x *= x;
-
- return x;
-}
-
-float
-init (int x, int y)
-{
- int x1 = identity (identity (identity (identity (x))));
- int y1 = identity (identity (identity (identity (y))));
-
- int x2 = expx (x1, 2);
- int y2 = expx (y1, 2);
-
- return (x2 + y2);
-}
-#pragma omp end declare target
-
-int
-main ()
-{
- int i, j;
- int a[size][size];
-
-#pragma omp target teams map(to:a[:size][:size])
-#pragma omp distribute parallel for default(none) private(i, j) shared(a)
- for (i = 0; i < size; ++i)
- for (j = 0; j < size; ++j)
- a[i][j] = init (i, j);
-
- for (i = 0; i < size; ++i)
- for (j = 0; j < size; ++j)
- if (i * i + j * j != a[i][j])
- __builtin_abort ();
-
- return 0;
-}
+++ /dev/null
-#include <omp.h>
-
-int
-main ()
-{
- int i;
- int level = -1;
-
-#pragma omp target map(tofrom : level)
- {
- level = omp_get_level ();
- }
-
- if (level != 0)
- __builtin_abort ();
-
-#pragma omp target teams map(tofrom : level)
-#pragma omp distribute parallel for default(none) private(i) shared(level)
- for (i = 0; i < 1; ++i)
- level += omp_get_level ();
-
- if (level != 1)
- __builtin_abort ();
-
- return 0;
-}
+++ /dev/null
-void __attribute__((noinline, noclone))
-foo (int n, int *a, int workgroup_size)
-{
- int i;
-#pragma omp target
-#pragma omp teams thread_limit(workgroup_size)
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i)
- for (i = 0; i < n; i++)
- a[i]++;
-}
-
-int main (int argc, char **argv)
-{
- int n = 32;
- int *a = __builtin_malloc (sizeof (int) * n);
- int i;
-
- __builtin_memset (a, 0, sizeof (int) * n);
- foo (n, a, 32);
- for (i = 0; i < n; i ++)
- {
- if (a[i] != 1)
- __builtin_abort ();
- }
- return 0;
-}
+++ /dev/null
-void __attribute__((noinline, noclone))
-foo (int j, int n, int *a)
-{
- int i;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- for (i = j + 1; i < n; i++)
- a[i] = i;
-}
-
-int main (int argc, char **argv)
-{
- int n = 32;
- int *a = __builtin_malloc (sizeof (int) * n);
- int i, j = 4;
-
- __builtin_memset (a, 0, sizeof (int) * n);
- foo (j, n, a);
- for (i = j + 1; i < n; i ++)
- {
- if (a[i] != i)
- __builtin_abort ();
- }
- return 0;
-}
+++ /dev/null
-#define THE_LOOP \
- for (i = j + 1; i < n; i += 3) \
- a[i] = i
-
-void __attribute__((noinline, noclone))
-foo (int j, int n, int *a)
-{
- int i;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- THE_LOOP;
-}
-
-void __attribute__((noinline, noclone))
-bar (int j, int n, int *a)
-{
- int i;
- THE_LOOP;
-}
-
-int main (int argc, char **argv)
-{
- int n = 32;
- int *a = __builtin_malloc (sizeof (int) * n);
- int *ref = __builtin_malloc (sizeof (int) * n);
- int i, j = 4;
-
- __builtin_memset (a, 0, sizeof (int) * n);
- __builtin_memset (ref, 0, sizeof (int) * n);
- bar (j, n, ref);
- foo (j, n, a);
- for (i = 0; i < n; i ++)
- {
- if (a[i] != ref[i])
- __builtin_abort ();
- }
- return 0;
-}
+++ /dev/null
-#define THE_LOOP \
- for (i = j + 1; i < n; i += 3) \
- a[i] = i
-
-void __attribute__((noinline, noclone))
-foo (int j, int n, int *a)
-{
-#pragma omp parallel
- {
- #pragma omp single
- {
- int i;
-#pragma omp target
-#pragma omp teams
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j)
- THE_LOOP;
- }
- }
-}
-
-void __attribute__((noinline, noclone))
-bar (int j, int n, int *a)
-{
- int i;
- THE_LOOP;
-}
-
-int main (int argc, char **argv)
-{
- int n = 32;
- int *a = __builtin_malloc (sizeof (int) * n);
- int *ref = __builtin_malloc (sizeof (int) * n);
- int i, j = 4;
-
- __builtin_memset (a, 0, sizeof (int) * n);
- __builtin_memset (ref, 0, sizeof (int) * n);
- bar (j, n, ref);
- foo (j, n, a);
- for (i = 0; i < n; i ++)
- {
- if (a[i] != ref[i])
- __builtin_abort ();
- }
- return 0;
-}
+++ /dev/null
-#include <assert.h>
-
-#define C 55
-
-int i, j, k;
-
-static void
-test_bzero (unsigned size)
-{
- unsigned bsize = size * sizeof (int);
- int *x = __builtin_malloc (bsize);
- __builtin_memset (x, C, bsize);
-
-#pragma omp target map(tofrom: x[:size]) map(from: bsize)
- {
- __builtin_bzero (x, bsize);
- }
-
- char *buffer = (char *) x;
- for (unsigned i = 0; i < bsize; ++i)
- assert (buffer[i] == 0);
-}
-
-static void
-test_memcpy (unsigned size)
-{
- unsigned bsize = size * sizeof (int);
- int *x = __builtin_malloc (bsize);
- __builtin_memset (x, C, bsize);
- int *y = __builtin_malloc (bsize);
-
-#pragma omp target map(tofrom: x[:size], y[:size]) map(from: bsize)
- {
- __builtin_memcpy (y, x, bsize);
- }
-
- char *buffer = (char *) y;
- for (unsigned i = 0; i < bsize; ++i)
- assert (buffer[i] == C);
-}
-
-static void
-test_mempcpy (unsigned size)
-{
- unsigned bsize = size * sizeof (int);
- int *x = __builtin_malloc (bsize);
- __builtin_memset (x, C, bsize);
- int *y = __builtin_malloc (bsize);
- int *ptr = 0;
-
-#pragma omp target map(tofrom :x[:size], y[:size], ptr) map(from: bsize)
- {
- ptr = __builtin_mempcpy (y, x, bsize);
- }
-
- char *buffer = (char *) y;
- for (unsigned i = 0; i < bsize; ++i)
- assert (buffer[i] == C);
-
- assert (ptr == y + size);
-}
-
-static void
-test_memset (unsigned size)
-{
- unsigned bsize = size * sizeof (int);
- int *x = __builtin_malloc (bsize);
- __builtin_bzero (x, bsize);
-
-#pragma omp target map(tofrom : x[:size]) map(from: bsize)
- {
- __builtin_memset (x, C, bsize);
- }
-
- char *buffer = (char *) x;
- for (unsigned i = 0; i < bsize; ++i)
- assert (buffer[i] == C);
-}
-
-int
-main (void)
-{
- unsigned tests[] = {1, 2, 3, 4, 5, 8, 15, 17, 23, 33, 0};
-
- for (unsigned i = 0; tests[i]; i++)
- {
- test_bzero (tests[i]);
- test_memset (tests[i]);
- test_memcpy (tests[i]);
- test_mempcpy (tests[i]);
- }
-}
+++ /dev/null
-/* PR hsa/69568 */
-
-typedef float float2 __attribute__ ((vector_size (8)));
-float2 *output;
-
-void __attribute__((noinline, noclone))
-foo (int n, float2 *a, int workgroup_size)
-{
- int i;
-#pragma omp target map(from:a[:n]) firstprivate(n, workgroup_size)
-#pragma omp teams thread_limit(workgroup_size)
-#pragma omp distribute parallel for shared(a) firstprivate(n) private(i)
- for (i = 0; i < n; i++)
- { float2 v;
- v[0] = i;
- v[1] = 1+i;
- a[i] = v;
- }
-}
-
-int main (int argc, char **argv)
-{
- int n = 32;
- float2 *a = __builtin_malloc (sizeof (float2) * n);
- int i;
-
- __builtin_memset (a, 0, sizeof (float2) * n);
- foo (n, a, 32);
- for (i = 0; i < n; i++)
- {
- float2 v = a[i];
- if (__builtin_abs (v[0] - i) > 0.1
- || __builtin_abs (v[1] - i - 1) > 0.1)
- {
- __builtin_abort ();
- return 1;
- }
- }
- return 0;
-}
-
+++ /dev/null
-char __attribute__ ((noipa))
-toup (char X)
-{
- if (X >= 97 && X <= 122)
- return X - 32;
- else
- return X;
-}
-
-char
-target_toup_1 (char X)
-{
- char r;
-#pragma omp target map(to:X) map(from:r)
- {
- if (X >= 97 && X <= 122)
- r = X - 32;
- else
- r = X;
- }
- return r;
-}
-
-char __attribute__ ((noipa))
-target_toup (char X)
-{
- return target_toup_1 (X);
-}
-
-int main (int argc, char **argv)
-{
- char a = 'a';
- if (toup (a) != target_toup (a))
- __builtin_abort ();
- a = 'Z';
- if (toup (a) != target_toup (a))
- __builtin_abort ();
- a = 5;
- if (toup (a) != target_toup (a))
- __builtin_abort ();
-
- return 0;
-}
+++ /dev/null
-#include <assert.h>
-#include <limits.h>
-
-#define T unsigned int
-#define BITSIZE CHAR_BIT * sizeof (T)
-
-#define C1 123u
-
-#pragma omp declare target
-T
-rotate (T value, T shift)
-{
- T r = (value << shift) | (value >> (BITSIZE - shift));
- return (r >> shift) | (r << (BITSIZE - shift));
-}
-#pragma omp end declare target
-
-int
-main (int argc)
-{
- T v1, v2, v3, v4, v5;
-
-#pragma omp target map(to: v1, v2, v3, v4, v5)
- {
- v1 = rotate (C1, 10);
- v2 = rotate (C1, 2);
- v3 = rotate (C1, 5);
- v4 = rotate (C1, 16);
- v5 = rotate (C1, 32);
- }
-
- assert (v1 == C1);
- assert (v2 == C1);
- assert (v3 == C1);
- assert (v4 == C1);
- assert (v5 == C1);
-
- return 0;
-}
+++ /dev/null
-extern void abort (void);
-
-#pragma omp declare target
-int
-foo (void)
-{
- static int s;
- return ++s;
-}
-#pragma omp end declare target
-
-int
-main ()
-{
- int r;
- #pragma omp target map(from:r)
- {
- r = foo ();
- }
- if (r != 1)
- abort ();
- return 0;
-}
+++ /dev/null
-#include <assert.h>
-
-#define s 100
-
-#pragma omp declare target
-int
-switch1 (int a)
-{
- switch (a)
- {
- case 1:
- return 11;
- case 33:
- return 333;
- case 55:
- return 55;
- default:
- return -1;
- }
-}
-
-int
-switch2 (int a)
-{
- switch (a)
- {
- case 1 ... 11:
- return 11;
- break;
- case 33:
- return 333;
- break;
- case 55:
- return 55;
- break;
- default:
- return -1;
- }
-}
-
-int
-switch3 (int a)
-{
- switch (a)
- {
- case 1 ... 11:
- return 11;
- case 12 ... 22:
- return 22;
- case 23 ... 33:
- return 33;
- case 34 ... 44:
- return 44;
- default:
- return 44;
- }
-}
-
-int
-switch4 (int a, int b)
-{
- switch (a)
- {
- case 1 ... 11:
- return a;
- case 12 ... 22:
- return b;
- case 23 ... 33:
- return a;
- case 34 ... 44:
- return b;
- default:
- return 12345;
- }
-}
-
-int
-switch5 (int a, int b)
-{
- switch (a)
- {
- case 1 ... 2:
- return 1;
- case 3 ... 4:
- return 2;
- case 5 ... 6:
- return 3;
- case 7 ... 11:
- return 4;
- }
-
- return -1;
-}
-#pragma omp end declare target
-
-int
-main (int argc)
-{
- int array[s];
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = 0; i < s; i++)
- array[i] = switch1 (i);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == switch1 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = 0; i < s; i++)
- array[i] = switch2 (i);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == switch2 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = 0; i < s; i++)
- array[i] = switch3 (i);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == switch3 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = 0; i < s; i++)
- array[i] = switch4 (i, i + 1);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == switch4 (i, i + 1));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = 0; i < s; i++)
- array[i] = switch5 (i, i + 1);
- }
-
- for (int i = 0; i < s; i++)
- assert (array[i] == switch5 (i, i + 1));
-}
+++ /dev/null
-#include <assert.h>
-
-#define s 100
-
-#pragma omp declare target
-int
-switch1 (unsigned a)
-{
- switch (a)
- {
- case 1 ... 11:
- return 11;
- case 12 ... 13:
- return 22;
- default:
- return 44;
- }
-}
-
-int
-switch2 (unsigned a)
-{
- switch (a)
- {
- case 1 ... 5:
- return 1;
- case 9 ... 11:
- return a + 3;
- case 12 ... 13:
- return a + 3;
- default:
- return 44;
- }
-}
-
-#define OFFSET 12
-
-int
-switch3 (unsigned a)
-{
- switch (a)
- {
- case (OFFSET + 0):
- return 1;
- case (OFFSET + 1)...(OFFSET + 11):
- return 11;
- case (OFFSET + 12)...(OFFSET + 13):
- return (OFFSET + 22);
- default:
- return (OFFSET + 44);
- }
-}
-
-int
-switch4 (unsigned a)
-{
- switch (a)
- {
- case -2:
- return 1;
- case -1:
- return a + 3;
- case 3:
- return a + 3;
- default:
- return 44;
- }
-}
-#pragma omp end declare target
-
-#define low -33
-#define high 55
-
-int
-main (int argc)
-{
- int array[s];
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = low; i < high; i++)
- array[i - low] = switch1 (i);
- }
-
- for (int i = low; i < high; i++)
- assert (array[i - low] == switch1 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = low; i < high; i++)
- array[i - low] = switch2 (i);
- }
-
- for (int i = low; i < high; i++)
- assert (array[i - low] == switch2 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = low; i < high; i++)
- array[i - low] = switch3 (i);
- }
-
- for (int i = low; i < high; i++)
- assert (array[i - low] == switch3 (i));
-
-#pragma omp target map(tofrom : array[:s])
- {
- for (int i = low; i < high; i++)
- array[i - low] = switch4 (i);
- }
-
- for (int i = low; i < high; i++)
- assert (array[i - low] == switch4 (i));
-
- return 0;
-}
+++ /dev/null
-/* { dg-additional-options "-fno-tree-switch-conversion" } */
-
-#pragma omp declare target
-int
-foo (unsigned a)
-{
- switch (a)
- {
- case 1 ... 5:
- return 1;
- case 9 ... 11:
- return a + 3;
- case 12 ... 13:
- return a + 3;
- default:
- return 44;
- }
-}
-#pragma omp end declare target
-
-#define s 100
-
-void __attribute__((noinline, noclone))
-verify(int *a)
-{
- if (a[0] != 44)
- __builtin_abort ();
-
- for (int i = 1; i <= 5; i++)
- if (a[i] != 1)
- __builtin_abort ();
-
- for (int i = 6; i <= 8; i++)
- if (a[i] != 44)
- __builtin_abort ();
-
- for (int i = 9; i <= 13; i++)
- if (a[i] != i + 3)
- __builtin_abort ();
-
- for (int i = 14; i < s; i++)
- if (a[i] != 44)
- __builtin_abort ();
-}
-
-int main(int argc)
-{
- int array[s];
-#pragma omp target
- {
- for (int i = 0; i < s; i++)
- {
- int v = foo (i);
- array[i] = v;
- }
- }
- verify (array);
- return 0;
-}
+++ /dev/null
-/*
-
- matmul.c : Matrix Multiplication with tiling for openmp4 example
-
-*/
-
-#include <stdlib.h>
-#include <math.h>
-
-#define BLOCK_SIZE 16
-/*
- #define BLOCK_SIZE 32
-*/
-#define NSECPERSEC 1000000000L
-
-typedef struct {
- int width;
- int height;
- int stride;
- int hpad;
- float* elements;
-} Matrix;
-
-/* Correctly extract the number of nanoseconds from the two time structures */
-long int get_nanosecs( struct timespec start_time, struct timespec end_time) {
- long int nanosecs;
- if ((end_time.tv_nsec-start_time.tv_nsec)<0) nanosecs =
- ((((long int) end_time.tv_sec- (long int) start_time.tv_sec )-1)*NSECPERSEC ) +
- ( NSECPERSEC + (long int) end_time.tv_nsec - (long int) start_time.tv_nsec) ;
- else nanosecs =
- (((long int) end_time.tv_sec- (long int) start_time.tv_sec )*NSECPERSEC ) +
- ( (long int) end_time.tv_nsec - (long int) start_time.tv_nsec );
- return nanosecs;
-}
-
-void simple_sgemm_tt(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-void simple_sgemm_tn(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-void tiled_sgemm_tt(const int M,const int N,const int K,const float alpha, const float*A, const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-
-int verify(float* v_res, float* v_ref, int len) {
- int passed = 1;
- int i;
- for (i = 0; i < len; ++i) {
- if (fabs(v_res[i] - v_ref[i]) > 0.001*v_ref[i]) {
- __builtin_abort ();
- }
- }
- return passed;
-}
-
-
-int main(int argc, char* argv[]){
-
- Matrix A,B,Bt,C,Cref;
- int a1,a2,a3,i,j;
- struct timespec start_time1, end_time1;
- struct timespec start_time2, end_time2;
- long int nanosecs,total_ops;
- float gflopsTiled,gflopsCPU;
-
- a1 = 35;
- a2 = 28;
- a3 = 47;
-
- A.height = a1;
- A.width = a2;
- A.stride = (((A.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- A.hpad = (((A.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- A.elements = (float*)malloc(A.stride * A.hpad* sizeof(float));
-
- B.height = a2;
- B.width = a3;
- B.stride = (((B.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- B.hpad = (((B.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- B.elements = (float*)malloc(B.stride * B.hpad * sizeof(float));
-
- /* Bt is same as B but stored in column-major order */
- Bt.height = B.height;
- Bt.width = B.width;
- Bt.stride = B.stride;
- Bt.hpad = B.hpad;
- Bt.elements = (float*)malloc(Bt.stride * Bt.hpad * sizeof(float));
-
- C.height = a1;
- C.width = a3;
- C.stride = (((C.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- C.hpad = (((C.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- C.elements = (float*)malloc(C.stride * C.hpad * sizeof(float));
-
- Cref.height = a1;
- Cref.width = a3;
- Cref.stride = (((Cref.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- Cref.hpad = (((Cref.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- Cref.elements = (float*)malloc(Cref.stride * Cref.hpad * sizeof(float));
-
- for(i = 0; i < A.hpad ; i++)
- for(j = 0; j < A.stride; j++) {
- if (( j<A.width ) && (i<A.height)) {
- A.elements[i*A.stride + j] = (i % 3);
- } else {
- A.elements[i*A.stride + j] = 0.0;
- }
- }
-
- /* Initialize B and Bt */
- for(i = 0; i < B.hpad ; i++)
- for(j = 0; j < B.stride; j++) {
- if (( j<B.width ) && (i<B.height)) {
- B.elements[i*B.stride+j] = (j % 2);
- Bt.elements[j*Bt.stride+i] = B.elements[i*B.stride+j] ;
- } else {
- B.elements[i*B.stride+j] = 0.0;
- Bt.elements[j*Bt.stride+i] = 0.0;
- }
- }
-
- /* zero C, and Cref */
- for(i = 0; i < C.hpad; i++)
- for(j = 0; j < C.stride; j++) {
- C.elements[i*C.stride+j] = 0.0;
- Cref.elements[i*Cref.stride+j] = 0.0;
- }
-
- simple_sgemm_tt(A.height,B.width,B.height,1.0,A.elements,A.stride,B.elements,B.stride,1.0,Cref.elements,Cref.stride);
- tiled_sgemm_tt(A.height,B.width,B.height,1.0,A.elements,A.stride,B.elements,B.stride,1.0,C.elements,C.stride);
-
- verify(C.elements, Cref.elements, C.height * C.stride);
- return 0;
-}
-
-void simple_sgemm_tt(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
-const float* B,const int LDB, const float beta,float* C, const int LDC) {
- /* A,B, and C are in row-major order */
- int c_row,c_col,inner;
- float sum;
- for (c_col = 0 ; c_col<N; c_col++ ) {
- for (c_row = 0 ; c_row<M; c_row++ ) {
- sum = 0.0 ;
- for (inner = 0 ; inner<K; inner++ ) {
- sum += A[c_row*LDA + inner] * B[inner*LDB + c_col] ;
- }
- C[c_row*LDC + c_col] = alpha*sum + beta*C[ c_row*LDC + c_col] ;
- }
- }
-}
-
-/***************************
-
- tiled_sgemm_tt: Tiled matrix multiplication:
-
-***************************/
-
-void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA,
- const float*B, const int LDB, const float beta, float*C, const int LDC){
-
-#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N])
-#pragma omp distribute collapse(2)
- for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE)
- for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE)
- {
-// Each team has a local copy of these mini matrices
- float As[BLOCK_SIZE][BLOCK_SIZE];
- float Bs[BLOCK_SIZE][BLOCK_SIZE];
-#pragma omp parallel
- {
- int C_row, C_col;
- float Cval = 0.0;
-
- for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE )
- {
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (kblock + col < K))
- As[row][col] = A[(C_row*LDA)+ kblock + col];
- else
- As[row][col] = 0;
- if ((kblock + row < K) && C_col < N)
- Bs[row][col] = B[((kblock+row)*LDB)+ C_col];
- else
- Bs[row][col] = 0;
- }
-
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- for (int e = 0; e < BLOCK_SIZE; ++e)
- Cval += As[row][e] * Bs[e][col];
- }
- } /* End for kblock .. */
-
-
-#pragma omp for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++)
- for (int col=0 ; col < BLOCK_SIZE ; col++)
- {
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (C_col < N))
- C[(C_row*LDC)+C_col] = alpha*Cval + beta*C[(C_row*LDC)+C_col];
-
- }
- } /* end parallel */
- } /* end target teams distribute */
-}
+++ /dev/null
-/*
-
- matmul.c : Matrix Multiplication with tiling for openmp4 example
-
-*/
-
-#include <stdlib.h>
-#include <math.h>
-
-#define BLOCK_SIZE 16
-/*
- #define BLOCK_SIZE 32
-*/
-#define NSECPERSEC 1000000000L
-
-typedef struct {
- int width;
- int height;
- int stride;
- int hpad;
- float* elements;
-} Matrix;
-
-/* Correctly extract the number of nanoseconds from the two time structures */
-long int get_nanosecs( struct timespec start_time, struct timespec end_time) {
- long int nanosecs;
- if ((end_time.tv_nsec-start_time.tv_nsec)<0) nanosecs =
- ((((long int) end_time.tv_sec- (long int) start_time.tv_sec )-1)*NSECPERSEC ) +
- ( NSECPERSEC + (long int) end_time.tv_nsec - (long int) start_time.tv_nsec) ;
- else nanosecs =
- (((long int) end_time.tv_sec- (long int) start_time.tv_sec )*NSECPERSEC ) +
- ( (long int) end_time.tv_nsec - (long int) start_time.tv_nsec );
- return nanosecs;
-}
-
-void simple_sgemm_tt(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-void simple_sgemm_tn(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-void tiled_sgemm_tt(const int M,const int N,const int K,const float alpha, const float*A, const int LDA,
- const float* B,const int LDB, const float beta,float* C, const int LDC) ;
-
-int verify(float* v_res, float* v_ref, int len) {
- int passed = 1;
- int i;
- for (i = 0; i < len; ++i) {
- if (fabs(v_res[i] - v_ref[i]) > 0.001*v_ref[i]) {
- __builtin_abort ();
- }
- }
- return passed;
-}
-
-
-int main(int argc, char* argv[]){
-
- Matrix A,B,Bt,C,Cref;
- int a1,a2,a3,i,j;
- struct timespec start_time1, end_time1;
- struct timespec start_time2, end_time2;
- long int nanosecs,total_ops;
- float gflopsTiled,gflopsCPU;
-
- a1 = 35;
- a2 = 28;
- a3 = 47;
-
- A.height = a1;
- A.width = a2;
- A.stride = (((A.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- A.hpad = (((A.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- A.elements = (float*)malloc(A.stride * A.hpad* sizeof(float));
-
- B.height = a2;
- B.width = a3;
- B.stride = (((B.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- B.hpad = (((B.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- B.elements = (float*)malloc(B.stride * B.hpad * sizeof(float));
-
- /* Bt is same as B but stored in column-major order */
- Bt.height = B.height;
- Bt.width = B.width;
- Bt.stride = B.stride;
- Bt.hpad = B.hpad;
- Bt.elements = (float*)malloc(Bt.stride * Bt.hpad * sizeof(float));
-
- C.height = a1;
- C.width = a3;
- C.stride = (((C.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- C.hpad = (((C.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- C.elements = (float*)malloc(C.stride * C.hpad * sizeof(float));
-
- Cref.height = a1;
- Cref.width = a3;
- Cref.stride = (((Cref.width-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- Cref.hpad = (((Cref.height-1)/BLOCK_SIZE)+1) * BLOCK_SIZE;
- Cref.elements = (float*)malloc(Cref.stride * Cref.hpad * sizeof(float));
-
- for(i = 0; i < A.hpad ; i++)
- for(j = 0; j < A.stride; j++) {
- if (( j<A.width ) && (i<A.height)) {
- A.elements[i*A.stride + j] = (i % 3);
- } else {
- A.elements[i*A.stride + j] = 0.0;
- }
- }
-
- /* Initialize B and Bt */
- for(i = 0; i < B.hpad ; i++)
- for(j = 0; j < B.stride; j++) {
- if (( j<B.width ) && (i<B.height)) {
- B.elements[i*B.stride+j] = (j % 2);
- Bt.elements[j*Bt.stride+i] = B.elements[i*B.stride+j] ;
- } else {
- B.elements[i*B.stride+j] = 0.0;
- Bt.elements[j*Bt.stride+i] = 0.0;
- }
- }
-
- /* zero C, and Cref */
- for(i = 0; i < C.hpad; i++)
- for(j = 0; j < C.stride; j++) {
- C.elements[i*C.stride+j] = 0.0;
- Cref.elements[i*Cref.stride+j] = 0.0;
- }
-
- simple_sgemm_tt(A.height,B.width,B.height,1.0,A.elements,A.stride,B.elements,B.stride,1.0,Cref.elements,Cref.stride);
- tiled_sgemm_tt(A.height,B.width,B.height,1.0,A.elements,A.stride,B.elements,B.stride,1.0,C.elements,C.stride);
-
- verify(C.elements, Cref.elements, C.height * C.stride);
- return 0;
-}
-
-void simple_sgemm_tt(const int M,const int N,const int K,const float alpha, const float* A,const int LDA,
-const float* B,const int LDB, const float beta,float* C, const int LDC) {
- /* A,B, and C are in row-major order */
- int c_row,c_col,inner;
- float sum;
- for (c_col = 0 ; c_col<N; c_col++ ) {
- for (c_row = 0 ; c_row<M; c_row++ ) {
- sum = 0.0 ;
- for (inner = 0 ; inner<K; inner++ ) {
- sum += A[c_row*LDA + inner] * B[inner*LDB + c_col] ;
- }
- C[c_row*LDC + c_col] = alpha*sum + beta*C[ c_row*LDC + c_col] ;
- }
- }
-}
-
-/***************************
-
- tiled_sgemm_tt: Tiled matrix multiplication:
-
-***************************/
-
-void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA,
- const float*B, const int LDB, const float beta, float*C, const int LDC){
-
-#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N])
-#pragma omp distribute collapse(2)
- for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE) {
- for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE) {
-
-// We now have M/BLOCK_SIZE * N/BLOCK_SIZE teams = (M*N)/(BLOCK_SIZE*BLOCK_SIZE)
-// The grid global dimensions are M,N,1
-// The grid local dimensions are BLOCK_SIZE,BLOCK_SIZE,1
-
-// -------------------------------------------------------------------
-// The rest of this code forms the HSAIL kernel with the
-// pairs of "parallel for collapse(2)" loops replaced with a barrier.
-// The kernel initializes these values
-// C_row_start = get_group_id(0) * BLOCK_SIZE
-// C_col_start = get_group_id(1) * BLOCK_SIZE
-// row=get_local_id(0)
-// col=get_local_id(1)
-// -------------------------------------------------------------------
-
-// Each team has a local copy of these mini matrices
- float As[BLOCK_SIZE][BLOCK_SIZE];
- float Bs[BLOCK_SIZE][BLOCK_SIZE];
- float Cs[BLOCK_SIZE][BLOCK_SIZE];
- int C_row, C_col;
-
- /* Zero Cs for this BLOCK */
-// - - - - - - - - - - - - - - - - - - - -
-// REPLACE NEXT THREE LINES WITH A BARRIER
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++) {
- for (int col=0 ; col < BLOCK_SIZE ; col++) {
-// END BARRIER
-// - - - - - - - - - - - - - - - - - - - -
- Cs[row][col] = 0.0;
- }
- }
-
- // This kblock loop is run on the master thread of each team
- for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE ) {
-
- // Copy global memory values to local memory
-// - - - - - - - - - - - - - - - - - - - -
-// REPLACE NEXT THREE LINES WITH A BARRIER
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++) {
- for (int col=0 ; col < BLOCK_SIZE ; col++) {
-// END BARRIER
-// - - - - - - - - - - - - - - - - - - - -
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (kblock + col < K))
- As[row][col] = A[(C_row*LDA)+ kblock + col];
- else
- As[row][col] = 0;
- if ((kblock + row < K) && C_col < N)
- Bs[row][col] = B[((kblock+row)*LDB)+ C_col];
- else
- Bs[row][col] = 0;
- }
- }
-
- // Calculate Cs <- Sum(As X Bs) across all kblocks
-// - - - - - - - - - - - - - - - - - - - -
-// REPLACE NEXT THREE LINES WITH A BARRIER
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++) {
- for (int col=0 ; col < BLOCK_SIZE ; col++) {
-// END BARRIER
-// - - - - - - - - - - - - - - - - - - - -
- for (int e = 0; e < BLOCK_SIZE; ++e)
- Cs[row][col] += As[row][e] * Bs[e][col];
- }
- }
-
- } /* End for kblock .. */
-
-
- // Scale Update actual C from Cs
-// - - - - - - - - - - - - - - - - - - - -
-// REPLACE NEXT THREE LINES WITH A BARRIER
-#pragma omp parallel for collapse(2)
- for (int row=0 ; row < BLOCK_SIZE ; row++) {
- for (int col=0 ; col < BLOCK_SIZE ; col++) {
-// END BARRIER
-// - - - - - - - - - - - - - - - - - - - -
- C_row = C_row_start + row;
- C_col = C_col_start + col;
- if ((C_row < M) && (C_col < N)) {
- C[(C_row*LDC)+C_col] = alpha*Cs[row][col] + beta*C[(C_row*LDC)+C_col];
- }
- }
- }
-
-// -------------------------------------------------------------------
-// This is the end of the kernel
-
- }
- }
-
-}