the untyped HSAIL regs.
Instead of always representing the HSAIL's untyped registers as
unsigned int, the gccbrig now pre-analyzes the BRIG code and
builds the register variables as a type used the most when storing
or reading data to/from each register. This reduces the total
conversions which cannot be always optimized away.
From-SVN: r254837
+2017-11-16 Henry Linjamäki <henry.linjamaki@parmance.com>
+
+ Change internal representation of HSA registers. Instead
+ representing HSA's untyped registers as unsigned int the gccbrig
+ analyzes brig code and builds the register variables as a type
+ used in tree expressions at most. This gives better chance to
+ optimize CONVERT_VIEW_EXPRs away.
+ * brigfrontend/brig-code-entry-handler.cc: Add analysis method for
+ register type usage. Handle any-typed register variables.
+ * brigfrontend/brig-code-entry-handler.h: New declarations for the
+ above.
+ * brigfrontend/brig-copy-move-inst-handler.cc: Handle any-typed
+ register variables.
+ * brigfrontend/brig-cvt-inst-handler.cc: Likewise.
+ * brigfrontend/brig-function.cc: Build register variables as a
+ type based on results of analysis phase.
+ * brigfrontend/brig-function.h: Move HSA register count defines to
+ brig-utils.h.
+ * brigfrontend/brig-to-generic.cc: New analysis handler. Analyze
+ HSA register usage.
+ * brigfrontend/brig-to-generic.h: New declarations.
+ * brigfrontend/brig-util.cc: New utility functions.
+ * brigfrontend/brig-util.h: New declarations for the above.
+
2017-11-16 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* gccbrig.texi: Added some documentation.
tree and_mask_vec = build_constructor (vec_type, and_mask_vals);
tree perm = build3 (VEC_PERM_EXPR, vec_type,
- build_reinterpret_cast (vec_type, operands[0]),
- build_reinterpret_cast (vec_type, operands[0]), mask_vec);
+ build_resize_convert_view (vec_type, operands[0]),
+ build_resize_convert_view (vec_type, operands[0]),
+ mask_vec);
tree cleared = build2 (BIT_AND_EXPR, vec_type, perm, and_mask_vec);
size_t s = int_size_in_bytes (TREE_TYPE (cleared)) * BITS_PER_UNIT;
tree raw_type = build_nonstandard_integer_type (s, true);
- tree as_int = build_reinterpret_cast (raw_type, cleared);
+ tree as_int = build_resize_convert_view (raw_type, cleared);
if (int_size_in_bytes (src_element_type) < 4)
{
size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT;
tree wide_type = build_nonstandard_integer_type (vecsize, 1);
- tree src_vect = build_reinterpret_cast (wide_type, operands[0]);
+ tree src_vect = build_resize_convert_view (wide_type, operands[0]);
src_vect = add_temp_var ("src_vect", src_vect);
tree scalar = operands[1];
if (is_fp16_operation)
old_value = build_h2f_conversion
- (build_reinterpret_cast (half_storage_type, operands[0]));
+ (build_resize_convert_view (half_storage_type, operands[0]));
else
old_value
- = build_reinterpret_cast (TREE_TYPE (instr_expr), operands[0]);
+ = build_resize_convert_view (TREE_TYPE (instr_expr), operands[0]);
size_t esize = is_fp16_operation ? 32 : element_size_bits;
correct size here so we don't need a separate unpack/pack for it.
fp16-fp32 conversion is done in build_operands (). */
if (is_input && TREE_TYPE (element) != operand_type)
- {
- if (int_size_in_bytes (TREE_TYPE (element))
- == int_size_in_bytes (operand_type)
- && !INTEGRAL_TYPE_P (operand_type))
- element = build1 (VIEW_CONVERT_EXPR, operand_type, element);
- else
- element = convert (operand_type, element);
- }
+ element = build_resize_convert_view (operand_type, element);
CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, element);
++operand_ptr;
to the array object. */
if (POINTER_TYPE_P (TREE_TYPE (arg_var_decl)))
- symbol_base = build_reinterpret_cast (ptype, arg_var_decl);
+ symbol_base = build_resize_convert_view (ptype, arg_var_decl);
else
{
/* In case we are referring to an array (the argument in
= (const BrigOperandRegister *) m_parent.get_brig_operand_entry
(addr_operand.reg);
tree base_reg_var = m_parent.m_cf->get_m_var_declfor_reg (mem_base_reg);
- var_offset = convert_to_pointer (ptr_type_node, base_reg_var);
+ tree as_uint = build_reinterpret_to_uint (base_reg_var);
+ var_offset = convert_to_pointer (ptr_type_node, as_uint);
gcc_assert (var_offset != NULL_TREE);
}
= ((const uint32_t *) &operand_entries->bytes)[operand_index];
const BrigBase *operand_data
= m_parent.get_brig_operand_entry (operand_offset);
- return build_tree_operand (*brig_inst, *operand_data, operand_type);
+
+ bool inputp = !gccbrig_hsa_opcode_op_output_p (brig_inst->opcode,
+ operand_index);
+ return build_tree_operand (*brig_inst, *operand_data, operand_type, inputp);
}
/* Builds a single (scalar) constant initialized element of type
call_operands.resize (4, NULL_TREE);
operand_types.resize (4, NULL_TREE);
for (size_t i = 0; i < operand_count; ++i)
- call_operands.at (i) = build_reinterpret_cast (operand_types.at (i),
- call_operands.at (i));
+ call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
+ call_operands.at (i));
tree fnptr = build_fold_addr_expr (built_in);
return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
tree_stl_vec
brig_code_entry_handler::build_operands (const BrigInstBase &brig_inst)
+{
+ return build_or_analyze_operands (brig_inst, false);
+}
+
+void
+brig_code_entry_handler::analyze_operands (const BrigInstBase &brig_inst)
+{
+ build_or_analyze_operands (brig_inst, true);
+}
+
+/* Implements both the build_operands () and analyze_operands () call
+ so changes go in tandem. Performs build_operands () when ANALYZE
+ is false. Otherwise, only analyze operands and return empty
+ list.
+
+ If analyzing record each HSA register operand with the
+ corresponding resolved operand tree type to
+ brig_to_generic::m_fn_regs_use_index. */
+
+tree_stl_vec
+brig_code_entry_handler::
+build_or_analyze_operands (const BrigInstBase &brig_inst, bool analyze)
{
/* Flush to zero. */
bool ftz = false;
/* Treat the operands as the storage type at this point. */
operand_type = half_storage_type;
+ if (analyze)
+ {
+ if (operand_data->kind == BRIG_KIND_OPERAND_REGISTER)
+ {
+ const BrigOperandRegister &brig_reg
+ = (const BrigOperandRegister &) *operand_data;
+ m_parent.add_reg_used_as_type (brig_reg, operand_type);
+ }
+ continue;
+ }
+
tree operand = build_tree_operand (brig_inst, *operand_data, operand_type,
!is_output);
-
gcc_assert (operand);
/* Cast/convert the inputs to correct types as expected by the GENERIC
{
if (half_to_float)
operand = build_h2f_conversion
- (build_reinterpret_cast (half_storage_type, operand));
+ (build_resize_convert_view (half_storage_type, operand));
else if (TREE_CODE (operand) != LABEL_DECL
&& TREE_CODE (operand) != TREE_VEC
&& operand_data->kind != BRIG_KIND_OPERAND_ADDRESS
- && !VECTOR_TYPE_P (TREE_TYPE (operand)))
+ && operand_data->kind != BRIG_KIND_OPERAND_OPERAND_LIST)
{
- size_t reg_width = int_size_in_bytes (TREE_TYPE (operand));
- size_t instr_width = int_size_in_bytes (operand_type);
- if (reg_width == instr_width)
- operand = build_reinterpret_cast (operand_type, operand);
- else if (reg_width > instr_width)
- {
- /* Clip the operand because the instruction's bitwidth
- is smaller than the HSAIL reg width. */
- if (INTEGRAL_TYPE_P (operand_type))
- operand
- = convert_to_integer (signed_or_unsigned_type_for
- (TYPE_UNSIGNED (operand_type),
- operand_type), operand);
- else
- operand = build_reinterpret_cast (operand_type, operand);
- }
- else if (reg_width < instr_width)
- /* At least shift amount operands can be read from smaller
- registers than the data operands. */
- operand = convert (operand_type, operand);
+ operand = build_resize_convert_view (operand_type, operand);
}
else if (brig_inst.opcode == BRIG_OPCODE_SHUFFLE)
/* Force the operand type to be treated as the raw type. */
- operand = build_reinterpret_cast (operand_type, operand);
+ operand = build_resize_convert_view (operand_type, operand);
if (brig_inst.opcode == BRIG_OPCODE_CMOV && i == 1)
{
brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
tree output, tree inst_expr)
{
- /* The destination type might be different from the output register
- variable type (which is always an unsigned integer type). */
+ /* The result/input type might be different from the output register
+ variable type (can be any type; see get_m_var_declfor_reg @
+ brig-function.cc). */
tree output_type = TREE_TYPE (output);
tree input_type = TREE_TYPE (inst_expr);
bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
{
inst_expr = add_temp_var ("before_f2h", inst_expr);
tree f2h_output = build_f2h_conversion (inst_expr);
- tree conv_int = convert_to_integer (output_type, f2h_output);
- tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
+ tree conv = build_resize_convert_view (output_type, f2h_output);
+ tree assign = build2 (MODIFY_EXPR, output_type, output, conv);
m_parent.m_cf->append_statement (assign);
return assign;
}
- else if (VECTOR_TYPE_P (TREE_TYPE (output)))
+ else if (VECTOR_TYPE_P (output_type) && TREE_CODE (output) == CONSTRUCTOR)
{
/* Expand/unpack the input value to the given vector elements. */
size_t i;
bitwidths. */
size_t src_width = int_size_in_bytes (input_type);
size_t dst_width = int_size_in_bytes (output_type);
-
- if (src_width == dst_width)
- {
- /* A simple bitcast should do. */
- tree bitcast = build_reinterpret_cast (output_type, inst_expr);
- tree assign = build2 (MODIFY_EXPR, output_type, output, bitcast);
- m_parent.m_cf->append_statement (assign);
- return assign;
- }
- else
+ tree input = inst_expr;
+ /* Integer results are extended to the target register width, using
+ the same sign as the inst_expr. */
+ if (INTEGRAL_TYPE_P (TREE_TYPE (input)) && src_width != dst_width)
{
- tree conv_int = convert_to_integer (output_type, inst_expr);
- tree assign = build2 (MODIFY_EXPR, output_type, output, conv_int);
- m_parent.m_cf->append_statement (assign);
- return assign;
+ bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (input));
+ tree resized_type
+ = build_nonstandard_integer_type (dst_width * BITS_PER_UNIT,
+ unsigned_p);
+ input = convert_to_integer (resized_type, input);
}
+ input = build_resize_convert_view (output_type, input);
+ tree assign = build2 (MODIFY_EXPR, output_type, output, input);
+ m_parent.m_cf->append_statement (assign);
+ return assign;
}
return NULL_TREE;
}
{
tree built_in = builtin_decl_explicit (BUILT_IN_HSAIL_F32_TO_F16);
- tree casted_operand = build_reinterpret_cast (uint32_type_node, operand);
+ tree casted_operand = build_resize_convert_view (uint32_type_node, operand);
tree call = call_builtin (built_in, 1, uint16_type_node, uint32_type_node,
casted_operand);
tree output = create_tmp_var (const_fp32_type, "fp32out");
tree casted_result
- = build_reinterpret_cast (brig_to_generic::s_fp32_type, call);
+ = build_resize_convert_view (brig_to_generic::s_fp32_type, call);
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_result);
tree build_h2f_conversion (tree source);
tree_stl_vec build_operands (const BrigInstBase &brig_inst);
+ void analyze_operands (const BrigInstBase &brig_inst);
tree build_output_assignment (const BrigInstBase &brig_inst, tree output,
tree inst_expr);
/* HSAIL-specific builtin functions not yet integrated to gcc. */
static builtin_map s_custom_builtins;
+
+private:
+
+ tree_stl_vec build_or_analyze_operands (const BrigInstBase &brig_inst,
+ bool analyze);
};
/* Implement the Visitor software pattern for performing various actions on
tree input = build_tree_operand_from_brig (brig_inst, source_type, 1);
tree output = build_tree_operand_from_brig (brig_inst, dest_type, 0);
+
if (brig_inst->opcode == BRIG_OPCODE_COMBINE)
{
/* For combine, a simple reinterpret cast from the array constructor
works. */
-
- tree casted = build_reinterpret_cast (dest_type, input);
+ tree casted = build_resize_convert_view (TREE_TYPE (output), input);
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted);
m_parent.m_cf->append_statement (assign);
}
/* Flush the float operand to zero if indicated with 'ftz'. */
if (FTZ && SCALAR_FLOAT_TYPE_P (src_type))
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
input = flush_to_zero (src_is_fp16) (*this, casted_input);
}
}
else
gcc_unreachable ();
- tree casted_input = build_reinterpret_cast (unsigned_int_type, input);
+ tree casted_input = build_resize_convert_view (unsigned_int_type,
+ input);
tree masked_input
= build2 (BIT_AND_EXPR, unsigned_int_type, casted_input, and_mask);
conversion_result
}
else if (dest_is_fp16)
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result
= convert_to_real (brig_to_generic::s_fp32_type, casted_input);
if (FTZ)
}
else if (SCALAR_FLOAT_TYPE_P (dest_type))
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result = convert_to_real (dest_type, casted_input);
}
else if (INTEGRAL_TYPE_P (dest_type) && INTEGRAL_TYPE_P (src_type))
#include "brig-builtins.def"
gcc_unreachable ();
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
conversion_result
= call_builtin (builtin, 1, dest_type, src_type, casted_input);
}
else
{
- tree casted_input = build_reinterpret_cast (src_type, input);
+ tree casted_input = build_resize_convert_view (src_type, input);
- /* Perform the int to float conversion. */
+ /* Perform the float to int conversion. */
conversion_result = convert_to_integer (dest_type, casted_input);
}
- /* The converted result is finally extended to the target register
- width, using the same sign as the destination. */
- conversion_result
- = convert_to_integer (TREE_TYPE (output), conversion_result);
}
else
{
/* Just use CONVERT_EXPR and hope for the best. */
- tree casted_input = build_reinterpret_cast (dest_type, input);
+ tree casted_input = build_resize_convert_view (dest_type, input);
conversion_result = build1 (CONVERT_EXPR, dest_type, casted_input);
}
size_t dst_reg_size = int_size_in_bytes (TREE_TYPE (output));
- tree assign = NULL_TREE;
/* The output register can be of different type&size than the
- conversion output size. Cast it to the register variable type. */
- if (dst_reg_size > conv_dst_size)
- {
- tree casted_output
- = build1 (CONVERT_EXPR, TREE_TYPE (output), conversion_result);
- assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
- }
- else
+ conversion output size. Only need to handle signed integers, rest
+ is handled by reinterpret_cast. */
+ tree casted_output = conversion_result;
+ if (dst_reg_size > conv_dst_size &&
+ INTEGRAL_TYPE_P (TREE_TYPE (casted_output)))
{
- tree casted_output
- = build_reinterpret_cast (TREE_TYPE (output), conversion_result);
- assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
+ gcc_assert (!VECTOR_TYPE_P (casted_output));
+
+ bool unsignedp = TYPE_UNSIGNED (TREE_TYPE (casted_output));
+ tree resized_int_type
+ = build_nonstandard_integer_type (dst_reg_size * BITS_PER_UNIT,
+ unsignedp);
+ casted_output = build1 (CONVERT_EXPR, resized_int_type, casted_output);
}
+
+ casted_output
+ = build_resize_convert_view (TREE_TYPE (output), casted_output);
+ tree assign = build2 (MODIFY_EXPR, TREE_TYPE (output), output, casted_output);
+
m_parent.m_cf->append_statement (assign);
return base->byteCount;
return variable;
}
+/* Return tree type for an HSA register.
+
+ The tree type can be anything (scalar, vector, int, float, etc.)
+ but its size is guaranteed to match the HSA register size.
+
+ HSA registers are untyped but we select a type based on their use
+ to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
+ to occur when use or def reaches over current BB). */
+
+tree
+brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
+{
+ size_t reg_size = gccbrig_reg_size (reg);
+
+ /* The default type. */
+ tree type = build_nonstandard_integer_type (reg_size, true);
+
+ if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
+ return type;
+
+ const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
+ size_t reg_id = gccbrig_hsa_reg_id (*reg);
+ if (index.count (reg_id) == 0)
+ return type;
+
+ const reg_use_info &info = index.find (reg_id)->second;
+ std::vector<std::pair<tree, size_t> >::const_iterator it
+ = info.m_type_refs.begin ();
+ std::vector<std::pair<tree, size_t> >::const_iterator it_end
+ = info.m_type_refs.end ();
+ size_t max_refs_as_type_count = 0;
+ for (; it != it_end; it++)
+ {
+ size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
+ if (type_bit_size != reg_size) continue;
+ if (it->second > max_refs_as_type_count)
+ {
+ type = it->first;
+ max_refs_as_type_count = it->second;
+ }
+ }
+
+ return type;
+}
+
/* Returns a DECL_VAR for the given HSAIL operand register.
If it has not been created yet for the function being generated,
- creates it as an unsigned int variable. */
+ creates it as a type determined by analysis phase. */
tree
brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
{
- size_t offset = reg->regNum;
- switch (reg->regKind)
- {
- case BRIG_REGISTER_KIND_QUAD:
- offset
- += BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT +
- BRIG_2_TREE_HSAIL_C_REG_COUNT;
- break;
- case BRIG_REGISTER_KIND_DOUBLE:
- offset += BRIG_2_TREE_HSAIL_S_REG_COUNT + BRIG_2_TREE_HSAIL_C_REG_COUNT;
- break;
- case BRIG_REGISTER_KIND_SINGLE:
- offset += BRIG_2_TREE_HSAIL_C_REG_COUNT;
- case BRIG_REGISTER_KIND_CONTROL:
- break;
- default:
- gcc_unreachable ();
- break;
- }
+ size_t offset = gccbrig_hsa_reg_id (*reg);
reg_decl_index_entry *regEntry = m_regs[offset];
if (regEntry == NULL)
size_t reg_size = gccbrig_reg_size (reg);
tree type;
if (reg_size > 1)
- type = build_nonstandard_integer_type (reg_size, true);
+ type = get_tree_type_for_hsa_reg (reg);
else
type = boolean_type_node;
typedef std::map<const BrigDirectiveVariable *, tree> variable_index;
typedef std::vector<tree> tree_stl_vec;
-/* There are 128 c regs and 2048 s/d/q regs each in the HSAIL. */
-#define BRIG_2_TREE_HSAIL_C_REG_COUNT (128)
-#define BRIG_2_TREE_HSAIL_S_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_D_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_Q_REG_COUNT (2048)
-#define BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT \
- (BRIG_2_TREE_HSAIL_C_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT \
- + BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_Q_REG_COUNT)
-
/* Holds data for the currently built GENERIC function. */
class brig_function
phsa_descriptor m_descriptor;
private:
+
+ tree get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const;
+
/* Bookkeeping for the different HSA registers and their tree declarations
for the currently generated function. */
reg_decl_index_entry *m_regs[BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT];
tree instr_type = gccbrig_tree_type_for_hsa_type (brig_inst->type);
- if (VECTOR_TYPE_P (TREE_TYPE (data)))
+ /* In case of {ld,st}_v{2,4}. Note: since 'register' variables may
+ be any type, even a vector type, we distinguish the registers
+ from operand lists by checking for constructor nodes (which
+ operand lists are represented as). */
+ if (VECTOR_TYPE_P (TREE_TYPE (data)) && TREE_CODE (data) == CONSTRUCTOR)
instr_type = TREE_TYPE (data);
tree ptype = build_pointer_type (instr_type);
address_base, ptr_offset);
if (is_store && TREE_TYPE (data) != instr_type)
- {
- if (int_size_in_bytes (TREE_TYPE (data))
- == int_size_in_bytes (instr_type)
- && !INTEGRAL_TYPE_P (instr_type))
- data = build1 (VIEW_CONVERT_EXPR, instr_type, data);
- else
- data = convert (instr_type, data);
- }
+ data = build_resize_convert_view (instr_type, data);
build_mem_access (brig_inst, address, data);
}
};
+class brig_reg_use_analyzer : public brig_code_entry_handler
+{
+public:
+ brig_reg_use_analyzer (brig_to_generic &parent)
+ : brig_code_entry_handler (parent)
+ {
+ }
+
+ size_t
+ operator () (const BrigBase *base)
+ {
+ const BrigInstBase *brig_inst = (const BrigInstBase *) base;
+ analyze_operands (*brig_inst);
+ return base->byteCount;
+ }
+
+};
+
/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
should handle its data. */
brig_directive_variable_handler var_handler (*this);
brig_directive_fbarrier_handler fbar_handler (*this);
brig_directive_function_handler func_handler (*this);
+ brig_reg_use_analyzer reg_use_analyzer (*this);
/* Need this for grabbing the module names for mangling the
group variable names. */
const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
code_entry_handler_info handlers[]
- = {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
+ = {{BRIG_KIND_INST_BASIC, ®_use_analyzer},
+ {BRIG_KIND_INST_MOD, ®_use_analyzer},
+ {BRIG_KIND_INST_CMP, ®_use_analyzer},
+ {BRIG_KIND_INST_MEM, ®_use_analyzer},
+ {BRIG_KIND_INST_CVT, ®_use_analyzer},
+ {BRIG_KIND_INST_SEG_CVT, ®_use_analyzer},
+ {BRIG_KIND_INST_SEG, ®_use_analyzer},
+ {BRIG_KIND_INST_ADDR, ®_use_analyzer},
+ {BRIG_KIND_INST_SOURCE_TYPE, ®_use_analyzer},
+ {BRIG_KIND_INST_ATOMIC, ®_use_analyzer},
+ {BRIG_KIND_INST_SIGNAL, ®_use_analyzer},
+ {BRIG_KIND_INST_BR, ®_use_analyzer},
+ {BRIG_KIND_INST_LANE, ®_use_analyzer},
+ {BRIG_KIND_INST_QUEUE, ®_use_analyzer},
+ {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
{BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
{BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
{BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
than the created reg var type in order to select correct instruction type
later on. This function creates the necessary reinterpret type cast from
a source variable to the destination type. In case no cast is needed to
- the same type, SOURCE is returned directly. */
+ the same type, SOURCE is returned directly.
+
+ In case of mismatched type sizes, casting:
+ - to narrower type the upper bits are clipped and
+ - to wider type the source value is zero extended. */
tree
-build_reinterpret_cast (tree destination_type, tree source)
+build_resize_convert_view (tree destination_type, tree source)
{
gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE
size_t dst_size = int_size_in_bytes (destination_type);
if (src_size == dst_size)
return build1 (VIEW_CONVERT_EXPR, destination_type, source);
- else if (src_size < dst_size)
+ else /* src_size != dst_size */
{
/* The src_size can be smaller at least with f16 scalars which are
stored to 32b register variables. First convert to an equivalent
size unsigned type, then extend to an unsigned type of the
target width, after which VIEW_CONVERT_EXPR can be used to
force to the target type. */
- tree unsigned_temp = build1 (VIEW_CONVERT_EXPR,
- get_unsigned_int_type (source_type),
- source);
- return build1 (VIEW_CONVERT_EXPR, destination_type,
- convert (get_unsigned_int_type (destination_type),
- unsigned_temp));
+ tree resized = convert (get_scalar_unsigned_int_type (destination_type),
+ build_reinterpret_to_uint (source));
+ gcc_assert ((size_t)int_size_in_bytes (TREE_TYPE (resized)) == dst_size);
+ return build_resize_convert_view (destination_type, resized);
}
- else
- gcc_unreachable ();
- return NULL_TREE;
+}
+
+/* Reinterprets SOURCE as a scalar unsigned int with the size
+ corresponding to the orignal. */
+
+tree build_reinterpret_to_uint (tree source)
+{
+ tree src_type = TREE_TYPE (source);
+ if (INTEGRAL_TYPE_P (src_type) && TYPE_UNSIGNED (src_type))
+ return source;
+ tree dest_type = get_scalar_unsigned_int_type (src_type);
+ return build1 (VIEW_CONVERT_EXPR, dest_type, source);
}
/* Returns the finished brig_function for the given generic FUNC_DECL,
{
types[i] = va_arg (ap, tree);
tree arg = va_arg (ap, tree);
- args[i] = build_reinterpret_cast (types[i], arg);
+ args[i] = build_resize_convert_view (types[i], arg);
if (types[i] == error_mark_node || args[i] == error_mark_node)
{
delete[] types;
true);
}
+/* Returns a type with unsigned int corresponding to the size
+ ORIGINAL_TYPE. */
+
+tree
+get_scalar_unsigned_int_type (tree original_type)
+{
+ return build_nonstandard_integer_type (int_size_in_bytes (original_type)
+ * BITS_PER_UNIT, true);
+}
+
void
dump_function (FILE *dump_file, brig_function *f)
{
fprintf (dump_file, "\n");
}
}
+
+/* Records use of the BRIG_REG as a TYPE in the current function. */
+
+void
+brig_to_generic::add_reg_used_as_type (const BrigOperandRegister &brig_reg,
+ tree type)
+{
+ gcc_assert (m_cf);
+ reg_use_info &info
+ = m_fn_regs_use_index[m_cf->m_name][gccbrig_hsa_reg_id (brig_reg)];
+
+ if (info.m_type_refs_lookup.count (type))
+ info.m_type_refs[info.m_type_refs_lookup[type]].second++;
+ else
+ {
+ info.m_type_refs.push_back (std::make_pair (type, 1));
+ info.m_type_refs_lookup[type] = info.m_type_refs.size () - 1;
+ }
+}
void add_group_variable (const std::string &name, size_t size,
size_t alignment, bool function_scope);
+ void add_reg_used_as_type (const BrigOperandRegister &brig_reg,
+ tree operand_type);
+
static tree s_fp16_type;
static tree s_fp32_type;
static tree s_fp64_type;
/* Accumulates the total group segment usage. */
size_t m_total_group_segment_usage;
+ /* Statistics about register uses per function. */
+ std::map<std::string, regs_use_index> m_fn_regs_use_index;
+
private:
void find_brig_sections ();
tree call_builtin (tree pdecl, int nargs, tree rettype, ...);
-tree build_reinterpret_cast (tree destination_type, tree source);
+tree build_resize_convert_view (tree destination_type, tree source);
+tree build_reinterpret_to_uint (tree source);
tree build_stmt (enum tree_code code, ...);
tree get_unsigned_int_type (tree type);
+tree get_scalar_unsigned_int_type (tree type);
+
void dump_function (FILE *dump_file, brig_function *f);
#endif
#include "brig-util.h"
#include "errors.h"
#include "diagnostic-core.h"
+#include "print-tree.h"
bool
group_variable_offset_index::has_variable (const std::string &name) const
/* Drop const qualifiers. */
return tree_type;
}
+
+/* Calculates numeric identifier for the HSA register REG.
+
+ Returned value is bound to [0, BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT]. */
+
+size_t
+gccbrig_hsa_reg_id (const BrigOperandRegister ®)
+{
+ size_t offset = reg.regNum;
+ switch (reg.regKind)
+ {
+ case BRIG_REGISTER_KIND_QUAD:
+ offset
+ += BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT
+ + BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ break;
+ case BRIG_REGISTER_KIND_DOUBLE:
+ offset += BRIG_2_TREE_HSAIL_S_REG_COUNT + BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ break;
+ case BRIG_REGISTER_KIND_SINGLE:
+ offset += BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ case BRIG_REGISTER_KIND_CONTROL:
+ break;
+ default:
+ gcc_unreachable ();
+ break;
+ }
+ return offset;
+}
+
+std::string
+gccbrig_hsa_reg_name_from_id (size_t reg_hash)
+{
+ char reg_name[32];
+ if (reg_hash < BRIG_2_TREE_HSAIL_C_REG_COUNT)
+ {
+ sprintf (reg_name, "$c%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_C_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_S_REG_COUNT)
+ {
+ sprintf (reg_name, "$s%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_S_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_D_REG_COUNT)
+ {
+ sprintf (reg_name, "$d%lu", reg_hash);
+ return reg_name;
+ }
+
+ reg_hash -= BRIG_2_TREE_HSAIL_D_REG_COUNT;
+ if (reg_hash < BRIG_2_TREE_HSAIL_Q_REG_COUNT)
+ {
+ sprintf (reg_name, "$q%lu", reg_hash);
+ return reg_name;
+ }
+
+ gcc_unreachable ();
+ return "$??";
+}
+
+/* Prints statistics of register usage to stdout. */
+
+void
+gccbrig_print_reg_use_info (FILE *dump, const regs_use_index &info)
+{
+ regs_use_index::const_iterator begin_it = info.begin ();
+ regs_use_index::const_iterator end_it = info.end ();
+ for (regs_use_index::const_iterator it = begin_it; it != end_it; it++)
+ {
+ std::string hsa_reg = gccbrig_hsa_reg_name_from_id (it->first);
+ printf ("%s:\n", hsa_reg.c_str ());
+ const reg_use_info &info = it->second;
+ typedef std::vector<std::pair<tree, size_t> >::const_iterator reg_use_it;
+ reg_use_it begin_it2 = info.m_type_refs.begin ();
+ reg_use_it end_it2 = info.m_type_refs.end ();
+ for (reg_use_it it2 = begin_it2; it2 != end_it2; it2++)
+ {
+ fprintf (dump, "(%lu) ", it2->second);
+ print_node_brief (dump, "", it2->first, 0);
+ fprintf (dump, "\n");
+ }
+ }
+}
#define GCC_BRIG_UTIL_H
#include <map>
+#include <vector>
#include "config.h"
#include "system.h"
#include "opts.h"
#include "tree.h"
+/* There are 128 c regs and 2048 s/d/q regs each in the HSAIL. */
+#define BRIG_2_TREE_HSAIL_C_REG_COUNT (128)
+#define BRIG_2_TREE_HSAIL_S_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_D_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_Q_REG_COUNT (2048)
+#define BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT \
+ (BRIG_2_TREE_HSAIL_C_REG_COUNT + BRIG_2_TREE_HSAIL_S_REG_COUNT \
+ + BRIG_2_TREE_HSAIL_D_REG_COUNT + BRIG_2_TREE_HSAIL_Q_REG_COUNT)
+
/* Helper class for keeping book of group variable offsets. */
class group_variable_offset_index
/* From hsa.h. */
bool hsa_type_packed_p (BrigType16_t type);
+struct reg_use_info
+{
+ /* This vector keeps count of the times an HSAIL register is used as
+ a tree type in generic expressions. The count is used to select
+ type for 'register' variables to reduce emission of
+ VIEW_CONVERT_EXPR nodes. The data is kept in vector (insertion
+ order) for determinism, in a case there is a tie with the
+ counts. */
+ std::vector<std::pair<tree, size_t> > m_type_refs;
+ /* Tree to index. Lookup for the above vector. */
+ std::map<tree, size_t> m_type_refs_lookup;
+};
+
+/* key = hsa register entry generated by gccbrig_hsa_reg_id (). */
+typedef std::map<size_t, reg_use_info> regs_use_index;
+
+size_t gccbrig_hsa_reg_id (const BrigOperandRegister ®);
+std::string gccbrig_hsa_reg_name_from_id (size_t reg_hash);
+
+void gccbrig_print_reg_use_info (FILE *dump, const regs_use_index &info);
+
#endif
+2017-11-16 Henry Linjamäki <henry.linjamaki@parmance.com>
+
+ * brig.dg/test/gimple/vector.hsail: Update for HSA registers' tree
+ representation changes in brig1.
+ * brig.dg/test/gimple/packed.hsail: Likewise.
+ * brig.dg/test/gimple/internal-reg-var-casts.hsail: New.
+
2017-11-16 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/ipa/ipcp-2.c: Lower threshold.
--- /dev/null
+module &module:1:0:$full:$large:$default;
+
+/* Test for casting from/to representation of HSA registers. */
+
+/* HSA registers are untyped but in gccbrig they are presented as */
+/* variables with a type selected by analysis. Currently, each */
+/* register variable, per function, has a type as it is used at */
+/* most. Therefore, register variable can be nearly any type. The */
+/* tests makes sure the generic/tree expressions have the right casts */
+/* from/to the register variables. */
+
+
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-original" } */
+
+prog kernel &Kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
+{
+ private_u64 %foo;
+ private_u64 %bar;
+ private_b128 %baz;
+
+ ld_kernarg_u64 $d0, [%input_ptr];
+ ld_global_u32 $s0, [$d0];
+
+ /* Trick gccbrig to set wanted type for the registers. */
+
+/* $s0 is selected as float... */
+/* { dg-final { scan-tree-dump "<float:32> s0;" "original"} } */
+/* ..., therefore, there should not be any casts. */
+/* { dg-final { scan-tree-dump "s10 = s0 \\\+ s0;" "original"} } */
+
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+ add_f32 $s10, $s0, $s0;
+
+/* Expression with other type, a cast is needed. */
+/* { dg-final { scan-tree-dump "s1 = VIEW_CONVERT_EXPR<unsigned int>.s0. \\\+ 123;" "original"} } */
+
+ add_u32 $s1, $s0, 123;
+
+/* { dg-final { scan-tree-dump "unsigned int s1;" "original"} } */
+
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+ add_u32 $s10, $s1, 0;
+
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<<float:32>>.s1.;" "original"} } */
+
+ mov_b32 $s0, $s1;
+
+/* Rig the election for $d0 to be double. */
+/* { dg-final { scan-tree-dump "<float:64> d0;" "original"} } */
+/* { dg-final { scan-tree-dump "d10 = d0 \\\+ d0;" "original"} } */
+
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+ add_f64 $d10, $d0, $d0;
+
+/* Make $s2 to be vector type. */
+/* { dg-final { scan-tree-dump "vector.4. unsigned char s2;" "original"} } */
+/* { dg-final { scan-tree-dump "s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(s1\\\) \\\+ VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(s1\\\);" "original"} } */
+
+ add_pp_u8x4 $s2, $s1, $s1;
+
+/* { dg-final { scan-tree-dump "s20 = s2 \\\+ s2;" "original"} } */
+
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+ add_pp_u8x4 $s20, $s2, $s2;
+
+/* { dg-final { scan-tree-dump "d0 = VIEW_CONVERT_EXPR<<float:64>>.{VIEW_CONVERT_EXPR<unsigned int>.s0., VIEW_CONVERT_EXPR<unsigned int>.s2.}.;" "original"} } */
+
+ combine_v2_b64_b32 $d0, ($s0, $s2);
+
+/* { dg-final { scan-tree-dump "s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>.BIT_FIELD_REF <d0, 32, 0>.;" "original"} } */
+/* { dg-final { scan-tree-dump "s1 = BIT_FIELD_REF <d0, 32, 32>;" "original"} } */
+
+ expand_v2_b32_b64 ($s2, $s1), $d0;
+
+/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(.*VIEW_CONVERT_EXPR<unsigned int>.s0\[\)\]*;" "original"} } */
+
+ cvt_s16_s8 $s0, $s0;
+
+/* { dg-final { scan-tree-dump "c0 = .*VIEW_CONVERT_EXPR<<float:32>>.s2..* != 0;" "original"} } */
+
+ cvt_b1_f32 $c0, $s2;
+
+/* { dg-final { scan-tree-dump ".*__private_base_addr.* = .*\\\(unsigned char\\\) VIEW_CONVERT_EXPR<unsigned int>\\\(s0\\\)\[\)\]*;" "original"} } */
+
+ st_private_u8 $s0, [%foo];
+
+/* { dg-final { scan-tree-dump ".*__private_base_addr.* = .*\\\(unsigned short\\\) VIEW_CONVERT_EXPR<unsigned int>\\\(s2\\\)\[\)\]*;" "original"} } */
+
+ st_private_u16 $s2, [%bar];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed char \\\*\\\) \\\(__private_base_addr .*\\\);\[ \n\]*s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_private_s8 $s2, [%foo];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr .*\\\);\[ \n\]*s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_private_s16 $s0, [%bar];
+
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 0 = s0;" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 4 = VIEW_CONVERT_EXPR<<float:32>>\\\(s1\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "\\\*\\\(<float:32> \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 8 = VIEW_CONVERT_EXPR<<float:32>>\\\(s2\\\);" "original"} } */
+
+ st_v3_private_f32 ($s0, $s1, $s2), [%baz];
+
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 0;\[ \n\]*s0 = VIEW_CONVERT_EXPR<<float:32>>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 2;\[ \n\]*s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "mem_read.\[0-9\]* = \\\*\\\(signed short \\\*\\\) \\\(__private_base_addr.*\\\) \\\+ 4;\[ \n\]*s2 = VIEW_CONVERT_EXPR<vector.4. unsigned char>\\\(\\\(signed int\\\) mem_read.\[0-9\]*\\\);" "original"} } */
+
+ ld_v3_private_s16 ($s0, $s1, $s2), [%baz];
+
+/* { dg-final { scan-tree-dump "s5 = .*VIEW_CONVERT_EXPR<unsigned int>\\\(s0\\\) == VIEW_CONVERT_EXPR<unsigned int>\\\(s2\\\)\\\) .*;" "original"} } */
+
+ cmp_eq_s32_u32 $s5, $s0, $s2;
+
+/* { dg-final { scan-tree-dump "s6 = VIEW_CONVERT_EXPR<<float:32>>\\\(.*VIEW_CONVERT_EXPR<vector\\\(2\\\) unsigned short>\\\(s0\\\).*VIEW_CONVERT_EXPR<vector\\\(2\\\) unsigned short>\\\(s2\\\).*;" "original"} } */
+
+ cmp_eq_pp_u16x2_u16x2 $s6, $s0, $s2;
+
+/* { dg-final { scan-tree-dump "<float:32> s60;" "original"} } */
+
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+ add_f32 $s60, $s6, $s6;
+
+ ld_kernarg_u64 $d0, [%output_ptr];
+ st_global_u32 $s0, [$d0];
+
+ ret;
+};
+
+
+
+
ret;
};
-/* The b128 load is done using uint128_t*.
+/* The b128 load is done using uint128_t*. */
/* { dg-final { scan-tree-dump "q0 = VIEW_CONVERT_EXPR<uint128_t>\\\(mem_read.\[0-9\]+\\\);" "original"} } */
/* Before arithmetics, the uint128_t is casted to a vector datatype. */
/* in comparison to the HSAIL syntax. */
/* { dg-final { scan-tree-dump "\\\+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }" "original"} } */
-/* After arithmetics, the vector DT is casted back to a uint128_t. */
-/* { dg-final { scan-tree-dump "q1 = VIEW_CONVERT_EXPR<uint128_t>" "original"} } */
-
/* Broadcasted the constant vector's lowest element and summed it up in the next line. */
-/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple"} } */
+/* { dg-final { scan-tree-dump "= { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 };\[\n \]+\[a-z0-9_\]+ = \[a-z0-9_\]+ \\\+ \[a-z0-9_\]+;" "gimple"} } */
/* Broadcasted the registers lowest element via a VEC_PERM_EXPR that has an all-zeros mask. */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <_\[0-9\]+, _\[0-9\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR <\[a-z0-9_\]+, \[a-z0-9_\]+, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }>;" "gimple" } } */
/* For the add_ss we assume performing the computation over the whole vector is cheaper than */
/* extracting the scalar and performing a scalar operation. This aims to stay in the vector
/* datapath as long as possible. */
-/* { dg-final { scan-tree-dump "_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q2\\\);\[\n \]+_\[0-9\]+ = VIEW_CONVERT_EXPR<vector\\\(16\\\) unsigned char>\\\(q3\\\);\[\n \]+_\[0-9\]+ = _\[0-9\]+ \\\+ _\[0-9\]+;" "gimple" } } */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = q2 \\\+ q3;" "gimple" } } */
/* Insert the lowest element of the result to the lowest element of the result register. */
-/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <_\[0-9\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */
+/* { dg-final { scan-tree-dump "= VEC_PERM_EXPR <\[a-z0-9_\]+, new_output.\[0-9\]+_\[0-9\]+, { 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }>;" "gimple" } } */
-/* { dg-final { scan-tree-dump "q4 = VIEW_CONVERT_EXPR<uint128_t>\\\(s_output.\[0-9\]+_\[0-9\]+\\\);" "gimple" } } */
+/* FIXME */
+/* { dg-final { scan-tree-dump "q4 = \(VIEW_CONVERT_EXPR<uint128_t>\\\()?s_output.\[0-9\]+\(_\[0-9\]+\)*\\\)?;" "gimple" } } */
/* The saturating arithmetics are (curently) implemented using scalar builtin calls. */
/* { dg-final { scan-tree-dump-times "= __builtin___hsail_sat_add_u8" 64 "gimple" } } */
/* A single operand vector instr (neg.) */
-/* { dg-final { scan-tree-dump " = VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(q8\\\);\[\n \]+_\[0-9\]+ = -_\[0-9\]+;\[\n \]+" "gimple" } } */
+/* { dg-final { scan-tree-dump "= VIEW_CONVERT_EXPR<vector\\\(8\\\) signed short>\\\(\(s_output.\[0-9\]+_\[0-9\]+|q8\)\\\);\[\n \]+q9 = -_\[0-9\]+;\[\n \]+" "gimple" } } */
/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)" "original"} } */
/* The v3 load is scalarized (at the moment) due to gcc requiring 2's exponent wide vectors. */
-/* { dg-final { scan-tree-dump "s0 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\);\[\n ]+s1 = VIEW_CONVERT_EXPR<unsigned int>\\\(BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\);" "original"} } */
+/* { dg-final { scan-tree-dump "s0 = .*BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 0>\\\)?;\[\n ]+s1 = .*BIT_FIELD_REF <mem_read.\[0-9\]+, 32, 32>\\\)?;" "original"} } */
/* The v4 load is done via casting to a vector datatype ptr. */
/* { dg-final { scan-tree-dump " = MEM\\\[\\\(vector\\\(4\\\) <float:32> \\\*\\\)" "original"} } */
/* The combines are generated to vector constructors. */
-/* { dg-final { scan-tree-dump "{s1, s0}" "original"} } */
-/* { dg-final { scan-tree-dump "{s2, s3}" "original"} } */
+/* { dg-final { scan-tree-dump "{.*s1\\\)?, .*s0\\\)?}" "original"} } */
+/* { dg-final { scan-tree-dump "{.*s2\\\)?, .*s3\\\)?}" "original"} } */
/* Expands to BIT_FIELD_REFs. */
-/* { dg-final { scan-tree-dump "s0 = BIT_FIELD_REF <d4, 32, 0>;" "original"} } */
-/* { dg-final { scan-tree-dump "s3 = BIT_FIELD_REF <d4, 32, 32>;" "original"} } */
+/* { dg-final { scan-tree-dump "s0 = \(VIEW_CONVERT_EXPR.*\\\(\)?BIT_FIELD_REF <d4, 32, 0>\\\)?;" "original"} } */
+/* { dg-final { scan-tree-dump "s3 = \(VIEW_CONVERT_EXPR.*\\\(\)?BIT_FIELD_REF <d4, 32, 32>\\\)?;" "original"} } */
/* The v1 store is done via casting to a vector datatype ptr and constructing a vector from the inputs. */
/* { dg-final { scan-tree-dump "MEM\\\[\\\(vector\\\(2\\\) <float:32> \\\*\\\)\\\(<float:32> \\\*\\\) d1\\\] = " "original"} } */