segment variables.
PRM specs defines function and module scope group segment variables
as an experimental feature. However, PRM test suite uses and
hcc relies on them. In addition, hcc assumes certain group variable
layout in its dynamic group segment allocation code.
We cannot have global group memory offsets if we want to
both have kernel-specific group segment size and multiple kernels
calling the same functions that use function scope group memory
variables.
Now group segment is handled by separate book keeping of module
scope and function (kernel) offsets. Each function has a "frame"
in the group segment offset to which is given as an argument.
From-SVN: r253233
-2017-05-13 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+
+ * brig-lang.c: Improved support for function and module scope
+ group segment variables. PRM specs defines function and module
+ scope group segment variables as an experimental feature. However,
+ PRM test suite uses and hcc relies on them. In addition, hcc
+ assumes certain group variable layout in its dynamic group segment
+ allocation code. We cannot have global group memory offsets if we
+ want to both have kernel-specific group segment size and multiple
+ kernels calling the same functions that use function scope group memory
+ variables. Now group segment is handled by separate book keeping of
+ module scope and function (kernel) offsets. Each function has a "frame"
+ in the group segment offset to which is given as an argument.
+ * brigfrontend/brig-branch-inst-handler.cc: See above.
+ * brigfrontend/brig-code-entry-handler.cc: See above.
+ * brigfrontend/brig-fbarrier-handler.cc: See above.
+ * brigfrontend/brig-function-handler.cc: See above.
+ * brigfrontend/brig-function.cc: See above.
+ * brigfrontend/brig-function.h: See above.
+ * brigfrontend/brig-to-generic.cc: See above.
+ * brigfrontend/brig-to-generic.h: See above.
+ * brigfrontend/brig-util.cc: See above.
+ * brigfrontend/brig-util.h: See above.
+ * brigfrontend/brig-variable-handler.cc: See above.
+
+2017-09-25 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* brigfrontend/brig-to-generic.cc: Ensure per WI copies of
private variables are aligned too.
flag_excess_precision_cmdline = EXCESS_PRECISION_STANDARD;
/* gccbrig casts pointers around like crazy, TBAA produces
- broken code if not force disabling it. */
+ broken code if not force disabling it. */
flag_strict_aliasing = 0;
/* Returning false means that the backend should be used. */
{
brig_to_generic brig_to_gen;
+ std::vector <char*> brig_blobs;
+
for (unsigned int i = 0; i < num_in_fnames; ++i)
{
error ("could not read the BRIG file");
exit (1);
}
- brig_to_gen.parse (brig_blob);
fclose (f);
+
+ brig_to_gen.analyze (brig_blob);
+ brig_blobs.push_back (brig_blob);
+ }
+
+ for (size_t i = 0; i < brig_blobs.size(); ++i)
+ {
+ char *brig_blob = brig_blobs.at(i);
+ brig_to_gen.parse (brig_blob);
}
brig_to_gen.write_globals ();
+
+ for (size_t i = 0; i < brig_blobs.size (); ++i)
+ delete brig_blobs[i];
}
static tree
they might call builtins that need them or access group/private
memory. */
+ tree group_local_offset
+ = add_temp_var ("group_local_offset",
+ build_int_cst
+ (uint32_type_node,
+ m_parent.m_cf->m_local_group_variables.size()));
+
+ /* TODO: ensure the callee's frame is aligned! */
+
vec_safe_push (in_args, m_parent.m_cf->m_context_arg);
vec_safe_push (in_args, m_parent.m_cf->m_group_base_arg);
+ vec_safe_push (in_args, group_local_offset);
vec_safe_push (in_args, m_parent.m_cf->m_private_base_arg);
tree call = build_call_vec (ret_val_type, build_fold_addr_expr (func_ref),
{
const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref;
- uint64_t offset = m_parent.group_variable_segment_offset
- (m_parent.get_mangled_name (fbar));
-
- return build_int_cst (uint32_type_node, offset);
+ std::string var_name = m_parent.get_mangled_name (fbar);
+ uint64_t offset
+ = m_parent.m_cf->group_variable_segment_offset (var_name);
+
+ tree local_offset = build_int_cst (uint32_type_node, offset);
+ if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
+ local_offset
+ = build2 (PLUS_EXPR, uint64_type_node, local_offset,
+ convert (uint64_type_node,
+ m_parent.m_cf->m_group_local_offset_arg));
+ return local_offset;
}
else
gcc_unreachable ();
}
else if (segment == BRIG_SEGMENT_GROUP)
{
-
- uint64_t offset = m_parent.group_variable_segment_offset (var_name);
+ uint64_t offset
+ = m_parent.m_cf->group_variable_segment_offset (var_name);
const_offset = build_int_cst (size_type_node, offset);
+
+ /* If it's a local group variable reference, substract the local
+ group segment offset to get the group base ptr offset. */
+ if (m_parent.m_cf->m_local_group_variables.has_variable (var_name))
+ const_offset
+ = build2 (PLUS_EXPR, uint64_type_node, const_offset,
+ convert (uint64_type_node,
+ m_parent.m_cf->m_group_local_offset_arg));
+
}
else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL)
{
if (m_parent.m_cf != NULL)
m_parent.m_cf->m_function_scope_vars.insert (base);
std::string var_name = m_parent.get_mangled_name (fbar);
- m_parent.append_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1);
+ m_parent.add_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1,
+ m_parent.m_cf != NULL);
return base->byteCount;
}
size_t
brig_directive_function_handler::operator () (const BrigBase *base)
{
- m_parent.finish_function ();
+ if (!m_parent.m_analyzing)
+ m_parent.finish_function ();
size_t bytes_consumed = base->byteCount;
if (is_kernel && !is_definition)
return bytes_consumed;
+ std::string func_name = m_parent.get_mangled_name (exec);
+ if (is_kernel)
+ /* The generated kernel function is not the one that should be
+ called by the host. */
+ func_name = std::string ("_") + func_name;
+
m_parent.m_cf = new brig_function (exec, &m_parent);
+ m_parent.m_cf->m_name = func_name;
+ m_parent.m_cf->m_is_kernel = is_kernel;
- std::string func_name = m_parent.get_mangled_name (exec);
+ /* During the analyze step, the above information is all we need per
+ function. */
+ if (m_parent.m_analyzing)
+ return bytes_consumed;
tree fndecl;
tree ret_value = NULL_TREE;
if (is_kernel)
{
- /* The generated kernel function is not the one that should be
- called by the host. */
- func_name = std::string ("_") + func_name;
-
tree name_identifier
= get_identifier_with_length (func_name.c_str (), func_name.size ());
DECL_ARTIFICIAL (group_base_arg) = 1;
TREE_READONLY (group_base_arg) = 1;
TREE_USED (group_base_arg) = 1;
+ m_parent.m_cf->m_group_base_arg = group_base_arg;
+
+ /* To implement call stack and (non-kernel) function scope group variables,
+ we need to pass an offset which describes how far are we from
+ group_base_ptr.
+ That must be substracted from any function local group variable offsets to
+ get the address related to the bottom of the group memory chunk. */
+ tree group_local_offset_arg
+ = build_decl (UNKNOWN_LOCATION, PARM_DECL,
+ get_identifier ("__group_local_offset"), uint32_type_node);
+ chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg);
+ DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node;
+ DECL_CONTEXT (group_local_offset_arg) = fndecl;
+ DECL_ARTIFICIAL (group_local_offset_arg) = 1;
+ TREE_READONLY (group_local_offset_arg) = 1;
+ TREE_USED (group_local_offset_arg) = 1;
+ m_parent.m_cf->m_group_local_offset_arg = group_local_offset_arg;
/* Same for private. */
tree private_base_arg
m_parent.start_function (fndecl);
- m_parent.m_cf->m_name = func_name;
m_parent.m_cf->m_func_decl = fndecl;
m_parent.m_cf->m_current_bind_expr = bind_expr;
- m_parent.m_cf->m_is_kernel = is_kernel;
m_parent.m_cf->m_context_arg = context_arg;
- m_parent.m_cf->m_group_base_arg = group_base_arg;
m_parent.m_cf->m_private_base_arg = private_base_arg;
if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node)
m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
m_next_kernarg_offset (0), m_kernarg_max_align (0),
- m_ret_value_brig_var (NULL), m_has_barriers (false),
- m_has_allocas (false), m_has_function_calls_with_barriers (false),
- m_calls_analyzed (false), m_is_wg_function (false),
- m_has_unexpanded_dp_builtins (false), m_generating_arg_block (false),
- m_parent (parent)
+ m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
+ m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
+ m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
+ m_generating_arg_block (false), m_parent (parent)
{
memset (m_regs, 0,
BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
tree phsail_launch_kernel_call;
+ /* Compute the local group segment frame start pointer. */
+ tree group_local_offset_temp
+ = create_tmp_var (uint32_type_node, "group_local_offset");
+ tree group_local_offset_arg
+ = build2 (MODIFY_EXPR, uint32_type_node,
+ group_local_offset_temp,
+ build_int_cst (uint32_type_node,
+ m_parent->m_module_group_variables.size()));
+
/* Emit a launcher depending whether we converted the kernel function to
a work group function or not. */
if (m_is_wg_function)
phsail_launch_kernel_call
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
- 3, void_type_node,
+ 4, void_type_node,
ptr_type_node, kernel_func_ptr, ptr_type_node,
- context_arg, ptr_type_node, group_base_addr_arg);
+ context_arg, ptr_type_node, group_base_addr_arg,
+ uint32_type_node, group_local_offset_arg);
else
phsail_launch_kernel_call
= call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
- 3, void_type_node,
+ 4, void_type_node,
ptr_type_node, kernel_func_ptr, ptr_type_node,
- context_arg, ptr_type_node, group_base_addr_arg);
+ context_arg, ptr_type_node, group_base_addr_arg,
+ uint32_type_node, group_local_offset_arg);
append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
{
return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
}
+
+size_t
+brig_function::group_variable_segment_offset (const std::string &name) const
+{
+ if (m_local_group_variables.has_variable (name))
+ return m_local_group_variables.segment_offset (name);
+
+ gcc_assert (m_parent->m_module_group_variables.has_variable (name));
+ return m_parent->m_module_group_variables.segment_offset (name);
+}
#include "tree.h"
#include "tree-iterator.h"
#include "hsa-brig-format.h"
-
-class brig_to_generic;
+#include "brig-util.h"
#include <map>
#include <string>
#include "phsa.h"
+class brig_to_generic;
+
typedef std::map<std::string, tree> label_index;
typedef std::map<const BrigDirectiveVariable *, tree> variable_index;
typedef std::vector<tree> tree_stl_vec;
tree add_local_variable (std::string name, tree type);
+ size_t group_variable_segment_offset (const std::string &name) const;
+
+ bool has_group_variable (const std::string &name) const;
+
+ size_t group_segment_size () const;
+
tree get_m_var_declfor_reg (const BrigOperandRegister *reg);
bool convert_to_wg_function ();
/* The __context function argument. */
tree m_context_arg;
+
/* The __group_base_ptr argument in the current function.
- Points to the start of the group segment for the kernel
- instance. */
+ Points to the start of the group segment for the work-group. */
tree m_group_base_arg;
+
+ /* The __group_local_offset_ptr argument in the current function. It
+ contains the offset related to the group_base_ptr where the function's
+ local area for group variables resides. */
+ tree m_group_local_offset_arg;
+
/* The __private_base_ptr argument in the current function.
Points to the start of the private segment. */
tree m_private_base_arg;
/* True if the function has at least one alloca instruction. */
bool m_has_allocas;
- /* If the kernel containts at least one function call that _may_
+ /* If the kernel contains at least one function call that _may_
contain a barrier call, this is set to true. */
bool m_has_function_calls_with_barriers;
/* The functions called by this function. */
std::vector<tree> m_called_functions;
+ /* Stores the kernel scope group variable offsets if the function is
+ a kernel. */
+ group_variable_offset_index m_local_group_variables;
+
brig_to_generic *m_parent;
/* The metadata of the function that should be stored with the binary and
passed to the HSA runtime: */
tree brig_to_generic::s_fp64_type;
brig_to_generic::brig_to_generic ()
- : m_cf (NULL), m_brig (NULL), m_next_group_offset (0),
- m_next_private_offset (0)
+ : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0),
+ m_brig (NULL), m_next_private_offset (0)
{
m_globals = NULL_TREE;
}
};
-/* Parses the given BRIG blob. */
+/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
+ should handle its data. */
-void
-brig_to_generic::parse (const char *brig_blob)
+struct code_entry_handler_info
{
- m_brig = brig_blob;
- m_brig_blobs.push_back (brig_blob);
+ BrigKind kind;
+ brig_code_entry_handler *handler;
+};
- const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
- if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
- fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
- "Unrecognized file format.");
- if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
- fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
- "BRIG version not supported. BRIG 1.0 required.");
+/* Finds the BRIG file sections in the currently processed file. */
+void
+brig_to_generic::find_brig_sections ()
+{
m_data = m_code = m_operand = NULL;
+ const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig;
/* Find the positions of the different sections. */
for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec)
{
uint64_t offset
- = ((const uint64_t *) (brig_blob + mheader->sectionIndex))[sec];
+ = ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec];
const BrigSectionHeader *section_header
- = (const BrigSectionHeader *) (brig_blob + offset);
+ = (const BrigSectionHeader *) (m_brig + offset);
std::string name ((const char *) (§ion_header->name),
section_header->nameLength);
if (m_operand == NULL)
gcc_unreachable ();
+}
+
+/* Does a first pass over the given BRIG to collect data needed for the
+ actual parsing. Currently this includes only collecting the
+ group segment variable usage to support the experimental HSA PRM feature
+ where group variables can be declared also in module and function scope
+ (in addition to kernel scope).
+*/
+
+void
+brig_to_generic::analyze (const char *brig_blob)
+{
+ const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
+
+ if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
+ fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
+ "Unrecognized file format.");
+ if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
+ fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
+ "BRIG version not supported. BRIG 1.0 required.");
+
+ m_brig = brig_blob;
+
+ find_brig_sections ();
+
+ brig_directive_variable_handler var_handler (*this);
+ brig_directive_fbarrier_handler fbar_handler (*this);
+ brig_directive_function_handler func_handler (*this);
+
+ /* Need this for grabbing the module names for mangling the
+ group variable names. */
+ brig_directive_module_handler module_handler (*this);
+ skipped_entry_handler skipped_handler (*this);
+
+ const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
+
+ code_entry_handler_info handlers[]
+ = {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
+ {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
+ {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
+ {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
+ {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}};
+
+ m_analyzing = true;
+ for (size_t b = csection_header->headerByteCount; b < m_code_size;)
+ {
+ const BrigBase *entry = (const BrigBase *) (m_code + b);
+
+ brig_code_entry_handler *handler = &skipped_handler;
+
+ if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
+ {
+ /* The function definition ended. We can just discard the place
+ holder function. */
+ m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
+ delete m_cf;
+ m_cf = NULL;
+ }
+
+ /* Find a handler. */
+ for (size_t i = 0;
+ i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
+ {
+ if (handlers[i].kind == entry->kind)
+ handler = handlers[i].handler;
+ }
+ b += (*handler) (entry);
+ }
+
+ if (m_cf != NULL)
+ {
+ m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
+ delete m_cf;
+ m_cf = NULL;
+ }
+
+ m_total_group_segment_usage += m_module_group_variables.size ();
+ m_analyzing = false;
+}
+
+/* Parses the given BRIG blob. */
+
+void
+brig_to_generic::parse (const char *brig_blob)
+{
+ m_brig = brig_blob;
+ find_brig_sections ();
+
brig_basic_inst_handler inst_handler (*this);
brig_branch_inst_handler branch_inst_handler (*this);
brig_cvt_inst_handler cvt_inst_handler (*this);
handler = handlers[i].handler;
}
b += (*handler) (entry);
- continue;
}
finish_function ();
return NULL;
}
+/* Adds a group variable to a correct book keeping structure depending
+ on its segment. */
+
+void
+brig_to_generic::add_group_variable (const std::string &name, size_t size,
+ size_t alignment, bool function_scope)
+{
+ /* Module and function scope group region variables are an experimental
+ feature. We implement module scope group variables with a separate
+ book keeping inside brig_to_generic which is populated in the 'analyze()'
+ prepass. This is to ensure we know the group segment offsets when
+ processing the functions that might refer to them. */
+ if (!function_scope)
+ {
+ if (!m_module_group_variables.has_variable (name))
+ m_module_group_variables.add (name, size, alignment);
+ return;
+ }
+
+ if (!m_cf->m_local_group_variables.has_variable (name))
+ m_cf->m_local_group_variables.add (name, size, alignment);
+}
+
/* Finalizes the currently handled function. Should be called before
setting a new function. */
m_cf->m_func_decl = f;
}
-/* Appends a new group variable (or an fbarrier) to the current kernel's
- group segment. */
-
-void
-brig_to_generic::append_group_variable (const std::string &name, size_t size,
- size_t alignment)
-{
- size_t align_padding = m_next_group_offset % alignment == 0 ?
- 0 : (alignment - m_next_group_offset % alignment);
- m_next_group_offset += align_padding;
- m_group_offsets[name] = m_next_group_offset;
- m_next_group_offset += size;
-}
-
-size_t
-brig_to_generic::group_variable_segment_offset (const std::string &name) const
-{
- var_offset_table::const_iterator i = m_group_offsets.find (name);
- gcc_assert (i != m_group_offsets.end ());
- return (*i).second;
-}
-
-/* The size of the group and private segments required by the currently
- processed kernel. Private segment size must be multiplied by the
- number of work-items in the launch, in case of a work-group function. */
-
-size_t
-brig_to_generic::group_segment_size () const
-{
- return m_next_group_offset;
-}
-
/* Appends a new variable to the current kernel's private segment. */
void
return i != m_private_data_sizes.end ();
}
-bool
-brig_to_generic::has_group_variable (const std::string &name) const
-{
- var_offset_table::const_iterator i = m_group_offsets.find (name);
- return i != m_group_offsets.end ();
-}
-
size_t
brig_to_generic::private_variable_size (const std::string &name) const
{
return (*i).second;
}
+
+/* The size of private segment required by a single work-item executing
+ the currently processed kernel. */
+
size_t
brig_to_generic::private_segment_size () const
{
cgraph_node::finalize_function (f->m_func_decl, true);
f->m_descriptor.is_kernel = 1;
- /* TODO: analyze the kernel's actual group and private segment usage
- using a call graph. Now the private and group mem sizes are overly
- pessimistic in case of multiple kernels in the same module. */
- f->m_descriptor.group_segment_size = group_segment_size ();
+ /* TODO: analyze the kernel's actual private and group segment usage
+ using call graph. Now the mem size is overly
+ pessimistic in case of multiple kernels in the same module.
+ */
+ f->m_descriptor.group_segment_size = m_total_group_segment_usage;
f->m_descriptor.private_segment_size = private_segment_size ();
/* The kernarg size is rounded up to a multiple of 16 according to
delete[] vec;
- for (size_t i = 0; i < m_brig_blobs.size (); ++i)
- delete m_brig_blobs[i];
}
/* Returns an type with unsigned int elements corresponding to the
#include "hsa-brig-format.h"
#include "brig-function.h"
-
struct reg_decl_index_entry;
/* Converts an HSAIL BRIG input to GENERIC. This class holds global state
public:
brig_to_generic ();
+ void analyze (const char *brig_blob);
void parse (const char *brig_blob);
void write_globals ();
void start_function (tree f);
void finish_function ();
- void append_group_variable (const std::string &name, size_t size,
- size_t alignment);
-
void append_private_variable (const std::string &name, size_t size,
size_t alignment);
- size_t group_variable_segment_offset (const std::string &name) const;
-
- bool
- has_group_variable (const std::string &name) const;
-
size_t
private_variable_segment_offset (const std::string &name) const;
{ return get_mangled_name_tmpl (var); }
std::string get_mangled_name (const BrigDirectiveExecutable *func) const;
- size_t group_segment_size () const;
size_t private_segment_size () const;
brig_function *get_finished_function (tree func_decl);
+ void add_group_variable (const std::string &name, size_t size,
+ size_t alignment, bool function_scope);
+
static tree s_fp16_type;
static tree s_fp32_type;
static tree s_fp64_type;
/* The currently built function. */
brig_function *m_cf;
+ /* Stores the module and function scope group variable offsets. */
+ group_variable_offset_index m_module_group_variables;
+
/* The name of the currently handled BRIG module. */
std::string m_module_name;
+ /* Set to true if the compilation is in 'analyze' phase. */
+ bool m_analyzing;
+
+ /* Accumulates the total group segment usage. */
+ size_t m_total_group_segment_usage;
+
private:
+
+ void find_brig_sections ();
/* The BRIG blob and its different sections of the file currently being
parsed. */
const char *m_brig;
/* The size of each private variable, including the alignment padding. */
std::map<std::string, size_t> m_private_data_sizes;
- /* The same for group variables. */
- size_t m_next_group_offset;
- var_offset_table m_group_offsets;
-
/* And private. */
size_t m_next_private_offset;
var_offset_table m_private_offsets;
for some interprocedural analysis. */
std::map<std::string, brig_function *> m_finished_functions;
- /* The parsed BRIG blobs. Owned and will be deleted after use. */
- std::vector<const char *> m_brig_blobs;
-
/* The original dump file. */
FILE *m_dump_file;
#include "errors.h"
#include "diagnostic-core.h"
+bool
+group_variable_offset_index::has_variable (const std::string &name) const
+{
+ varname_offset_table::const_iterator i = m_group_offsets.find (name);
+ return i != m_group_offsets.end ();
+}
+
+/* Adds a new group segment variable. */
+
+void
+group_variable_offset_index::add (const std::string &name, size_t size,
+ size_t alignment)
+{
+ size_t align_padding = m_next_group_offset % alignment == 0 ?
+ 0 : (alignment - m_next_group_offset % alignment);
+ m_next_group_offset += align_padding;
+ m_group_offsets[name] = m_next_group_offset;
+ m_next_group_offset += size;
+}
+
+size_t
+group_variable_offset_index::segment_offset (const std::string &name) const
+{
+ varname_offset_table::const_iterator i = m_group_offsets.find (name);
+ gcc_assert (i != m_group_offsets.end ());
+ return (*i).second;
+}
+
/* Return true if operand number OPNUM of instruction with OPCODE is an output.
False if it is an input. Some code reused from Martin Jambor's gcc-hsa
tree. */
#ifndef GCC_BRIG_UTIL_H
#define GCC_BRIG_UTIL_H
-#include "brig-to-generic.h"
+#include <map>
+
+#include "config.h"
+#include "system.h"
+#include "ansidecl.h"
+#include "coretypes.h"
+#include "opts.h"
+#include "tree.h"
+
+/* Helper class for keeping book of group variable offsets. */
+
+class group_variable_offset_index
+{
+public:
+ group_variable_offset_index () : m_next_group_offset (0) {}
+
+ typedef std::map<std::string, size_t> varname_offset_table;
+
+ bool has_variable (const std::string &name) const;
+ void add (const std::string &name, size_t size, size_t alignment);
+ size_t segment_offset (const std::string &name) const;
+ size_t size () const { return m_next_group_offset; }
+
+private:
+ size_t m_next_group_offset;
+ varname_offset_table m_group_offsets;
+};
bool gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum);
size_t alignment = get_brig_var_alignment (brigVar);
- if (m_parent.m_cf != NULL)
+ bool function_scope = m_parent.m_cf != NULL;
+
+ if (function_scope)
m_parent.m_cf->m_function_scope_vars.insert (base);
std::string var_name = m_parent.get_mangled_name (brigVar);
+ if (brigVar->segment == BRIG_SEGMENT_GROUP)
+ {
+ /* Non-kernel scope group variables have been added at the
+ 'analyze' stage. */
+ m_parent.add_group_variable (var_name, var_size, alignment,
+ function_scope);
+ return base->byteCount;
+ }
+
+ /* During analyze, handle only (module scope) group variables. */
+ if (m_parent.m_analyzing)
+ return base->byteCount;
+
if (brigVar->segment == BRIG_SEGMENT_KERNARG)
{
/* Do not create a real variable, but only a table of
m_parent.m_cf->append_kernel_arg (brigVar, var_size, alignment);
return base->byteCount;
}
- else if (brigVar->segment == BRIG_SEGMENT_GROUP)
- {
- /* Handle group region variables similarly as kernargs:
- assign offsets to the group region on the fly when
- a new module scope or function scope group variable is
- introduced. These offsets will be then added to the
- group_base hidden pointer passed to the kernel in order to
- get the flat address. */
- if (!m_parent.has_group_variable (var_name))
- m_parent.append_group_variable (var_name, var_size, alignment);
- return base->byteCount;
- }
else if (brigVar->segment == BRIG_SEGMENT_PRIVATE
|| brigVar->segment == BRIG_SEGMENT_SPILL)
{
+2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+
+ * brig.dg/test/gimple/fbarrier.hsail: Fixed tests to match the new
+ new group memory offsetting code in the BRIG frontend.
+ * brig.dg/test/gimple/function_calls.hsail: Likewise.
+ * brig.dg/test/gimple/smoke_test.hsail: Likewise.
+ * brig.dg/test/gimple/variables.hsail: Likewise.
+
2017-09-27 Jakub Jelinek <jakub@redhat.com>
PR c++/82159
/* { dg-final { scan-tree-dump "__hsail_waitfbar \\\(0, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(0, __context\\\);" "gimple"} } */
-/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(32, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
/* { dg-final { scan-tree-dump "__hsail_joinfbar \\\(0, __context\\\);" "gimple"} } */
-/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(32, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
-/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(32, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
-/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(32, __context\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */
/* The generated function call should have the incoming arguments and three hidden arguments. */
-/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */
+/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, group_local_offset.*, __private_base_addr\\\);" "gimple"} } */
/* The callee should refer directly to the scalar arguments when it reads them. */
/* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */
/* The kernel function itself should have a fingerprint as follows */
/* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */
-/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */
+/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */
/* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */
/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */
/* The launcher should call __hsail_launch_wg_function in this case: */
/* Kernel (void * __context, void * __group_base_addr) */
/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */
-/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/
+/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/
/* The kernel should have the magic metadata section injected to the ELF. */
/* TODO: this should be disabled in case not outputting to an ELF. */
/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__builtin___hsail_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */
/* The kernel with the barrier call's launcher function should call the thread-spawning function. */
-/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */
+/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr, group_local_offset.*\\\);" "gimple" } } */
/* Tests for different variable scopes and address spaces. */
/* { dg-do compile } */
-/* { dg-options "-fdump-tree-gimple" } */
+/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */
prog align(256) private_u32 &prog_private;
private_u32 &mod_private;
ld_private_u32 $s200, [%func_private];
st_private_u32 $s200, [&prog_private];
+/* { dg-final { scan-tree-dump "__group_base_addr \\\+ \\\(0 \\\+" "original" } } */
ld_group_u32 $s203, [%func_group];
+
+/* { dg-final { scan-tree-dump "__group_base_addr \\\+ 0" "original" } } */
st_group_u32 $s203, [&prog_group];
ld_global_u32 $s204, [%func_global];
kern_group @12 (3)
*/
-/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */
-
/* The "mangling" of the global and readonly vars. */
/* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */
+2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+
+ * include/internal/phsa-rt.h: Support for improved group segment
+ handling with a stack-like allocation scheme.
+ * include/internal/workitems.h: Likewise.
+ * rt/workitems.c: Likewise.
+
2017-09-25 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Assume the host runtime allocates the work group
memory.
+
2017-05-03 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Removed a leftover comment.
/* Pointer type for kernel functions produced by gccbrig from the HSAIL.
This is private from outside the device binary and only called by
the launcher. */
-typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, void *);
+typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t,
+ void *);
/* Context data that is passed to the kernel function, initialized
by the runtime to the current launch information. The data is
to the work-group. */
void *group_base_ptr;
+ /* The offset in the group memory for the kernel local group variables.
+ To support module scope group variables, there might be need to preseve
+ room for them in the beginning of the group segment. */
+ uint32_t initial_group_offset;
+
/* Similarly to the private segment that gets space allocated for all
WIs in the work-group. */
void *private_base_ptr;
&& wi->z < __hsail_currentworkgroupsize (2, wi))
{
l_data->kernel (l_data->kernarg_addr, wi, wg->group_base_ptr,
- wg->private_base_ptr);
+ wg->initial_group_offset, wg->private_base_ptr);
#ifdef DEBUG_PHSA_RT
printf ("done.\n");
#endif
static void
phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
- size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+ uint32_t group_local_offset, size_t wg_size_x,
+ size_t wg_size_y, size_t wg_size_z)
{
PHSAWorkItem *wi_threads = NULL;
PHSAWorkGroup wg;
wg.alloca_stack_p = wg.private_segment_total_size;
wg.alloca_frame_p = wg.alloca_stack_p;
+ wg.initial_group_offset = group_local_offset;
#ifdef EXECUTE_WGS_BACKWARDS
wg.x = context->wg_max_x - 1;
them execute all the WGs, including a potential partial WG. */
static void
-phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
+phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z;
dp->grid_size_y, dp->grid_size_z);
#endif
- phsa_execute_wi_gang (context, group_base_ptr, sat_wg_size_x, sat_wg_size_y,
- sat_wg_size_z);
+ phsa_execute_wi_gang (context, group_base_ptr, group_local_offset,
+ sat_wg_size_x, sat_wg_size_y, sat_wg_size_z);
}
#endif
execute massive numbers of work-items in a non-SPMD machine than fibers
(easily 100x faster). */
static void
-phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
+phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z, wg_x, wg_y, wg_z;
wi.wg->z = wg_z;
context->kernel (context->kernarg_addr, &wi, group_base_ptr,
- private_base_ptr);
+ group_local_offset, private_base_ptr);
#if defined (BENCHMARK_PHSA_RT)
wg_count++;
void
__hsail_launch_kernel (gccbrigKernelFunc kernel, PHSAKernelLaunchData *context,
- void *group_base_ptr)
+ void *group_base_ptr, uint32_t group_local_offset)
{
context->kernel = kernel;
- phsa_spawn_work_items (context, group_base_ptr);
+ phsa_spawn_work_items (context, group_base_ptr, group_local_offset);
}
#endif
void
__hsail_launch_wg_function (gccbrigKernelFunc kernel,
- PHSAKernelLaunchData *context, void *group_base_ptr)
+ PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
context->kernel = kernel;
- phsa_execute_work_groups (context, group_base_ptr);
+ phsa_execute_work_groups (context, group_base_ptr, group_local_offset);
}
uint32_t