From: Pekka Jääskeläinen Date: Wed, 27 Sep 2017 15:40:24 +0000 (+0000) Subject: [BRIGFE] Improved support for function and module scope group X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d4b7f2ee4bb69d5d3594413d63d077d2f3610141;p=gcc.git [BRIGFE] Improved support for function and module scope group segment variables. PRM specs defines function and module scope group segment variables as an experimental feature. However, PRM test suite uses and hcc relies on them. In addition, hcc assumes certain group variable layout in its dynamic group segment allocation code. We cannot have global group memory offsets if we want to both have kernel-specific group segment size and multiple kernels calling the same functions that use function scope group memory variables. Now group segment is handled by separate book keeping of module scope and function (kernel) offsets. Each function has a "frame" in the group segment offset to which is given as an argument. From-SVN: r253233 --- diff --git a/gcc/brig/ChangeLog b/gcc/brig/ChangeLog index 69c57cbf80a..0225929c2ef 100644 --- a/gcc/brig/ChangeLog +++ b/gcc/brig/ChangeLog @@ -1,4 +1,29 @@ -2017-05-13 Pekka Jääskeläinen +2017-09-27 Pekka Jääskeläinen + + * brig-lang.c: Improved support for function and module scope + group segment variables. PRM specs defines function and module + scope group segment variables as an experimental feature. However, + PRM test suite uses and hcc relies on them. In addition, hcc + assumes certain group variable layout in its dynamic group segment + allocation code. We cannot have global group memory offsets if we + want to both have kernel-specific group segment size and multiple + kernels calling the same functions that use function scope group memory + variables. Now group segment is handled by separate book keeping of + module scope and function (kernel) offsets. Each function has a "frame" + in the group segment offset to which is given as an argument. + * brigfrontend/brig-branch-inst-handler.cc: See above. + * brigfrontend/brig-code-entry-handler.cc: See above. + * brigfrontend/brig-fbarrier-handler.cc: See above. + * brigfrontend/brig-function-handler.cc: See above. + * brigfrontend/brig-function.cc: See above. + * brigfrontend/brig-function.h: See above. + * brigfrontend/brig-to-generic.cc: See above. + * brigfrontend/brig-to-generic.h: See above. + * brigfrontend/brig-util.cc: See above. + * brigfrontend/brig-util.h: See above. + * brigfrontend/brig-variable-handler.cc: See above. + +2017-09-25 Pekka Jääskeläinen * brigfrontend/brig-to-generic.cc: Ensure per WI copies of private variables are aligned too. diff --git a/gcc/brig/brig-lang.c b/gcc/brig/brig-lang.c index 13e738e1937..a587c8b6091 100644 --- a/gcc/brig/brig-lang.c +++ b/gcc/brig/brig-lang.c @@ -160,7 +160,7 @@ brig_langhook_post_options (const char **pfilename ATTRIBUTE_UNUSED) flag_excess_precision_cmdline = EXCESS_PRECISION_STANDARD; /* gccbrig casts pointers around like crazy, TBAA produces - broken code if not force disabling it. */ + broken code if not force disabling it. */ flag_strict_aliasing = 0; /* Returning false means that the backend should be used. */ @@ -182,6 +182,8 @@ brig_langhook_parse_file (void) { brig_to_generic brig_to_gen; + std::vector brig_blobs; + for (unsigned int i = 0; i < num_in_fnames; ++i) { @@ -194,11 +196,22 @@ brig_langhook_parse_file (void) error ("could not read the BRIG file"); exit (1); } - brig_to_gen.parse (brig_blob); fclose (f); + + brig_to_gen.analyze (brig_blob); + brig_blobs.push_back (brig_blob); + } + + for (size_t i = 0; i < brig_blobs.size(); ++i) + { + char *brig_blob = brig_blobs.at(i); + brig_to_gen.parse (brig_blob); } brig_to_gen.write_globals (); + + for (size_t i = 0; i < brig_blobs.size (); ++i) + delete brig_blobs[i]; } static tree diff --git a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc index 9cec5b6d455..c8912dbccd7 100644 --- a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc +++ b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc @@ -117,8 +117,17 @@ brig_branch_inst_handler::operator () (const BrigBase *base) they might call builtins that need them or access group/private memory. */ + tree group_local_offset + = add_temp_var ("group_local_offset", + build_int_cst + (uint32_type_node, + m_parent.m_cf->m_local_group_variables.size())); + + /* TODO: ensure the callee's frame is aligned! */ + vec_safe_push (in_args, m_parent.m_cf->m_context_arg); vec_safe_push (in_args, m_parent.m_cf->m_group_base_arg); + vec_safe_push (in_args, group_local_offset); vec_safe_push (in_args, m_parent.m_cf->m_private_base_arg); tree call = build_call_vec (ret_val_type, build_fold_addr_expr (func_ref), diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.cc b/gcc/brig/brigfrontend/brig-code-entry-handler.cc index 8f07d372796..a660739807e 100644 --- a/gcc/brig/brigfrontend/brig-code-entry-handler.cc +++ b/gcc/brig/brigfrontend/brig-code-entry-handler.cc @@ -88,10 +88,17 @@ brig_code_entry_handler::build_code_ref (const BrigBase &ref) { const BrigDirectiveFbarrier* fbar = (const BrigDirectiveFbarrier*)&ref; - uint64_t offset = m_parent.group_variable_segment_offset - (m_parent.get_mangled_name (fbar)); - - return build_int_cst (uint32_type_node, offset); + std::string var_name = m_parent.get_mangled_name (fbar); + uint64_t offset + = m_parent.m_cf->group_variable_segment_offset (var_name); + + tree local_offset = build_int_cst (uint32_type_node, offset); + if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) + local_offset + = build2 (PLUS_EXPR, uint64_type_node, local_offset, + convert (uint64_type_node, + m_parent.m_cf->m_group_local_offset_arg)); + return local_offset; } else gcc_unreachable (); @@ -264,9 +271,18 @@ brig_code_entry_handler::build_address_operand } else if (segment == BRIG_SEGMENT_GROUP) { - - uint64_t offset = m_parent.group_variable_segment_offset (var_name); + uint64_t offset + = m_parent.m_cf->group_variable_segment_offset (var_name); const_offset = build_int_cst (size_type_node, offset); + + /* If it's a local group variable reference, substract the local + group segment offset to get the group base ptr offset. */ + if (m_parent.m_cf->m_local_group_variables.has_variable (var_name)) + const_offset + = build2 (PLUS_EXPR, uint64_type_node, const_offset, + convert (uint64_type_node, + m_parent.m_cf->m_group_local_offset_arg)); + } else if (segment == BRIG_SEGMENT_PRIVATE || segment == BRIG_SEGMENT_SPILL) { diff --git a/gcc/brig/brigfrontend/brig-fbarrier-handler.cc b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc index 802d51ce383..a033db6fc6d 100644 --- a/gcc/brig/brigfrontend/brig-fbarrier-handler.cc +++ b/gcc/brig/brigfrontend/brig-fbarrier-handler.cc @@ -39,6 +39,7 @@ brig_directive_fbarrier_handler::operator () (const BrigBase *base) if (m_parent.m_cf != NULL) m_parent.m_cf->m_function_scope_vars.insert (base); std::string var_name = m_parent.get_mangled_name (fbar); - m_parent.append_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1); + m_parent.add_group_variable (var_name, FBARRIER_STRUCT_SIZE, 1, + m_parent.m_cf != NULL); return base->byteCount; } diff --git a/gcc/brig/brigfrontend/brig-function-handler.cc b/gcc/brig/brigfrontend/brig-function-handler.cc index ebfca3907c1..7896c4ac935 100644 --- a/gcc/brig/brigfrontend/brig-function-handler.cc +++ b/gcc/brig/brigfrontend/brig-function-handler.cc @@ -39,7 +39,8 @@ extern int gccbrig_verbose; size_t brig_directive_function_handler::operator () (const BrigBase *base) { - m_parent.finish_function (); + if (!m_parent.m_analyzing) + m_parent.finish_function (); size_t bytes_consumed = base->byteCount; @@ -64,9 +65,20 @@ brig_directive_function_handler::operator () (const BrigBase *base) if (is_kernel && !is_definition) return bytes_consumed; + std::string func_name = m_parent.get_mangled_name (exec); + if (is_kernel) + /* The generated kernel function is not the one that should be + called by the host. */ + func_name = std::string ("_") + func_name; + m_parent.m_cf = new brig_function (exec, &m_parent); + m_parent.m_cf->m_name = func_name; + m_parent.m_cf->m_is_kernel = is_kernel; - std::string func_name = m_parent.get_mangled_name (exec); + /* During the analyze step, the above information is all we need per + function. */ + if (m_parent.m_analyzing) + return bytes_consumed; tree fndecl; tree ret_value = NULL_TREE; @@ -79,10 +91,6 @@ brig_directive_function_handler::operator () (const BrigBase *base) if (is_kernel) { - /* The generated kernel function is not the one that should be - called by the host. */ - func_name = std::string ("_") + func_name; - tree name_identifier = get_identifier_with_length (func_name.c_str (), func_name.size ()); @@ -256,6 +264,23 @@ brig_directive_function_handler::operator () (const BrigBase *base) DECL_ARTIFICIAL (group_base_arg) = 1; TREE_READONLY (group_base_arg) = 1; TREE_USED (group_base_arg) = 1; + m_parent.m_cf->m_group_base_arg = group_base_arg; + + /* To implement call stack and (non-kernel) function scope group variables, + we need to pass an offset which describes how far are we from + group_base_ptr. + That must be substracted from any function local group variable offsets to + get the address related to the bottom of the group memory chunk. */ + tree group_local_offset_arg + = build_decl (UNKNOWN_LOCATION, PARM_DECL, + get_identifier ("__group_local_offset"), uint32_type_node); + chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg); + DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node; + DECL_CONTEXT (group_local_offset_arg) = fndecl; + DECL_ARTIFICIAL (group_local_offset_arg) = 1; + TREE_READONLY (group_local_offset_arg) = 1; + TREE_USED (group_local_offset_arg) = 1; + m_parent.m_cf->m_group_local_offset_arg = group_local_offset_arg; /* Same for private. */ tree private_base_arg @@ -329,12 +354,9 @@ brig_directive_function_handler::operator () (const BrigBase *base) m_parent.start_function (fndecl); - m_parent.m_cf->m_name = func_name; m_parent.m_cf->m_func_decl = fndecl; m_parent.m_cf->m_current_bind_expr = bind_expr; - m_parent.m_cf->m_is_kernel = is_kernel; m_parent.m_cf->m_context_arg = context_arg; - m_parent.m_cf->m_group_base_arg = group_base_arg; m_parent.m_cf->m_private_base_arg = private_base_arg; if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node) diff --git a/gcc/brig/brigfrontend/brig-function.cc b/gcc/brig/brigfrontend/brig-function.cc index 0ca9ebe8b0e..f3c3895078a 100644 --- a/gcc/brig/brigfrontend/brig-function.cc +++ b/gcc/brig/brigfrontend/brig-function.cc @@ -52,11 +52,10 @@ brig_function::brig_function (const BrigDirectiveExecutable *exec, m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE), m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE), m_next_kernarg_offset (0), m_kernarg_max_align (0), - m_ret_value_brig_var (NULL), m_has_barriers (false), - m_has_allocas (false), m_has_function_calls_with_barriers (false), - m_calls_analyzed (false), m_is_wg_function (false), - m_has_unexpanded_dp_builtins (false), m_generating_arg_block (false), - m_parent (parent) + m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false), + m_has_function_calls_with_barriers (false), m_calls_analyzed (false), + m_is_wg_function (false), m_has_unexpanded_dp_builtins (false), + m_generating_arg_block (false), m_parent (parent) { memset (m_regs, 0, BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *)); @@ -577,20 +576,31 @@ brig_function::emit_launcher_and_metadata () tree phsail_launch_kernel_call; + /* Compute the local group segment frame start pointer. */ + tree group_local_offset_temp + = create_tmp_var (uint32_type_node, "group_local_offset"); + tree group_local_offset_arg + = build2 (MODIFY_EXPR, uint32_type_node, + group_local_offset_temp, + build_int_cst (uint32_type_node, + m_parent->m_module_group_variables.size())); + /* Emit a launcher depending whether we converted the kernel function to a work group function or not. */ if (m_is_wg_function) phsail_launch_kernel_call = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC), - 3, void_type_node, + 4, void_type_node, ptr_type_node, kernel_func_ptr, ptr_type_node, - context_arg, ptr_type_node, group_base_addr_arg); + context_arg, ptr_type_node, group_base_addr_arg, + uint32_type_node, group_local_offset_arg); else phsail_launch_kernel_call = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL), - 3, void_type_node, + 4, void_type_node, ptr_type_node, kernel_func_ptr, ptr_type_node, - context_arg, ptr_type_node, group_base_addr_arg); + context_arg, ptr_type_node, group_base_addr_arg, + uint32_type_node, group_local_offset_arg); append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list); @@ -722,3 +732,13 @@ brig_function::has_function_scope_var (const BrigBase* var) const { return m_function_scope_vars.find (var) != m_function_scope_vars.end (); } + +size_t +brig_function::group_variable_segment_offset (const std::string &name) const +{ + if (m_local_group_variables.has_variable (name)) + return m_local_group_variables.segment_offset (name); + + gcc_assert (m_parent->m_module_group_variables.has_variable (name)); + return m_parent->m_module_group_variables.segment_offset (name); +} diff --git a/gcc/brig/brigfrontend/brig-function.h b/gcc/brig/brigfrontend/brig-function.h index 71b5d3f996f..2a85f5e69fe 100644 --- a/gcc/brig/brigfrontend/brig-function.h +++ b/gcc/brig/brigfrontend/brig-function.h @@ -30,8 +30,7 @@ #include "tree.h" #include "tree-iterator.h" #include "hsa-brig-format.h" - -class brig_to_generic; +#include "brig-util.h" #include #include @@ -40,6 +39,8 @@ class brig_to_generic; #include "phsa.h" +class brig_to_generic; + typedef std::map label_index; typedef std::map variable_index; typedef std::vector tree_stl_vec; @@ -84,6 +85,12 @@ public: tree add_local_variable (std::string name, tree type); + size_t group_variable_segment_offset (const std::string &name) const; + + bool has_group_variable (const std::string &name) const; + + size_t group_segment_size () const; + tree get_m_var_declfor_reg (const BrigOperandRegister *reg); bool convert_to_wg_function (); @@ -119,10 +126,16 @@ public: /* The __context function argument. */ tree m_context_arg; + /* The __group_base_ptr argument in the current function. - Points to the start of the group segment for the kernel - instance. */ + Points to the start of the group segment for the work-group. */ tree m_group_base_arg; + + /* The __group_local_offset_ptr argument in the current function. It + contains the offset related to the group_base_ptr where the function's + local area for group variables resides. */ + tree m_group_local_offset_arg; + /* The __private_base_ptr argument in the current function. Points to the start of the private segment. */ tree m_private_base_arg; @@ -159,7 +172,7 @@ public: /* True if the function has at least one alloca instruction. */ bool m_has_allocas; - /* If the kernel containts at least one function call that _may_ + /* If the kernel contains at least one function call that _may_ contain a barrier call, this is set to true. */ bool m_has_function_calls_with_barriers; @@ -199,6 +212,10 @@ public: /* The functions called by this function. */ std::vector m_called_functions; + /* Stores the kernel scope group variable offsets if the function is + a kernel. */ + group_variable_offset_index m_local_group_variables; + brig_to_generic *m_parent; /* The metadata of the function that should be stored with the binary and passed to the HSA runtime: */ diff --git a/gcc/brig/brigfrontend/brig-to-generic.cc b/gcc/brig/brigfrontend/brig-to-generic.cc index 2b1d94e0ff7..6459f9e1076 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.cc +++ b/gcc/brig/brigfrontend/brig-to-generic.cc @@ -60,8 +60,8 @@ tree brig_to_generic::s_fp32_type; tree brig_to_generic::s_fp64_type; brig_to_generic::brig_to_generic () - : m_cf (NULL), m_brig (NULL), m_next_group_offset (0), - m_next_private_offset (0) + : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0), + m_brig (NULL), m_next_private_offset (0) { m_globals = NULL_TREE; @@ -124,33 +124,32 @@ public: } }; -/* Parses the given BRIG blob. */ +/* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that + should handle its data. */ -void -brig_to_generic::parse (const char *brig_blob) +struct code_entry_handler_info { - m_brig = brig_blob; - m_brig_blobs.push_back (brig_blob); + BrigKind kind; + brig_code_entry_handler *handler; +}; - const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob; - if (strncmp (mheader->identification, "HSA BRIG", 8) != 0) - fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE - "Unrecognized file format."); - if (mheader->brigMajor != 1 || mheader->brigMinor != 0) - fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE - "BRIG version not supported. BRIG 1.0 required."); +/* Finds the BRIG file sections in the currently processed file. */ +void +brig_to_generic::find_brig_sections () +{ m_data = m_code = m_operand = NULL; + const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig; /* Find the positions of the different sections. */ for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec) { uint64_t offset - = ((const uint64_t *) (brig_blob + mheader->sectionIndex))[sec]; + = ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec]; const BrigSectionHeader *section_header - = (const BrigSectionHeader *) (brig_blob + offset); + = (const BrigSectionHeader *) (m_brig + offset); std::string name ((const char *) (§ion_header->name), section_header->nameLength); @@ -183,6 +182,94 @@ brig_to_generic::parse (const char *brig_blob) if (m_operand == NULL) gcc_unreachable (); +} + +/* Does a first pass over the given BRIG to collect data needed for the + actual parsing. Currently this includes only collecting the + group segment variable usage to support the experimental HSA PRM feature + where group variables can be declared also in module and function scope + (in addition to kernel scope). +*/ + +void +brig_to_generic::analyze (const char *brig_blob) +{ + const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob; + + if (strncmp (mheader->identification, "HSA BRIG", 8) != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "Unrecognized file format."); + if (mheader->brigMajor != 1 || mheader->brigMinor != 0) + fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE + "BRIG version not supported. BRIG 1.0 required."); + + m_brig = brig_blob; + + find_brig_sections (); + + brig_directive_variable_handler var_handler (*this); + brig_directive_fbarrier_handler fbar_handler (*this); + brig_directive_function_handler func_handler (*this); + + /* Need this for grabbing the module names for mangling the + group variable names. */ + brig_directive_module_handler module_handler (*this); + skipped_entry_handler skipped_handler (*this); + + const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code; + + code_entry_handler_info handlers[] + = {{BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler}, + {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler}, + {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler}, + {BRIG_KIND_DIRECTIVE_MODULE, &module_handler}, + {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}}; + + m_analyzing = true; + for (size_t b = csection_header->headerByteCount; b < m_code_size;) + { + const BrigBase *entry = (const BrigBase *) (m_code + b); + + brig_code_entry_handler *handler = &skipped_handler; + + if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry) + { + /* The function definition ended. We can just discard the place + holder function. */ + m_total_group_segment_usage += m_cf->m_local_group_variables.size (); + delete m_cf; + m_cf = NULL; + } + + /* Find a handler. */ + for (size_t i = 0; + i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i) + { + if (handlers[i].kind == entry->kind) + handler = handlers[i].handler; + } + b += (*handler) (entry); + } + + if (m_cf != NULL) + { + m_total_group_segment_usage += m_cf->m_local_group_variables.size (); + delete m_cf; + m_cf = NULL; + } + + m_total_group_segment_usage += m_module_group_variables.size (); + m_analyzing = false; +} + +/* Parses the given BRIG blob. */ + +void +brig_to_generic::parse (const char *brig_blob) +{ + m_brig = brig_blob; + find_brig_sections (); + brig_basic_inst_handler inst_handler (*this); brig_branch_inst_handler branch_inst_handler (*this); brig_cvt_inst_handler cvt_inst_handler (*this); @@ -269,7 +356,6 @@ brig_to_generic::parse (const char *brig_blob) handler = handlers[i].handler; } b += (*handler) (entry); - continue; } finish_function (); @@ -519,6 +605,29 @@ brig_to_generic::get_finished_function (tree func_decl) return NULL; } +/* Adds a group variable to a correct book keeping structure depending + on its segment. */ + +void +brig_to_generic::add_group_variable (const std::string &name, size_t size, + size_t alignment, bool function_scope) +{ + /* Module and function scope group region variables are an experimental + feature. We implement module scope group variables with a separate + book keeping inside brig_to_generic which is populated in the 'analyze()' + prepass. This is to ensure we know the group segment offsets when + processing the functions that might refer to them. */ + if (!function_scope) + { + if (!m_module_group_variables.has_variable (name)) + m_module_group_variables.add (name, size, alignment); + return; + } + + if (!m_cf->m_local_group_variables.has_variable (name)) + m_cf->m_local_group_variables.add (name, size, alignment); +} + /* Finalizes the currently handled function. Should be called before setting a new function. */ @@ -567,38 +676,6 @@ brig_to_generic::start_function (tree f) m_cf->m_func_decl = f; } -/* Appends a new group variable (or an fbarrier) to the current kernel's - group segment. */ - -void -brig_to_generic::append_group_variable (const std::string &name, size_t size, - size_t alignment) -{ - size_t align_padding = m_next_group_offset % alignment == 0 ? - 0 : (alignment - m_next_group_offset % alignment); - m_next_group_offset += align_padding; - m_group_offsets[name] = m_next_group_offset; - m_next_group_offset += size; -} - -size_t -brig_to_generic::group_variable_segment_offset (const std::string &name) const -{ - var_offset_table::const_iterator i = m_group_offsets.find (name); - gcc_assert (i != m_group_offsets.end ()); - return (*i).second; -} - -/* The size of the group and private segments required by the currently - processed kernel. Private segment size must be multiplied by the - number of work-items in the launch, in case of a work-group function. */ - -size_t -brig_to_generic::group_segment_size () const -{ - return m_next_group_offset; -} - /* Appends a new variable to the current kernel's private segment. */ void @@ -646,13 +723,6 @@ brig_to_generic::has_private_variable (const std::string &name) const return i != m_private_data_sizes.end (); } -bool -brig_to_generic::has_group_variable (const std::string &name) const -{ - var_offset_table::const_iterator i = m_group_offsets.find (name); - return i != m_group_offsets.end (); -} - size_t brig_to_generic::private_variable_size (const std::string &name) const { @@ -662,6 +732,10 @@ brig_to_generic::private_variable_size (const std::string &name) const return (*i).second; } + +/* The size of private segment required by a single work-item executing + the currently processed kernel. */ + size_t brig_to_generic::private_segment_size () const { @@ -735,10 +809,11 @@ brig_to_generic::write_globals () cgraph_node::finalize_function (f->m_func_decl, true); f->m_descriptor.is_kernel = 1; - /* TODO: analyze the kernel's actual group and private segment usage - using a call graph. Now the private and group mem sizes are overly - pessimistic in case of multiple kernels in the same module. */ - f->m_descriptor.group_segment_size = group_segment_size (); + /* TODO: analyze the kernel's actual private and group segment usage + using call graph. Now the mem size is overly + pessimistic in case of multiple kernels in the same module. + */ + f->m_descriptor.group_segment_size = m_total_group_segment_usage; f->m_descriptor.private_segment_size = private_segment_size (); /* The kernarg size is rounded up to a multiple of 16 according to @@ -774,8 +849,6 @@ brig_to_generic::write_globals () delete[] vec; - for (size_t i = 0; i < m_brig_blobs.size (); ++i) - delete m_brig_blobs[i]; } /* Returns an type with unsigned int elements corresponding to the diff --git a/gcc/brig/brigfrontend/brig-to-generic.h b/gcc/brig/brigfrontend/brig-to-generic.h index b94ff7cf57b..0070894dd26 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.h +++ b/gcc/brig/brigfrontend/brig-to-generic.h @@ -36,7 +36,6 @@ #include "hsa-brig-format.h" #include "brig-function.h" - struct reg_decl_index_entry; /* Converts an HSAIL BRIG input to GENERIC. This class holds global state @@ -56,6 +55,7 @@ private: public: brig_to_generic (); + void analyze (const char *brig_blob); void parse (const char *brig_blob); void write_globals (); @@ -78,17 +78,9 @@ public: void start_function (tree f); void finish_function (); - void append_group_variable (const std::string &name, size_t size, - size_t alignment); - void append_private_variable (const std::string &name, size_t size, size_t alignment); - size_t group_variable_segment_offset (const std::string &name) const; - - bool - has_group_variable (const std::string &name) const; - size_t private_variable_segment_offset (const std::string &name) const; @@ -107,11 +99,13 @@ public: { return get_mangled_name_tmpl (var); } std::string get_mangled_name (const BrigDirectiveExecutable *func) const; - size_t group_segment_size () const; size_t private_segment_size () const; brig_function *get_finished_function (tree func_decl); + void add_group_variable (const std::string &name, size_t size, + size_t alignment, bool function_scope); + static tree s_fp16_type; static tree s_fp32_type; static tree s_fp64_type; @@ -123,10 +117,21 @@ public: /* The currently built function. */ brig_function *m_cf; + /* Stores the module and function scope group variable offsets. */ + group_variable_offset_index m_module_group_variables; + /* The name of the currently handled BRIG module. */ std::string m_module_name; + /* Set to true if the compilation is in 'analyze' phase. */ + bool m_analyzing; + + /* Accumulates the total group segment usage. */ + size_t m_total_group_segment_usage; + private: + + void find_brig_sections (); /* The BRIG blob and its different sections of the file currently being parsed. */ const char *m_brig; @@ -144,10 +149,6 @@ private: /* The size of each private variable, including the alignment padding. */ std::map m_private_data_sizes; - /* The same for group variables. */ - size_t m_next_group_offset; - var_offset_table m_group_offsets; - /* And private. */ size_t m_next_private_offset; var_offset_table m_private_offsets; @@ -162,9 +163,6 @@ private: for some interprocedural analysis. */ std::map m_finished_functions; - /* The parsed BRIG blobs. Owned and will be deleted after use. */ - std::vector m_brig_blobs; - /* The original dump file. */ FILE *m_dump_file; diff --git a/gcc/brig/brigfrontend/brig-util.cc b/gcc/brig/brigfrontend/brig-util.cc index f96ae6ab5e6..a8684de9131 100644 --- a/gcc/brig/brigfrontend/brig-util.cc +++ b/gcc/brig/brigfrontend/brig-util.cc @@ -27,6 +27,34 @@ along with GCC; see the file COPYING3. If not see #include "errors.h" #include "diagnostic-core.h" +bool +group_variable_offset_index::has_variable (const std::string &name) const +{ + varname_offset_table::const_iterator i = m_group_offsets.find (name); + return i != m_group_offsets.end (); +} + +/* Adds a new group segment variable. */ + +void +group_variable_offset_index::add (const std::string &name, size_t size, + size_t alignment) +{ + size_t align_padding = m_next_group_offset % alignment == 0 ? + 0 : (alignment - m_next_group_offset % alignment); + m_next_group_offset += align_padding; + m_group_offsets[name] = m_next_group_offset; + m_next_group_offset += size; +} + +size_t +group_variable_offset_index::segment_offset (const std::string &name) const +{ + varname_offset_table::const_iterator i = m_group_offsets.find (name); + gcc_assert (i != m_group_offsets.end ()); + return (*i).second; +} + /* Return true if operand number OPNUM of instruction with OPCODE is an output. False if it is an input. Some code reused from Martin Jambor's gcc-hsa tree. */ diff --git a/gcc/brig/brigfrontend/brig-util.h b/gcc/brig/brigfrontend/brig-util.h index 3060f5b87f9..c90ff29d0fd 100644 --- a/gcc/brig/brigfrontend/brig-util.h +++ b/gcc/brig/brigfrontend/brig-util.h @@ -22,7 +22,33 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_BRIG_UTIL_H #define GCC_BRIG_UTIL_H -#include "brig-to-generic.h" +#include + +#include "config.h" +#include "system.h" +#include "ansidecl.h" +#include "coretypes.h" +#include "opts.h" +#include "tree.h" + +/* Helper class for keeping book of group variable offsets. */ + +class group_variable_offset_index +{ +public: + group_variable_offset_index () : m_next_group_offset (0) {} + + typedef std::map varname_offset_table; + + bool has_variable (const std::string &name) const; + void add (const std::string &name, size_t size, size_t alignment); + size_t segment_offset (const std::string &name) const; + size_t size () const { return m_next_group_offset; } + +private: + size_t m_next_group_offset; + varname_offset_table m_group_offsets; +}; bool gccbrig_hsa_opcode_op_output_p (BrigOpcode16_t opcode, int opnum); diff --git a/gcc/brig/brigfrontend/brig-variable-handler.cc b/gcc/brig/brigfrontend/brig-variable-handler.cc index b2e869b7ed5..cd0e98107f5 100644 --- a/gcc/brig/brigfrontend/brig-variable-handler.cc +++ b/gcc/brig/brigfrontend/brig-variable-handler.cc @@ -144,10 +144,25 @@ brig_directive_variable_handler::operator () (const BrigBase *base) size_t alignment = get_brig_var_alignment (brigVar); - if (m_parent.m_cf != NULL) + bool function_scope = m_parent.m_cf != NULL; + + if (function_scope) m_parent.m_cf->m_function_scope_vars.insert (base); std::string var_name = m_parent.get_mangled_name (brigVar); + if (brigVar->segment == BRIG_SEGMENT_GROUP) + { + /* Non-kernel scope group variables have been added at the + 'analyze' stage. */ + m_parent.add_group_variable (var_name, var_size, alignment, + function_scope); + return base->byteCount; + } + + /* During analyze, handle only (module scope) group variables. */ + if (m_parent.m_analyzing) + return base->byteCount; + if (brigVar->segment == BRIG_SEGMENT_KERNARG) { /* Do not create a real variable, but only a table of @@ -158,18 +173,6 @@ brig_directive_variable_handler::operator () (const BrigBase *base) m_parent.m_cf->append_kernel_arg (brigVar, var_size, alignment); return base->byteCount; } - else if (brigVar->segment == BRIG_SEGMENT_GROUP) - { - /* Handle group region variables similarly as kernargs: - assign offsets to the group region on the fly when - a new module scope or function scope group variable is - introduced. These offsets will be then added to the - group_base hidden pointer passed to the kernel in order to - get the flat address. */ - if (!m_parent.has_group_variable (var_name)) - m_parent.append_group_variable (var_name, var_size, alignment); - return base->byteCount; - } else if (brigVar->segment == BRIG_SEGMENT_PRIVATE || brigVar->segment == BRIG_SEGMENT_SPILL) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 39682ce7259..9e67ac34e58 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2017-09-27 Pekka Jääskeläinen + + * brig.dg/test/gimple/fbarrier.hsail: Fixed tests to match the new + new group memory offsetting code in the BRIG frontend. + * brig.dg/test/gimple/function_calls.hsail: Likewise. + * brig.dg/test/gimple/smoke_test.hsail: Likewise. + * brig.dg/test/gimple/variables.hsail: Likewise. + 2017-09-27 Jakub Jelinek PR c++/82159 diff --git a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail index a58ca0964ad..9efe0271571 100644 --- a/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/fbarrier.hsail @@ -65,10 +65,10 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* { dg-final { scan-tree-dump "__hsail_waitfbar \\\(0, __context\\\);" "gimple"} } */ /* { dg-final { scan-tree-dump "__hsail_initfbar \\\(0, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_initfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ /* { dg-final { scan-tree-dump "__hsail_joinfbar \\\(0, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "@skip_init:\[\n ]+__builtin___hsail_barrier \\\(__context\\\);\[\n ]+__builtin___hsail_joinfbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_arrivefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(32, __context\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "__hsail_releasefbar \\\(0, __context\\\);\[\n ]+__builtin___hsail_releasefbar \\\(__group_local_offset, __context\\\);" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail index d3b690cd37a..50f79060b59 100644 --- a/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/function_calls.hsail @@ -46,7 +46,7 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The generated function call should have the incoming arguments and three hidden arguments. */ -/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, __private_base_addr\\\);" "gimple"} } */ +/* { dg-final { scan-tree-dump "_\[0-9\]+ = subfunction \\\(_kernel.float_arg.\[_0-9\]+, _kernel.double_arg.\[_0-9\]+, _kernel.half_arg.\[_0-9\]+, __context, __group_base_addr, group_local_offset.*, __private_base_addr\\\);" "gimple"} } */ /* The callee should refer directly to the scalar arguments when it reads them. */ /* { dg-final { scan-tree-dump "= float_arg;" "gimple"} } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail index 850aeeb6a84..1f36ddc4181 100644 --- a/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/smoke_test.hsail @@ -42,7 +42,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The kernel function itself should have a fingerprint as follows */ /* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */ -/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, void \\\* __private_base_addr\\\)" "gimple"} } */ +/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */ /* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */ /* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */ @@ -73,7 +73,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* The launcher should call __hsail_launch_wg_function in this case: */ /* Kernel (void * __context, void * __group_base_addr) */ /* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */ -/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr\\\);" "gimple"} }*/ +/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/ /* The kernel should have the magic metadata section injected to the ELF. */ /* TODO: this should be disabled in case not outputting to an ELF. */ @@ -85,7 +85,7 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) /* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;\[\n \]+__builtin___hsail_barrier \\\(__context\\\);\[\n \]+s3 = s0 \\\+ 4294967295;" "gimple"} } */ /* The kernel with the barrier call's launcher function should call the thread-spawning function. */ -/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr\\\);" "gimple" } } */ +/* { dg-final { scan-tree-dump "__hsail_launch_kernel \\\(_KernelWithBarrier, __context, __group_base_addr, group_local_offset.*\\\);" "gimple" } } */ diff --git a/gcc/testsuite/brig.dg/test/gimple/variables.hsail b/gcc/testsuite/brig.dg/test/gimple/variables.hsail index c76ea606575..5fd96c1c7bd 100644 --- a/gcc/testsuite/brig.dg/test/gimple/variables.hsail +++ b/gcc/testsuite/brig.dg/test/gimple/variables.hsail @@ -3,7 +3,7 @@ module &module:1:0:$full:$large:$default; /* Tests for different variable scopes and address spaces. */ /* { dg-do compile } */ -/* { dg-options "-fdump-tree-gimple" } */ +/* { dg-options "-fdump-tree-gimple -fdump-tree-original" } */ prog align(256) private_u32 &prog_private; private_u32 &mod_private; @@ -29,7 +29,10 @@ prog function &subfunction(arg_u32 %return_value)(arg_u32 %arg) { ld_private_u32 $s200, [%func_private]; st_private_u32 $s200, [&prog_private]; +/* { dg-final { scan-tree-dump "__group_base_addr \\\+ \\\(0 \\\+" "original" } } */ ld_group_u32 $s203, [%func_group]; + +/* { dg-final { scan-tree-dump "__group_base_addr \\\+ 0" "original" } } */ st_group_u32 $s203, [&prog_group]; ld_global_u32 $s204, [%func_global]; @@ -104,8 +107,6 @@ prog kernel &kernel(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr) kern_group @12 (3) */ -/* { dg-final { scan-tree-dump "\\\+ 8;.*\\\+ 12;.*\\\+ 4;" "gimple" } } */ - /* The "mangling" of the global and readonly vars. */ /* { dg-final { scan-tree-dump "\[ \]*prog_global = s204;" "gimple" } } */ diff --git a/libhsail-rt/ChangeLog b/libhsail-rt/ChangeLog index bf86278584d..53d3634a640 100644 --- a/libhsail-rt/ChangeLog +++ b/libhsail-rt/ChangeLog @@ -1,7 +1,15 @@ +2017-09-27 Pekka Jääskeläinen + + * include/internal/phsa-rt.h: Support for improved group segment + handling with a stack-like allocation scheme. + * include/internal/workitems.h: Likewise. + * rt/workitems.c: Likewise. + 2017-09-25 Pekka Jääskeläinen * rt/workitems.c: Assume the host runtime allocates the work group memory. + 2017-05-03 Pekka Jääskeläinen * rt/workitems.c: Removed a leftover comment. diff --git a/libhsail-rt/include/internal/phsa-rt.h b/libhsail-rt/include/internal/phsa-rt.h index d47cbfcd3b6..13349e7fdbc 100644 --- a/libhsail-rt/include/internal/phsa-rt.h +++ b/libhsail-rt/include/internal/phsa-rt.h @@ -42,7 +42,8 @@ typedef void (*gccbrigKernelLauncherFunc) (void *context, void *); /* Pointer type for kernel functions produced by gccbrig from the HSAIL. This is private from outside the device binary and only called by the launcher. */ -typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, void *); +typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t, + void *); /* Context data that is passed to the kernel function, initialized by the runtime to the current launch information. The data is diff --git a/libhsail-rt/include/internal/workitems.h b/libhsail-rt/include/internal/workitems.h index e7d386d32ec..2abfc61d867 100644 --- a/libhsail-rt/include/internal/workitems.h +++ b/libhsail-rt/include/internal/workitems.h @@ -63,6 +63,11 @@ typedef struct to the work-group. */ void *group_base_ptr; + /* The offset in the group memory for the kernel local group variables. + To support module scope group variables, there might be need to preseve + room for them in the beginning of the group segment. */ + uint32_t initial_group_offset; + /* Similarly to the private segment that gets space allocated for all WIs in the work-group. */ void *private_base_ptr; diff --git a/libhsail-rt/rt/workitems.c b/libhsail-rt/rt/workitems.c index ed1185a5780..b24fc108357 100644 --- a/libhsail-rt/rt/workitems.c +++ b/libhsail-rt/rt/workitems.c @@ -113,7 +113,7 @@ phsa_work_item_thread (int arg0, int arg1) && wi->z < __hsail_currentworkgroupsize (2, wi)) { l_data->kernel (l_data->kernarg_addr, wi, wg->group_base_ptr, - wg->private_base_ptr); + wg->initial_group_offset, wg->private_base_ptr); #ifdef DEBUG_PHSA_RT printf ("done.\n"); #endif @@ -221,7 +221,8 @@ phsa_work_item_thread (int arg0, int arg1) static void phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, - size_t wg_size_x, size_t wg_size_y, size_t wg_size_z) + uint32_t group_local_offset, size_t wg_size_x, + size_t wg_size_y, size_t wg_size_z) { PHSAWorkItem *wi_threads = NULL; PHSAWorkGroup wg; @@ -247,6 +248,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, wg.alloca_stack_p = wg.private_segment_total_size; wg.alloca_frame_p = wg.alloca_stack_p; + wg.initial_group_offset = group_local_offset; #ifdef EXECUTE_WGS_BACKWARDS wg.x = context->wg_max_x - 1; @@ -313,7 +315,8 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr, them execute all the WGs, including a potential partial WG. */ static void -phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) +phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { hsa_kernel_dispatch_packet_t *dp = context->dp; size_t x, y, z; @@ -361,8 +364,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) dp->grid_size_y, dp->grid_size_z); #endif - phsa_execute_wi_gang (context, group_base_ptr, sat_wg_size_x, sat_wg_size_y, - sat_wg_size_z); + phsa_execute_wi_gang (context, group_base_ptr, group_local_offset, + sat_wg_size_x, sat_wg_size_y, sat_wg_size_z); } #endif @@ -374,7 +377,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr) execute massive numbers of work-items in a non-SPMD machine than fibers (easily 100x faster). */ static void -phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) +phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { hsa_kernel_dispatch_packet_t *dp = context->dp; size_t x, y, z, wg_x, wg_y, wg_z; @@ -462,7 +466,7 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) wi.wg->z = wg_z; context->kernel (context->kernarg_addr, &wi, group_base_ptr, - private_base_ptr); + group_local_offset, private_base_ptr); #if defined (BENCHMARK_PHSA_RT) wg_count++; @@ -527,19 +531,20 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr) void __hsail_launch_kernel (gccbrigKernelFunc kernel, PHSAKernelLaunchData *context, - void *group_base_ptr) + void *group_base_ptr, uint32_t group_local_offset) { context->kernel = kernel; - phsa_spawn_work_items (context, group_base_ptr); + phsa_spawn_work_items (context, group_base_ptr, group_local_offset); } #endif void __hsail_launch_wg_function (gccbrigKernelFunc kernel, - PHSAKernelLaunchData *context, void *group_base_ptr) + PHSAKernelLaunchData *context, void *group_base_ptr, + uint32_t group_local_offset) { context->kernel = kernel; - phsa_execute_work_groups (context, group_base_ptr); + phsa_execute_work_groups (context, group_base_ptr, group_local_offset); } uint32_t