From 637f3cdec3168c4acf42b067cf511e0aecfcfcae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pekka=20J=C3=A4=C3=A4skel=C3=A4inen?= Date: Fri, 4 May 2018 16:44:02 +0000 Subject: [PATCH] [BRIGFE] Enable whole program optimizations HSA assumes all program scope HSAIL symbols can be queried from the host runtime API, thus cannot be removed by the IPA. Getting some inlining happening in the finalized binary required: * explicitly marking the 'prog' scope functions and the launcher function "externally_visible" to avoid the inliner removing it * also the host_def ptr is set to externally visible, otherwise IPA assumes it's never set * adding the 'inline' keyword to functions to enable inlining, otherwise GCC defaults to replaceable functions (one can link over the previous one) which cannot be inlined * replacing all calls to declarations with calls to definitions to enable the inliner to find the definition * to fix missing hidden argument types in the generated functions. These were ignored silently until GCC started to be able to inline calls to such functions. * do not gimplify before fixing the call targets. Otherwise the calls get detached and the definitions are not found. The reason why this happens is not clear, but gimplifying only after call target decl->def conversion fixes this. From-SVN: r259943 --- gcc/brig/ChangeLog | 11 +++ gcc/brig/brig-lang.c | 4 +- .../brigfrontend/brig-branch-inst-handler.cc | 2 + .../brigfrontend/brig-function-handler.cc | 30 +++++-- gcc/brig/brigfrontend/brig-function.cc | 4 +- gcc/brig/brigfrontend/brig-to-generic.cc | 82 ++++++++++++++++++- gcc/brig/brigfrontend/brig-to-generic.h | 8 ++ .../brigfrontend/brig-variable-handler.cc | 3 + 8 files changed, 130 insertions(+), 14 deletions(-) diff --git a/gcc/brig/ChangeLog b/gcc/brig/ChangeLog index 7805b99e1b7..57d44b71e36 100644 --- a/gcc/brig/ChangeLog +++ b/gcc/brig/ChangeLog @@ -1,3 +1,14 @@ +2018-05-04 Pekka Jääskeläinen + + * brig/brig-lang.c: Add support for whole program + optimizations by marking the kernels externally visible. + * brig/brigfrontend/brig-branch-inst-handler.cc: See above. + * brig/brigfrontend/brig-function-handler.cc: See above. + * brig/brigfrontend/brig-function.cc: See above. + * brig/brigfrontend/brig-to-generic.cc: See above. + * brig/brigfrontend/brig-to-generic.h: See above. + * brig/brigfrontend/brig-variable-handler.h: See above. + 2018-01-03 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/brig/brig-lang.c b/gcc/brig/brig-lang.c index 997dad4191c..030d76a745e 100644 --- a/gcc/brig/brig-lang.c +++ b/gcc/brig/brig-lang.c @@ -57,7 +57,7 @@ static tree handle_pure_attribute (tree *, tree, tree, int, bool *); static tree handle_nothrow_attribute (tree *, tree, tree, int, bool *); static tree handle_returns_twice_attribute (tree *, tree, tree, int, bool *); -/* This file is based on Go frontent'd go-lang.c and gogo-tree.cc. */ +/* This file is based on Go frontend's go-lang.c and gogo-tree.cc. */ /* If -v set. */ @@ -123,7 +123,7 @@ brig_langhook_init_options_struct (struct gcc_options *opts) /* If we set this to one, the whole program optimizations internalize all global variables, making them invisible to the dyn loader (and thus the HSA runtime implementation). */ - opts->x_flag_whole_program = 0; + opts->x_flag_whole_program = 1; /* The builtin math functions should not set errno. */ opts->x_flag_errno_math = 0; diff --git a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc index a766a997200..1340b74dd35 100644 --- a/gcc/brig/brigfrontend/brig-branch-inst-handler.cc +++ b/gcc/brig/brigfrontend/brig-branch-inst-handler.cc @@ -150,6 +150,8 @@ brig_branch_inst_handler::operator () (const BrigBase *base) } m_parent.m_cf->m_called_functions.push_back (func_ref); + if (DECL_EXTERNAL (func_ref)) + m_parent.add_decl_call (call); return base->byteCount; } diff --git a/gcc/brig/brigfrontend/brig-function-handler.cc b/gcc/brig/brigfrontend/brig-function-handler.cc index f0c995fa0f2..c524dbe092a 100644 --- a/gcc/brig/brigfrontend/brig-function-handler.cc +++ b/gcc/brig/brigfrontend/brig-function-handler.cc @@ -132,6 +132,14 @@ brig_directive_function_handler::operator () (const BrigBase *base) DECL_RESULT (fndecl) = resdecl; DECL_CONTEXT (resdecl) = fndecl; DECL_EXTERNAL (fndecl) = 0; + + /* Aggressive inlining to the kernel function is usually a good + idea with offlined functionality to enchance SIMD execution on + GPUs and vector units. */ + + DECL_ATTRIBUTES (fndecl) + = tree_cons (get_identifier ("flatten"), NULL, + DECL_ATTRIBUTES (fndecl)); } else { @@ -228,6 +236,8 @@ brig_directive_function_handler::operator () (const BrigBase *base) vec_safe_push (args, ptr_type_node); vec_safe_push (args, ptr_type_node); + vec_safe_push (args, ptr_type_node); + vec_safe_push (args, ptr_type_node); fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier, build_function_type_vec (ret_type, args)); @@ -295,21 +305,21 @@ brig_directive_function_handler::operator () (const BrigBase *base) DECL_SAVED_TREE (fndecl) = bind_expr; - /* Try to preserve the functions across IPA. */ - DECL_PRESERVE_P (fndecl) = 1; - TREE_SIDE_EFFECTS (fndecl) = 1; - - TREE_ADDRESSABLE (fndecl) = 1; + set_externally_visible (fndecl); if (base->kind == BRIG_KIND_DIRECTIVE_FUNCTION) { - TREE_STATIC (fndecl) = 1; + TREE_STATIC (fndecl) = 0; TREE_PUBLIC (fndecl) = 1; + DECL_EXTERNAL (fndecl) = 0; + DECL_DECLARED_INLINE_P (fndecl) = 1; } else if (base->kind == BRIG_KIND_DIRECTIVE_KERNEL) { - TREE_STATIC (fndecl) = 1; + TREE_STATIC (fndecl) = 0; TREE_PUBLIC (fndecl) = 1; + DECL_EXTERNAL (fndecl) = 0; + set_externally_visible (fndecl); } else if (base->kind == BRIG_KIND_DIRECTIVE_SIGNATURE) { @@ -349,8 +359,12 @@ brig_directive_function_handler::operator () (const BrigBase *base) m_parent.add_function_decl (func_name, fndecl); m_parent.append_global (fndecl); + if (!is_definition) - return bytes_consumed; + { + DECL_EXTERNAL (fndecl) = 1; + return bytes_consumed; + } m_parent.start_function (fndecl); diff --git a/gcc/brig/brigfrontend/brig-function.cc b/gcc/brig/brigfrontend/brig-function.cc index 14ca32cdb78..e1a14da8b72 100644 --- a/gcc/brig/brigfrontend/brig-function.cc +++ b/gcc/brig/brigfrontend/brig-function.cc @@ -589,7 +589,7 @@ brig_function::emit_launcher_and_metadata () tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL); - TREE_STATIC (launcher) = 0; + TREE_STATIC (launcher) = 1; TREE_PUBLIC (launcher) = 1; DECL_SAVED_TREE (launcher) = bind_expr; @@ -633,6 +633,8 @@ brig_function::emit_launcher_and_metadata () emit_metadata (stmt_list); + set_externally_visible (launcher); + return launcher; } diff --git a/gcc/brig/brigfrontend/brig-to-generic.cc b/gcc/brig/brigfrontend/brig-to-generic.cc index f644db81ecb..ee212b1f7b4 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.cc +++ b/gcc/brig/brigfrontend/brig-to-generic.cc @@ -52,6 +52,7 @@ #include "cgraph.h" #include "dumpfile.h" #include "tree-pretty-print.h" +#include "attribs.h" extern int gccbrig_verbose; @@ -487,7 +488,9 @@ brig_to_generic::add_global_variable (const std::string &name, tree var_decl) tree var_addr = build1 (ADDR_EXPR, ptype, var_decl); DECL_INITIAL (host_def_var) = var_addr; - TREE_PUBLIC (host_def_var) = 0; + TREE_PUBLIC (host_def_var) = 1; + + set_externally_visible (host_def_var); } /* Adds an indirection pointer for a potential host-defined program scope @@ -510,10 +513,18 @@ brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl) TREE_ADDRESSABLE (ptr_var) = 1; TREE_STATIC (ptr_var) = 1; + set_externally_visible (ptr_var); + append_global (ptr_var); m_global_variables[var_name] = ptr_var; } +void +brig_to_generic::add_decl_call (tree call) +{ + m_decl_call.push_back (call); +} + /* Produce a "mangled name" for the given brig function or kernel. The mangling is used to make unique global symbol name in case of module scope functions. Program scope functions are not mangled @@ -701,8 +712,6 @@ brig_to_generic::finish_function () m_cf->finish (); m_cf->emit_metadata (stmts); dump_function (m_dump_file, m_cf); - gimplify_function_tree (m_cf->m_func_decl); - cgraph_node::finalize_function (m_cf->m_func_decl, true); } else /* Emit the kernel only at the very end so we can analyze the total @@ -846,6 +855,43 @@ call_builtin (tree pdecl, int nargs, tree rettype, ...) void brig_to_generic::write_globals () { + + /* Replace calls to declarations with calls to definitions. Otherwise + inlining will fail to find the definition to inline from. */ + + for (size_t i = 0; i < m_decl_call.size(); ++i) + { + tree decl_call = m_decl_call.at(i); + tree func_decl = get_callee_fndecl (decl_call); + brig_function *brig_function = get_finished_function (func_decl); + + if (brig_function && brig_function->m_func_decl + && DECL_EXTERNAL (brig_function->m_func_decl) == 0 + && brig_function->m_func_decl != func_decl) + { + + decl_call = CALL_EXPR_FN (decl_call); + STRIP_NOPS (decl_call); + if (TREE_CODE (decl_call) == ADDR_EXPR + && TREE_CODE (TREE_OPERAND (decl_call, 0)) == FUNCTION_DECL) + TREE_OPERAND (decl_call, 0) = brig_function->m_func_decl; + } + } + + for (std::map::iterator i + = m_finished_functions.begin(), e = m_finished_functions.end(); + i != e; ++i) + { + brig_function *brig_f = (*i).second; + if (brig_f->m_is_kernel) + continue; + + /* Finalize only at this point to allow the cgraph analysis to + see definitions to calls to later functions. */ + gimplify_function_tree (brig_f->m_func_decl); + cgraph_node::finalize_function (brig_f->m_func_decl, true); + } + /* Now that the whole BRIG module has been processed, build a launcher and a metadata section for each built kernel. */ for (size_t i = 0; i < m_kernels.size (); ++i) @@ -880,6 +926,17 @@ brig_to_generic::write_globals () append_global (launcher); + if (m_dump_file) + { + std::string kern_name = f->m_name.substr (1); + fprintf (m_dump_file, "\n;; Function %s", kern_name.c_str()); + fprintf (m_dump_file, "\n;; enabled by -%s\n\n", + dump_flag_name (TDI_original)); + print_generic_decl (m_dump_file, launcher, 0); + print_generic_expr (m_dump_file, DECL_SAVED_TREE (launcher), 0); + fprintf (m_dump_file, "\n"); + } + gimplify_function_tree (launcher); cgraph_node::finalize_function (launcher, true); pop_cfun (); @@ -933,6 +990,25 @@ get_scalar_unsigned_int_type (tree original_type) * BITS_PER_UNIT, true); } +/* Set the declaration externally visible so it won't get removed by + whole program optimizations. */ + +void +set_externally_visible (tree decl) +{ + if (!lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("externally_visible"), + NULL, DECL_ATTRIBUTES (decl)); +} + +void +set_inline (tree decl) +{ + if (!lookup_attribute ("inline", DECL_ATTRIBUTES (decl))) + DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("inline"), + NULL, DECL_ATTRIBUTES (decl)); +} + void dump_function (FILE *dump_file, brig_function *f) { diff --git a/gcc/brig/brigfrontend/brig-to-generic.h b/gcc/brig/brigfrontend/brig-to-generic.h index 384e3bd56e4..a3eb4329a82 100644 --- a/gcc/brig/brigfrontend/brig-to-generic.h +++ b/gcc/brig/brigfrontend/brig-to-generic.h @@ -74,6 +74,7 @@ public: tree global_variable (const std::string &name) const; void add_global_variable (const std::string &name, tree var_decl); void add_host_def_var_ptr (const std::string &name, tree var_decl); + void add_decl_call (tree call); void start_function (tree f); void finish_function (); @@ -152,6 +153,10 @@ private: label_index m_global_variables; + /* Calls to declarations to be fixed in the end of processing to call + defs instead. */ + std::vector m_decl_call; + /* The size of each private variable, including the alignment padding. */ std::map m_private_data_sizes; @@ -226,6 +231,9 @@ tree build_stmt (enum tree_code code, ...); tree get_unsigned_int_type (tree type); tree get_scalar_unsigned_int_type (tree type); +void set_externally_visible (tree decl); + +void set_inline (tree decl); void dump_function (FILE *dump_file, brig_function *f); diff --git a/gcc/brig/brigfrontend/brig-variable-handler.cc b/gcc/brig/brigfrontend/brig-variable-handler.cc index 3edb786a1dd..39263086c22 100644 --- a/gcc/brig/brigfrontend/brig-variable-handler.cc +++ b/gcc/brig/brigfrontend/brig-variable-handler.cc @@ -27,6 +27,7 @@ #include "brig-util.h" #include "print-tree.h" #include "diagnostic-core.h" +#include "brig-to-generic.h" tree brig_directive_variable_handler::build_variable @@ -206,6 +207,8 @@ brig_directive_variable_handler::operator () (const BrigBase *base) so we can get their address from the Runtime API. */ DECL_CONTEXT (var_decl) = NULL_TREE; TREE_STATIC (var_decl) = 1; + TREE_PUBLIC (var_decl) = 1; + set_externally_visible (var_decl); m_parent.add_global_variable (var_name, var_decl); } } -- 2.30.2