From be3c16c474629e69c6c4cd6465ffa4b572cc6460 Mon Sep 17 00:00:00 2001 From: Dehao Chen Date: Tue, 21 Oct 2014 17:59:30 +0000 Subject: [PATCH] Add AutoFDO. gcc/ChangeLog: 2014-10-21 Dehao Chen * auto-profile.c: New file. * auto-profile.h: New file. * basic-block.h (maybe_hot_count_p): New export func. (add_working_set): New export func. * gcov-io.h (GCOV_TAG_AFDO_FILE_NAMES): New tag. (GCOV_TAG_AFDO_FUNCTION): Likewise. (GCOV_TAG_AFDO_WORKING_SET): Likewise. * opts.c (enable_fdo_optimizations): New func. (common_handle_option): Handle -fauto-profile flag. * ipa-inline.c (want_early_inline_function_p): Iterative-einline. (class pass_early_inline): Export early_inliner. (early_inliner): Likewise. (pass_early_inline::execute): Likewise. * ipa-inline.h (early_inliner): Likewise. * predict.c (maybe_hot_count_p): New export func. (counts_to_freqs): AutoFDO logic. (rebuild_frequencies): Likewise. * tree-profile.c (pass_ipa_tree_profile::gate): Likewise. * profile.c (add_working_set): New func. * Makefile.in (auto-profile.o): New object file. * passes.def (pass_ipa_auto_profile): New pass. * tree-ssa-live.c (remove_unused_scope_block_p): AutoFDO logic. * tree-pass.h (make_pass_ipa_auto_profile): New pass. * toplev.c (compile_file): AutoFDO logic. * doc/invoke.texi (-fauto-profile): New doc. * coverage.c (coverage_init): AutoFDO logic. * common.opt (-fauto-profile): New flag. * timevar.def (TV_IPA_AUTOFDO): New tag. * value-prof.c (gimple_alloc_histogram_value): New export func. (check_ic_target): Likewise. * value-prof.h (gimple_alloc_histogram_value): Likewise. (check_ic_target): Likewise. From-SVN: r216523 --- gcc/ChangeLog | 35 + gcc/Makefile.in | 1 + gcc/auto-profile.c | 1687 +++++++++++++++++++++++++++++++++++++++++++ gcc/auto-profile.h | 31 + gcc/basic-block.h | 2 + gcc/common.opt | 10 + gcc/coverage.c | 5 +- gcc/doc/invoke.texi | 38 +- gcc/gcov-io.h | 3 + gcc/ipa-inline.c | 79 +- gcc/ipa-inline.h | 1 + gcc/opts.c | 100 ++- gcc/passes.def | 1 + gcc/predict.c | 7 +- gcc/profile.c | 8 + gcc/timevar.def | 1 + gcc/toplev.c | 5 + gcc/tree-pass.h | 1 + gcc/tree-profile.c | 6 +- gcc/tree-ssa-live.c | 2 +- gcc/value-prof.c | 4 +- gcc/value-prof.h | 3 + 22 files changed, 1950 insertions(+), 80 deletions(-) create mode 100644 gcc/auto-profile.c create mode 100644 gcc/auto-profile.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bce5f7010eb..931ec2e5253 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2014-10-21 Dehao Chen + + * auto-profile.c: New file. + * auto-profile.h: New file. + * basic-block.h (maybe_hot_count_p): New export func. + (add_working_set): New export func. + * gcov-io.h (GCOV_TAG_AFDO_FILE_NAMES): New tag. + (GCOV_TAG_AFDO_FUNCTION): Likewise. + (GCOV_TAG_AFDO_WORKING_SET): Likewise. + * opts.c (enable_fdo_optimizations): New func. + (common_handle_option): Handle -fauto-profile flag. + * ipa-inline.c (want_early_inline_function_p): Iterative-einline. + (class pass_early_inline): Export early_inliner. + (early_inliner): Likewise. + (pass_early_inline::execute): Likewise. + * ipa-inline.h (early_inliner): Likewise. + * predict.c (maybe_hot_count_p): New export func. + (counts_to_freqs): AutoFDO logic. + (rebuild_frequencies): Likewise. + * tree-profile.c (pass_ipa_tree_profile::gate): Likewise. + * profile.c (add_working_set): New func. + * Makefile.in (auto-profile.o): New object file. + * passes.def (pass_ipa_auto_profile): New pass. + * tree-ssa-live.c (remove_unused_scope_block_p): AutoFDO logic. + * tree-pass.h (make_pass_ipa_auto_profile): New pass. + * toplev.c (compile_file): AutoFDO logic. + * doc/invoke.texi (-fauto-profile): New doc. + * coverage.c (coverage_init): AutoFDO logic. + * common.opt (-fauto-profile): New flag. + * timevar.def (TV_IPA_AUTOFDO): New tag. + * value-prof.c (gimple_alloc_histogram_value): New export func. + (check_ic_target): Likewise. + * value-prof.h (gimple_alloc_histogram_value): Likewise. + (check_ic_target): Likewise. + 2014-10-21 David Malcolm * cgraph.c (cgraph_c_finalize): New function. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index de2adc761e4..1fd7915814c 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1153,6 +1153,7 @@ OBJS = \ alias.o \ alloc-pool.o \ auto-inc-dec.o \ + auto-profile.o \ bb-reorder.o \ bitmap.o \ bt-load.o \ diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c new file mode 100644 index 00000000000..24d7126a07a --- /dev/null +++ b/gcc/auto-profile.c @@ -0,0 +1,1687 @@ +/* Read and annotate call graph profile from the auto profile data file. + Copyright (C) 2014. Free Software Foundation, Inc. + Contributed by Dehao Chen (dehao@google.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include +#include +#include + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "tree-pass.h" +#include "flags.h" +#include "basic-block.h" +#include "diagnostic-core.h" +#include "gcov-io.h" +#include "input.h" +#include "profile.h" +#include "langhooks.h" +#include "opts.h" +#include "tree-pass.h" +#include "cfgloop.h" +#include "tree-ssa-alias.h" +#include "tree-cfg.h" +#include "tree-cfgcleanup.h" +#include "tree-ssa-operands.h" +#include "tree-into-ssa.h" +#include "internal-fn.h" +#include "is-a.h" +#include "gimple-expr.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "gimple-ssa.h" +#include "cgraph.h" +#include "value-prof.h" +#include "coverage.h" +#include "params.h" +#include "ipa-inline.h" +#include "tree-inline.h" +#include "stringpool.h" +#include "auto-profile.h" +#include "vec.h" + +/* The following routines implements AutoFDO optimization. + + This optimization uses sampling profiles to annotate basic block counts + and uses heuristics to estimate branch probabilities. + + There are three phases in AutoFDO: + + Phase 1: Read profile from the profile data file. + The following info is read from the profile datafile: + * string_table: a map between function name and its index. + * autofdo_source_profile: a map from function_instance name to + function_instance. This is represented as a forest of + function_instances. + * WorkingSet: a histogram of how many instructions are covered for a + given percentage of total cycles. This is describing the binary + level information (not source level). This info is used to help + decide if we want aggressive optimizations that could increase + code footprint (e.g. loop unroll etc.) + A function instance is an instance of function that could either be a + standalone symbol, or a clone of a function that is inlined into another + function. + + Phase 2: Early inline + valur profile transformation. + Early inline uses autofdo_source_profile to find if a callsite is: + * inlined in the profiled binary. + * callee body is hot in the profiling run. + If both condition satisfies, early inline will inline the callsite + regardless of the code growth. + Phase 2 is an iterative process. During each iteration, we also check + if an indirect callsite is promoted and inlined in the profiling run. + If yes, vpt will happen to force promote it and in the next iteration, + einline will inline the promoted callsite in the next iteration. + + Phase 3: Annotate control flow graph. + AutoFDO uses a separate pass to: + * Annotate basic block count + * Estimate branch probability + + After the above 3 phases, all profile is readily annotated on the GCC IR. + AutoFDO tries to reuse all FDO infrastructure as much as possible to make + use of the profile. E.g. it uses existing mechanism to calculate the basic + block/edge frequency, as well as the cgraph node/edge count. +*/ + +#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" +#define AUTO_PROFILE_VERSION 1 + +namespace autofdo +{ + +/* Represent a source location: (function_decl, lineno). */ +typedef std::pair decl_lineno; + +/* Represent an inline stack. vector[0] is the leaf node. */ +typedef auto_vec inline_stack; + +/* String array that stores function names. */ +typedef auto_vec string_vector; + +/* Map from function name's index in string_table to target's + execution count. */ +typedef std::map icall_target_map; + +/* Set of gimple stmts. Used to track if the stmt has already been promoted + to direct call. */ +typedef std::set stmt_set; + +/* Represent count info of an inline stack. */ +struct count_info +{ + /* Sampled count of the inline stack. */ + gcov_type count; + + /* Map from indirect call target to its sample count. */ + icall_target_map targets; + + /* Whether this inline stack is already used in annotation. + + Each inline stack should only be used to annotate IR once. + This will be enforced when instruction-level discriminator + is supported. */ + bool annotated; +}; + +/* operator< for "const char *". */ +struct string_compare +{ + bool operator()(const char *a, const char *b) const + { + return strcmp (a, b) < 0; + } +}; + +/* Store a string array, indexed by string position in the array. */ +class string_table +{ +public: + string_table () + {} + + ~string_table (); + + /* For a given string, returns its index. */ + int get_index (const char *name) const; + + /* For a given decl, returns the index of the decl name. */ + int get_index_by_decl (tree decl) const; + + /* For a given index, returns the string. */ + const char *get_name (int index) const; + + /* Read profile, return TRUE on success. */ + bool read (); + +private: + typedef std::map string_index_map; + string_vector vector_; + string_index_map map_; +}; + +/* Profile of a function instance: + 1. total_count of the function. + 2. head_count (entry basic block count) of the function (only valid when + function is a top-level function_instance, i.e. it is the original copy + instead of the inlined copy). + 3. map from source location (decl_lineno) to profile (count_info). + 4. map from callsite to callee function_instance. */ +class function_instance +{ +public: + typedef auto_vec function_instance_stack; + + /* Read the profile and return a function_instance with head count as + HEAD_COUNT. Recursively read callsites to create nested function_instances + too. STACK is used to track the recursive creation process. */ + static function_instance * + read_function_instance (function_instance_stack *stack, + gcov_type head_count); + + /* Recursively deallocate all callsites (nested function_instances). */ + ~function_instance (); + + /* Accessors. */ + int + name () const + { + return name_; + } + gcov_type + total_count () const + { + return total_count_; + } + gcov_type + head_count () const + { + return head_count_; + } + + /* Traverse callsites of the current function_instance to find one at the + location of LINENO and callee name represented in DECL. */ + function_instance *get_function_instance_by_decl (unsigned lineno, + tree decl) const; + + /* Store the profile info for LOC in INFO. Return TRUE if profile info + is found. */ + bool get_count_info (location_t loc, count_info *info) const; + + /* Read the inlined indirect call target profile for STMT and store it in + MAP, return the total count for all inlined indirect calls. */ + gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const; + + /* Sum of counts that is used during annotation. */ + gcov_type total_annotated_count () const; + + /* Mark LOC as annotated. */ + void mark_annotated (location_t loc); + +private: + /* Callsite, represented as (decl_lineno, callee_function_name_index). */ + typedef std::pair callsite; + + /* Map from callsite to callee function_instance. */ + typedef std::map callsite_map; + + function_instance (unsigned name, gcov_type head_count) + : name_ (name), total_count_ (0), head_count_ (head_count) + { + } + + /* Map from source location (decl_lineno) to profile (count_info). */ + typedef std::map position_count_map; + + /* function_instance name index in the string_table. */ + unsigned name_; + + /* Total sample count. */ + gcov_type total_count_; + + /* Entry BB's sample count. */ + gcov_type head_count_; + + /* Map from callsite location to callee function_instance. */ + callsite_map callsites; + + /* Map from source location to count_info. */ + position_count_map pos_counts; +}; + +/* Profile for all functions. */ +class autofdo_source_profile +{ +public: + static autofdo_source_profile * + create () + { + autofdo_source_profile *map = new autofdo_source_profile (); + + if (map->read ()) + return map; + delete map; + return NULL; + } + + ~autofdo_source_profile (); + + /* For a given DECL, returns the top-level function_instance. */ + function_instance *get_function_instance_by_decl (tree decl) const; + + /* Find count_info for a given gimple STMT. If found, store the count_info + in INFO and return true; otherwise return false. */ + bool get_count_info (gimple stmt, count_info *info) const; + + /* Find total count of the callee of EDGE. */ + gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; + + /* Update value profile INFO for STMT from the inlined indirect callsite. + Return true if INFO is updated. */ + bool update_inlined_ind_target (gimple stmt, count_info *info); + + /* Mark LOC as annotated. */ + void mark_annotated (location_t loc); + +private: + /* Map from function_instance name index (in string_table) to + function_instance. */ + typedef std::map name_function_instance_map; + + autofdo_source_profile () {} + + /* Read AutoFDO profile and returns TRUE on success. */ + bool read (); + + /* Return the function_instance in the profile that correspond to the + inline STACK. */ + function_instance * + get_function_instance_by_inline_stack (const inline_stack &stack) const; + + name_function_instance_map map_; +}; + +/* Store the strings read from the profile data file. */ +static string_table *afdo_string_table; + +/* Store the AutoFDO source profile. */ +static autofdo_source_profile *afdo_source_profile; + +/* gcov_ctr_summary structure to store the profile_info. */ +static struct gcov_ctr_summary *afdo_profile_info; + +/* Helper functions. */ + +/* Return the original name of NAME: strip the suffix that starts + with '.' Caller is responsible for freeing RET. */ + +static char * +get_original_name (const char *name) +{ + char *ret = xstrdup (name); + char *find = strchr (ret, '.'); + if (find != NULL) + *find = 0; + return ret; +} + +/* Return the combined location, which is a 32bit integer in which + higher 16 bits stores the line offset of LOC to the start lineno + of DECL, The lower 16 bits stores the discrimnator. */ + +static unsigned +get_combined_location (location_t loc, tree decl) +{ + /* TODO: allow more bits for line and less bits for discriminator. */ + if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) + warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes."); + return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); +} + +/* Return the function decl of a given lexical BLOCK. */ + +static tree +get_function_decl_from_block (tree block) +{ + tree decl; + + if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION)) + return NULL_TREE; + + for (decl = BLOCK_ABSTRACT_ORIGIN (block); + decl && (TREE_CODE (decl) == BLOCK); + decl = BLOCK_ABSTRACT_ORIGIN (decl)) + if (TREE_CODE (decl) == FUNCTION_DECL) + break; + return decl; +} + +/* Store inline stack for STMT in STACK. */ + +static void +get_inline_stack (location_t locus, inline_stack *stack) +{ + if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) + return; + + tree block = LOCATION_BLOCK (locus); + if (block && TREE_CODE (block) == BLOCK) + { + int level = 0; + for (block = BLOCK_SUPERCONTEXT (block); + block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + { + location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); + if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) + continue; + + tree decl = get_function_decl_from_block (block); + stack->safe_push ( + std::make_pair (decl, get_combined_location (locus, decl))); + locus = tmp_locus; + level++; + } + } + stack->safe_push ( + std::make_pair (current_function_decl, + get_combined_location (locus, current_function_decl))); +} + +/* Return STMT's combined location, which is a 32bit integer in which + higher 16 bits stores the line offset of LOC to the start lineno + of DECL, The lower 16 bits stores the discrimnator. */ + +static unsigned +get_relative_location_for_stmt (gimple stmt) +{ + location_t locus = gimple_location (stmt); + if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) + return UNKNOWN_LOCATION; + + for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); + block = BLOCK_SUPERCONTEXT (block)) + if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) + return get_combined_location (locus, + get_function_decl_from_block (block)); + return get_combined_location (locus, current_function_decl); +} + +/* Return true if BB contains indirect call. */ + +static bool +has_indirect_call (basic_block bb) +{ + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) + && (gimple_call_fn (stmt) == NULL + || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) + return true; + } + return false; +} + +/* Member functions for string_table. */ + +/* Deconstructor. */ + +string_table::~string_table () +{ + for (unsigned i = 0; i < vector_.length (); i++) + free (vector_[i]); +} + + +/* Return the index of a given function NAME. Return -1 if NAME is not + found in string table. */ + +int +string_table::get_index (const char *name) const +{ + if (name == NULL) + return -1; + string_index_map::const_iterator iter = map_.find (name); + if (iter == map_.end ()) + return -1; + else + return iter->second; +} + +/* Return the index of a given function DECL. Return -1 if DECL is not + found in string table. */ + +int +string_table::get_index_by_decl (tree decl) const +{ + char *name + = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); + int ret = get_index (name); + free (name); + if (ret != -1) + return ret; + ret = get_index (lang_hooks.dwarf_name (decl, 0)); + if (ret != -1) + return ret; + if (DECL_ABSTRACT_ORIGIN (decl)) + return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); + else + return -1; +} + +/* Return the function name of a given INDEX. */ + +const char * +string_table::get_name (int index) const +{ + gcc_assert (index > 0 && index < (int)vector_.length ()); + return vector_[index]; +} + +/* Read the string table. Return TRUE if reading is successful. */ + +bool +string_table::read () +{ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) + return false; + /* Skip the length of the section. */ + gcov_read_unsigned (); + /* Read in the file name table. */ + unsigned string_num = gcov_read_unsigned (); + for (unsigned i = 0; i < string_num; i++) + { + vector_.safe_push (get_original_name (gcov_read_string ())); + map_[vector_.last ()] = i; + } + return true; +} + +/* Member functions for function_instance. */ + +function_instance::~function_instance () +{ + for (callsite_map::iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) + delete iter->second; +} + +/* Traverse callsites of the current function_instance to find one at the + location of LINENO and callee name represented in DECL. */ + +function_instance * +function_instance::get_function_instance_by_decl (unsigned lineno, + tree decl) const +{ + int func_name_idx = afdo_string_table->get_index_by_decl (decl); + if (func_name_idx != -1) + { + callsite_map::const_iterator ret + = callsites.find (std::make_pair (lineno, func_name_idx)); + if (ret != callsites.end ()) + return ret->second; + } + func_name_idx + = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); + if (func_name_idx != -1) + { + callsite_map::const_iterator ret + = callsites.find (std::make_pair (lineno, func_name_idx)); + if (ret != callsites.end ()) + return ret->second; + } + if (DECL_ABSTRACT_ORIGIN (decl)) + return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); + else + return NULL; +} + +/* Store the profile info for LOC in INFO. Return TRUE if profile info + is found. */ + +bool +function_instance::get_count_info (location_t loc, count_info *info) const +{ + position_count_map::const_iterator iter = pos_counts.find (loc); + if (iter == pos_counts.end ()) + return false; + *info = iter->second; + return true; +} + +/* Mark LOC as annotated. */ + +void +function_instance::mark_annotated (location_t loc) +{ + position_count_map::iterator iter = pos_counts.find (loc); + if (iter == pos_counts.end ()) + return; + iter->second.annotated = true; +} + +/* Read the inlinied indirect call target profile for STMT and store it in + MAP, return the total count for all inlined indirect calls. */ + +gcov_type +function_instance::find_icall_target_map (gimple stmt, + icall_target_map *map) const +{ + gcov_type ret = 0; + unsigned stmt_offset = get_relative_location_for_stmt (stmt); + + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) + { + unsigned callee = iter->second->name (); + /* Check if callsite location match the stmt. */ + if (iter->first.first != stmt_offset) + continue; + struct cgraph_node *node = cgraph_node::get_for_asmname ( + get_identifier (afdo_string_table->get_name (callee))); + if (node == NULL) + continue; + if (!check_ic_target (stmt, node)) + continue; + (*map)[callee] = iter->second->total_count (); + ret += iter->second->total_count (); + } + return ret; +} + +/* Read the profile and create a function_instance with head count as + HEAD_COUNT. Recursively read callsites to create nested function_instances + too. STACK is used to track the recursive creation process. */ + +/* function instance profile format: + + ENTRY_COUNT: 8 bytes + NAME_INDEX: 4 bytes + NUM_POS_COUNTS: 4 bytes + NUM_CALLSITES: 4 byte + POS_COUNT_1: + POS_1_OFFSET: 4 bytes + NUM_TARGETS: 4 bytes + COUNT: 8 bytes + TARGET_1: + VALUE_PROFILE_TYPE: 4 bytes + TARGET_IDX: 8 bytes + COUNT: 8 bytes + TARGET_2 + ... + TARGET_n + POS_COUNT_2 + ... + POS_COUNT_N + CALLSITE_1: + CALLSITE_1_OFFSET: 4 bytes + FUNCTION_INSTANCE_PROFILE (nested) + CALLSITE_2 + ... + CALLSITE_n. */ + +function_instance * +function_instance::read_function_instance (function_instance_stack *stack, + gcov_type head_count) +{ + unsigned name = gcov_read_unsigned (); + unsigned num_pos_counts = gcov_read_unsigned (); + unsigned num_callsites = gcov_read_unsigned (); + function_instance *s = new function_instance (name, head_count); + stack->safe_push (s); + + for (unsigned i = 0; i < num_pos_counts; i++) + { + unsigned offset = gcov_read_unsigned () & 0xffff0000; + unsigned num_targets = gcov_read_unsigned (); + gcov_type count = gcov_read_counter (); + s->pos_counts[offset].count = count; + for (unsigned j = 0; j < stack->length (); j++) + (*stack)[j]->total_count_ += count; + for (unsigned j = 0; j < num_targets; j++) + { + /* Only indirect call target histogram is supported now. */ + gcov_read_unsigned (); + gcov_type target_idx = gcov_read_counter (); + s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); + } + } + for (unsigned i = 0; i < num_callsites; i++) + { + unsigned offset = gcov_read_unsigned (); + function_instance *callee_function_instance + = read_function_instance (stack, 0); + s->callsites[std::make_pair (offset, callee_function_instance->name ())] + = callee_function_instance; + } + stack->pop (); + return s; +} + +/* Sum of counts that is used during annotation. */ + +gcov_type +function_instance::total_annotated_count () const +{ + gcov_type ret = 0; + for (callsite_map::const_iterator iter = callsites.begin (); + iter != callsites.end (); ++iter) + ret += iter->second->total_annotated_count (); + for (position_count_map::const_iterator iter = pos_counts.begin (); + iter != pos_counts.end (); ++iter) + if (iter->second.annotated) + ret += iter->second.count; + return ret; +} + +/* Member functions for autofdo_source_profile. */ + +autofdo_source_profile::~autofdo_source_profile () +{ + for (name_function_instance_map::const_iterator iter = map_.begin (); + iter != map_.end (); ++iter) + delete iter->second; +} + +/* For a given DECL, returns the top-level function_instance. */ + +function_instance * +autofdo_source_profile::get_function_instance_by_decl (tree decl) const +{ + int index = afdo_string_table->get_index_by_decl (decl); + if (index == -1) + return NULL; + name_function_instance_map::const_iterator ret = map_.find (index); + return ret == map_.end () ? NULL : ret->second; +} + +/* Find count_info for a given gimple STMT. If found, store the count_info + in INFO and return true; otherwise return false. */ + +bool +autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const +{ + if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) + return false; + + inline_stack stack; + get_inline_stack (gimple_location (stmt), &stack); + if (stack.length () == 0) + return false; + function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return false; + return s->get_count_info (stack[0].second, info); +} + +/* Mark LOC as annotated. */ + +void +autofdo_source_profile::mark_annotated (location_t loc) +{ + inline_stack stack; + get_inline_stack (loc, &stack); + if (stack.length () == 0) + return; + function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return; + s->mark_annotated (stack[0].second); +} + +/* Update value profile INFO for STMT from the inlined indirect callsite. + Return true if INFO is updated. */ + +bool +autofdo_source_profile::update_inlined_ind_target (gimple stmt, + count_info *info) +{ + if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) + return false; + + count_info old_info; + get_count_info (stmt, &old_info); + gcov_type total = 0; + for (icall_target_map::const_iterator iter = old_info.targets.begin (); + iter != old_info.targets.end (); ++iter) + total += iter->second; + + /* Program behavior changed, original promoted (and inlined) target is not + hot any more. Will avoid promote the original target. + + To check if original promoted target is still hot, we check the total + count of the unpromoted targets (stored in old_info). If it is no less + than half of the callsite count (stored in INFO), the original promoted + target is considered not hot any more. */ + if (total >= info->count / 2) + return false; + + inline_stack stack; + get_inline_stack (gimple_location (stmt), &stack); + if (stack.length () == 0) + return false; + function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL) + return false; + icall_target_map map; + if (s->find_icall_target_map (stmt, &map) == 0) + return false; + for (icall_target_map::const_iterator iter = map.begin (); + iter != map.end (); ++iter) + info->targets[iter->first] = iter->second; + return true; +} + +/* Find total count of the callee of EDGE. */ + +gcov_type +autofdo_source_profile::get_callsite_total_count ( + struct cgraph_edge *edge) const +{ + inline_stack stack; + stack.safe_push (std::make_pair (edge->callee->decl, 0)); + get_inline_stack (gimple_location (edge->call_stmt), &stack); + + function_instance *s = get_function_instance_by_inline_stack (stack); + if (s == NULL + || afdo_string_table->get_index (IDENTIFIER_POINTER ( + DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) + return 0; + else + return s->total_count (); +} + +/* Read AutoFDO profile and returns TRUE on success. */ + +/* source profile format: + + GCOV_TAG_AFDO_FUNCTION: 4 bytes + LENGTH: 4 bytes + NUM_FUNCTIONS: 4 bytes + FUNCTION_INSTANCE_1 + FUNCTION_INSTANCE_2 + ... + FUNCTION_INSTANCE_N. */ + +bool +autofdo_source_profile::read () +{ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) + { + inform (0, "Not expected TAG."); + return false; + } + + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the function/callsite profile, and store it in local + data structure. */ + unsigned function_num = gcov_read_unsigned (); + for (unsigned i = 0; i < function_num; i++) + { + function_instance::function_instance_stack stack; + function_instance *s = function_instance::read_function_instance ( + &stack, gcov_read_counter ()); + afdo_profile_info->sum_all += s->total_count (); + map_[s->name ()] = s; + } + return true; +} + +/* Return the function_instance in the profile that correspond to the + inline STACK. */ + +function_instance * +autofdo_source_profile::get_function_instance_by_inline_stack ( + const inline_stack &stack) const +{ + name_function_instance_map::const_iterator iter = map_.find ( + afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); + if (iter == map_.end()) + return NULL; + function_instance *s = iter->second; + for (unsigned i = stack.length() - 1; i > 0; i--) + { + s = s->get_function_instance_by_decl ( + stack[i].second, stack[i - 1].first); + if (s == NULL) + return NULL; + } + return s; +} + +/* Module profile is only used by LIPO. Here we simply ignore it. */ + +static void +fake_read_autofdo_module_profile () +{ + /* Read in the module info. */ + gcov_read_unsigned (); + + /* Skip the length of the section. */ + gcov_read_unsigned (); + + /* Read in the file name table. */ + unsigned total_module_num = gcov_read_unsigned (); + gcc_assert (total_module_num == 0); +} + +/* Read data from profile data file. */ + +static void +read_profile (void) +{ + if (gcov_open (auto_profile_file, 1) == 0) + error ("Cannot open profile file %s.", auto_profile_file); + + if (gcov_read_unsigned () != GCOV_DATA_MAGIC) + error ("AutoFDO profile magic number does not mathch."); + + /* Skip the version number. */ + unsigned version = gcov_read_unsigned (); + if (version != AUTO_PROFILE_VERSION) + error ("AutoFDO profile version %u does match %u.", + version, AUTO_PROFILE_VERSION); + + /* Skip the empty integer. */ + gcov_read_unsigned (); + + /* string_table. */ + afdo_string_table = new string_table (); + if (!afdo_string_table->read()) + error ("Cannot read string table from %s.", auto_profile_file); + + /* autofdo_source_profile. */ + afdo_source_profile = autofdo_source_profile::create (); + if (afdo_source_profile == NULL) + error ("Cannot read function profile from %s.", auto_profile_file); + + /* autofdo_module_profile. */ + fake_read_autofdo_module_profile (); + + /* Read in the working set. */ + if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET) + error ("Cannot read working set from %s.", auto_profile_file); + + /* Skip the length of the section. */ + gcov_read_unsigned (); + gcov_working_set_t set[128]; + for (unsigned i = 0; i < 128; i++) + { + set[i].num_counters = gcov_read_unsigned (); + set[i].min_counter = gcov_read_counter (); + } + add_working_set (set); +} + +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms for indirect-call optimization. + + This function is actually served for 2 purposes: +     * before annotation, we need to mark histogram, promote and inline +     * after annotation, we just need to mark, and let follow-up logic to +       decide if it needs to promote and inline. */ + +static void +afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, + bool transform) +{ + gimple stmt = gsi_stmt (*gsi); + tree callee; + + if (map.size () == 0 || gimple_code (stmt) != GIMPLE_CALL + || gimple_call_fndecl (stmt) != NULL_TREE) + return; + + callee = gimple_call_fn (stmt); + + histogram_value hist = gimple_alloc_histogram_value ( + cfun, HIST_TYPE_INDIR_CALL, stmt, callee); + hist->n_counters = 3; + hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); + gimple_add_histogram_value (cfun, stmt, hist); + + gcov_type total = 0; + icall_target_map::const_iterator max_iter = map.end (); + + for (icall_target_map::const_iterator iter = map.begin (); + iter != map.end (); ++iter) + { + total += iter->second; + if (max_iter == map.end () || max_iter->second < iter->second) + max_iter = iter; + } + + hist->hvalue.counters[0] + = (unsigned long long)afdo_string_table->get_name (max_iter->first); + hist->hvalue.counters[1] = max_iter->second; + hist->hvalue.counters[2] = total; + + if (!transform) + return; + + struct cgraph_edge *indirect_edge + = cgraph_node::get (current_function_decl)->get_edge (stmt); + struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( + get_identifier ((const char *) hist->hvalue.counters[0])); + + if (direct_call == NULL || !check_ic_target (stmt, direct_call)) + return; + if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) + return; + struct cgraph_edge *new_edge + = indirect_edge->make_speculative (direct_call, 0, 0); + new_edge->redirect_call_stmt_to_callee (); + gimple_remove_histogram_value (cfun, stmt, hist); + inline_call (new_edge, true, NULL, NULL, false); +} + +/* From AutoFDO profiles, find values inside STMT for that we want to measure + histograms and adds them to list VALUES. */ + +static void +afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, + bool transform) +{ + afdo_indirect_call (gsi, map, transform); +} + +typedef std::set bb_set; +typedef std::set edge_set; + +static bool +is_bb_annotated (const basic_block bb, const bb_set &annotated) +{ + return annotated.find (bb) != annotated.end (); +} + +static void +set_bb_annotated (basic_block bb, bb_set *annotated) +{ + annotated->insert (bb); +} + +static bool +is_edge_annotated (const edge e, const edge_set &annotated) +{ + return annotated.find (e) != annotated.end (); +} + +static void +set_edge_annotated (edge e, edge_set *annotated) +{ + annotated->insert (e); +} + +/* For a given BB, set its execution count. Attach value profile if a stmt + is not in PROMOTED, because we only want to promot an indirect call once. + Return TRUE if BB is annotated. */ + +static bool +afdo_set_bb_count (basic_block bb, const stmt_set &promoted) +{ + gimple_stmt_iterator gsi; + edge e; + edge_iterator ei; + gcov_type max_count = 0; + bool has_annotated = false; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple stmt = gsi_stmt (gsi); + if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) + continue; + if (afdo_source_profile->get_count_info (stmt, &info)) + { + if (info.count > max_count) + max_count = info.count; + has_annotated = true; + if (info.targets.size () > 0 + && promoted.find (stmt) == promoted.end ()) + afdo_vpt (&gsi, info.targets, false); + } + } + + if (!has_annotated) + return false; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple phi = gsi_stmt (gsi); + size_t i; + for (i = 0; i < gimple_phi_num_args (phi); i++) + afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); + } + FOR_EACH_EDGE (e, ei, bb->succs) + afdo_source_profile->mark_annotated (e->goto_locus); + + bb->count = max_count; + return true; +} + +/* BB1 and BB2 are in an equivalent class iff: + 1. BB1 dominates BB2. + 2. BB2 post-dominates BB1. + 3. BB1 and BB2 are in the same loop nest. + This function finds the equivalent class for each basic block, and + stores a pointer to the first BB in its equivalent class. Meanwhile, + set bb counts for the same equivalent class to be idenical. Update + ANNOTATED_BB for the first BB in its equivalent class. */ + +static void +afdo_find_equiv_class (bb_set *annotated_bb) +{ + basic_block bb; + + FOR_ALL_BB_FN (bb, cfun) + bb->aux = NULL; + + FOR_ALL_BB_FN (bb, cfun) + { + vec dom_bbs; + basic_block bb1; + int i; + + if (bb->aux != NULL) + continue; + bb->aux = bb; + dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + { + bb->count = MAX (bb->count, bb1->count); + set_bb_annotated (bb, annotated_bb); + } + } + dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb); + FOR_EACH_VEC_ELT (dom_bbs, i, bb1) + if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) + && bb1->loop_father == bb->loop_father) + { + bb1->aux = bb; + if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) + { + bb->count = MAX (bb->count, bb1->count); + set_bb_annotated (bb, annotated_bb); + } + } + } +} + +/* If a basic block's count is known, and only one of its in/out edges' count + is unknown, its count can be calculated. Meanwhile, if all of the in/out + edges' counts are known, then the basic block's unknown count can also be + calculated. + IS_SUCC is true if out edges of a basic blocks are examined. + Update ANNOTATED_BB and ANNOTATED_EDGE accordingly. + Return TRUE if any basic block/edge count is changed. */ + +static bool +afdo_propagate_edge (bool is_succ, bb_set *annotated_bb, + edge_set *annotated_edge) +{ + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + { + edge e, unknown_edge = NULL; + edge_iterator ei; + int num_unknown_edge = 0; + gcov_type total_known_count = 0; + + FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) + if (!is_edge_annotated (e, *annotated_edge)) + num_unknown_edge++, unknown_edge = e; + else + total_known_count += e->count; + + if (num_unknown_edge == 0) + { + if (total_known_count > bb->count) + { + bb->count = total_known_count; + changed = true; + } + if (!is_bb_annotated (bb, *annotated_bb)) + { + set_bb_annotated (bb, annotated_bb); + changed = true; + } + } + else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) + { + if (bb->count >= total_known_count) + unknown_edge->count = bb->count - total_known_count; + else + unknown_edge->count = 0; + set_edge_annotated (unknown_edge, annotated_edge); + changed = true; + } + } + return changed; +} + +/* Special propagation for circuit expressions. Because GCC translates + control flow into data flow for circuit expressions. E.g. + BB1: + if (a && b) + BB2 + else + BB3 + + will be translated into: + + BB1: + if (a) + goto BB.t1 + else + goto BB.t3 + BB.t1: + if (b) + goto BB.t2 + else + goto BB.t3 + BB.t2: + goto BB.t3 + BB.t3: + tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) + if (tmp) + goto BB2 + else + goto BB3 + + In this case, we need to propagate through PHI to determine the edge + count of BB1->BB.t1, BB.t1->BB.t2. + Update ANNOTATED_EDGE accordingly. */ + +static void +afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge) +{ + basic_block bb; + FOR_ALL_BB_FN (bb, cfun) + { + gimple phi_stmt; + tree cmp_rhs, cmp_lhs; + gimple cmp_stmt = last_stmt (bb); + edge e; + edge_iterator ei; + + if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) + continue; + cmp_rhs = gimple_cond_rhs (cmp_stmt); + cmp_lhs = gimple_cond_lhs (cmp_stmt); + if (!TREE_CONSTANT (cmp_rhs) + || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) + continue; + if (TREE_CODE (cmp_lhs) != SSA_NAME) + continue; + if (!is_bb_annotated (bb, annotated_bb)) + continue; + phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs); + while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN + && gimple_assign_single_p (phi_stmt) + && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME) + phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt)); + if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI) + continue; + FOR_EACH_EDGE (e, ei, bb->succs) + { + unsigned i, total = 0; + edge only_one; + bool check_value_one = (((integer_onep (cmp_rhs)) + ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) + ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); + if (!is_edge_annotated (e, *annotated_edge)) + continue; + for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) + { + tree val = gimple_phi_arg_def (phi_stmt, i); + edge ep = gimple_phi_arg_edge (phi_stmt, i); + + if (!TREE_CONSTANT (val) + || !(integer_zerop (val) || integer_onep (val))) + continue; + if (check_value_one ^ integer_onep (val)) + continue; + total++; + only_one = ep; + if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge)) + { + ep->probability = 0; + ep->count = 0; + set_edge_annotated (ep, annotated_edge); + } + } + if (total == 1 && !is_edge_annotated (only_one, *annotated_edge)) + { + only_one->probability = e->probability; + only_one->count = e->count; + set_edge_annotated (only_one, annotated_edge); + } + } + } +} + +/* Propagate the basic block count and edge count on the control flow + graph. We do the propagation iteratively until stablize. */ + +static void +afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge) +{ + basic_block bb; + bool changed = true; + int i = 0; + + FOR_ALL_BB_FN (bb, cfun) + { + bb->count = ((basic_block)bb->aux)->count; + if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb)) + set_bb_annotated (bb, annotated_bb); + } + + while (changed && i++ < 10) + { + changed = false; + + if (afdo_propagate_edge (true, annotated_bb, annotated_edge)) + changed = true; + if (afdo_propagate_edge (false, annotated_bb, annotated_edge)) + changed = true; + afdo_propagate_circuit (*annotated_bb, annotated_edge); + } +} + +/* Propagate counts on control flow graph and calculate branch + probabilities. */ + +static void +afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) +{ + basic_block bb; + bool has_sample = false; + + FOR_EACH_BB_FN (bb, cfun) + if (bb->count > 0) + has_sample = true; + + if (!has_sample) + return; + + calculate_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (0); + + afdo_find_equiv_class (annotated_bb); + afdo_propagate (annotated_bb, annotated_edge); + + FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + int num_unknown_succ = 0; + gcov_type total_count = 0; + + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (!is_edge_annotated (e, *annotated_edge)) + num_unknown_succ++; + else + total_count += e->count; + } + if (num_unknown_succ == 0 && total_count > 0) + { + FOR_EACH_EDGE (e, ei, bb->succs) + e->probability = (double)e->count * REG_BR_PROB_BASE / total_count; + } + } + FOR_ALL_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE; + bb->aux = NULL; + } + + loop_optimizer_finalize (); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); +} + +/* Perform value profile transformation using AutoFDO profile. Add the + promoted stmts to PROMOTED_STMTS. Return TRUE if there is any + indirect call promoted. */ + +static bool +afdo_vpt_for_early_inline (stmt_set *promoted_stmts) +{ + basic_block bb; + if (afdo_source_profile->get_function_instance_by_decl ( + current_function_decl) == NULL) + return false; + + compute_inline_parameters (cgraph_node::get (current_function_decl), true); + + bool has_vpt = false; + FOR_EACH_BB_FN (bb, cfun) + { + if (!has_indirect_call (bb)) + continue; + gimple_stmt_iterator gsi; + + gcov_type bb_count = 0; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple stmt = gsi_stmt (gsi); + if (afdo_source_profile->get_count_info (stmt, &info)) + bb_count = MAX (bb_count, info.count); + } + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + /* IC_promotion and early_inline_2 is done in multiple iterations. + No need to promoted the stmt if its in promoted_stmts (means + it is already been promoted in the previous iterations). */ + if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fn (stmt) == NULL + || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL + || promoted_stmts->find (stmt) != promoted_stmts->end ()) + continue; + + count_info info; + afdo_source_profile->get_count_info (stmt, &info); + info.count = bb_count; + if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) + { + /* Promote the indirect call and update the promoted_stmts. */ + promoted_stmts->insert (stmt); + afdo_vpt (&gsi, info.targets, true); + has_vpt = true; + } + } + } + if (has_vpt) + { + optimize_inline_calls (current_function_decl); + return true; + } + else + return false; +} + +/* Annotate auto profile to the control flow graph. Do not annotate value + profile for stmts in PROMOTED_STMTS. */ + +static void +afdo_annotate_cfg (const stmt_set &promoted_stmts) +{ + basic_block bb; + bb_set annotated_bb; + edge_set annotated_edge; + const function_instance *s + = afdo_source_profile->get_function_instance_by_decl ( + current_function_decl); + + if (s == NULL) + return; + cgraph_node::get (current_function_decl)->count = s->head_count (); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count (); + gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + + FOR_EACH_BB_FN (bb, cfun) + { + edge e; + edge_iterator ei; + + bb->count = 0; + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = 0; + + if (afdo_set_bb_count (bb, promoted_stmts)) + set_bb_annotated (bb, &annotated_bb); + if (bb->count > max_count) + max_count = bb->count; + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) + { + ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); + } + if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count + > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) + { + EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count + = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); + } + afdo_source_profile->mark_annotated ( + DECL_SOURCE_LOCATION (current_function_decl)); + afdo_source_profile->mark_annotated (cfun->function_start_locus); + afdo_source_profile->mark_annotated (cfun->function_end_locus); + if (max_count > 0) + { + afdo_calculate_branch_prob (&annotated_bb, &annotated_edge); + counts_to_freqs (); + profile_status_for_fn (cfun) = PROFILE_READ; + } + if (flag_value_profile_transformations) + gimple_value_profile_transformations (); +} + +/* Wrapper function to invoke early inliner. */ + +static void +early_inline () +{ + compute_inline_parameters (cgraph_node::get (current_function_decl), true); + unsigned todo = early_inliner (cfun); + if (todo & TODO_update_ssa_any) + update_ssa (TODO_update_ssa); +} + +/* Use AutoFDO profile to annoate the control flow graph. + Return the todo flag. */ + +static unsigned int +auto_profile (void) +{ + struct cgraph_node *node; + + if (symtab->state == FINISHED) + return 0; + + init_node_map (true); + profile_info = autofdo::afdo_profile_info; + + FOR_EACH_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl)) + continue; + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + continue; + + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + + /* First do indirect call promotion and early inline to make the + IR match the profiled binary before actual annotation. + + This is needed because an indirect call might have been promoted + and inlined in the profiled binary. If we do not promote and + inline these indirect calls before annotation, the profile for + these promoted functions will be lost. + + e.g. foo() --indirect_call--> bar() + In profiled binary, the callsite is promoted and inlined, making + the profile look like: + + foo: { + loc_foo_1: count_1 + bar@loc_foo_2: { + loc_bar_1: count_2 + loc_bar_2: count_3 + } + } + + Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. + If we perform annotation on it, the profile inside bar@loc_foo2 + will be wasted. + + To avoid this, we promote loc_foo_2 and inline the promoted bar + function before annotation, so the profile inside bar@loc_foo2 + will be useful. */ + autofdo::stmt_set promoted_stmts; + for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) + { + if (!flag_value_profile_transformations + || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) + break; + early_inline (); + } + + early_inline (); + autofdo::afdo_annotate_cfg (promoted_stmts); + compute_function_frequency (); + update_ssa (TODO_update_ssa); + + /* Local pure-const may imply need to fixup the cfg. */ + if (execute_fixup_cfg () & TODO_cleanup_cfg) + cleanup_tree_cfg (); + + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + cgraph_edge::rebuild_edges (); + pop_cfun (); + } + + return TODO_rebuild_cgraph_edges; +} +} /* namespace autofdo. */ + +/* Read the profile from the profile data file. */ + +void +read_autofdo_file (void) +{ + if (auto_profile_file == NULL) + auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; + + autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc ( + 1, sizeof (struct gcov_ctr_summary)); + autofdo::afdo_profile_info->runs = 1; + autofdo::afdo_profile_info->sum_max = 0; + autofdo::afdo_profile_info->sum_all = 0; + + /* Read the profile from the profile file. */ + autofdo::read_profile (); +} + +/* Free the resources. */ + +void +end_auto_profile (void) +{ + delete autofdo::afdo_source_profile; + delete autofdo::afdo_string_table; + profile_info = NULL; +} + +/* Returns TRUE if EDGE is hot enough to be inlined early. */ + +bool +afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) +{ + gcov_type count + = autofdo::afdo_source_profile->get_callsite_total_count (edge); + if (count > 0) + { + bool is_hot; + const struct gcov_ctr_summary *saved_profile_info = profile_info; + /* At earling inline stage, profile_info is not set yet. We need to + temporarily set it to afdo_profile_info to calculate hotness. */ + profile_info = autofdo::afdo_profile_info; + is_hot = maybe_hot_count_p (NULL, count); + profile_info = saved_profile_info; + return is_hot; + } + else + return false; +} + +namespace +{ + +const pass_data pass_data_ipa_auto_profile = { + SIMPLE_IPA_PASS, "afdo", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_AUTOFDO, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_auto_profile : public simple_ipa_opt_pass +{ +public: + pass_ipa_auto_profile (gcc::context *ctxt) + : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) + { + } + + /* opt_pass methods: */ + virtual bool + gate (function *) + { + return flag_auto_profile; + } + virtual unsigned int + execute (function *) + { + return autofdo::auto_profile (); + } +}; // class pass_ipa_auto_profile + +} // anon namespace + +simple_ipa_opt_pass * +make_pass_ipa_auto_profile (gcc::context *ctxt) +{ + return new pass_ipa_auto_profile (ctxt); +} diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h new file mode 100644 index 00000000000..7b4f2449cb7 --- /dev/null +++ b/gcc/auto-profile.h @@ -0,0 +1,31 @@ +/* auto-profile.h - Defines data exported from auto-profile.c + Copyright (C) 2014. Free Software Foundation, Inc. + Contributed by Dehao Chen (dehao@google.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef AUTO_PROFILE_H +#define AUTO_PROFILE_H + +/* Read, process, finalize AutoFDO data structures. */ +extern void read_autofdo_file (void); +extern void end_auto_profile (void); + +/* Returns TRUE if EDGE is hot enough to be inlined early. */ +extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *); + +#endif /* AUTO_PROFILE_H */ diff --git a/gcc/basic-block.h b/gcc/basic-block.h index b1055c00fa3..a82db077619 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -652,6 +652,7 @@ ei_cond (edge_iterator ei, edge *p) extern void dump_flow_info (FILE *, int); /* In predict.c */ +extern bool maybe_hot_count_p (struct function *, gcov_type); extern bool maybe_hot_bb_p (struct function *, const_basic_block); extern bool maybe_hot_edge_p (edge); extern bool probably_never_executed_bb_p (struct function *, const_basic_block); @@ -805,6 +806,7 @@ extern void default_rtl_profile (void); /* In profile.c. */ typedef struct gcov_working_set_info gcov_working_set_t; extern gcov_working_set_t *find_working_set (unsigned pct_times_10); +extern void add_working_set (gcov_working_set_t *); /* Check tha probability is sane. */ diff --git a/gcc/common.opt b/gcc/common.opt index 5db5e1ee6ad..817ac87bf12 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -895,6 +895,16 @@ fauto-inc-dec Common Report Var(flag_auto_inc_dec) Init(1) Generate auto-inc/dec instructions +fauto-profile +Common Report Var(flag_auto_profile) Optimization +Use sample profile information for call graph node weights. The default +profile file is fbdata.afdo in 'pwd'. + +fauto-profile= +Common Joined RejectNegative Var(auto_profile_file) +Use sample profile information for call graph node weights. The profile +file is specified in the argument. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. diff --git a/gcc/coverage.c b/gcc/coverage.c index 66217ed862d..c02b18c1144 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. If not see #include "filenames.h" #include "target.h" #include "params.h" +#include "auto-profile.h" #include "gcov-io.h" #include "gcov-io.c" @@ -1214,7 +1215,9 @@ coverage_init (const char *filename) bbg_file_stamp = local_tick; - if (flag_branch_probabilities) + if (flag_auto_profile) + read_autofdo_file (); + else if (flag_branch_probabilities) read_counts_file (); /* Name of bbg file. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 23f272f4a1f..0b26976c520 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -365,7 +365,8 @@ Objective-C and Objective-C++ Dialects}. @gccoptlist{-faggressive-loop-optimizations -falign-functions[=@var{n}] @gol -falign-jumps[=@var{n}] @gol -falign-labels[=@var{n}] -falign-loops[=@var{n}] @gol --fassociative-math -fauto-inc-dec -fbranch-probabilities @gol +-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol +-fauto-inc-dec -fbranch-probabilities @gol -fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol -fbtr-bb-exclusive -fcaller-saves @gol -fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol @@ -9205,6 +9206,41 @@ code. If @var{path} is specified, GCC looks at the @var{path} to find the profile feedback data files. See @option{-fprofile-dir}. + +@item -fauto-profile +@itemx -fauto-profile=@var{path} +@opindex fauto-profile +Enable sampling based feedback directed optimizations, and optimizations +generally profitable only with profile feedback available. + +The following options are enabled: @code{-fbranch-probabilities}, @code{-fvpt}, +@code{-funroll-loops}, @code{-fpeel-loops}, @code{-ftracer}, @code{-ftree-vectorize}, +@code{-finline-functions}, @code{-fipa-cp}, @code{-fipa-cp-clone}, +@code{-fpredictive-commoning}, @code{-funswitch-loops}, +@code{-fgcse-after-reload}, @code{-ftree-loop-distribute-patterns}, + +If @var{path} is specified, GCC looks at the @var{path} to find +the profile feedback data files. + +In order to collect AutoFDO profile, you need to have: + +1. A linux system with linux perf support +2. (optional) An Intel processor with last branch record (LBR) support. This is + to guarantee accurate instruction level profile, which is important for + AutoFDO performance. + +To collect the profile, first use linux perf to collect raw profile +(see @uref{https://perf.wiki.kernel.org/}). + +E.g. +@code{perf record -e br_inst_retired:near_taken -b -o perf.data -- your_program} + +Then use create_gcov tool, which takes raw profile and unstripped binary to +generate AutoFDO profile that can be used by GCC. +(see @uref{https://github.com/google/autofdo}). + +E.g. +@code{create_gcov --binary=your_program.unstripped --profile=perf.data --gcov=profile.afdo} @end table The following options control compiler behavior regarding floating-point diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index 6fc126851b4..3c2543705d5 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -244,6 +244,9 @@ typedef uint64_t gcov_type_unsigned; #define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000) #define GCOV_TAG_SUMMARY_LENGTH(NUM) \ (1 + GCOV_COUNTERS_SUMMABLE * (10 + 3 * 2) + (NUM) * 5) +#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000) +#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000) +#define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf000000) /* Counters that are collected. */ diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index e79a4dd8e21..f0177dfc60d 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -122,6 +122,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-inline.h" #include "ipa-utils.h" #include "sreal.h" +#include "auto-profile.h" #include "cilk.h" #include "builtins.h" @@ -443,6 +444,14 @@ want_early_inline_function_p (struct cgraph_edge *e) if (DECL_DISREGARD_INLINE_LIMITS (callee->decl)) ; + /* For AutoFDO, we need to make sure that before profile annotation, all + hot paths' IR look exactly the same as profiled binary. As a result, + in einliner, we will disregard size limit and inline those callsites + that are: + * inlined in the profiled binary, and + * the cloned callee has enough samples to be considered "hot". */ + else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e)) + ; else if (!DECL_DECLARED_INLINE_P (callee->decl) && !flag_inline_small_functions) { @@ -2360,39 +2369,8 @@ early_inline_small_functions (struct cgraph_node *node) return inlined; } -/* Do inlining of small functions. Doing so early helps profiling and other - passes to be somewhat more effective and avoids some code duplication in - later real inlining pass for testcases with very many function calls. */ - -namespace { - -const pass_data pass_data_early_inline = -{ - GIMPLE_PASS, /* type */ - "einline", /* name */ - OPTGROUP_INLINE, /* optinfo_flags */ - TV_EARLY_INLINING, /* tv_id */ - PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_early_inline : public gimple_opt_pass -{ -public: - pass_early_inline (gcc::context *ctxt) - : gimple_opt_pass (pass_data_early_inline, ctxt) - {} - - /* opt_pass methods: */ - virtual unsigned int execute (function *); - -}; // class pass_early_inline - unsigned int -pass_early_inline::execute (function *fun) +early_inliner (function *fun) { struct cgraph_node *node = cgraph_node::get (current_function_decl); struct cgraph_edge *edge; @@ -2493,6 +2471,43 @@ pass_early_inline::execute (function *fun) return todo; } +/* Do inlining of small functions. Doing so early helps profiling and other + passes to be somewhat more effective and avoids some code duplication in + later real inlining pass for testcases with very many function calls. */ + +namespace { + +const pass_data pass_data_early_inline = +{ + GIMPLE_PASS, /* type */ + "einline", /* name */ + OPTGROUP_INLINE, /* optinfo_flags */ + TV_EARLY_INLINING, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_early_inline : public gimple_opt_pass +{ +public: + pass_early_inline (gcc::context *ctxt) + : gimple_opt_pass (pass_data_early_inline, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *); + +}; // class pass_early_inline + +unsigned int +pass_early_inline::execute (function *fun) +{ + return early_inliner (fun); +} + } // anon namespace gimple_opt_pass * diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index 75973230bc5..8edf49621ad 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -238,6 +238,7 @@ void initialize_growth_caches (void); void free_growth_caches (void); void compute_inline_parameters (struct cgraph_node *, bool); bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining); +unsigned int early_inliner (function *fun); /* In ipa-inline-transform.c */ bool inline_call (struct cgraph_edge *, bool, vec *, int *, bool, diff --git a/gcc/opts.c b/gcc/opts.c index 30541962018..de066bcce07 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1280,6 +1280,50 @@ print_specific_help (unsigned int include_flags, opts->x_help_columns, opts, lang_mask); } +/* Enable FDO-related flags. */ + +static void +enable_fdo_optimizations (struct gcc_options *opts, + struct gcc_options *opts_set, + int value) +{ + if (!opts_set->x_flag_branch_probabilities) + opts->x_flag_branch_probabilities = value; + if (!opts_set->x_flag_profile_values) + opts->x_flag_profile_values = value; + if (!opts_set->x_flag_unroll_loops) + opts->x_flag_unroll_loops = value; + if (!opts_set->x_flag_peel_loops) + opts->x_flag_peel_loops = value; + if (!opts_set->x_flag_tracer) + opts->x_flag_tracer = value; + if (!opts_set->x_flag_value_profile_transformations) + opts->x_flag_value_profile_transformations = value; + if (!opts_set->x_flag_inline_functions) + opts->x_flag_inline_functions = value; + if (!opts_set->x_flag_ipa_cp) + opts->x_flag_ipa_cp = value; + if (!opts_set->x_flag_ipa_cp_clone + && value && opts->x_flag_ipa_cp) + opts->x_flag_ipa_cp_clone = value; + if (!opts_set->x_flag_predictive_commoning) + opts->x_flag_predictive_commoning = value; + if (!opts_set->x_flag_unswitch_loops) + opts->x_flag_unswitch_loops = value; + if (!opts_set->x_flag_gcse_after_reload) + opts->x_flag_gcse_after_reload = value; + if (!opts_set->x_flag_tree_loop_vectorize + && !opts_set->x_flag_tree_vectorize) + opts->x_flag_tree_loop_vectorize = value; + if (!opts_set->x_flag_tree_slp_vectorize + && !opts_set->x_flag_tree_vectorize) + opts->x_flag_tree_slp_vectorize = value; + if (!opts_set->x_flag_vect_cost_model) + opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC; + if (!opts_set->x_flag_tree_loop_distribute_patterns) + opts->x_flag_tree_loop_distribute_patterns = value; +} + /* Handle target- and language-independent options. Return zero to generate an "unknown option" message. Only options that need extra handling need to be listed here; if you simply want @@ -1749,50 +1793,30 @@ common_handle_option (struct gcc_options *opts, value = true; /* No break here - do -fprofile-use processing. */ case OPT_fprofile_use: - if (!opts_set->x_flag_branch_probabilities) - opts->x_flag_branch_probabilities = value; - if (!opts_set->x_flag_profile_values) - opts->x_flag_profile_values = value; - if (!opts_set->x_flag_unroll_loops) - opts->x_flag_unroll_loops = value; - if (!opts_set->x_flag_peel_loops) - opts->x_flag_peel_loops = value; - if (!opts_set->x_flag_tracer) - opts->x_flag_tracer = value; - if (!opts_set->x_flag_value_profile_transformations) - opts->x_flag_value_profile_transformations = value; - if (!opts_set->x_flag_inline_functions) - opts->x_flag_inline_functions = value; - if (!opts_set->x_flag_ipa_cp) - opts->x_flag_ipa_cp = value; - if (!opts_set->x_flag_ipa_cp_clone - && value && opts->x_flag_ipa_cp) - opts->x_flag_ipa_cp_clone = value; - if (!opts_set->x_flag_predictive_commoning) - opts->x_flag_predictive_commoning = value; - if (!opts_set->x_flag_unswitch_loops) - opts->x_flag_unswitch_loops = value; - if (!opts_set->x_flag_gcse_after_reload) - opts->x_flag_gcse_after_reload = value; - if (!opts_set->x_flag_tree_loop_vectorize - && !opts_set->x_flag_tree_vectorize) - opts->x_flag_tree_loop_vectorize = value; - if (!opts_set->x_flag_tree_slp_vectorize - && !opts_set->x_flag_tree_vectorize) - opts->x_flag_tree_slp_vectorize = value; - if (!opts_set->x_flag_vect_cost_model) - opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC; - if (!opts_set->x_flag_tree_loop_distribute_patterns) - opts->x_flag_tree_loop_distribute_patterns = value; + enable_fdo_optimizations (opts, opts_set, value); if (!opts_set->x_flag_profile_reorder_functions) - opts->x_flag_profile_reorder_functions = value; - /* Indirect call profiling should do all useful transformations - speculative devirtualization does. */ + opts->x_flag_profile_reorder_functions = value; + /* Indirect call profiling should do all useful transformations + speculative devirtualization does. */ if (!opts_set->x_flag_devirtualize_speculatively && opts->x_flag_value_profile_transformations) opts->x_flag_devirtualize_speculatively = false; break; + case OPT_fauto_profile_: + opts->x_auto_profile_file = xstrdup (arg); + opts->x_flag_auto_profile = true; + value = true; + /* No break here - do -fauto-profile processing. */ + case OPT_fauto_profile: + enable_fdo_optimizations (opts, opts_set, value); + if (!opts_set->x_flag_profile_correction) + opts->x_flag_profile_correction = value; + maybe_set_param_value ( + PARAM_EARLY_INLINER_MAX_ITERATIONS, 10, + opts->x_param_values, opts_set->x_param_values); + break; + case OPT_fprofile_generate_: opts->x_profile_data_prefix = xstrdup (arg); value = true; diff --git a/gcc/passes.def b/gcc/passes.def index 57c2c1343b1..9c2542d5352 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_rebuild_cgraph_edges); NEXT_PASS (pass_inline_parameters); POP_INSERT_PASSES () + NEXT_PASS (pass_ipa_auto_profile); NEXT_PASS (pass_ipa_free_inline_summary); NEXT_PASS (pass_ipa_tree_profile); PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile) diff --git a/gcc/predict.c b/gcc/predict.c index 5f5d4a55c9c..96c7661aebe 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -167,7 +167,7 @@ set_hot_bb_threshold (gcov_type min) /* Return TRUE if frequency FREQ is considered to be hot. */ -static inline bool +bool maybe_hot_count_p (struct function *fun, gcov_type count) { if (fun && profile_status_for_fn (fun) != PROFILE_READ) @@ -2859,7 +2859,7 @@ counts_to_freqs (void) /* Don't overwrite the estimated frequencies when the profile for the function is missing. We may drop this function PROFILE_GUESSED later in drop_profile (). */ - if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count) + if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count) return 0; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) @@ -3230,7 +3230,8 @@ rebuild_frequencies (void) count_max = MAX (bb->count, count_max); if (profile_status_for_fn (cfun) == PROFILE_GUESSED - || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10)) + || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ + && count_max < REG_BR_PROB_BASE/10)) { loop_optimizer_init (0); add_noreturn_fake_exit_edges (); diff --git a/gcc/profile.c b/gcc/profile.c index bcff4110522..71b8cbdc97f 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -112,6 +112,14 @@ static int total_num_times_called; static int total_hist_br_prob[20]; static int total_num_branches; +/* Helper function to update gcov_working_sets. */ + +void add_working_set (gcov_working_set_t *set) { + int i = 0; + for (; i < NUM_GCOV_WORKING_SETS; i++) + gcov_working_sets[i] = set[i]; +} + /* Forward declarations. */ static void find_spanning_tree (struct edge_list *); diff --git a/gcc/timevar.def b/gcc/timevar.def index 55a230b39c8..96b606951d6 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -89,6 +89,7 @@ DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning") DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") +DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") DEFTIMEVAR (TV_IPA_ICF , "ipa icf") DEFTIMEVAR (TV_IPA_PTA , "ipa points-to") diff --git a/gcc/toplev.c b/gcc/toplev.c index a87ff9a2ff8..adfae0bf93b 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -83,6 +83,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-color.h" #include "context.h" #include "pass_manager.h" +#include "auto-profile.h" #include "dwarf2out.h" #include "ipa-reference.h" #include "ipa-prop.h" @@ -671,6 +672,10 @@ compile_file (void) targetm.asm_out.output_ident (ident_str); } + /* Auto profile finalization. */ + if (flag_auto_profile) + end_auto_profile (); + /* Invoke registered plugin callbacks. */ invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 2ff52cf5243..3db1a08b0dd 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -449,6 +449,7 @@ extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_function_and_variable_visibility (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tree_profile (gcc::context *ctxt); +extern simple_ipa_opt_pass *make_pass_ipa_auto_profile (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_early_local_passes (gcc::context *ctxt); diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index c25626c60aa..ceb616968ef 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -719,8 +719,10 @@ public: bool pass_ipa_tree_profile::gate (function *) { - /* When profile instrumentation, use or test coverage shall be performed. */ - return (!in_lto_p + /* When profile instrumentation, use or test coverage shall be performed. + But for AutoFDO, this there is no instrumentation, thus this pass is + diabled. */ + return (!in_lto_p && !flag_auto_profile && (flag_branch_probabilities || flag_test_coverage || profile_arc_flag)); } diff --git a/gcc/tree-ssa-live.c b/gcc/tree-ssa-live.c index 23eaf262c64..87f5bf84c59 100644 --- a/gcc/tree-ssa-live.c +++ b/gcc/tree-ssa-live.c @@ -605,7 +605,7 @@ remove_unused_scope_block_p (tree scope) ; /* When not generating debug info we can eliminate info on unused variables. */ - else if (debug_info_level == DINFO_LEVEL_NONE) + else if (!flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE) { /* Even for -g0 don't prune outer scopes from artificial functions, otherwise diagnostics using tree_nonartificial_location diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 37710ca6da6..8ed6433ce7f 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -139,7 +139,7 @@ static bool gimple_ic_transform (gimple_stmt_iterator *); /* Allocate histogram value. */ -static histogram_value +histogram_value gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED, enum hist_type type, gimple stmt, tree value) { @@ -1342,7 +1342,7 @@ find_func_by_profile_id (int profile_id) may ICE. Here we only do very minimal sanity check just to make compiler happy. Returns true if TARGET is considered ok for call CALL_STMT. */ -static bool +bool check_ic_target (gimple call_stmt, struct cgraph_node *target) { location_t locus; diff --git a/gcc/value-prof.h b/gcc/value-prof.h index 00a89fab2a4..f4db3d4bb66 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -77,6 +77,8 @@ typedef vec histogram_values; extern void gimple_find_values_to_profile (histogram_values *); extern bool gimple_value_profile_transformations (void); +histogram_value gimple_alloc_histogram_value (struct function *, enum hist_type, + gimple stmt, tree); histogram_value gimple_histogram_value (struct function *, gimple); histogram_value gimple_histogram_value_of_type (struct function *, gimple, enum hist_type); @@ -91,6 +93,7 @@ void verify_histograms (void); void free_histograms (void); void stringop_block_profile (gimple, unsigned int *, HOST_WIDE_INT *); gimple gimple_ic (gimple, struct cgraph_node *, int, gcov_type, gcov_type); +bool check_ic_target (gimple, struct cgraph_node *); /* In tree-profile.c. */ -- 2.30.2