From 0fca07e31dc85bea3b1d669d7cc1d51d22192335 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 14 Dec 2018 20:37:38 +0100 Subject: [PATCH] re PR c++/82294 (Array of objects with constexpr constructors initialized from space-inefficient memory image) PR c++/82294 PR c++/87436 * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument. * expr.c (categorize_ctor_elements_1): Likewise. Compute it like p_nz_elts, except don't multiply it by mult. Adjust recursive call. Fix up COMPLEX_CST handling. (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize it and pass it through to categorize_ctor_elements_1. (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers. * gimplify.c (gimplify_init_constructor): Likewise. Don't force ctor into readonly data section if num_unique_nonzero_elements is smaller or equal to 1/8 of num_nonzero_elements and size is >= 64 bytes. * g++.dg/tree-ssa/pr82294.C: New test. * g++.dg/tree-ssa/pr87436.C: New test. From-SVN: r267143 --- gcc/ChangeLog | 16 +++++++ gcc/expr.c | 58 ++++++++++++++++++------- gcc/expr.h | 3 +- gcc/gimplify.c | 26 ++++++++++- gcc/testsuite/ChangeLog | 7 +++ gcc/testsuite/g++.dg/tree-ssa/pr82294.C | 13 ++++++ gcc/testsuite/g++.dg/tree-ssa/pr87436.C | 25 +++++++++++ 7 files changed, 131 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr82294.C create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr87436.C diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3cacc25f273..527164c4f9e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2018-12-14 Jakub Jelinek + + PR c++/82294 + PR c++/87436 + * expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument. + * expr.c (categorize_ctor_elements_1): Likewise. Compute it like + p_nz_elts, except don't multiply it by mult. Adjust recursive call. + Fix up COMPLEX_CST handling. + (categorize_ctor_elements): Add p_unique_nz_elts argument, initialize + it and pass it through to categorize_ctor_elements_1. + (mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers. + * gimplify.c (gimplify_init_constructor): Likewise. Don't force + ctor into readonly data section if num_unique_nonzero_elements is + smaller or equal to 1/8 of num_nonzero_elements and size is >= 64 + bytes. + 2018-12-14 Thomas Preud'homme * config/arm/arm.md (arm_movdi): Split if -mslow-flash-data and diff --git a/gcc/expr.c b/gcc/expr.c index 85b7847431b..b4a2133ebce 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bool for_ctor_p) static bool categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, + HOST_WIDE_INT *p_unique_nz_elts, HOST_WIDE_INT *p_init_elts, bool *p_complete) { unsigned HOST_WIDE_INT idx; - HOST_WIDE_INT nz_elts, init_elts, num_fields; + HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields; tree value, purpose, elt_type; /* Whether CTOR is a valid constant initializer, in accordance with what @@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor); nz_elts = 0; + unique_nz_elts = 0; init_elts = 0; num_fields = 0; elt_type = NULL_TREE; @@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, { case CONSTRUCTOR: { - HOST_WIDE_INT nz = 0, ic = 0; + HOST_WIDE_INT nz = 0, unz = 0, ic = 0; - bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic, - p_complete); + bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &unz, + &ic, p_complete); nz_elts += mult * nz; + unique_nz_elts += unz; init_elts += mult * ic; if (const_from_elts_p && const_p) @@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, case REAL_CST: case FIXED_CST: if (!initializer_zerop (value)) - nz_elts += mult; + { + nz_elts += mult; + unique_nz_elts++; + } init_elts += mult; break; case STRING_CST: nz_elts += mult * TREE_STRING_LENGTH (value); + unique_nz_elts += TREE_STRING_LENGTH (value); init_elts += mult * TREE_STRING_LENGTH (value); break; case COMPLEX_CST: if (!initializer_zerop (TREE_REALPART (value))) - nz_elts += mult; + { + nz_elts += mult; + unique_nz_elts++; + } if (!initializer_zerop (TREE_IMAGPART (value))) - nz_elts += mult; - init_elts += mult; + { + nz_elts += mult; + unique_nz_elts++; + } + init_elts += 2 * mult; break; case VECTOR_CST: @@ -6025,7 +6038,10 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, { tree v = VECTOR_CST_ELT (value, i); if (!initializer_zerop (v)) - nz_elts += mult; + { + nz_elts += mult; + unique_nz_elts++; + } init_elts += mult; } } @@ -6035,6 +6051,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, { HOST_WIDE_INT tc = count_type_elements (elt_type, false); nz_elts += mult * tc; + unique_nz_elts += tc; init_elts += mult * tc; if (const_from_elts_p && const_p) @@ -6054,6 +6071,7 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, *p_complete = false; *p_nz_elts += nz_elts; + *p_unique_nz_elts += unique_nz_elts; *p_init_elts += init_elts; return const_p; @@ -6062,6 +6080,11 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, /* Examine CTOR to discover: * how many scalar fields are set to nonzero values, and place it in *P_NZ_ELTS; + * the same, but counting RANGE_EXPRs as multiplier of 1 instead of + high - low + 1 (this can be useful for callers to determine ctors + that could be cheaply initialized with - perhaps nested - loops + compared to copied from huge read-only data), + and place it in *P_UNIQUE_NZ_ELTS; * how many scalar fields in total are in CTOR, and place it in *P_ELT_COUNT. * whether the constructor is complete -- in the sense that every @@ -6073,13 +6096,16 @@ categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts, bool categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts, + HOST_WIDE_INT *p_unique_nz_elts, HOST_WIDE_INT *p_init_elts, bool *p_complete) { *p_nz_elts = 0; + *p_unique_nz_elts = 0; *p_init_elts = 0; *p_complete = true; - return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete); + return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts, + p_init_elts, p_complete); } /* TYPE is initialized by a constructor with NUM_ELTS elements, the last @@ -6110,17 +6136,18 @@ complete_ctor_at_level_p (const_tree type, HOST_WIDE_INT num_elts, return count_type_elements (type, true) == num_elts; } -/* Return 1 if EXP contains mostly (3/4) zeros. */ +/* Return 1 if EXP contains mostly (3/4) zeros. */ static int mostly_zeros_p (const_tree exp) { if (TREE_CODE (exp) == CONSTRUCTOR) { - HOST_WIDE_INT nz_elts, init_elts; + HOST_WIDE_INT nz_elts, unz_elts, init_elts; bool complete_p; - categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); + categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, + &complete_p); return !complete_p || nz_elts < init_elts / 4; } @@ -6134,10 +6161,11 @@ all_zeros_p (const_tree exp) { if (TREE_CODE (exp) == CONSTRUCTOR) { - HOST_WIDE_INT nz_elts, init_elts; + HOST_WIDE_INT nz_elts, unz_elts, init_elts; bool complete_p; - categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p); + categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts, + &complete_p); return nz_elts == 0; } diff --git a/gcc/expr.h b/gcc/expr.h index 6ae343d81f0..4469652e443 100644 --- a/gcc/expr.h +++ b/gcc/expr.h @@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int); extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree); extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *, - HOST_WIDE_INT *, bool *); + HOST_WIDE_INT *, HOST_WIDE_INT *, + bool *); extern void expand_operands (tree, tree, rtx, rtx*, rtx*, enum expand_modifier); diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 9022ef8b2ee..465d138abbe 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -4778,7 +4778,15 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, { struct gimplify_init_ctor_preeval_data preeval_data; HOST_WIDE_INT num_ctor_elements, num_nonzero_elements; + HOST_WIDE_INT num_unique_nonzero_elements; bool cleared, complete_p, valid_const_initializer; + /* Use readonly data for initializers of this or smaller size + regardless of the num_nonzero_elements / num_unique_nonzero_elements + ratio. */ + const HOST_WIDE_INT min_unique_size = 64; + /* If num_nonzero_elements / num_unique_nonzero_elements ratio + is smaller than this, use readonly data. */ + const int unique_nonzero_ratio = 8; /* Aggregate types must lower constructors to initialization of individual elements. The exception is that a CONSTRUCTOR node @@ -4795,6 +4803,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, can only do so if it known to be a valid constant initializer. */ valid_const_initializer = categorize_ctor_elements (ctor, &num_nonzero_elements, + &num_unique_nonzero_elements, &num_ctor_elements, &complete_p); /* If a const aggregate variable is being initialized, then it @@ -4803,7 +4812,15 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, && num_nonzero_elements > 1 && TREE_READONLY (object) && VAR_P (object) - && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object))) + && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object)) + /* For ctors that have many repeated nonzero elements + represented through RANGE_EXPRs, prefer initializing + those through runtime loops over copies of large amounts + of data from readonly data section. */ + && (num_unique_nonzero_elements + > num_nonzero_elements / unique_nonzero_ratio + || ((unsigned HOST_WIDE_INT) int_size_in_bytes (type) + <= (unsigned HOST_WIDE_INT) min_unique_size))) { if (notify_temp_creation) return GS_ERROR; @@ -4896,6 +4913,13 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, is so large as to make individual moves inefficient. */ if (size > 0 && num_nonzero_elements > 1 + /* For ctors that have many repeated nonzero elements + represented through RANGE_EXPRs, prefer initializing + those through runtime loops over copies of large amounts + of data from readonly data section. */ + && (num_unique_nonzero_elements + > num_nonzero_elements / unique_nonzero_ratio + || size <= min_unique_size) && (size < num_nonzero_elements || !can_move_by_pieces (size, align))) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3717a5c7ed2..1374be13f0e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-12-14 Jakub Jelinek + + PR c++/82294 + PR c++/87436 + * g++.dg/tree-ssa/pr82294.C: New test. + * g++.dg/tree-ssa/pr87436.C: New test. + 2018-11-14 Thomas Preud'homme * gcc.target/arm/thumb2-slow-flash-data-2.c: Require arm_fp_ok diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr82294.C b/gcc/testsuite/g++.dg/tree-ssa/pr82294.C new file mode 100644 index 00000000000..25aaba7d016 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr82294.C @@ -0,0 +1,13 @@ +// PR c++/82294 +// { dg-do compile { target c++11 } } +// { dg-options "-O2 -fdump-tree-gimple" } + +// Verify we don't "optimize" the ctor as copying a 1KB .rodata +// object into the variable. It is better to initialize it through +// a loop. +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } } + +struct S { int x; explicit constexpr S (); }; +constexpr S::S () : x{7} {} +struct T { S arr[256]; explicit T (); }; +T::T () {} diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr87436.C b/gcc/testsuite/g++.dg/tree-ssa/pr87436.C new file mode 100644 index 00000000000..6e35bf8afff --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr87436.C @@ -0,0 +1,25 @@ +// PR c++/87436 +// { dg-do compile { target { c++11 && size32plus } } } +// { dg-options "-O2 -fdump-tree-gimple" } + +// Verify we don't "optimize" the ctor as copying a 384MB .rodata +// object into the variable. It is better to initialize it through +// two nested loops. +// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } } + +struct S { + int a = -1; + short b = 3; + int x = 0; + int y = 1; + int z = 42; + float f = 0.123f; +}; + +struct T { S arr[4096][4096]; }; + +T * +foo () +{ + return new T; +} -- 2.30.2