From: Andrew Pinski Date: Mon, 20 Nov 2006 20:29:10 +0000 (+0000) Subject: re PR target/25500 (SSE2 vectorized code is slower on 4.x.x than previous) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=903ff2758bdad932eee6ae84539361c18e6c45e9;p=gcc.git re PR target/25500 (SSE2 vectorized code is slower on 4.x.x than previous) 2006-11-20 Andrew Pinski PR tree-opt/25500 * tree-sra.c (single_scalar_field_in_record_p): New function. (decide_block_copy): Use it. 2006-11-20 Andrew Pinski PR tree-opt/25500 * gcc.dg/tree-ssa/sra-4.c: New testcase. From-SVN: r119026 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b9c85589226..2a6a7e276cc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2006-11-20 Andrew Pinski + + PR tree-opt/25500 + * tree-sra.c (single_scalar_field_in_record_p): New function. + (decide_block_copy): Use it. + 2006-11-20 David Daney * config/mips/linux-unwind.h (mips_fallback_frame_state): Adjust diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 046b230c798..57ce15b1a88 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2006-11-20 Andrew Pinski + + PR tree-opt/25500 + * gcc.dg/tree-ssa/sra-4.c: New testcase. + 2006-11-20 Tobias Burnus * gfortran.dg/volatile3.f90: Add conflict test. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c new file mode 100644 index 00000000000..6fdf37ffb34 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-4.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized -w" } */ +/* Check that SRA does non block copies for structs that just contain vectors. */ + +#define vector __attribute__((vector_size(16))) + +struct vt +{ + vector int t; +}; + + +vector int f(vector int t1, vector int t2) +{ + struct vt st1, st2, st3; + st1.t = t1; + st2 = st1; + st2.t += t2; + st3 = st2; + return st3.t; +} + +/* There should be no references to st as SRA should not have done block copy. */ +/* { dg-final { scan-tree-dump-times "st" 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ + diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 80c4ca744e0..a5ed161ed3e 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -1351,6 +1351,32 @@ instantiate_missing_elements (struct sra_elt *elt) } } +/* Return true if there is only one non aggregate field in the record, TYPE. + Return false otherwise. */ + +static bool +single_scalar_field_in_record_p (tree type) +{ + int num_fields = 0; + tree field; + if (TREE_CODE (type) != RECORD_TYPE) + return false; + + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + if (TREE_CODE (field) == FIELD_DECL) + { + num_fields++; + + if (num_fields == 2) + return false; + + if (AGGREGATE_TYPE_P (TREE_TYPE (field))) + return false; + } + + return true; +} + /* Make one pass across an element tree deciding whether to perform block or element copies. If we decide on element copies, instantiate all elements. Return true if there are any instantiated sub-elements. */ @@ -1430,6 +1456,10 @@ decide_block_copy (struct sra_elt *elt) full_count = count_type_elements (elt->type, false); inst_count = sum_instantiated_sizes (elt, &inst_size); + /* If there is only one scalar field in the record, don't block copy. */ + if (single_scalar_field_in_record_p (elt->type)) + use_block_copy = false; + /* ??? What to do here. If there are two fields, and we've only instantiated one, then instantiating the other is clearly a win. If there are a large number of fields then the size of the copy