From: Richard Biener Date: Thu, 15 Oct 2020 07:10:40 +0000 (+0200) Subject: tree-optimization/97482 - fix split_constant_offset of nop-conversions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=28290cb50c7dbf87458befeb3e295b5cb13560b5;p=gcc.git tree-optimization/97482 - fix split_constant_offset of nop-conversions split_constant_offset is confused about a nop-conversion from unsigned long to sizetype and tries to prove non-overflowing of the inner operation. Obviously the conversion could have been elided so make sure split_constant_offset handles this properly. It also makes sure that convert_to_ptrofftype does not introduce conversions not necessary which in this case is the source for the unnecessary conversion. 2020-10-15 Richard Biener PR tree-optimization/97482 * tree-data-ref.c (split_constant_offset_1): Handle trivial conversions better. * fold-const.c (convert_to_ptrofftype_loc): Elide conversion if the offset is already ptrofftype_p. * gcc.dg/vect/pr97428.c: New testcase. --- diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 0cc80adf632..ebd32bb2e04 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -15489,6 +15489,8 @@ ptr_difference_const (tree e1, tree e2, poly_int64_pod *diff) tree convert_to_ptrofftype_loc (location_t loc, tree off) { + if (ptrofftype_p (TREE_TYPE (off))) + return off; return fold_convert_loc (loc, sizetype, off); } diff --git a/gcc/testsuite/gcc.dg/vect/pr97428.c b/gcc/testsuite/gcc.dg/vect/pr97428.c new file mode 100644 index 00000000000..b5b02dca9de --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97428.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ + +typedef struct { double re, im; } dcmlx_t; +typedef struct { double re[4], im[4]; } dcmlx4_t; + +void foo_i2(dcmlx4_t dst[], const dcmlx_t src[], int n) +{ + for (int i = 0; i < n; ++i) { + dcmlx_t s00 = src[i*4+0]; + dcmlx_t s01 = src[i*4+1]; + dcmlx_t s02 = src[i*4+2]; + dcmlx_t s03 = src[i*4+3]; + + dcmlx_t s10 = src[i*4+0+n]; + dcmlx_t s11 = src[i*4+1+n]; + dcmlx_t s12 = src[i*4+2+n]; + dcmlx_t s13 = src[i*4+3+n]; + + dst[i*2+0].re[0] = s00.re; + dst[i*2+0].re[1] = s01.re; + dst[i*2+0].re[2] = s02.re; + dst[i*2+0].re[3] = s03.re; + dst[i*2+0].im[0] = s00.im; + dst[i*2+0].im[1] = s01.im; + dst[i*2+0].im[2] = s02.im; + dst[i*2+0].im[3] = s03.im; + + dst[i*2+1].re[0] = s10.re; + dst[i*2+1].re[1] = s11.re; + dst[i*2+1].re[2] = s12.re; + dst[i*2+1].re[3] = s13.re; + dst[i*2+1].im[0] = s10.im; + dst[i*2+1].im[1] = s11.im; + dst[i*2+1].im[2] = s12.im; + dst[i*2+1].im[3] = s13.im; + } +} + +/* The first step to produce optimal code is to appropriately detect the + load and store groups. */ +/* { dg-final { scan-tree-dump "Detected interleaving load of size 8" "vect" } } */ +/* { dg-final { scan-tree-dump "Detected interleaving store of size 16" "vect" } } */ +/* { dg-final { scan-tree-dump-not "gap of 6 elements" "vect" } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 5505ba46778..3bf460cccfd 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -754,7 +754,9 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) { - if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype)) + if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype) + && (TYPE_PRECISION (type) > TYPE_PRECISION (itype) + || TYPE_UNSIGNED (itype) != TYPE_UNSIGNED (type))) { /* Split the unconverted operand and try to prove that wrapping isn't a problem. */