the accumulator. If REUSE_EXPANSION is 0 reuse
the original accumulator. Else use
var_expansions[REUSE_EXPANSION - 1]. */
+ unsigned accum_pos; /* The position in which the accumulator is placed in
+ the insn src. For example in x = x + something
+ accum_pos is 0 while in x = something + x accum_pos
+ is 1. */
};
/* Information about optimization applied in
static struct var_to_expand *
analyze_insn_to_expand_var (struct loop *loop, rtx insn)
{
- rtx set, dest, src, op1;
+ rtx set, dest, src, op1, op2, something;
struct var_to_expand *ves;
enum machine_mode mode1, mode2;
-
+ unsigned accum_pos;
+
set = single_set (insn);
if (!set)
return NULL;
if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
return NULL;
- if (!XEXP (src, 0))
- return NULL;
-
op1 = XEXP (src, 0);
+ op2 = XEXP (src, 1);
if (!REG_P (dest)
&& !(GET_CODE (dest) == SUBREG
&& REG_P (SUBREG_REG (dest))))
return NULL;
- if (!rtx_equal_p (dest, op1))
- return NULL;
-
+ if (rtx_equal_p (dest, op1))
+ accum_pos = 0;
+ else if (rtx_equal_p (dest, op2))
+ accum_pos = 1;
+ else
+ return NULL;
+
+ /* The method of expansion that we are using; which includes
+ the initialization of the expansions with zero and the summation of
+ the expansions at the end of the computation will yield wrong results
+ for (x = something - x) thus avoid using it in that case. */
+ if (accum_pos == 1
+ && GET_CODE (src) == MINUS)
+ return NULL;
+
+ something = (accum_pos == 0)? op2 : op1;
+
if (!referenced_in_one_insn_in_loop_p (loop, dest))
return NULL;
- if (rtx_referenced_p (dest, XEXP (src, 1)))
+ if (rtx_referenced_p (dest, something))
return NULL;
mode1 = GET_MODE (dest);
- mode2 = GET_MODE (XEXP (src, 1));
+ mode2 = GET_MODE (something);
if ((FLOAT_MODE_P (mode1)
|| FLOAT_MODE_P (mode2))
&& !flag_unsafe_math_optimizations)
ves->op = GET_CODE (src);
ves->expansion_count = 0;
ves->reuse_expansion = 0;
+ ves->accum_pos = accum_pos;
return ves;
}
new_reg = get_expansion (ve);
validate_change (insn, &SET_DEST (set), new_reg, 1);
- validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
+ validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1);
if (apply_change_group ())
if (really_new_expansion)
--- /dev/null
+/* { dg-do run { target { powerpc*-*-* && powerpc_altivec_ok } } }} */
+/* { dg-options "-O2 -funroll-loops -ffast-math -fvariable-expansion-in-unroller -maltivec -dL" } */
+
+#include "altivec.h"
+extern void abort (void);
+extern void exit (int);
+
+#define N 256
+
+float in1[N] __attribute__ ((__aligned__ (16))) = {0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57};
+float in2[N] __attribute__ ((__aligned__ (16))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
+
+float
+foo (int n)
+{
+ unsigned i;
+ vector float vp1, vp2, vp3, vaccum, vzero, vtmp;
+ float accum = 1.0;
+
+ vzero = (vector float){0, 0, 0, 0};
+
+ vaccum = vzero;
+
+ for (i = 0; i < n; i++)
+ {
+ vp1 = vec_ld (i * 16, in1);
+ vp2 = vec_ld (i * 16, in2);
+
+ vaccum = vec_madd (vp1, vp2, vaccum);
+
+ }
+ vtmp = vec_sld (vaccum, vaccum, 8);
+ vp1 = vec_add (vaccum, vtmp);
+ vtmp = vec_sld (vp1, vp1, 4);
+ vp2 = vec_add (vp1, vtmp);
+
+ vec_ste (vp2, 0, &accum);
+ if (accum != 1518)
+ return 0;
+
+ return accum;
+}
+
+int
+main (void)
+{
+ if (!foo (3))
+ abort ();
+
+ exit (0);
+}
+
+/* { dg-final { scan-rtl-dump "Expanding Accumulator" "loop2_unroll" } } */
+/* { dg-final { cleanup-rtl-dump "loop*" } } */
+
+
+
+