+2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/aarch64.c (aarch64_add_offset): In the fallback
+ multiplication case, try to compute VG * (lowest set bit) directly
+ rather than always basing the multiplication on VG. Use
+ expand_mult for the multiplication if we can.
+
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-protos.h
#include "selftest-rtl.h"
#include "rtx-vector-builder.h"
#include "intl.h"
+#include "expmed.h"
/* This file should be included last. */
#include "target-def.h"
}
else
{
- /* Use CNTD, then multiply it by FACTOR. */
- val = gen_int_mode (poly_int64 (2, 2), mode);
+ /* Base the factor on LOW_BIT if we can calculate LOW_BIT
+ directly, since that should increase the chances of being
+ able to use a shift and add sequence. If LOW_BIT itself
+ is out of range, just use CNTD. */
+ if (low_bit <= 16 * 8)
+ factor /= low_bit;
+ else
+ low_bit = 1;
+
+ val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
val = aarch64_force_temporary (mode, temp1, val);
- /* Go back to using a negative multiplication factor if we have
- no register from which to subtract. */
- if (code == MINUS && src == const0_rtx)
+ if (can_create_pseudo_p ())
{
- factor = -factor;
- code = PLUS;
+ rtx coeff1 = gen_int_mode (factor, mode);
+ val = expand_mult (mode, val, coeff1, NULL_RTX, false, true);
+ }
+ else
+ {
+ /* Go back to using a negative multiplication factor if we have
+ no register from which to subtract. */
+ if (code == MINUS && src == const0_rtx)
+ {
+ factor = -factor;
+ code = PLUS;
+ }
+ rtx coeff1 = gen_int_mode (factor, mode);
+ coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
+ val = gen_rtx_MULT (mode, val, coeff1);
}
- rtx coeff1 = gen_int_mode (factor, mode);
- coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
- val = gen_rtx_MULT (mode, val, coeff1);
}
if (shift > 0)
+2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/sve/loop_add_4.c: Expect 10 INCWs and
+ INCDs rather than 8.
+
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets.
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */
+/* 2 for the calculations of -17 and 17. */
+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */
+/* 2 for the calculations of -17 and 17. */
+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */