+2018-01-08 Vidya Praveen <vidyapraveen@arm.com>
+
+ PR target/83663 - Revert r255946
+ * config/aarch64/aarch64.c (aarch64_expand_vector_init): Modify code
+ generation for cases where splatting a value is not useful.
+ * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge
+ across a vec_duplicate and a paradoxical subreg forming a vector
+ mode to a vec_concat.
+
2018-01-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/t-aprofile (MULTILIB_MATCHES): Add mapping rules for
maxv = matches[i][1];
}
- /* Create a duplicate of the most common element, unless all elements
- are equally useless to us, in which case just immediately set the
- vector register using the first element. */
-
- if (maxv == 1)
- {
- /* For vectors of two 64-bit elements, we can do even better. */
- if (n_elts == 2
- && (inner_mode == E_DImode
- || inner_mode == E_DFmode))
-
- {
- rtx x0 = XVECEXP (vals, 0, 0);
- rtx x1 = XVECEXP (vals, 0, 1);
- /* Combine can pick up this case, but handling it directly
- here leaves clearer RTL.
-
- This is load_pair_lanes<mode>, and also gives us a clean-up
- for store_pair_lanes<mode>. */
- if (memory_operand (x0, inner_mode)
- && memory_operand (x1, inner_mode)
- && !STRICT_ALIGNMENT
- && rtx_equal_p (XEXP (x1, 0),
- plus_constant (Pmode,
- XEXP (x0, 0),
- GET_MODE_SIZE (inner_mode))))
- {
- rtx t;
- if (inner_mode == DFmode)
- t = gen_load_pair_lanesdf (target, x0, x1);
- else
- t = gen_load_pair_lanesdi (target, x0, x1);
- emit_insn (t);
- return;
- }
- }
- rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
- aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
- maxelement = 0;
- }
- else
- {
- rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
- aarch64_emit_move (target, gen_vec_duplicate (mode, x));
- }
+ /* Create a duplicate of the most common element. */
+ rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
+ aarch64_emit_move (target, gen_vec_duplicate (mode, x));
/* Insert the rest. */
for (int i = 0; i < n_elts; i++)
return simplify_gen_binary (VEC_CONCAT, mode, newop0, newop1);
}
- /* Replace:
-
- (vec_merge:outer (vec_duplicate:outer x:inner)
- (subreg:outer y:inner 0)
- (const_int N))
-
- with (vec_concat:outer x:inner y:inner) if N == 1,
- or (vec_concat:outer y:inner x:inner) if N == 2.
- We assume that degenrate cases (N == 0 or N == 3), which
- represent taking all elements from either input, are handled
- elsewhere.
-
- Implicitly, this means we have a paradoxical subreg, but such
- a check is cheap, so make it anyway.
-
- Only applies for vectors of two elements. */
-
- if ((GET_CODE (op0) == VEC_DUPLICATE
- || GET_CODE (op1) == VEC_DUPLICATE)
- && GET_MODE (op0) == GET_MODE (op1)
- && known_eq (GET_MODE_NUNITS (GET_MODE (op0)), 2)
- && known_eq (GET_MODE_NUNITS (GET_MODE (op1)), 2)
- && IN_RANGE (sel, 1, 2))
- {
- rtx newop0 = op0, newop1 = op1;
-
- /* Canonicalize locally such that the VEC_DUPLICATE is always
- the first operand. */
- if (GET_CODE (newop1) == VEC_DUPLICATE)
- {
- std::swap (newop0, newop1);
- /* If we swap the operand order, we also need to swap
- the selector mask. */
- sel = sel == 1 ? 2 : 1;
- }
-
- if (GET_CODE (newop1) == SUBREG
- && paradoxical_subreg_p (newop1)
- && subreg_lowpart_p (newop1)
- && GET_MODE (SUBREG_REG (newop1))
- == GET_MODE (XEXP (newop0, 0)))
- {
- newop0 = XEXP (newop0, 0);
- newop1 = SUBREG_REG (newop1);
- if (sel == 2)
- std::swap (newop0, newop1);
- return simplify_gen_binary (VEC_CONCAT, mode,
- newop0, newop1);
- }
- }
-
/* Replace (vec_merge (vec_duplicate x) (vec_duplicate y)
(const_int n))
with (vec_concat x y) or (vec_concat y x) depending on value
+2018-01-08 Vidya Praveen <vidyapraveen@arm.com>
+
+ PR target/83663 - Revert r255946
+ * gcc.target/aarch64/vect-slp-dup.c: New.
+
2018-01-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/arm/multilib.exp: Add fp16, dotprod and armv8.3-a
+++ /dev/null
-/* { dg-do compile } */
-
-/* { dg-options "-O3 -ftree-vectorize -fno-vect-cost-model" } */
-
-void bar (double);
-
-void
-foo (double *restrict in, double *restrict in2,
- double *restrict out1, double *restrict out2)
-{
- for (int i = 0; i < 1024; i++)
- {
- out1[i] = in[i] + 2.0 * in[i+128];
- out1[i+1] = in[i+1] + 2.0 * in2[i];
- bar (in[i]);
- }
-}
-
-/* { dg-final { scan-assembler-not "dup\tv\[0-9\]+.2d, v\[0-9\]+" } } */
-