"TARGET_SIMD && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[2], src[2];
-
- dest[0] = gen_rtx_REG (DImode, rdest);
- src[0] = gen_rtx_REG (DImode, rsrc);
- dest[1] = gen_rtx_REG (DImode, rdest + 1);
- src[1] = gen_rtx_REG (DImode, rsrc + 1);
-
- aarch64_simd_disambiguate_copy (operands, dest, src, 2);
+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
+ DONE;
})
(define_split
;; Reload patterns for AdvSIMD register list operands.
(define_expand "mov<mode>"
- [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
- (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
+ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
+ (match_operand:VSTRUCT 1 "general_operand" ""))]
"TARGET_SIMD"
{
if (can_create_pseudo_p ())
(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
- (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
- "TARGET_SIMD
+ (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
-
-{
- switch (which_alternative)
- {
- case 0: return "#";
- case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
- case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
- default: gcc_unreachable ();
- }
-}
- [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
- neon_load<nregs>_<nregs>reg_q")
+ "@
+ #
+ st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
+ ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
+ [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
+ neon_load<nregs>_<nregs>reg_q")
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
)
[(set_attr "type" "neon_store1_1reg<q>")]
)
+(define_insn "*aarch64_be_movoi"
+ [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
+ (match_operand:OI 1 "general_operand" " w,w,m"))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN
+ && (register_operand (operands[0], OImode)
+ || register_operand (operands[1], OImode))"
+ "@
+ #
+ stp\\t%q1, %R1, %0
+ ldp\\t%q0, %R0, %1"
+ [(set_attr "type" "multiple,neon_store2_2reg_q,neon_load2_2reg_q")
+ (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
+)
+
+(define_insn "*aarch64_be_movci"
+ [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
+ (match_operand:CI 1 "general_operand" " w,w,o"))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN
+ && (register_operand (operands[0], CImode)
+ || register_operand (operands[1], CImode))"
+ "#"
+ [(set_attr "type" "multiple")
+ (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
+)
+
+(define_insn "*aarch64_be_movxi"
+ [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
+ (match_operand:XI 1 "general_operand" " w,w,o"))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN
+ && (register_operand (operands[0], XImode)
+ || register_operand (operands[1], XImode))"
+ "#"
+ [(set_attr "type" "multiple")
+ (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
+)
+
(define_split
- [(set (match_operand:OI 0 "register_operand" "")
- (match_operand:OI 1 "register_operand" ""))]
+ [(set (match_operand:OI 0 "register_operand")
+ (match_operand:OI 1 "register_operand"))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[2], src[2];
-
- dest[0] = gen_rtx_REG (TFmode, rdest);
- src[0] = gen_rtx_REG (TFmode, rsrc);
- dest[1] = gen_rtx_REG (TFmode, rdest + 1);
- src[1] = gen_rtx_REG (TFmode, rsrc + 1);
-
- aarch64_simd_disambiguate_copy (operands, dest, src, 2);
+ aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
+ DONE;
})
(define_split
- [(set (match_operand:CI 0 "register_operand" "")
- (match_operand:CI 1 "register_operand" ""))]
+ [(set (match_operand:CI 0 "nonimmediate_operand")
+ (match_operand:CI 1 "general_operand"))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (match_dup 5))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[3], src[3];
-
- dest[0] = gen_rtx_REG (TFmode, rdest);
- src[0] = gen_rtx_REG (TFmode, rsrc);
- dest[1] = gen_rtx_REG (TFmode, rdest + 1);
- src[1] = gen_rtx_REG (TFmode, rsrc + 1);
- dest[2] = gen_rtx_REG (TFmode, rdest + 2);
- src[2] = gen_rtx_REG (TFmode, rsrc + 2);
-
- aarch64_simd_disambiguate_copy (operands, dest, src, 3);
+ if (register_operand (operands[0], CImode)
+ && register_operand (operands[1], CImode))
+ {
+ aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
+ DONE;
+ }
+ else if (BYTES_BIG_ENDIAN)
+ {
+ emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
+ simplify_gen_subreg (OImode, operands[1], CImode, 0));
+ emit_move_insn (gen_lowpart (V16QImode,
+ simplify_gen_subreg (TImode, operands[0],
+ CImode, 32)),
+ gen_lowpart (V16QImode,
+ simplify_gen_subreg (TImode, operands[1],
+ CImode, 32)));
+ DONE;
+ }
+ else
+ FAIL;
})
(define_split
- [(set (match_operand:XI 0 "register_operand" "")
- (match_operand:XI 1 "register_operand" ""))]
+ [(set (match_operand:XI 0 "nonimmediate_operand")
+ (match_operand:XI 1 "general_operand"))]
"TARGET_SIMD && reload_completed"
- [(set (match_dup 0) (match_dup 1))
- (set (match_dup 2) (match_dup 3))
- (set (match_dup 4) (match_dup 5))
- (set (match_dup 6) (match_dup 7))]
+ [(const_int 0)]
{
- int rdest = REGNO (operands[0]);
- int rsrc = REGNO (operands[1]);
- rtx dest[4], src[4];
-
- dest[0] = gen_rtx_REG (TFmode, rdest);
- src[0] = gen_rtx_REG (TFmode, rsrc);
- dest[1] = gen_rtx_REG (TFmode, rdest + 1);
- src[1] = gen_rtx_REG (TFmode, rsrc + 1);
- dest[2] = gen_rtx_REG (TFmode, rdest + 2);
- src[2] = gen_rtx_REG (TFmode, rsrc + 2);
- dest[3] = gen_rtx_REG (TFmode, rdest + 3);
- src[3] = gen_rtx_REG (TFmode, rsrc + 3);
-
- aarch64_simd_disambiguate_copy (operands, dest, src, 4);
+ if (register_operand (operands[0], XImode)
+ && register_operand (operands[1], XImode))
+ {
+ aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
+ DONE;
+ }
+ else if (BYTES_BIG_ENDIAN)
+ {
+ emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
+ simplify_gen_subreg (OImode, operands[1], XImode, 0));
+ emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
+ simplify_gen_subreg (OImode, operands[1], XImode, 32));
+ DONE;
+ }
+ else
+ FAIL;
})
(define_expand "aarch64_ld2r<mode>"
{
enum rtx_code code = GET_CODE (x);
rtx op0, op1;
+
+ /* On BE, we use load/store pair for all large int mode load/stores. */
+ bool load_store_pair_p = (outer_code == PARALLEL
+ || (BYTES_BIG_ENDIAN
+ && aarch64_vect_struct_mode_p (mode)));
+
bool allow_reg_index_p =
- outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
- || aarch64_vector_mode_supported_p (mode));
- /* Don't support anything other than POST_INC or REG addressing for
- AdvSIMD. */
- if (aarch64_vect_struct_mode_p (mode)
+ !load_store_pair_p
+ && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
+ && !aarch64_vect_struct_mode_p (mode);
+
+ /* On LE, for AdvSIMD, don't support anything other than POST_INC or
+ REG addressing. */
+ if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
&& (code != POST_INC && code != REG))
return false;
return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
&& offset_9bit_signed_unscaled_p (mode, offset));
- if (outer_code == PARALLEL)
+ /* A 7bit offset check because OImode will emit a ldp/stp
+ instruction (only big endian will get here).
+ For ldp/stp instructions, the offset is scaled for the size of a
+ single element of the pair. */
+ if (mode == OImode)
+ return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
+
+ /* Three 9/12 bit offsets checks because CImode will emit three
+ ldr/str instructions (only big endian will get here). */
+ if (mode == CImode)
+ return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
+ && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
+ || offset_12bit_unsigned_scaled_p (V16QImode,
+ offset + 32)));
+
+ /* Two 7bit offsets checks because XImode will emit two ldp/stp
+ instructions (only big endian will get here). */
+ if (mode == XImode)
+ return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
+ && aarch64_offset_7bit_signed_scaled_p (TImode,
+ offset + 32));
+
+ if (load_store_pair_p)
return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& aarch64_offset_7bit_signed_scaled_p (mode, offset));
else
return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
&& offset_9bit_signed_unscaled_p (mode, offset));
- if (outer_code == PARALLEL)
+ if (load_store_pair_p)
return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& aarch64_offset_7bit_signed_scaled_p (mode, offset));
else
/* load literal: pc-relative constant pool entry. Only supported
for SI mode or larger. */
info->type = ADDRESS_SYMBOLIC;
- if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
+
+ if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
{
rtx sym, addend;
asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
break;
+ case 'R':
+ /* Print a scalar FP/SIMD register name + 1. */
+ if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
+ {
+ output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
+ return;
+ }
+ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
+ break;
+
case 'X':
/* Print bottom 16 bits of integer constant in hex. */
if (!CONST_INT_P (x))
|| REG_P (XEXP (op, 0)));
}
-/* Set up OPERANDS for a register copy from SRC to DEST, taking care
- not to early-clobber SRC registers in the process.
+/* Emit a register copy from operand to operand, taking care not to
+ early-clobber source registers in the process.
- We assume that the operands described by SRC and DEST represent a
- decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
- number of components into which the copy has been decomposed. */
+ COUNT is the number of components into which the copy needs to be
+ decomposed. */
void
-aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
- rtx *src, unsigned int count)
+aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
+ unsigned int count)
{
unsigned int i;
+ int rdest = REGNO (operands[0]);
+ int rsrc = REGNO (operands[1]);
if (!reg_overlap_mentioned_p (operands[0], operands[1])
- || REGNO (operands[0]) < REGNO (operands[1]))
- {
- for (i = 0; i < count; i++)
- {
- operands[2 * i] = dest[i];
- operands[2 * i + 1] = src[i];
- }
- }
+ || rdest < rsrc)
+ for (i = 0; i < count; i++)
+ emit_move_insn (gen_rtx_REG (mode, rdest + i),
+ gen_rtx_REG (mode, rsrc + i));
else
- {
- for (i = 0; i < count; i++)
- {
- operands[2 * i] = dest[count - i - 1];
- operands[2 * i + 1] = src[count - i - 1];
- }
- }
+ for (i = 0; i < count; i++)
+ emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
+ gen_rtx_REG (mode, rsrc + count - i - 1));
}
/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is