From 2d8c6dc1d9cc67b535e70c6ace871d1a8f15805a Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 21 Jan 2015 17:53:31 +0000 Subject: [PATCH] aarch64-protos.h (aarch64_simd_disambiguate_copy): Declare. gcc/ * config/aarch64/aarch64-protos.h (aarch64_simd_disambiguate_copy): Declare. * config/aarch64/aarch64.c (aarch64_classify_address): Allow extra addressing modes for BE. (aarch64_print_operand): Add 'R' specifier. (aarch64_simd_disambiguate_copy): Delete. (aarch64_simd_emit_reg_reg_move): New function. * config/aarch64/aarch64-simd.md: Use aarch64_simd_emit_reg_reg_move in define_splits for structural moves. (mov): Use less restrictive predicates. (*aarch64_mov): Simplify and only allow for LE. (*aarch64_be_movoi, *aarch64_be_movci, *aarch64_be_movxi): New. From-SVN: r219958 --- gcc/ChangeLog | 15 +++ gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64-simd.md | 175 +++++++++++++++------------- gcc/config/aarch64/aarch64.c | 94 ++++++++++----- 4 files changed, 176 insertions(+), 110 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 40422a38d54..988097b3523 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-01-21 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_simd_disambiguate_copy): + Declare. + * config/aarch64/aarch64.c (aarch64_classify_address): Allow extra + addressing modes for BE. + (aarch64_print_operand): Add 'R' specifier. + (aarch64_simd_disambiguate_copy): Delete. + (aarch64_simd_emit_reg_reg_move): New function. + * config/aarch64/aarch64-simd.md: Use aarch64_simd_emit_reg_reg_move + in define_splits for structural moves. + (mov): Use less restrictive predicates. + (*aarch64_mov): Simplify and only allow for LE. + (*aarch64_be_movoi, *aarch64_be_movci, *aarch64_be_movxi): New. + 2015-01-21 Alan Hayward * rtlanal.c (subreg_get_info): Exit early for simple and common diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 8f286b3ab76..165aa6bac50 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -263,7 +263,7 @@ void aarch64_emit_call_insn (rtx); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); -void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); +void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int); /* Emit code to place a AdvSIMD pair result in memory locations (with equal registers). */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d239884e70c..870054dee4b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -158,19 +158,10 @@ "TARGET_SIMD && reload_completed && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[2], src[2]; - - dest[0] = gen_rtx_REG (DImode, rdest); - src[0] = gen_rtx_REG (DImode, rsrc); - dest[1] = gen_rtx_REG (DImode, rdest + 1); - src[1] = gen_rtx_REG (DImode, rsrc + 1); - - aarch64_simd_disambiguate_copy (operands, dest, src, 2); + aarch64_simd_emit_reg_reg_move (operands, DImode, 2); + DONE; }) (define_split @@ -4051,8 +4042,8 @@ ;; Reload patterns for AdvSIMD register list operands. (define_expand "mov" - [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "") - (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))] + [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") + (match_operand:VSTRUCT 1 "general_operand" ""))] "TARGET_SIMD" { if (can_create_pseudo_p ()) @@ -4064,22 +4055,16 @@ (define_insn "*aarch64_mov" [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") - (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] - "TARGET_SIMD + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" - -{ - switch (which_alternative) - { - case 0: return "#"; - case 1: return "st1\\t{%S1.16b - %1.16b}, %0"; - case 2: return "ld1\\t{%S0.16b - %0.16b}, %1"; - default: gcc_unreachable (); - } -} - [(set_attr "type" "neon_move,neon_store_reg_q,\ - neon_load_reg_q") + "@ + # + st1\\t{%S1.16b - %1.16b}, %0 + ld1\\t{%S0.16b - %0.16b}, %1" + [(set_attr "type" "multiple,neon_store_reg_q,\ + neon_load_reg_q") (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] ) @@ -4101,70 +4086,102 @@ [(set_attr "type" "neon_store1_1reg")] ) +(define_insn "*aarch64_be_movoi" + [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") + (match_operand:OI 1 "general_operand" " w,w,m"))] + "TARGET_SIMD && BYTES_BIG_ENDIAN + && (register_operand (operands[0], OImode) + || register_operand (operands[1], OImode))" + "@ + # + stp\\t%q1, %R1, %0 + ldp\\t%q0, %R0, %1" + [(set_attr "type" "multiple,neon_store2_2reg_q,neon_load2_2reg_q") + (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] +) + +(define_insn "*aarch64_be_movci" + [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") + (match_operand:CI 1 "general_operand" " w,w,o"))] + "TARGET_SIMD && BYTES_BIG_ENDIAN + && (register_operand (operands[0], CImode) + || register_operand (operands[1], CImode))" + "#" + [(set_attr "type" "multiple") + (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] +) + +(define_insn "*aarch64_be_movxi" + [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") + (match_operand:XI 1 "general_operand" " w,w,o"))] + "TARGET_SIMD && BYTES_BIG_ENDIAN + && (register_operand (operands[0], XImode) + || register_operand (operands[1], XImode))" + "#" + [(set_attr "type" "multiple") + (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] +) + (define_split - [(set (match_operand:OI 0 "register_operand" "") - (match_operand:OI 1 "register_operand" ""))] + [(set (match_operand:OI 0 "register_operand") + (match_operand:OI 1 "register_operand"))] "TARGET_SIMD && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[2], src[2]; - - dest[0] = gen_rtx_REG (TFmode, rdest); - src[0] = gen_rtx_REG (TFmode, rsrc); - dest[1] = gen_rtx_REG (TFmode, rdest + 1); - src[1] = gen_rtx_REG (TFmode, rsrc + 1); - - aarch64_simd_disambiguate_copy (operands, dest, src, 2); + aarch64_simd_emit_reg_reg_move (operands, TImode, 2); + DONE; }) (define_split - [(set (match_operand:CI 0 "register_operand" "") - (match_operand:CI 1 "register_operand" ""))] + [(set (match_operand:CI 0 "nonimmediate_operand") + (match_operand:CI 1 "general_operand"))] "TARGET_SIMD && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (match_dup 5))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[3], src[3]; - - dest[0] = gen_rtx_REG (TFmode, rdest); - src[0] = gen_rtx_REG (TFmode, rsrc); - dest[1] = gen_rtx_REG (TFmode, rdest + 1); - src[1] = gen_rtx_REG (TFmode, rsrc + 1); - dest[2] = gen_rtx_REG (TFmode, rdest + 2); - src[2] = gen_rtx_REG (TFmode, rsrc + 2); - - aarch64_simd_disambiguate_copy (operands, dest, src, 3); + if (register_operand (operands[0], CImode) + && register_operand (operands[1], CImode)) + { + aarch64_simd_emit_reg_reg_move (operands, TImode, 3); + DONE; + } + else if (BYTES_BIG_ENDIAN) + { + emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), + simplify_gen_subreg (OImode, operands[1], CImode, 0)); + emit_move_insn (gen_lowpart (V16QImode, + simplify_gen_subreg (TImode, operands[0], + CImode, 32)), + gen_lowpart (V16QImode, + simplify_gen_subreg (TImode, operands[1], + CImode, 32))); + DONE; + } + else + FAIL; }) (define_split - [(set (match_operand:XI 0 "register_operand" "") - (match_operand:XI 1 "register_operand" ""))] + [(set (match_operand:XI 0 "nonimmediate_operand") + (match_operand:XI 1 "general_operand"))] "TARGET_SIMD && reload_completed" - [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3)) - (set (match_dup 4) (match_dup 5)) - (set (match_dup 6) (match_dup 7))] + [(const_int 0)] { - int rdest = REGNO (operands[0]); - int rsrc = REGNO (operands[1]); - rtx dest[4], src[4]; - - dest[0] = gen_rtx_REG (TFmode, rdest); - src[0] = gen_rtx_REG (TFmode, rsrc); - dest[1] = gen_rtx_REG (TFmode, rdest + 1); - src[1] = gen_rtx_REG (TFmode, rsrc + 1); - dest[2] = gen_rtx_REG (TFmode, rdest + 2); - src[2] = gen_rtx_REG (TFmode, rsrc + 2); - dest[3] = gen_rtx_REG (TFmode, rdest + 3); - src[3] = gen_rtx_REG (TFmode, rsrc + 3); - - aarch64_simd_disambiguate_copy (operands, dest, src, 4); + if (register_operand (operands[0], XImode) + && register_operand (operands[1], XImode)) + { + aarch64_simd_emit_reg_reg_move (operands, TImode, 4); + DONE; + } + else if (BYTES_BIG_ENDIAN) + { + emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), + simplify_gen_subreg (OImode, operands[1], XImode, 0)); + emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), + simplify_gen_subreg (OImode, operands[1], XImode, 32)); + DONE; + } + else + FAIL; }) (define_expand "aarch64_ld2r" diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a89bb417ede..d7310d95e34 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3424,12 +3424,20 @@ aarch64_classify_address (struct aarch64_address_info *info, { enum rtx_code code = GET_CODE (x); rtx op0, op1; + + /* On BE, we use load/store pair for all large int mode load/stores. */ + bool load_store_pair_p = (outer_code == PARALLEL + || (BYTES_BIG_ENDIAN + && aarch64_vect_struct_mode_p (mode))); + bool allow_reg_index_p = - outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 - || aarch64_vector_mode_supported_p (mode)); - /* Don't support anything other than POST_INC or REG addressing for - AdvSIMD. */ - if (aarch64_vect_struct_mode_p (mode) + !load_store_pair_p + && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode)) + && !aarch64_vect_struct_mode_p (mode); + + /* On LE, for AdvSIMD, don't support anything other than POST_INC or + REG addressing. */ + if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN && (code != POST_INC && code != REG)) return false; @@ -3481,7 +3489,29 @@ aarch64_classify_address (struct aarch64_address_info *info, return (aarch64_offset_7bit_signed_scaled_p (mode, offset) && offset_9bit_signed_unscaled_p (mode, offset)); - if (outer_code == PARALLEL) + /* A 7bit offset check because OImode will emit a ldp/stp + instruction (only big endian will get here). + For ldp/stp instructions, the offset is scaled for the size of a + single element of the pair. */ + if (mode == OImode) + return aarch64_offset_7bit_signed_scaled_p (TImode, offset); + + /* Three 9/12 bit offsets checks because CImode will emit three + ldr/str instructions (only big endian will get here). */ + if (mode == CImode) + return (aarch64_offset_7bit_signed_scaled_p (TImode, offset) + && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32) + || offset_12bit_unsigned_scaled_p (V16QImode, + offset + 32))); + + /* Two 7bit offsets checks because XImode will emit two ldp/stp + instructions (only big endian will get here). */ + if (mode == XImode) + return (aarch64_offset_7bit_signed_scaled_p (TImode, offset) + && aarch64_offset_7bit_signed_scaled_p (TImode, + offset + 32)); + + if (load_store_pair_p) return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else @@ -3541,7 +3571,7 @@ aarch64_classify_address (struct aarch64_address_info *info, return (aarch64_offset_7bit_signed_scaled_p (mode, offset) && offset_9bit_signed_unscaled_p (mode, offset)); - if (outer_code == PARALLEL) + if (load_store_pair_p) return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else @@ -3555,7 +3585,8 @@ aarch64_classify_address (struct aarch64_address_info *info, /* load literal: pc-relative constant pool entry. Only supported for SI mode or larger. */ info->type = ADDRESS_SYMBOLIC; - if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4) + + if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4) { rtx sym, addend; @@ -4208,6 +4239,16 @@ aarch64_print_operand (FILE *f, rtx x, char code) asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); break; + case 'R': + /* Print a scalar FP/SIMD register name + 1. */ + if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) + { + output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); + return; + } + asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); + break; + case 'X': /* Print bottom 16 bits of integer constant in hex. */ if (!CONST_INT_P (x)) @@ -8595,35 +8636,28 @@ aarch64_simd_mem_operand_p (rtx op) || REG_P (XEXP (op, 0))); } -/* Set up OPERANDS for a register copy from SRC to DEST, taking care - not to early-clobber SRC registers in the process. +/* Emit a register copy from operand to operand, taking care not to + early-clobber source registers in the process. - We assume that the operands described by SRC and DEST represent a - decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the - number of components into which the copy has been decomposed. */ + COUNT is the number of components into which the copy needs to be + decomposed. */ void -aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest, - rtx *src, unsigned int count) +aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, + unsigned int count) { unsigned int i; + int rdest = REGNO (operands[0]); + int rsrc = REGNO (operands[1]); if (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) < REGNO (operands[1])) - { - for (i = 0; i < count; i++) - { - operands[2 * i] = dest[i]; - operands[2 * i + 1] = src[i]; - } - } + || rdest < rsrc) + for (i = 0; i < count; i++) + emit_move_insn (gen_rtx_REG (mode, rdest + i), + gen_rtx_REG (mode, rsrc + i)); else - { - for (i = 0; i < count; i++) - { - operands[2 * i] = dest[count - i - 1]; - operands[2 * i + 1] = src[count - i - 1]; - } - } + for (i = 0; i < count; i++) + emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1), + gen_rtx_REG (mode, rsrc + count - i - 1)); } /* Compute and return the length of aarch64_simd_mov, where is -- 2.30.2