From: Charles Baylis Date: Wed, 11 Nov 2015 01:08:43 +0000 (+0000) Subject: [ARM] PR63870 Mark lane indices of vldN/vstN with appropriate qualifier X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=22f9db641f1532b33d27104fcb00167172bfb8ec;p=gcc.git [ARM] PR63870 Mark lane indices of vldN/vstN with appropriate qualifier 2015-11-11 Charles Baylis PR target/63870 * config/arm/arm-builtins.c: (arm_load1_qualifiers) Use qualifier_struct_load_store_lane_index. (arm_storestruct_lane_qualifiers) Likewise. * config/arm/neon.md: (neon_vld1_lane) Reverse lane numbers for big-endian. (neon_vst1_lane) Likewise. (neon_vld2_lane) Likewise. (neon_vst2_lane) Likewise. (neon_vld3_lane) Likewise. (neon_vst3_lane) Likewise. (neon_vld4_lane) Likewise. (neon_vst4_lane) Likewise. From-SVN: r230143 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 90ab5853c08..f0865eb5294 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2015-11-11 Charles Baylis + + PR target/63870 + * config/arm/arm-builtins.c: (arm_load1_qualifiers) Use + qualifier_struct_load_store_lane_index. + (arm_storestruct_lane_qualifiers) Likewise. + * config/arm/neon.md: (neon_vld1_lane) Reverse lane numbers for + big-endian. + (neon_vst1_lane) Likewise. + (neon_vld2_lane) Likewise. + (neon_vst2_lane) Likewise. + (neon_vld3_lane) Likewise. + (neon_vst3_lane) Likewise. + (neon_vld4_lane) Likewise. + (neon_vst4_lane) Likewise. + 2015-11-11 Charles Baylis PR target/63870 diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index d0bd7775704..f73afc269c3 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -152,7 +152,7 @@ arm_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_load1_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_const_pointer_map_mode, - qualifier_none, qualifier_immediate }; + qualifier_none, qualifier_struct_load_store_lane_index }; #define LOAD1LANE_QUALIFIERS (arm_load1_lane_qualifiers) /* The first argument (return type) of a store should be void type, @@ -171,7 +171,7 @@ arm_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer_map_mode, - qualifier_none, qualifier_immediate }; + qualifier_none, qualifier_struct_load_store_lane_index }; #define STORE1LANE_QUALIFIERS (arm_storestruct_lane_qualifiers) #define v8qi_UP V8QImode diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e5a2b0f1c9a..c70e08a39ae 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4253,6 +4253,9 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_1reg")] ) +;; The lane numbers in the RTL are in GCC lane order, having been flipped +;; in arm_expand_neon_args. The lane numbers are restored to architectural +;; lane order here. (define_insn "neon_vld1_lane" [(set (match_operand:VDX 0 "s_register_operand" "=w") (unspec:VDX [(match_operand: 1 "neon_struct_operand" "Um") @@ -4261,8 +4264,9 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD1_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + operands[3] = GEN_INT (lane); if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) @@ -4273,6 +4277,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld1_lane" [(set (match_operand:VQX 0 "s_register_operand" "=w") (unspec:VQX [(match_operand: 1 "neon_struct_operand" "Um") @@ -4281,8 +4287,9 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD1_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + operands[3] = GEN_INT (lane); int regno = REGNO (operands[0]); if (lane < 0 || lane >= max) error ("lane out of range"); @@ -4359,6 +4366,8 @@ if (BYTES_BIG_ENDIAN) "vst1.\t%h1, %A0" [(set_attr "type" "neon_store1_1reg")]) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4367,8 +4376,9 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); + operands[2] = GEN_INT (lane); if (lane < 0 || lane >= max) error ("lane out of range"); if (max == 1) @@ -4379,6 +4389,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst1_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4387,7 +4399,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); if (lane < 0 || lane >= max) @@ -4396,8 +4408,8 @@ if (BYTES_BIG_ENDIAN) { lane -= max / 2; regno += 2; - operands[2] = GEN_INT (lane); } + operands[2] = GEN_INT (lane); operands[1] = gen_rtx_REG (mode, regno); if (max == 2) return "vst1.\t{%P1}, %A0"; @@ -4448,6 +4460,8 @@ if (BYTES_BIG_ENDIAN) "vld2.\t%h0, %A1" [(set_attr "type" "neon_load2_2reg_q")]) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld2_lane" [(set (match_operand:TI 0 "s_register_operand" "=w") (unspec:TI [(match_operand: 1 "neon_struct_operand" "Um") @@ -4457,7 +4471,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD2_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[4]; @@ -4466,13 +4480,15 @@ if (BYTES_BIG_ENDIAN) ops[0] = gen_rtx_REG (DImode, regno); ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = operands[1]; - ops[3] = operands[3]; + ops[3] = GEN_INT (lane); output_asm_insn ("vld2.\t{%P0[%c3], %P1[%c3]}, %A2", ops); return ""; } [(set_attr "type" "neon_load2_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld2_lane" [(set (match_operand:OI 0 "s_register_operand" "=w") (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") @@ -4482,7 +4498,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD2_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[4]; @@ -4563,6 +4579,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store2_4reg")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst2_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4572,7 +4590,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST2_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[4]; @@ -4581,13 +4599,15 @@ if (BYTES_BIG_ENDIAN) ops[0] = operands[0]; ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 2); - ops[3] = operands[2]; + ops[3] = GEN_INT (lane); output_asm_insn ("vst2.\t{%P1[%c3], %P2[%c3]}, %A0", ops); return ""; } [(set_attr "type" "neon_store2_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst2_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4597,7 +4617,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST2_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[4]; @@ -4707,6 +4727,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load3_3reg")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld3_lane" [(set (match_operand:EI 0 "s_register_operand" "=w") (unspec:EI [(match_operand: 1 "neon_struct_operand" "Um") @@ -4716,7 +4738,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD3_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[5]; @@ -4726,7 +4748,7 @@ if (BYTES_BIG_ENDIAN) ops[1] = gen_rtx_REG (DImode, regno + 2); ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = operands[1]; - ops[4] = operands[3]; + ops[4] = GEN_INT (lane); output_asm_insn ("vld3.\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3", ops); return ""; @@ -4734,6 +4756,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load3_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld3_lane" [(set (match_operand:CI 0 "s_register_operand" "=w") (unspec:CI [(match_operand: 1 "neon_struct_operand" "Um") @@ -4743,7 +4767,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD3_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[5]; @@ -4879,6 +4903,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store3_3reg")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst3_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4888,7 +4914,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST3_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[5]; @@ -4898,7 +4924,7 @@ if (BYTES_BIG_ENDIAN) ops[1] = gen_rtx_REG (DImode, regno); ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = gen_rtx_REG (DImode, regno + 4); - ops[4] = operands[2]; + ops[4] = GEN_INT (lane); output_asm_insn ("vst3.\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0", ops); return ""; @@ -4906,6 +4932,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store3_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst3_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -4915,7 +4943,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST3_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[5]; @@ -5029,6 +5057,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load4_4reg")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld4_lane" [(set (match_operand:OI 0 "s_register_operand" "=w") (unspec:OI [(match_operand: 1 "neon_struct_operand" "Um") @@ -5038,7 +5068,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD4_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[6]; @@ -5049,7 +5079,7 @@ if (BYTES_BIG_ENDIAN) ops[2] = gen_rtx_REG (DImode, regno + 4); ops[3] = gen_rtx_REG (DImode, regno + 6); ops[4] = operands[1]; - ops[5] = operands[3]; + ops[5] = GEN_INT (lane); output_asm_insn ("vld4.\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4", ops); return ""; @@ -5057,6 +5087,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load4_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vld4_lane" [(set (match_operand:XI 0 "s_register_operand" "=w") (unspec:XI [(match_operand: 1 "neon_struct_operand" "Um") @@ -5066,7 +5098,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VLD4_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[3]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[3])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[0]); rtx ops[6]; @@ -5209,6 +5241,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store4_4reg")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst4_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -5218,7 +5252,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST4_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[6]; @@ -5229,7 +5263,7 @@ if (BYTES_BIG_ENDIAN) ops[2] = gen_rtx_REG (DImode, regno + 2); ops[3] = gen_rtx_REG (DImode, regno + 4); ops[4] = gen_rtx_REG (DImode, regno + 6); - ops[5] = operands[2]; + ops[5] = GEN_INT (lane); output_asm_insn ("vst4.\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0", ops); return ""; @@ -5237,6 +5271,8 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store4_one_lane")] ) +;; see comment on neon_vld1_lane for reason why the lane numbers are reversed +;; here on big endian targets. (define_insn "neon_vst4_lane" [(set (match_operand: 0 "neon_struct_operand" "=Um") (unspec: @@ -5246,7 +5282,7 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST4_LANE))] "TARGET_NEON" { - HOST_WIDE_INT lane = INTVAL (operands[2]); + HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(mode, INTVAL (operands[2])); HOST_WIDE_INT max = GET_MODE_NUNITS (mode); int regno = REGNO (operands[1]); rtx ops[6];