From ff825b8158394a01a43359efd91d0b6b8c4fa21b Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Thu, 2 Apr 2020 10:23:47 +0100 Subject: [PATCH] [ARM]: Fix for MVE ACLE intrinsics with writeback (PR94317). Following MVE ACLE intrinsics have an issue with writeback to the base address. vldrdq_gather_base_wb_s64, vldrdq_gather_base_wb_u64, vldrdq_gather_base_wb_z_s64, vldrdq_gather_base_wb_z_u64, vldrwq_gather_base_wb_s32, vldrwq_gather_base_wb_u32, vldrwq_gather_base_wb_z_s32, vldrwq_gather_base_wb_z_u32, vldrwq_gather_base_wb_f32, vldrwq_gather_base_wb_z_f32. This patch fixes the bug reported in PR94317 by adding separate builtin calls to update the result and writeback to base address for the above intrinsics. 2020-04-02 Srinath Parvathaneni PR target/94317 * config/arm/arm-builtins.c (LDRGBWBXU_QUALIFIERS): Define. (LDRGBWBXU_Z_QUALIFIERS): Likewise. * config/arm/arm_mve.h (__arm_vldrdq_gather_base_wb_s64): Modify intrinsic defintion by adding a new builtin call to writeback into base address. (__arm_vldrdq_gather_base_wb_u64): Likewise. (__arm_vldrdq_gather_base_wb_z_s64): Likewise. (__arm_vldrdq_gather_base_wb_z_u64): Likewise. (__arm_vldrwq_gather_base_wb_s32): Likewise. (__arm_vldrwq_gather_base_wb_u32): Likewise. (__arm_vldrwq_gather_base_wb_z_s32): Likewise. (__arm_vldrwq_gather_base_wb_z_u32): Likewise. (__arm_vldrwq_gather_base_wb_f32): Likewise. (__arm_vldrwq_gather_base_wb_z_f32): Likewise. * config/arm/arm_mve_builtins.def (vldrwq_gather_base_wb_z_u): Modify builtin's qualifier. (vldrdq_gather_base_wb_z_u): Likewise. (vldrwq_gather_base_wb_u): Likewise. (vldrdq_gather_base_wb_u): Likewise. (vldrwq_gather_base_wb_z_s): Likewise. (vldrwq_gather_base_wb_z_f): Likewise. (vldrdq_gather_base_wb_z_s): Likewise. (vldrwq_gather_base_wb_s): Likewise. (vldrwq_gather_base_wb_f): Likewise. (vldrdq_gather_base_wb_s): Likewise. (vldrwq_gather_base_nowb_z_u): Define builtin. (vldrdq_gather_base_nowb_z_u): Likewise. (vldrwq_gather_base_nowb_u): Likewise. (vldrdq_gather_base_nowb_u): Likewise. (vldrwq_gather_base_nowb_z_s): Likewise. (vldrwq_gather_base_nowb_z_f): Likewise. (vldrdq_gather_base_nowb_z_s): Likewise. (vldrwq_gather_base_nowb_s): Likewise. (vldrwq_gather_base_nowb_f): Likewise. (vldrdq_gather_base_nowb_s): Likewise. * config/arm/mve.md (mve_vldrwq_gather_base_nowb_v4si): Define RTL pattern. (mve_vldrwq_gather_base_wb_v4si): Modify RTL pattern. (mve_vldrwq_gather_base_nowb_z_v4si): Define RTL pattern. (mve_vldrwq_gather_base_wb_z_v4si): Modify RTL pattern. (mve_vldrwq_gather_base_wb_fv4sf): Modify RTL pattern. (mve_vldrwq_gather_base_nowb_fv4sf): Define RTL pattern. (mve_vldrwq_gather_base_wb_z_fv4sf): Modify RTL pattern. (mve_vldrwq_gather_base_nowb_z_fv4sf): Define RTL pattern. (mve_vldrdq_gather_base_nowb_v4di): Define RTL pattern. (mve_vldrdq_gather_base_wb_v4di): Modify RTL pattern. (mve_vldrdq_gather_base_nowb_z_v4di): Define RTL pattern. (mve_vldrdq_gather_base_wb_z_v4di): Modify RTL pattern. gcc/testsuite/ChangeLog: 2020-04-02 Srinath Parvathaneni PR target/94317 * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: Modify. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. --- gcc/ChangeLog | 52 ++++++++++ gcc/config/arm/arm-builtins.c | 11 +++ gcc/config/arm/arm_mve.h | 40 ++++---- gcc/config/arm/arm_mve_builtins.def | 30 ++++-- gcc/config/arm/mve.md | 96 ++++++++++++++++++- gcc/testsuite/ChangeLog | 14 +++ .../intrinsics/vldrdq_gather_base_wb_s64.c | 4 +- .../intrinsics/vldrdq_gather_base_wb_u64.c | 4 +- .../intrinsics/vldrdq_gather_base_wb_z_s64.c | 6 +- .../intrinsics/vldrdq_gather_base_wb_z_u64.c | 6 +- .../intrinsics/vldrwq_gather_base_wb_f32.c | 4 +- .../intrinsics/vldrwq_gather_base_wb_s32.c | 4 +- .../intrinsics/vldrwq_gather_base_wb_u32.c | 4 +- .../intrinsics/vldrwq_gather_base_wb_z_f32.c | 6 +- .../intrinsics/vldrwq_gather_base_wb_z_s32.c | 6 +- .../intrinsics/vldrwq_gather_base_wb_z_u32.c | 6 +- 16 files changed, 250 insertions(+), 43 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5a8a2c525c9..6df3a17a645 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,55 @@ +2020-04-02 Srinath Parvathaneni + + PR target/94317 + * config/arm/arm-builtins.c (LDRGBWBXU_QUALIFIERS): Define. + (LDRGBWBXU_Z_QUALIFIERS): Likewise. + * config/arm/arm_mve.h (__arm_vldrdq_gather_base_wb_s64): Modify + intrinsic defintion by adding a new builtin call to writeback into base + address. + (__arm_vldrdq_gather_base_wb_u64): Likewise. + (__arm_vldrdq_gather_base_wb_z_s64): Likewise. + (__arm_vldrdq_gather_base_wb_z_u64): Likewise. + (__arm_vldrwq_gather_base_wb_s32): Likewise. + (__arm_vldrwq_gather_base_wb_u32): Likewise. + (__arm_vldrwq_gather_base_wb_z_s32): Likewise. + (__arm_vldrwq_gather_base_wb_z_u32): Likewise. + (__arm_vldrwq_gather_base_wb_f32): Likewise. + (__arm_vldrwq_gather_base_wb_z_f32): Likewise. + * config/arm/arm_mve_builtins.def (vldrwq_gather_base_wb_z_u): Modify + builtin's qualifier. + (vldrdq_gather_base_wb_z_u): Likewise. + (vldrwq_gather_base_wb_u): Likewise. + (vldrdq_gather_base_wb_u): Likewise. + (vldrwq_gather_base_wb_z_s): Likewise. + (vldrwq_gather_base_wb_z_f): Likewise. + (vldrdq_gather_base_wb_z_s): Likewise. + (vldrwq_gather_base_wb_s): Likewise. + (vldrwq_gather_base_wb_f): Likewise. + (vldrdq_gather_base_wb_s): Likewise. + (vldrwq_gather_base_nowb_z_u): Define builtin. + (vldrdq_gather_base_nowb_z_u): Likewise. + (vldrwq_gather_base_nowb_u): Likewise. + (vldrdq_gather_base_nowb_u): Likewise. + (vldrwq_gather_base_nowb_z_s): Likewise. + (vldrwq_gather_base_nowb_z_f): Likewise. + (vldrdq_gather_base_nowb_z_s): Likewise. + (vldrwq_gather_base_nowb_s): Likewise. + (vldrwq_gather_base_nowb_f): Likewise. + (vldrdq_gather_base_nowb_s): Likewise. + * config/arm/mve.md (mve_vldrwq_gather_base_nowb_v4si): Define RTL + pattern. + (mve_vldrwq_gather_base_wb_v4si): Modify RTL pattern. + (mve_vldrwq_gather_base_nowb_z_v4si): Define RTL pattern. + (mve_vldrwq_gather_base_wb_z_v4si): Modify RTL pattern. + (mve_vldrwq_gather_base_wb_fv4sf): Modify RTL pattern. + (mve_vldrwq_gather_base_nowb_fv4sf): Define RTL pattern. + (mve_vldrwq_gather_base_wb_z_fv4sf): Modify RTL pattern. + (mve_vldrwq_gather_base_nowb_z_fv4sf): Define RTL pattern. + (mve_vldrdq_gather_base_nowb_v4di): Define RTL pattern. + (mve_vldrdq_gather_base_wb_v4di): Modify RTL pattern. + (mve_vldrdq_gather_base_nowb_z_v4di): Define RTL pattern. + (mve_vldrdq_gather_base_wb_z_v4di): Modify RTL pattern. + 2020-04-02 Andreas Krebbel * config/s390/vector.md ("add3", "mul3") diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c index 56f0db21ea9..832b9107424 100644 --- a/gcc/config/arm/arm-builtins.c +++ b/gcc/config/arm/arm-builtins.c @@ -718,6 +718,17 @@ arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ (arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers) +static enum arm_type_qualifiers +arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate}; +#define LDRGBWBXU_QUALIFIERS (arm_ldrgbwbxu_qualifiers) + +static enum arm_type_qualifiers +arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, + qualifier_unsigned}; +#define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) + static enum arm_type_qualifiers arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_unsigned, qualifier_immediate}; diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index f1dcdc21532..47a6268e080 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -13903,8 +13903,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset) { int64x2_t - result = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); - __addr += __offset; + result = __builtin_mve_vldrdq_gather_base_nowb_sv2di (*__addr, __offset); + *__addr = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset); return result; } @@ -13913,8 +13913,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset) { uint64x2_t - result = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); - __addr += __offset; + result = __builtin_mve_vldrdq_gather_base_nowb_uv2di (*__addr, __offset); + *__addr = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset); return result; } @@ -13923,8 +13923,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) { int64x2_t - result = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); - __addr += __offset; + result = __builtin_mve_vldrdq_gather_base_nowb_z_sv2di (*__addr, __offset, __p); + *__addr = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p); return result; } @@ -13933,8 +13933,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p) { uint64x2_t - result = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); - __addr += __offset; + result = __builtin_mve_vldrdq_gather_base_nowb_z_uv2di (*__addr, __offset, __p); + *__addr = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p); return result; } @@ -13943,8 +13943,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset) { int32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_sv4si (*__addr, __offset); + *__addr = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset); return result; } @@ -13953,8 +13953,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset) { uint32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_uv4si (*__addr, __offset); + *__addr = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset); return result; } @@ -13963,8 +13963,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) { int32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_z_sv4si (*__addr, __offset, __p); + *__addr = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p); return result; } @@ -13973,8 +13973,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) { uint32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_z_uv4si (*__addr, __offset, __p); + *__addr = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p); return result; } @@ -19372,8 +19372,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset) { float32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_fv4sf (*__addr, __offset); + *__addr = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset); return result; } @@ -19382,8 +19382,8 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p) { float32x4_t - result = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); - __addr += __offset; + result = __builtin_mve_vldrwq_gather_base_nowb_z_fv4sf (*__addr, __offset, __p); + *__addr = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p); return result; } diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index 2fb975944b9..753e40a951d 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -847,16 +847,26 @@ VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di) VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si) VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf) VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di) -VAR1 (LDRGBWBU_Z, vldrwq_gather_base_wb_z_u, v4si) -VAR1 (LDRGBWBU_Z, vldrdq_gather_base_wb_z_u, v2di) -VAR1 (LDRGBWBU, vldrwq_gather_base_wb_u, v4si) -VAR1 (LDRGBWBU, vldrdq_gather_base_wb_u, v2di) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_s, v4si) -VAR1 (LDRGBWBS_Z, vldrwq_gather_base_wb_z_f, v4sf) -VAR1 (LDRGBWBS_Z, vldrdq_gather_base_wb_z_s, v2di) -VAR1 (LDRGBWBS, vldrwq_gather_base_wb_s, v4si) -VAR1 (LDRGBWBS, vldrwq_gather_base_wb_f, v4sf) -VAR1 (LDRGBWBS, vldrdq_gather_base_wb_s, v2di) +VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si) +VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di) +VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si) +VAR1 (LDRGBWBU, vldrdq_gather_base_nowb_u, v2di) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_s, v4si) +VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_f, v4sf) +VAR1 (LDRGBWBS_Z, vldrdq_gather_base_nowb_z_s, v2di) +VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_s, v4si) +VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_f, v4sf) +VAR1 (LDRGBWBS, vldrdq_gather_base_nowb_s, v2di) +VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_s, v2di) +VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_u, v2di) +VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_s, v2di) +VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_u, v2di) +VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_s, v4si) +VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_f, v4sf) +VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_u, v4si) +VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_s, v4si) +VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_f, v4sf) +VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadciq_s, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vadciq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vadcq_s, v4si) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index df602b07840..d1028f4542b 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -10419,6 +10419,20 @@ (match_operand:SI 2 "mve_vldrd_immediate") (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] "TARGET_HAVE_MVE" +{ + rtx ignore_result = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_v4si_insn (ignore_result, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vldrwq_gather_base_nowb_v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (V4SImode); emit_insn ( @@ -10458,6 +10472,21 @@ (match_operand:HI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] "TARGET_HAVE_MVE" +{ + rtx ignore_result = gen_reg_rtx (V4SImode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_v4si_insn (ignore_result, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) +(define_expand "mve_vldrwq_gather_base_nowb_z_v4si" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] + "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (V4SImode); emit_insn ( @@ -10487,12 +10516,26 @@ ops[0] = operands[0]; ops[1] = operands[2]; ops[2] = operands[3]; - output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); return ""; } [(set_attr "length" "8")]) (define_expand "mve_vldrwq_gather_base_wb_fv4sf" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_result = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_fv4sf_insn (ignore_result, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vldrwq_gather_base_nowb_fv4sf" [(match_operand:V4SF 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") @@ -10531,6 +10574,22 @@ [(set_attr "length" "4")]) (define_expand "mve_vldrwq_gather_base_wb_z_fv4sf" + [(match_operand:V4SI 0 "s_register_operand") + (match_operand:V4SI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] + "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" +{ + rtx ignore_result = gen_reg_rtx (V4SFmode); + emit_insn ( + gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (ignore_result, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vldrwq_gather_base_nowb_z_fv4sf" [(match_operand:V4SF 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") @@ -10566,7 +10625,7 @@ ops[0] = operands[0]; ops[1] = operands[2]; ops[2] = operands[3]; - output_asm_insn ("vpst\;\tvldrwt.u32\t%q0, [%q1, %2]!",ops); + output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops); return ""; } [(set_attr "length" "8")]) @@ -10577,6 +10636,20 @@ (match_operand:SI 2 "mve_vldrd_immediate") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" +{ + rtx ignore_result = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_v2di_insn (ignore_result, operands[0], + operands[1], operands[2])); + DONE; +}) + +(define_expand "mve_vldrdq_gather_base_nowb_v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (V2DImode); emit_insn ( @@ -10585,6 +10658,7 @@ DONE; }) + ;; ;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u] ;; @@ -10616,6 +10690,22 @@ (match_operand:HI 3 "vpr_register_operand") (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] "TARGET_HAVE_MVE" +{ + rtx ignore_result = gen_reg_rtx (V2DImode); + emit_insn ( + gen_mve_vldrdq_gather_base_wb_z_v2di_insn (ignore_result, operands[0], + operands[1], operands[2], + operands[3])); + DONE; +}) + +(define_expand "mve_vldrdq_gather_base_nowb_z_v2di" + [(match_operand:V2DI 0 "s_register_operand") + (match_operand:V2DI 1 "s_register_operand") + (match_operand:SI 2 "mve_vldrd_immediate") + (match_operand:HI 3 "vpr_register_operand") + (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)] + "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (V2DImode); emit_insn ( @@ -10660,7 +10750,7 @@ ops[0] = operands[0]; ops[1] = operands[2]; ops[2] = operands[3]; - output_asm_insn ("vpst\;\tvldrdt.u64\t%q0, [%q1, %2]!",ops); + output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops); return ""; } [(set_attr "length" "8")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a154849a837..1249f46ed88 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2020-04-02 Srinath Parvathaneni + + PR target/94317 + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c: Modify. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c: Likewise. + 2020-04-02 Tobias Burnus PR fortran/93522 diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c index a5c5a61345c..0d1ee769ec6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_s64.c @@ -10,4 +10,6 @@ foo (uint64x2_t * addr) return vldrdq_gather_base_wb_s64 (addr, 8); } -/* { dg-final { scan-assembler "vldrd.64" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vldrd.64\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c index 442bca92a43..cb2a41bdcd3 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_u64.c @@ -10,4 +10,6 @@ foo (uint64x2_t * addr) return vldrdq_gather_base_wb_u64 (addr, 8); } -/* { dg-final { scan-assembler "vldrd.64" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vldrd.64\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c index 1863d0835e1..243fbeacc34 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_s64.c @@ -8,4 +8,8 @@ int64x2_t foo (uint64x2_t * addr, mve_pred16_t p) return vldrdq_gather_base_wb_z_s64 (addr, 1016, p); } -/* { dg-final { scan-assembler "vldrdt.u64" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vmsr\t P0, r\[0-9\]+.*$" } } */ +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vldrdt.u64\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c index 7ba272a1126..10ba42405fe 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrdq_gather_base_wb_z_u64.c @@ -8,4 +8,8 @@ uint64x2_t foo (uint64x2_t * addr, mve_pred16_t p) return vldrdq_gather_base_wb_z_u64 (addr, 8, p); } -/* { dg-final { scan-assembler "vldrdt.u64" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vmsr\t P0, r\[0-9\]+.*" } } */ +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vldrdt.u64\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c index 6b496873f17..db8108e3732 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_f32.c @@ -10,4 +10,6 @@ foo (uint32x4_t * addr) return vldrwq_gather_base_wb_f32 (addr, 8); } -/* { dg-final { scan-assembler "vldrw.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vldrw.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c index 9bbbd0d7015..3da64e218e2 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_s32.c @@ -10,4 +10,6 @@ foo (uint32x4_t * addr) return vldrwq_gather_base_wb_s32 (addr, 8); } -/* { dg-final { scan-assembler "vldrw.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vldrw.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c index 774230b2903..2597ee11608 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_u32.c @@ -10,4 +10,6 @@ foo (uint32x4_t * addr) return vldrwq_gather_base_wb_u32 (addr, 8); } -/* { dg-final { scan-assembler "vldrw.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vldrw.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c index 6400f014a88..f1ba63855be 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_f32.c @@ -10,4 +10,8 @@ foo (uint32x4_t * addr, mve_pred16_t p) return vldrwq_gather_base_wb_z_f32 (addr, 8, p); } -/* { dg-final { scan-assembler "vldrwt.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vmsr\tP0, r\[0-9\]+.*" } } */ +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vldrwt.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c index de7006c51f1..56da5a46c64 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_s32.c @@ -10,4 +10,8 @@ foo (uint32x4_t * addr, mve_pred16_t p) return vldrwq_gather_base_wb_z_s32 (addr, 8, p); } -/* { dg-final { scan-assembler "vldrwt.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vmsr\t P0, r\[0-9\]+.*" } } */ +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vldrwt.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c index 6c9608f07ba..63165d97c1a 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vldrwq_gather_base_wb_z_u32.c @@ -10,4 +10,8 @@ foo (uint32x4_t * addr, mve_pred16_t p) return vldrwq_gather_base_wb_z_u32 (addr, 8, p); } -/* { dg-final { scan-assembler "vldrwt.u32" } } */ +/* { dg-final { scan-assembler "vldrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "vmsr\t P0, r\[0-9\]+.*" } } */ +/* { dg-final { scan-assembler "vpst" } } */ +/* { dg-final { scan-assembler "vldrwt.u32\tq\[0-9\]+, \\\[q\[0-9\]+, #\[0-9\]+\\\]!" } } */ +/* { dg-final { scan-assembler "vstrb.8 q\[0-9\]+, \\\[r\[0-9\]+\\\]" } } */ -- 2.30.2