From 2a9234e81e7403f86d81f6401aab1460f44a432d Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Thu, 16 Aug 2018 10:39:13 +0000 Subject: [PATCH] Update fall through pattern for FP16 patterns in ARM. The original issue comes from the fact that the code does ... foo (... bar) { return bar; } The expansion of the return statement causes GCC to try to return the value in a register. GCC will try to emit the move then, from MEM to REG (due to the SSA temporary.). It checks for a mov optab for this which isn't available and then tries to do the move in bits using emit_move_multi_word. emit_move_multi_word will split the move into sub parts, but then needs to get the sub parts and does this using subregs, but it's told it can't do subregs! The compiler is now stuck in an infinite loop. The way this is worked around in the back-end is that we have move patterns in neon.md that usually just force the register instead of checking with the back-end. This prevents emit_move_multi_word from being needed. However the pattern for V4HF and V8HF were guarded by TARGET_NEON && TARGET_FP16. I don't believe the TARGET_FP16 guard to be needed, because the pattern doesn't actually generate code and requires another pattern for that, and a reg to reg move should always be possible anyway. So allowing the force to register here is safe and it allows the compiler to generate a correct error instead of ICEing in an infinite loop. gcc/ 2018-08-16 Tamar Christina PR target/84711 * config/arm/arm.c (arm_can_change_mode_class): Disallow subreg. * config/arm/neon.md (movv4hf, movv8hf): Refactored to.. (mov): ..this and enable unconditionally. From-SVN: r263584 --- gcc/ChangeLog | 7 +++++++ gcc/config/arm/arm.c | 4 ++-- gcc/config/arm/neon.md | 35 ++++++++++++----------------------- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 85bf69c7bc4..aaed5d54a4b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-08-16 Tamar Christina + + PR target/84711 + * config/arm/arm.c (arm_can_change_mode_class): Disallow subreg. + * config/arm/neon.md (movv4hf, movv8hf): Refactored to.. + (mov): ..this and enable unconditionally. + 2018-08-16 Tamar Christina * config/arm/neon.md (*neon_mov): Remove reg-to-reg alternative. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f5eece4f152..1d97db50a28 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -31509,8 +31509,8 @@ arm_can_change_mode_class (machine_mode from, machine_mode to, { if (TARGET_BIG_END && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8) - && (GET_MODE_UNIT_SIZE (from) > UNITS_PER_WORD - || GET_MODE_UNIT_SIZE (to) > UNITS_PER_WORD) + && (GET_MODE_SIZE (from) > UNITS_PER_WORD + || GET_MODE_SIZE (to) > UNITS_PER_WORD) && reg_classes_intersect_p (VFP_REGS, rclass)) return false; return true; diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 97d88e686ab..5aeee4b08c1 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -113,6 +113,13 @@ (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*") (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")]) +/* We define these mov expanders to match the standard mov$a optab to prevent + the mid-end from trying to do a subreg for these modes which is the most + inefficient way to expand the move. Also big-endian subreg's aren't + allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS. + Without these RTL generation patterns the mid-end would attempt to take a + sub-reg and may ICE if it can't. */ + (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand" "") (match_operand:TI 1 "general_operand" ""))] @@ -137,33 +144,15 @@ } }) -(define_expand "movv4hf" - [(set (match_operand:V4HF 0 "s_register_operand") - (match_operand:V4HF 1 "s_register_operand"))] - "TARGET_NEON && TARGET_FP16" +(define_expand "mov" + [(set (match_operand:VH 0 "s_register_operand") + (match_operand:VH 1 "s_register_operand"))] + "TARGET_NEON" { - /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS - causing an ICE on big-endian because it cannot extract subregs in - this case. */ - if (can_create_pseudo_p ()) - { - if (!REG_P (operands[0])) - operands[1] = force_reg (V4HFmode, operands[1]); - } -}) - -(define_expand "movv8hf" - [(set (match_operand:V8HF 0 "") - (match_operand:V8HF 1 ""))] - "TARGET_NEON && TARGET_FP16" -{ - /* We need to use force_reg to avoid TARGET_CAN_CHANGE_MODE_CLASS - causing an ICE on big-endian because it cannot extract subregs in - this case. */ if (can_create_pseudo_p ()) { if (!REG_P (operands[0])) - operands[1] = force_reg (V8HFmode, operands[1]); + operands[1] = force_reg (mode, operands[1]); } }) -- 2.30.2