From 02d3ba0e000ad83dcb76ccccd1ea9882672d71b1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 27 Jul 2017 09:38:54 +0000 Subject: [PATCH] [rs6000] Avoid rotates of floating-point modes The little-endian VSX code uses rotates to swap the two 64-bit halves of 128-bit scalar modes. This is fine for TImode and V1TImode, but it isn't really valid to use RTL rotates on floating-point modes like KFmode and TFmode, and doing that triggered an assert added by the SVE series. This patch uses bit-casts to V1TImode instead. 2017-07-27 Richard Sandiford gcc/ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare. * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with... (rs6000_emit_le_vsx_permute): ...this. Take the destination as input. Emit instructions rather than returning an expression. Handle TFmode and KFmode by casting to TImode. (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute. (rs6000_emit_le_vsx_store): Likewise. * config/rs6000/vsx.md (VSX_TI): New iterator. (*vsx_le_permute_): Use it instead of VSX_LE_128. (*vsx_le_undo_permute_): Likewise. (*vsx_le_perm_load_): Use rs6000_emit_le_vsx_permute to emit the split sequence. (*vsx_le_perm_store_): Likewise. From-SVN: r250615 --- gcc/ChangeLog | 16 ++++++++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 49 ++++++++++++----------- gcc/config/rs6000/vsx.md | 64 +++++++++++++++---------------- 4 files changed, 74 insertions(+), 56 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 345034d1e1b..7444943bb80 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2017-07-27 Richard Sandiford + + * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare. + * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with... + (rs6000_emit_le_vsx_permute): ...this. Take the destination as input. + Emit instructions rather than returning an expression. Handle TFmode + and KFmode by casting to TImode. + (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute. + (rs6000_emit_le_vsx_store): Likewise. + * config/rs6000/vsx.md (VSX_TI): New iterator. + (*vsx_le_permute_): Use it instead of VSX_LE_128. + (*vsx_le_undo_permute_): Likewise. + (*vsx_le_perm_load_): Use rs6000_emit_le_vsx_permute to + emit the split sequence. + (*vsx_le_perm_store_): Likewise. + 2017-07-27 Jakub Jelinek PR tree-optimization/81555 diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index aeec9b2f1c2..1b4932eba1e 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx); extern void rs6000_fatal_bad_address (rtx); extern rtx create_TOC_reference (rtx, rtx); extern void rs6000_split_multireg_move (rtx, rtx); +extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode); extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode); extern bool valid_sf_si_move (rtx, rtx, machine_mode); extern void rs6000_emit_move (rtx, rtx, machine_mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index cb2a30b62ed..7461decd99c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -10374,19 +10374,30 @@ rs6000_const_vec (machine_mode mode) return v; } -/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi - for a VSX load or store operation. */ -rtx -rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) +/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or + store operation. */ +void +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode) { - /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and - 128-bit integers if they are allowed in VSX registers. */ - if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode) - return gen_rtx_ROTATE (mode, source, GEN_INT (64)); + /* Scalar permutations are easier to express in integer modes rather than + floating-point modes, so cast them here. We use V1TImode instead + of TImode to ensure that the values don't go through GPRs. */ + if (FLOAT128_VECTOR_P (mode)) + { + dest = gen_lowpart (V1TImode, dest); + source = gen_lowpart (V1TImode, source); + mode = V1TImode; + } + + /* Use ROTATE instead of VEC_SELECT if the mode contains only a single + scalar. */ + if (mode == TImode || mode == V1TImode) + emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source, + GEN_INT (64)))); else { rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); - return gen_rtx_VEC_SELECT (mode, source, par); + emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par))); } } @@ -10396,8 +10407,6 @@ rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) void rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) { - rtx tmp, permute_mem, permute_reg; - /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) @@ -10407,11 +10416,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) source = adjust_address (source, V2DImode, 0); } - tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; - permute_mem = rs6000_gen_le_vsx_permute (source, mode); - permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); - emit_insn (gen_rtx_SET (tmp, permute_mem)); - emit_insn (gen_rtx_SET (dest, permute_reg)); + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; + rs6000_emit_le_vsx_permute (tmp, source, mode); + rs6000_emit_le_vsx_permute (dest, tmp, mode); } /* Emit a little-endian store to vector memory location DEST from VSX @@ -10420,8 +10427,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) void rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) { - rtx tmp, permute_src, permute_tmp; - /* This should never be called during or after reload, because it does not re-permute the source register. It is intended only for use during expand. */ @@ -10436,11 +10441,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) source = gen_lowpart (V2DImode, source); } - tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; - permute_src = rs6000_gen_le_vsx_permute (source, mode); - permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); - emit_insn (gen_rtx_SET (tmp, permute_src)); - emit_insn (gen_rtx_SET (dest, permute_tmp)); + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; + rs6000_emit_le_vsx_permute (tmp, source, mode); + rs6000_emit_le_vsx_permute (dest, tmp, mode); } /* Emit a sequence representing a little-endian VSX load or store, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index e6b98e0a335..b2b85c1e384 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -37,6 +37,9 @@ (TI "TARGET_VSX_TIMODE") V1TI]) +;; Iterator for 128-bit integer types that go in a single vector register. +(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI]) + ;; Iterator for the 2 32-bit vector types (define_mode_iterator VSX_W [V4SF V4SI]) @@ -756,9 +759,9 @@ ;; special V1TI container class, which it is not appropriate to use vec_select ;; for the type. (define_insn "*vsx_le_permute_" - [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=,,Z") - (rotate:VSX_LE_128 - (match_operand:VSX_LE_128 1 "input_operand" ",Z,") + [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=,,Z") + (rotate:VSX_TI + (match_operand:VSX_TI 1 "input_operand" ",Z,") (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "@ @@ -769,10 +772,10 @@ (set_attr "type" "vecperm,vecload,vecstore")]) (define_insn_and_split "*vsx_le_undo_permute_" - [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=,") - (rotate:VSX_LE_128 - (rotate:VSX_LE_128 - (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,") + [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=,") + (rotate:VSX_TI + (rotate:VSX_TI + (match_operand:VSX_TI 1 "vsx_register_operand" "0,") (const_int 64)) (const_int 64)))] "!BYTES_BIG_ENDIAN && TARGET_VSX" @@ -797,16 +800,15 @@ "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" "#" "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" - [(set (match_dup 2) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 2) - (const_int 64)))] + [(const_int 0)] " { - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) - : operands[0]; + rtx tmp = (can_create_pseudo_p () + ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]); + rs6000_emit_le_vsx_permute (tmp, operands[1], mode); + rs6000_emit_le_vsx_permute (operands[0], tmp, mode); + DONE; } " [(set_attr "type" "vecload") @@ -824,15 +826,14 @@ [(set (match_operand:VSX_LE_128 0 "memory_operand" "") (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" - [(set (match_dup 2) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 2) - (const_int 64)))] + [(const_int 0)] { - operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) - : operands[0]; + rtx tmp = (can_create_pseudo_p () + ? gen_reg_rtx_and_attrs (operands[0]) + : operands[0]); + rs6000_emit_le_vsx_permute (tmp, operands[1], mode); + rs6000_emit_le_vsx_permute (operands[0], tmp, mode); + DONE; }) ;; Peephole to catch memory to memory transfers for TImode if TImode landed in @@ -856,16 +857,13 @@ [(set (match_operand:VSX_LE_128 0 "memory_operand" "") (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))] "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" - [(set (match_dup 1) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 0) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64))) - (set (match_dup 1) - (rotate:VSX_LE_128 (match_dup 1) - (const_int 64)))] - "") + [(const_int 0)] +{ + rs6000_emit_le_vsx_permute (operands[1], operands[1], mode); + rs6000_emit_le_vsx_permute (operands[0], operands[1], mode); + rs6000_emit_le_vsx_permute (operands[1], operands[1], mode); + DONE; +}) ;; Vector constants that can be generated with XXSPLTIB that was added in ISA ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. -- 2.30.2