+2017-07-27 Richard Sandiford <richard.sandiford@linaro.org>
+
+ * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
+ * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
+ (rs6000_emit_le_vsx_permute): ...this. Take the destination as input.
+ Emit instructions rather than returning an expression. Handle TFmode
+ and KFmode by casting to TImode.
+ (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
+ (rs6000_emit_le_vsx_store): Likewise.
+ * config/rs6000/vsx.md (VSX_TI): New iterator.
+ (*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
+ (*vsx_le_undo_permute_<mode>): Likewise.
+ (*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
+ emit the split sequence.
+ (*vsx_le_perm_store_<mode>): Likewise.
+
2017-07-27 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/81555
extern void rs6000_fatal_bad_address (rtx);
extern rtx create_TOC_reference (rtx, rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
extern bool valid_sf_si_move (rtx, rtx, machine_mode);
extern void rs6000_emit_move (rtx, rtx, machine_mode);
return v;
}
-/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
- for a VSX load or store operation. */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
+ store operation. */
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
{
- /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
- 128-bit integers if they are allowed in VSX registers. */
- if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
- return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+ /* Scalar permutations are easier to express in integer modes rather than
+ floating-point modes, so cast them here. We use V1TImode instead
+ of TImode to ensure that the values don't go through GPRs. */
+ if (FLOAT128_VECTOR_P (mode))
+ {
+ dest = gen_lowpart (V1TImode, dest);
+ source = gen_lowpart (V1TImode, source);
+ mode = V1TImode;
+ }
+
+ /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+ scalar. */
+ if (mode == TImode || mode == V1TImode)
+ emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+ GEN_INT (64))));
else
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
- return gen_rtx_VEC_SELECT (mode, source, par);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
}
}
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_mem, permute_reg;
-
/* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
source = adjust_address (source, V2DImode, 0);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
- permute_mem = rs6000_gen_le_vsx_permute (source, mode);
- permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_mem));
- emit_insn (gen_rtx_SET (dest, permute_reg));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a little-endian store to vector memory location DEST from VSX
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_src, permute_tmp;
-
/* This should never be called during or after reload, because it does
not re-permute the source register. It is intended only for use
during expand. */
source = gen_lowpart (V2DImode, source);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
- permute_src = rs6000_gen_le_vsx_permute (source, mode);
- permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_src));
- emit_insn (gen_rtx_SET (dest, permute_tmp));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a sequence representing a little-endian VSX load or store,
(TI "TARGET_VSX_TIMODE")
V1TI])
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+ [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
(set_attr "type" "vecperm,vecload,vecstore")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
- (rotate:VSX_LE_128
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+ [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+ (rotate:VSX_TI
+ (rotate:VSX_TI
+ (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
"
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
}
"
[(set_attr "type" "vecload")
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
})
;; Peephole to catch memory to memory transfers for TImode if TImode landed in
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))]
- "")
+ [(const_int 0)]
+{
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ DONE;
+})
;; Vector constants that can be generated with XXSPLTIB that was added in ISA
;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.