+2016-08-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ PR target/72863
+ * vsx.md (vsx_load_<mode>): For P8LE, emit swaps at expand time.
+ (vsx_store_<mode>): Likewise.
+
2015-08-11 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.c (timode_scalar_to_vector_candidate_p): Allow
[(set (match_operand:VSX_M 0 "vsx_register_operand" "")
(match_operand:VSX_M 1 "memory_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "")
+{
+ /* Expand to swaps if needed, prior to swap optimization. */
+ if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
+ {
+ rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
+ DONE;
+ }
+})
(define_expand "vsx_store_<mode>"
[(set (match_operand:VSX_M 0 "memory_operand" "")
(match_operand:VSX_M 1 "vsx_register_operand" ""))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "")
+{
+ /* Expand to swaps if needed, prior to swap optimization. */
+ if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR)
+ {
+ rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
+ DONE;
+ }
+})
;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
;; when you really want their element-reversing behavior.
+2016-08-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ PR target/72863
+ * gcc.target/powerpc/pr72863.c: New test.
+
2016-08-11 Uros Bizjak <ubizjak@gmail.com>
* g++.dg/cpp1z/constexpr-lambda6.C: Remove dg-do run.
--- /dev/null
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O3" } */
+/* { dg-final { scan-assembler "lxvd2x" } } */
+/* { dg-final { scan-assembler "stxvd2x" } } */
+/* { dg-final { scan-assembler-not "xxpermdi" } } */
+
+#include <altivec.h>
+
+extern unsigned char *src, *dst;
+
+void b(void)
+{
+ int i;
+
+ unsigned char *s8 = src;
+ unsigned char *d8 = dst;
+
+ for (i = 0; i < 100; i++) {
+ vector unsigned char vs = vec_vsx_ld(0, s8);
+ vector unsigned char vd = vec_vsx_ld(0, d8);
+ vector unsigned char vr = vec_xor(vs, vd);
+ vec_vsx_st(vr, 0, d8);
+ s8 += 16;
+ d8 += 16;
+ }
+}