r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
emit_move_insn (op0, r);
}
+ /* Normal *mov<mode>_internal pattern will handle
+ unaligned loads just fine if misaligned_operand
+ is true, and without the UNSPEC it can be combined
+ with arithmetic instructions. */
+ else if (misaligned_operand (op1, GET_MODE (op1)))
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
else
emit_insn (load_unaligned (op0, op1));
}
void
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
- rtx op0, op1, m;
+ rtx op0, op1, orig_op0 = NULL_RTX, m;
rtx (*load_unaligned) (rtx, rtx);
rtx (*store_unaligned) (rtx, rtx);
{
case MODE_VECTOR_INT:
case MODE_INT:
- op0 = gen_lowpart (V16SImode, op0);
+ if (GET_MODE (op0) != V16SImode)
+ {
+ if (!MEM_P (op0))
+ {
+ orig_op0 = op0;
+ op0 = gen_reg_rtx (V16SImode);
+ }
+ else
+ op0 = gen_lowpart (V16SImode, op0);
+ }
op1 = gen_lowpart (V16SImode, op1);
/* FALLTHRU */
emit_insn (store_unaligned (op0, op1));
else
gcc_unreachable ();
+ if (orig_op0)
+ emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
break;
default:
{
case MODE_VECTOR_INT:
case MODE_INT:
- op0 = gen_lowpart (V32QImode, op0);
+ if (GET_MODE (op0) != V32QImode)
+ {
+ if (!MEM_P (op0))
+ {
+ orig_op0 = op0;
+ op0 = gen_reg_rtx (V32QImode);
+ }
+ else
+ op0 = gen_lowpart (V32QImode, op0);
+ }
op1 = gen_lowpart (V32QImode, op1);
/* FALLTHRU */
case MODE_VECTOR_FLOAT:
ix86_avx256_split_vector_move_misalign (op0, op1);
+ if (orig_op0)
+ emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
break;
default:
if (MEM_P (op1))
{
+ /* Normal *mov<mode>_internal pattern will handle
+ unaligned loads just fine if misaligned_operand
+ is true, and without the UNSPEC it can be combined
+ with arithmetic instructions. */
+ if (TARGET_AVX
+ && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+ && misaligned_operand (op1, GET_MODE (op1)))
+ emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
/* ??? If we have typed data, then it would appear that using
movdqu is the only way to get unaligned data loaded with
integer type. */
- if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
- op0 = gen_lowpart (V16QImode, op0);
+ if (GET_MODE (op0) != V16QImode)
+ {
+ orig_op0 = op0;
+ op0 = gen_reg_rtx (V16QImode);
+ }
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_loaddquv16qi (op0, op1));
+ if (orig_op0)
+ emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
}
else if (TARGET_SSE2 && mode == V2DFmode)
{
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_insn_for_size_p ())
{
- op0 = gen_lowpart (V4SFmode, op0);
+ if (GET_MODE (op0) != V4SFmode)
+ {
+ orig_op0 = op0;
+ op0 = gen_reg_rtx (V4SFmode);
+ }
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_loadups (op0, op1));
+ if (orig_op0)
+ emit_move_insn (orig_op0,
+ gen_lowpart (GET_MODE (orig_op0), op0));
return;
}
c[i] = a[i] * b[i+3];
}
-/* { dg-final { scan-assembler-not "avx_loadups256" } } */
-/* { dg-final { scan-assembler "sse_loadups" } } */
+/* { dg-final { scan-assembler-not "(avx_loadups256|vmovups\[^\n\r]*movv8sf_internal)" } } */
+/* { dg-final { scan-assembler "(sse_loadups|movv4sf_internal)" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */
*ap++ = *cp++;
}
-/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
-/* { dg-final { scan-assembler "sse2_loaddqu" } } */
+/* { dg-final { scan-assembler-not "(avx_loaddqu256|vmovdqu\[^\n\r]*movv32qi_internal)" } } */
+/* { dg-final { scan-assembler "(sse2_loaddqu|vmovdqu\[^\n\r]*movv16qi_internal)" } } */
/* { dg-final { scan-assembler "vinsert.128" } } */
c[i] = a[i] * b[i+3];
}
-/* { dg-final { scan-assembler-not "avx_loadupd256" } } */
-/* { dg-final { scan-assembler "sse2_loadupd" } } */
+/* { dg-final { scan-assembler-not "(avx_loadupd256|vmovupd\[^\n\r]*movv4df_internal)" } } */
+/* { dg-final { scan-assembler "(sse2_loadupd|vmovupd\[^\n\r]*movv2df_internal)" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */
b[i] = a[i+3] * 2;
}
-/* { dg-final { scan-assembler "avx_loadups256" } } */
-/* { dg-final { scan-assembler-not "sse_loadups" } } */
+/* { dg-final { scan-assembler "(avx_loadups256|vmovups\[^\n\r]*movv8sf_internal)" } } */
+/* { dg-final { scan-assembler-not "(sse_loadups|vmovups\[^\n\r]*movv4sf_internal)" } } */
/* { dg-final { scan-assembler-not "vinsertf128" } } */
#include "l_fma_1.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
#include "l_fma_2.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
#include "l_fma_3.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
#include "l_fma_4.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
#include "l_fma_5.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
#include "l_fma_6.h"
-/* { dg-final { scan-assembler-times "vfmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 56 } } */
#include "l_fma_1.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
#include "l_fma_2.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
#include "l_fma_3.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
#include "l_fma_4.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
#include "l_fma_5.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
#include "l_fma_6.h"
-/* { dg-final { scan-assembler-times "vfmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
-/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 120 } } */