From: H.J. Lu Date: Sun, 27 Mar 2011 18:56:00 +0000 (+0000) Subject: Split 32-byte AVX unaligned load/store. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d253656a7be7461c2fc7bb638e93b9943a91de9b;p=gcc.git Split 32-byte AVX unaligned load/store. gcc/ 2011-03-27 H.J. Lu * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. (ix86_option_override_internal): Split 32-byte AVX unaligned load/store by default. (ix86_avx256_split_vector_move_misalign): New. (ix86_expand_vector_move_misalign): Use it. * config/i386/i386.opt: Add -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. * config/i386/sse.md (*avx_mov_internal): Verify unaligned 256bit load/store. Generate unaligned store on misaligned memory operand. (*avx_movu): Verify unaligned 256bit load/store. (*avx_movdqu): Likewise. * doc/invoke.texi: Document -mavx256-split-unaligned-load and -mavx256-split-unaligned-store. gcc/testsuite/ 2011-03-27 H.J. Lu * gcc.target/i386/avx256-unaligned-load-1.c: New. * gcc.target/i386/avx256-unaligned-load-2.c: Likewise. * gcc.target/i386/avx256-unaligned-load-3.c: Likewise. * gcc.target/i386/avx256-unaligned-load-4.c: Likewise. * gcc.target/i386/avx256-unaligned-load-5.c: Likewise. * gcc.target/i386/avx256-unaligned-load-6.c: Likewise. * gcc.target/i386/avx256-unaligned-load-7.c: Likewise. * gcc.target/i386/avx256-unaligned-store-1.c: Likewise. * gcc.target/i386/avx256-unaligned-store-2.c: Likewise. * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. * gcc.target/i386/avx256-unaligned-store-4.c: Likewise. * gcc.target/i386/avx256-unaligned-store-5.c: Likewise. * gcc.target/i386/avx256-unaligned-store-6.c: Likewise. * gcc.target/i386/avx256-unaligned-store-7.c: Likewise. From-SVN: r171578 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 41c0ef22801..ca0e3d69ef4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2011-03-27 H.J. Lu + + * config/i386/i386.c (flag_opts): Add -mavx256-split-unaligned-load + and -mavx256-split-unaligned-store. + (ix86_option_override_internal): Split 32-byte AVX unaligned + load/store by default. + (ix86_avx256_split_vector_move_misalign): New. + (ix86_expand_vector_move_misalign): Use it. + + * config/i386/i386.opt: Add -mavx256-split-unaligned-load and + -mavx256-split-unaligned-store. + + * config/i386/sse.md (*avx_mov_internal): Verify unaligned + 256bit load/store. Generate unaligned store on misaligned memory + operand. + (*avx_movu): Verify unaligned + 256bit load/store. + (*avx_movdqu): Likewise. + + * doc/invoke.texi: Document -mavx256-split-unaligned-load and + -mavx256-split-unaligned-store. + 2011-03-27 Richard Sandiford PR target/38598 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 4e8ca698a44..a4ca762228c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -3130,6 +3130,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, { "-mvzeroupper", MASK_VZEROUPPER }, + { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD}, + { "-mavx256-split-unaligned-stroe", MASK_AVX256_SPLIT_UNALIGNED_STORE}, }; const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; @@ -4274,11 +4276,18 @@ ix86_option_override_internal (bool main_args_p) if (TARGET_AVX) { /* When not optimize for size, enable vzeroupper optimization for - TARGET_AVX with -fexpensive-optimizations. */ - if (!optimize_size - && flag_expensive_optimizations - && !(target_flags_explicit & MASK_VZEROUPPER)) - target_flags |= MASK_VZEROUPPER; + TARGET_AVX with -fexpensive-optimizations and split 32-byte + AVX unaligned load/store. */ + if (!optimize_size) + { + if (flag_expensive_optimizations + && !(target_flags_explicit & MASK_VZEROUPPER)) + target_flags |= MASK_VZEROUPPER; + if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) + target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; + if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE)) + target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; + } } else { @@ -15588,6 +15597,57 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); } +/* Split 32-byte AVX unaligned load and store if needed. */ + +static void +ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) +{ + rtx m; + rtx (*extract) (rtx, rtx, rtx); + rtx (*move_unaligned) (rtx, rtx); + enum machine_mode mode; + + switch (GET_MODE (op0)) + { + default: + gcc_unreachable (); + case V32QImode: + extract = gen_avx_vextractf128v32qi; + move_unaligned = gen_avx_movdqu256; + mode = V16QImode; + break; + case V8SFmode: + extract = gen_avx_vextractf128v8sf; + move_unaligned = gen_avx_movups256; + mode = V4SFmode; + break; + case V4DFmode: + extract = gen_avx_vextractf128v4df; + move_unaligned = gen_avx_movupd256; + mode = V2DFmode; + break; + } + + if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD) + { + rtx r = gen_reg_rtx (mode); + m = adjust_address (op1, mode, 0); + emit_move_insn (r, m); + m = adjust_address (op1, mode, 16); + r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); + emit_move_insn (op0, r); + } + else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE) + { + m = adjust_address (op0, mode, 0); + emit_insn (extract (m, op1, const0_rtx)); + m = adjust_address (op0, mode, 16); + emit_insn (extract (m, op1, const1_rtx)); + } + else + emit_insn (move_unaligned (op0, op1)); +} + /* Implement the movmisalign patterns for SSE. Non-SSE modes go straight to ix86_expand_vector_move. */ /* Code generation for scalar reg-reg moves of single and double precision data: @@ -15672,7 +15732,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) case 32: op0 = gen_lowpart (V32QImode, op0); op1 = gen_lowpart (V32QImode, op1); - emit_insn (gen_avx_movdqu256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: gcc_unreachable (); @@ -15688,7 +15748,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) emit_insn (gen_avx_movups (op0, op1)); break; case V8SFmode: - emit_insn (gen_avx_movups256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; case V2DFmode: if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) @@ -15701,7 +15761,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) emit_insn (gen_avx_movupd (op0, op1)); break; case V4DFmode: - emit_insn (gen_avx_movupd256 (op0, op1)); + ix86_avx256_split_vector_move_misalign (op0, op1); break; default: gcc_unreachable (); diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index e02d098fd19..f63a40629e1 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -420,3 +420,11 @@ Emit profiling counter call at function entry before prologue. m8bit-idiv Target Report Mask(USE_8BIT_IDIV) Save Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check + +mavx256-split-unaligned-load +Target Report Mask(AVX256_SPLIT_UNALIGNED_LOAD) Save +Split 32-byte AVX unaligned load + +mavx256-split-unaligned-store +Target Report Mask(AVX256_SPLIT_UNALIGNED_STORE) Save +Split 32-byte AVX unaligned store diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 70a0b344bdf..de11f7362ec 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -203,19 +203,35 @@ return standard_sse_constant_opcode (insn, operands[1]); case 1: case 2: + if (GET_MODE_ALIGNMENT (mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], mode)))) + gcc_unreachable (); switch (get_attr_mode (insn)) { case MODE_V8SF: case MODE_V4SF: - return "vmovaps\t{%1, %0|%0, %1}"; + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) + return "vmovups\t{%1, %0|%0, %1}"; + else + return "vmovaps\t{%1, %0|%0, %1}"; case MODE_V4DF: case MODE_V2DF: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) + return "vmovupd\t{%1, %0|%0, %1}"; + else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) return "vmovaps\t{%1, %0|%0, %1}"; else return "vmovapd\t{%1, %0|%0, %1}"; default: - if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) + if (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)) + return "vmovdqu\t{%1, %0|%0, %1}"; + else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) return "vmovaps\t{%1, %0|%0, %1}"; else return "vmovdqa\t{%1, %0|%0, %1}"; @@ -400,7 +416,15 @@ UNSPEC_MOVU))] "AVX_VEC_FLOAT_MODE_P (mode) && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovu\t{%1, %0|%0, %1}" +{ + if (GET_MODE_ALIGNMENT (mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], mode)))) + gcc_unreachable (); + return "vmovu\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "vex") @@ -459,7 +483,15 @@ [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] UNSPEC_MOVU))] "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "vmovdqu\t{%1, %0|%0, %1}" +{ + if (GET_MODE_ALIGNMENT (mode) == 256 + && ((TARGET_AVX256_SPLIT_UNALIGNED_STORE + && misaligned_operand (operands[0], mode)) + || (TARGET_AVX256_SPLIT_UNALIGNED_LOAD + && misaligned_operand (operands[1], mode)))) + gcc_unreachable (); + return "vmovdqu\t{%1, %0|%0, %1}"; +} [(set_attr "type" "ssemov") (set_attr "movu" "1") (set_attr "prefix" "vex") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 925455d0e30..85bf2b4854e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -602,7 +602,8 @@ Objective-C and Objective-C++ Dialects}. -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol -mcmodel=@var{code-model} -mabi=@var{name} @gol -m32 -m64 -mlarge-data-threshold=@var{num} @gol --msse2avx -mfentry -m8bit-idiv} +-msse2avx -mfentry -m8bit-idiv @gol +-mavx256-split-unaligned-load -mavx256-split-unaligned-store} @emph{i386 and x86-64 Windows Options} @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol @@ -12669,6 +12670,12 @@ runt-time check. If both dividend and divisor are within range of 0 to 255, 8bit unsigned integer divide will be used instead of 32bit/64bit integer divide. +@item -mavx256-split-unaligned-load +@item -mavx256-split-unaligned-store +@opindex avx256-split-unaligned-load +@opindex avx256-split-unaligned-store +Split 32-byte AVX unaligned load and store. + @end table These @samp{-m} switches are supported in addition to the above diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3cc61b079d5..fdcc95f91b4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,20 @@ +2011-03-27 H.J. Lu + + * gcc.target/i386/avx256-unaligned-load-1.c: New. + * gcc.target/i386/avx256-unaligned-load-2.c: Likewise. + * gcc.target/i386/avx256-unaligned-load-3.c: Likewise. + * gcc.target/i386/avx256-unaligned-load-4.c: Likewise. + * gcc.target/i386/avx256-unaligned-load-5.c: Likewise. + * gcc.target/i386/avx256-unaligned-load-6.c: Likewise. + * gcc.target/i386/avx256-unaligned-load-7.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-1.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-2.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-3.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-4.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-5.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-6.c: Likewise. + * gcc.target/i386/avx256-unaligned-store-7.c: Likewise. + 2011-03-27 Thomas Koenig PR fortran/47065 diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c new file mode 100644 index 00000000000..023e859b6c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#define N 1024 + +float a[N], b[N+3], c[N]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + c[i] = a[i] * b[i+3]; +} + +/* { dg-final { scan-assembler-not "\\*avx_movups256/1" } } */ +/* { dg-final { scan-assembler "\\*avx_movups/1" } } */ +/* { dg-final { scan-assembler "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c new file mode 100644 index 00000000000..8394e27197b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#define N 1024 + +char **ep; +char **fp; + +void +avx_test (void) +{ + int i; + char **ap; + char **bp; + char **cp; + + ap = ep; + bp = fp; + for (i = 128; i >= 0; i--) + { + *ap++ = *cp++; + *bp++ = 0; + } +} + +/* { dg-final { scan-assembler-not "\\*avx_movdqu256/1" } } */ +/* { dg-final { scan-assembler "\\*avx_movdqu/1" } } */ +/* { dg-final { scan-assembler "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c new file mode 100644 index 00000000000..ec7d59d53cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#define N 1024 + +double a[N], b[N+3], c[N]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + c[i] = a[i] * b[i+3]; +} + +/* { dg-final { scan-assembler-not "\\*avx_movupd256/1" } } */ +/* { dg-final { scan-assembler "\\*avx_movupd/1" } } */ +/* { dg-final { scan-assembler "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c new file mode 100644 index 00000000000..0d3ef333120 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */ + +#define N 1024 + +float a[N], b[N+3]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i] = a[i+3] * 2; +} + +/* { dg-final { scan-assembler "\\*avx_movups256/1" } } */ +/* { dg-final { scan-assembler-not "\\*avx_movups/1" } } */ +/* { dg-final { scan-assembler-not "vinsertf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c new file mode 100644 index 00000000000..153b66f8287 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-5.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#include "avx-check.h" + +#define N 8 + +float a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35, + 546.46, 46.79, 82.78, 82.7, 9.4 }; +float b[N]; +float c[N]; + +void +foo (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i] = a[i+3] * 2; +} + +__attribute__ ((noinline)) +float +bar (float x) +{ + return x * 2; +} + +void +avx_test (void) +{ + int i; + + foo (); + + for (i = 0; i < N; i++) + c[i] = bar (a[i+3]); + + for (i = 0; i < N; i++) + if (b[i] != c[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c new file mode 100644 index 00000000000..2fa984cc4a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-6.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#include "avx-check.h" + +#define N 4 + +double a[N+3] = { -1, -1, -1, 24.43, 68.346, 43.35, 546.46 }; +double b[N]; +double c[N]; + +void +foo (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i] = a[i+3] * 2; +} + +__attribute__ ((noinline)) +double +bar (double x) +{ + return x * 2; +} + +void +avx_test (void) +{ + int i; + + foo (); + + for (i = 0; i < N; i++) + c[i] = bar (a[i+3]); + + for (i = 0; i < N; i++) + if (b[i] != c[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c new file mode 100644 index 00000000000..ad16a5329bf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-7.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ + +#include "avx-check.h" + +#define N 128 + +char **ep; +char **fp; +char **mp; +char **lp; + +__attribute__ ((noinline)) +void +foo (void) +{ + mp = (char **) malloc (N); + lp = (char **) malloc (N); + ep = (char **) malloc (N); + fp = (char **) malloc (N); +} + +void +avx_test (void) +{ + int i; + char **ap, **bp, **cp, **dp; + char *str = "STR"; + + foo (); + + cp = mp; + dp = lp; + + for (i = N; i >= 0; i--) + { + *cp++ = str; + *dp++ = str; + } + + ap = ep; + bp = fp; + cp = mp; + dp = lp; + + for (i = N; i >= 0; i--) + { + *ap++ = *cp++; + *bp++ = *dp++; + } + + for (i = N; i >= 0; i--) + { + if (strcmp (*--ap, "STR") != 0) + abort (); + if (strcmp (*--bp, "STR") != 0) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c new file mode 100644 index 00000000000..99db55c9d0a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#define N 1024 + +float a[N], b[N+3], c[N], d[N]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i+3] = a[i] * 10.0; + + for (i = 0; i < N; i++) + d[i] = c[i] * 20.0; +} + +/* { dg-final { scan-assembler-not "\\*avx_movups256/2" } } */ +/* { dg-final { scan-assembler "movups.*\\*avx_movv4sf_internal/3" } } */ +/* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c new file mode 100644 index 00000000000..38ee9e2a45c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#define N 1024 + +char **ep; +char **fp; + +void +avx_test (void) +{ + int i; + char **ap; + char **bp; + char **cp; + + ap = ep; + bp = fp; + for (i = 128; i >= 0; i--) + { + *ap++ = *cp++; + *bp++ = 0; + } +} + +/* { dg-final { scan-assembler-not "\\*avx_movdqu256/2" } } */ +/* { dg-final { scan-assembler "movdqu.*\\*avx_movv16qi_internal/3" } } */ +/* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c new file mode 100644 index 00000000000..eaab6fd775b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-3.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#define N 1024 + +double a[N], b[N+3], c[N], d[N]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i+3] = a[i] * 10.0; + + for (i = 0; i < N; i++) + d[i] = c[i] * 20.0; +} + +/* { dg-final { scan-assembler-not "\\*avx_movupd256/2" } } */ +/* { dg-final { scan-assembler "movupd.*\\*avx_movv2df_internal/3" } } */ +/* { dg-final { scan-assembler "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c new file mode 100644 index 00000000000..96cca66ae9c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -dp -mavx -mno-avx256-split-unaligned-load -mno-avx256-split-unaligned-store" } */ + +#define N 1024 + +float a[N], b[N+3], c[N]; + +void +avx_test (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i+3] = a[i] * c[i]; +} + +/* { dg-final { scan-assembler "\\*avx_movups256/2" } } */ +/* { dg-final { scan-assembler-not "\\*avx_movups/2" } } */ +/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */ +/* { dg-final { scan-assembler-not "vextractf128" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c new file mode 100644 index 00000000000..642da3cf0ee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-5.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#include "avx-check.h" + +#define N 8 + +float a[N] = { 24.43, 68.346, 43.35, 546.46, 46.79, 82.78, 82.7, 9.4 }; +float b[N+3]; +float c[N+3]; + +void +foo (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i+3] = a[i] * 2; +} + +__attribute__ ((noinline)) +float +bar (float x) +{ + return x * 2; +} + +void +avx_test (void) +{ + int i; + + foo (); + + for (i = 0; i < N; i++) + c[i+3] = bar (a[i]); + + for (i = 0; i < N; i++) + if (b[i+3] != c[i+3]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c new file mode 100644 index 00000000000..a0de7a56f63 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-6.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#include "avx-check.h" + +#define N 4 + +double a[N] = { 24.43, 68.346, 43.35, 546.46 }; +double b[N+3]; +double c[N+3]; + +void +foo (void) +{ + int i; + + for (i = 0; i < N; i++) + b[i+3] = a[i] * 2; +} + +__attribute__ ((noinline)) +double +bar (double x) +{ + return x * 2; +} + +void +avx_test (void) +{ + int i; + + foo (); + + for (i = 0; i < N; i++) + c[i+3] = bar (a[i]); + + for (i = 0; i < N; i++) + if (b[i+3] != c[i+3]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c new file mode 100644 index 00000000000..4272dc3cd0d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-store-7.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-store" } */ + +#include "avx-check.h" + +#define N 128 + +char **ep; +char **fp; + +__attribute__ ((noinline)) +void +foo (void) +{ + ep = (char **) malloc (N); + fp = (char **) malloc (N); +} + +void +avx_test (void) +{ + int i; + char **ap, **bp; + char *str = "STR"; + + foo (); + + ap = ep; + bp = fp; + + for (i = N; i >= 0; i--) + { + *ap++ = str; + *bp++ = str; + } + + for (i = N; i >= 0; i--) + { + if (strcmp (*--ap, "STR") != 0) + abort (); + if (strcmp (*--bp, "STR") != 0) + abort (); + } +}