From 97afef00ba922009a05cff1293c1ff3f35093ad6 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 24 Apr 2015 15:05:50 +0200 Subject: [PATCH] i386-protos.h (ix86_operands_ok_for_move_multiple): New. * config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New. * config/i386/i386.c (extract_base_offset_in_addr): New function. (ix86_operands_ok_for_move_multiple): Ditto. * config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern. (movlpd/movhpd to movupd peephole2): Ditto. testsuite/ChangeLog: * gcc.target/i386/sse2-load-multi.c: New test. * gcc.target/i386/sse2-store-multi.c: Ditto. Co-Authored-By: Wei Mi From-SVN: r222410 --- gcc/ChangeLog | 9 ++ gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c | 86 +++++++++++++++++++ gcc/config/i386/sse.md | 29 +++++++ gcc/testsuite/ChangeLog | 6 ++ .../gcc.target/i386/sse2-load-multi.c | 24 ++++++ .../gcc.target/i386/sse2-store-multi.c | 20 +++++ 7 files changed, 176 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/sse2-load-multi.c create mode 100644 gcc/testsuite/gcc.target/i386/sse2-store-multi.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d489b4a7d85..952a1dd1a17 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2015-04-24 Uros Bizjak + Wei Mi + + * config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New. + * config/i386/i386.c (extract_base_offset_in_addr): New function. + (ix86_operands_ok_for_move_multiple): Ditto. + * config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern. + (movlpd/movhpd to movupd peephole2): Ditto. + 2015-04-24 Marek Polacek PR c/61534 diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 2c90c2c5ccd..0224c362e97 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -304,6 +304,8 @@ extern enum attr_cpu ix86_schedule; #endif extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op); +extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load, + enum machine_mode mode); #ifdef RTX_CODE /* Target data for multipass lookahead scheduling. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index bdc365274bb..77a6109b711 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -51726,6 +51726,92 @@ ix86_binds_local_p (const_tree exp) } #endif +/* If MEM is in the form of [base+offset], extract the two parts + of address and set to BASE and OFFSET, otherwise return false. */ + +static bool +extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (MEM_P (mem)); + + addr = XEXP (mem, 0); + + if (GET_CODE (addr) == CONST) + addr = XEXP (addr, 0); + + if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF) + { + *base = addr; + *offset = const0_rtx; + return true; + } + + if (GET_CODE (addr) == PLUS + && (REG_P (XEXP (addr, 0)) + || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) + && CONST_INT_P (XEXP (addr, 1))) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return true; + } + + return false; +} + +/* Given OPERANDS of consecutive load/store, check if we can merge + them into move multiple. LOAD is true if they are load instructions. + MODE is the mode of memory operands. */ + +bool +ix86_operands_ok_for_move_multiple (rtx *operands, bool load, + enum machine_mode mode) +{ + HOST_WIDE_INT offval_1, offval_2, msize; + rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; + + if (load) + { + mem_1 = operands[1]; + mem_2 = operands[3]; + reg_1 = operands[0]; + reg_2 = operands[2]; + } + else + { + mem_1 = operands[0]; + mem_2 = operands[2]; + reg_1 = operands[1]; + reg_2 = operands[3]; + } + + gcc_assert (REG_P (reg_1) && REG_P (reg_2)); + + if (REGNO (reg_1) != REGNO (reg_2)) + return false; + + /* Check if the addresses are in the form of [base+offset]. */ + if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1)) + return false; + if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2)) + return false; + + /* Check if the bases are the same. */ + if (!rtx_equal_p (base_1, base_2)) + return false; + + offval_1 = INTVAL (offset_1); + offval_2 = INTVAL (offset_2); + msize = GET_MODE_SIZE (mode); + /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ + if (offval_1 + msize != offval_2) + return false; + + return true; +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 16db216f51b..9b7009ace6b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1183,6 +1183,21 @@ ] (const_string "")))]) +;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets. +(define_peephole2 + [(set (match_operand:V2DF 0 "register_operand") + (vec_concat:V2DF (match_operand:DF 1 "memory_operand") + (match_operand:DF 4 "const0_operand"))) + (set (match_operand:V2DF 2 "register_operand") + (vec_concat:V2DF (vec_select:DF (match_dup 2) + (parallel [(const_int 0)])) + (match_operand:DF 3 "memory_operand")))] + "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL + && ix86_operands_ok_for_move_multiple (operands, true, DFmode)" + [(set (match_dup 2) + (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))] + "operands[4] = adjust_address (operands[1], V2DFmode, 0);") + (define_insn "_storeu" [(set (match_operand:VF 0 "memory_operand" "=m") (unspec:VF @@ -1242,6 +1257,20 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) +;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets. +(define_peephole2 + [(set (match_operand:DF 0 "memory_operand") + (vec_select:DF (match_operand:V2DF 1 "register_operand") + (parallel [(const_int 0)]))) + (set (match_operand:DF 2 "memory_operand") + (vec_select:DF (match_operand:V2DF 3 "register_operand") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL + && ix86_operands_ok_for_move_multiple (operands, false, DFmode)" + [(set (match_dup 4) + (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))] + "operands[4] = adjust_address (operands[0], V2DFmode, 0);") + /* For AVX, normal *mov_internal pattern will handle unaligned loads just fine if misaligned_operand is true, and without the UNSPEC it can be combined with arithmetic instructions. If misaligned_operand is diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 375ad1d5459..4b2f62bc32d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2015-04-24 Uros Bizjak + Wei Mi + + * gcc.target/i386/sse2-load-multi.c: New test. + * gcc.target/i386/sse2-store-multi.c: Ditto. + 2015-04-24 Marek Polacek PR c/65830 diff --git a/gcc/testsuite/gcc.target/i386/sse2-load-multi.c b/gcc/testsuite/gcc.target/i386/sse2-load-multi.c new file mode 100644 index 00000000000..927605460d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-load-multi.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-march=corei7 -O2" } */ + +#include + +double a[8]; + +__m128d load_1 () +{ + __m128d res; + res = _mm_load_sd (&a[1]); + res = _mm_loadh_pd (res, &a[2]); + return res; +} + +__m128d load_2 (double *a) +{ + __m128d res; + res = _mm_load_sd (&a[1]); + res = _mm_loadh_pd (res, &a[2]); + return res; +} + +/* { dg-final { scan-assembler-times "movup" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse2-store-multi.c b/gcc/testsuite/gcc.target/i386/sse2-store-multi.c new file mode 100644 index 00000000000..203a00f69be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse2-store-multi.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=corei7 -O2" } */ + +#include + +double a[8]; + +void store_1 (__m128d val) +{ + _mm_store_sd (&a[1], val); + _mm_storeh_pd (&a[2], val); +} + +void store_2 (__m128d val, double *a) +{ + _mm_store_sd (&a[1], val); + _mm_storeh_pd (&a[2], val); +} + +/* { dg-final { scan-assembler-times "movup" 2 } } */ -- 2.30.2