* config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
* config/i386/i386.c (extract_base_offset_in_addr): New function.
(ix86_operands_ok_for_move_multiple): Ditto.
* config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
(movlpd/movhpd to movupd peephole2): Ditto.
testsuite/ChangeLog:
* gcc.target/i386/sse2-load-multi.c: New test.
* gcc.target/i386/sse2-store-multi.c: Ditto.
Co-Authored-By: Wei Mi <wmi@google.com>
From-SVN: r222410
+2015-04-24 Uros Bizjak <ubizjak@gmail.com>
+ Wei Mi <wmi@google.com>
+
+ * config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
+ * config/i386/i386.c (extract_base_offset_in_addr): New function.
+ (ix86_operands_ok_for_move_multiple): Ditto.
+ * config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
+ (movlpd/movhpd to movupd peephole2): Ditto.
+
2015-04-24 Marek Polacek <polacek@redhat.com>
PR c/61534
#endif
extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
+extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
+ enum machine_mode mode);
#ifdef RTX_CODE
/* Target data for multipass lookahead scheduling.
}
#endif
+/* If MEM is in the form of [base+offset], extract the two parts
+ of address and set to BASE and OFFSET, otherwise return false. */
+
+static bool
+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
+ {
+ *base = addr;
+ *offset = const0_rtx;
+ return true;
+ }
+
+ if (GET_CODE (addr) == PLUS
+ && (REG_P (XEXP (addr, 0))
+ || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
+ && CONST_INT_P (XEXP (addr, 1)))
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return true;
+ }
+
+ return false;
+}
+
+/* Given OPERANDS of consecutive load/store, check if we can merge
+ them into move multiple. LOAD is true if they are load instructions.
+ MODE is the mode of memory operands. */
+
+bool
+ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
+ enum machine_mode mode)
+{
+ HOST_WIDE_INT offval_1, offval_2, msize;
+ rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
+
+ if (load)
+ {
+ mem_1 = operands[1];
+ mem_2 = operands[3];
+ reg_1 = operands[0];
+ reg_2 = operands[2];
+ }
+ else
+ {
+ mem_1 = operands[0];
+ mem_2 = operands[2];
+ reg_1 = operands[1];
+ reg_2 = operands[3];
+ }
+
+ gcc_assert (REG_P (reg_1) && REG_P (reg_2));
+
+ if (REGNO (reg_1) != REGNO (reg_2))
+ return false;
+
+ /* Check if the addresses are in the form of [base+offset]. */
+ if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
+ return false;
+ if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
+ return false;
+
+ /* Check if the bases are the same. */
+ if (!rtx_equal_p (base_1, base_2))
+ return false;
+
+ offval_1 = INTVAL (offset_1);
+ offval_2 = INTVAL (offset_2);
+ msize = GET_MODE_SIZE (mode);
+ /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
+ if (offval_1 + msize != offval_2)
+ return false;
+
+ return true;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
]
(const_string "<MODE>")))])
+;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
+(define_peephole2
+ [(set (match_operand:V2DF 0 "register_operand")
+ (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
+ (match_operand:DF 4 "const0_operand")))
+ (set (match_operand:V2DF 2 "register_operand")
+ (vec_concat:V2DF (vec_select:DF (match_dup 2)
+ (parallel [(const_int 0)]))
+ (match_operand:DF 3 "memory_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+ "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
+
(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VF
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)])))
+ (set (match_operand:DF 2 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 3 "register_operand")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
+ [(set (match_dup 4)
+ (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+ "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
+
/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
just fine if misaligned_operand is true, and without the UNSPEC it can
be combined with arithmetic instructions. If misaligned_operand is
+2015-04-24 Uros Bizjak <ubizjak@gmail.com>
+ Wei Mi <wmi@google.com>
+
+ * gcc.target/i386/sse2-load-multi.c: New test.
+ * gcc.target/i386/sse2-store-multi.c: Ditto.
+
2015-04-24 Marek Polacek <polacek@redhat.com>
PR c/65830
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=corei7 -O2" } */
+
+#include <emmintrin.h>
+
+double a[8];
+
+__m128d load_1 ()
+{
+ __m128d res;
+ res = _mm_load_sd (&a[1]);
+ res = _mm_loadh_pd (res, &a[2]);
+ return res;
+}
+
+__m128d load_2 (double *a)
+{
+ __m128d res;
+ res = _mm_load_sd (&a[1]);
+ res = _mm_loadh_pd (res, &a[2]);
+ return res;
+}
+
+/* { dg-final { scan-assembler-times "movup" 2 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=corei7 -O2" } */
+
+#include <emmintrin.h>
+
+double a[8];
+
+void store_1 (__m128d val)
+{
+ _mm_store_sd (&a[1], val);
+ _mm_storeh_pd (&a[2], val);
+}
+
+void store_2 (__m128d val, double *a)
+{
+ _mm_store_sd (&a[1], val);
+ _mm_storeh_pd (&a[2], val);
+}
+
+/* { dg-final { scan-assembler-times "movup" 2 } } */