From: Richard Henderson Date: Tue, 17 May 2005 04:33:42 +0000 (-0700) Subject: sse.md (mulv4si3): New. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2b5bf0e27e0b77a48f0f8a046cb6a56280137054;p=gcc.git sse.md (mulv4si3): New. * config/i386/sse.md (mulv4si3): New. * lib/target-supports.exp (check_effective_target_vect_int_mul): Add i?86 and x86_64. From-SVN: r99811 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 28d0a13447d..f42032b6645 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2005-05-16 Richard Henderson + + * config/i386/sse.md (mulv4si3): New. + 2005-05-17 Hans-Peter Nilsson * config/cris/cris.h (EXTRA_CONSTRAINT_T): Remove FIXME and diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ee2e6147a15..76efe5f0586 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2490,6 +2490,52 @@ [(set_attr "type" "sseiadd") (set_attr "mode" "TI")]) +(define_expand "mulv4si3" + [(set (match_operand:V4SI 0 "register_operand" "") + (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") + (match_operand:V4SI 2 "nonimmediate_operand" "")))] + "TARGET_SSE2" +{ + rtx t1, t2, t3, t4, t5, t6, thirtytwo; + rtx op0, op1, op2; + + op0 = operands[0]; + op1 = operands[1]; + op2 = operands[2]; + t1 = gen_reg_rtx (V4SImode); + t2 = gen_reg_rtx (V4SImode); + t3 = gen_reg_rtx (V4SImode); + t4 = gen_reg_rtx (V4SImode); + t5 = gen_reg_rtx (V4SImode); + t6 = gen_reg_rtx (V4SImode); + thirtytwo = GEN_INT (32); + + /* Multiply elements 2 and 0. */ + emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); + + /* Shift both input vectors down one element, so that elements 3 and 1 + are now in the slots for elements 2 and 0. For K8, at least, this is + faster than using a shuffle. */ + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), + gen_lowpart (TImode, op1), thirtytwo)); + emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), + gen_lowpart (TImode, op2), thirtytwo)); + + /* Multiply elements 3 and 1. */ + emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); + + /* Move the results in element 2 down to element 1; we don't care what + goes in elements 2 and 3. */ + emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, + const0_rtx, const0_rtx)); + + /* Merge the parts back together. */ + emit_insn (gen_sse2_punpckldq (op0, t5, t6)); + DONE; +}) + (define_insn "ashr3" [(set (match_operand:SSEMODE24 0 "register_operand" "=x") (ashiftrt:SSEMODE24 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b94841e9e9a..c9f0b74de21 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2005-05-16 Richard Henderson + + * lib/target-supports.exp (check_effective_target_vect_int_mul): Add + i?86 and x86_64. + 2005-05-16 Mark Mitchell * gcc.dg/compat/generate-random.c (config.h): Do not include. diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 9306790530d..ac6dda5fd57 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -955,7 +955,9 @@ proc check_effective_target_vect_int_mult { } { verbose "check_effective_target_vect_int_mult: using cached result" 2 } else { set et_vect_int_mult_saved 0 - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || [istarget i?86-*-*] + || [istarget x86_64-*-*] } { set et_vect_int_mult_saved 1 } }