From f228967232d972181d9c497ca7731d5d1db60881 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 13 Oct 2011 00:06:45 +0200 Subject: [PATCH] sse.md (vec_avx2): New mode_attr. * config/i386/sse.md (vec_avx2): New mode_attr. (mulv16qi3): Macroize to cover also mulv32qi3 for TARGET_AVX2 into ... (mul3): ... this. From-SVN: r179871 --- gcc/ChangeLog | 5 +++++ gcc/config/i386/sse.md | 45 +++++++++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e774b5855da..753677a5a15 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2011-10-12 Jakub Jelinek + * config/i386/sse.md (vec_avx2): New mode_attr. + (mulv16qi3): Macroize to cover also mulv32qi3 for + TARGET_AVX2 into ... + (mul3): ... this. + * config/i386/i386.md (UNSPEC_VPERMDI): Remove. * config/i386/i386.c (ix86_expand_vec_perm): Handle V16QImode and V32QImode for TARGET_AVX2. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 12331516cd6..e649b30fec6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -163,6 +163,12 @@ (V4SI "avx2") (V2DI "avx2") (V8SI "avx2") (V4DI "avx2")]) +(define_mode_attr vec_avx2 + [(V16QI "vec") (V32QI "avx2") + (V8HI "vec") (V16HI "avx2") + (V4SI "vec") (V8SI "avx2") + (V2DI "vec") (V4DI "avx2")]) + ;; Mapping of logic-shift operators (define_code_iterator lshift [lshiftrt ashift]) @@ -4838,10 +4844,10 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) -(define_insn_and_split "mulv16qi3" - [(set (match_operand:V16QI 0 "register_operand" "") - (mult:V16QI (match_operand:V16QI 1 "register_operand" "") - (match_operand:V16QI 2 "register_operand" "")))] +(define_insn_and_split "mul3" + [(set (match_operand:VI1_AVX2 0 "register_operand" "") + (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "") + (match_operand:VI1_AVX2 2 "register_operand" "")))] "TARGET_SSE2 && can_create_pseudo_p ()" "#" @@ -4850,34 +4856,41 @@ { rtx t[6]; int i; + enum machine_mode mulmode = mode; for (i = 0; i < 6; ++i) - t[i] = gen_reg_rtx (V16QImode); + t[i] = gen_reg_rtx (mode); /* Unpack data such that we've got a source byte in each low byte of each word. We don't care what goes into the high byte of each word. Rather than trying to get zero in there, most convenient is to let it be a copy of the low byte. */ - emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1])); - emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2])); - emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1])); - emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2])); + emit_insn (gen__interleave_high (t[0], operands[1], + operands[1])); + emit_insn (gen__interleave_high (t[1], operands[2], + operands[2])); + emit_insn (gen__interleave_low (t[2], operands[1], + operands[1])); + emit_insn (gen__interleave_low (t[3], operands[2], + operands[2])); /* Multiply words. The end-of-line annotations here give a picture of what the output of that instruction looks like. Dot means don't care; the letters are the bytes of the result with A being the most significant. */ - emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ - gen_lowpart (V8HImode, t[0]), - gen_lowpart (V8HImode, t[1]))); - emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ - gen_lowpart (V8HImode, t[2]), - gen_lowpart (V8HImode, t[3]))); + emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]), + gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */ + gen_lowpart (mulmode, t[0]), + gen_lowpart (mulmode, t[1])))); + emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]), + gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */ + gen_lowpart (mulmode, t[2]), + gen_lowpart (mulmode, t[3])))); /* Extract the even bytes and merge them back together. */ ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0); set_unique_reg_note (get_last_insn (), REG_EQUAL, - gen_rtx_MULT (V16QImode, operands[1], operands[2])); + gen_rtx_MULT (mode, operands[1], operands[2])); DONE; }) -- 2.30.2