From d0337ddca57bd43865679081a4a8d26502d4fd83 Mon Sep 17 00:00:00 2001 From: Alexander Ivchenko Date: Wed, 24 Sep 2014 08:02:30 +0000 Subject: [PATCH] AVX-512. Add insert insn patterns. gcc/ * config/i386/i386.c (CODE_FOR_avx2_extracti128): Rename to ... (CODE_FOR_avx_vextractf128v4di): this. (CODE_FOR_avx2_inserti128): Rename to ... (CODE_FOR_avx_vinsertf128v4di): this. (ix86_expand_args_builtin): Handle CODE_FOR_avx_vinsertf128v4di, CODE_FOR_avx_vextractf128v4di. (ix86_expand_args_builtin): Handle CODE_FOR_avx512dq_vinsertf32x8_mask, CODE_FOR_avx512dq_vinserti32x8_mask, CODE_FOR_avx512vl_vinsertv4df, CODE_FOR_avx512vl_vinsertv4di, CODE_FOR_avx512vl_vinsertv8sf, CODE_FOR_avx512vl_vinsertv8si. * config/i386/sse.md (define_expand "_vinsert_mask"): Use AVX512_VEC mode iterator. (define_insn "_vinsert_1"): Ditto. (define_expand "_vinsert_mask"): Use AVX512_VEC_2 mode iterator. (define_insn "vec_set_lo_"): New. (define_insn "vec_set_hi_"): Ditto. (define_expand "avx512vl_vinsert"): Ditto. (define_insn "avx2_vec_set_lo_v4di"): Delete. (define_insn "avx2_vec_set_hi_v4di"): Ditto. (define_insn "vec_set_lo_"): Add masking. (define_insn "vec_set_hi_"): Ditto. (define_insn "vec_set_lo_"): Ditto. (define_insn "vec_set_hi_"): Ditto. (define_expand "avx2_extracti128"): Delete. (define_expand "avx2_inserti128"): Ditto. Co-Authored-By: Andrey Turetskiy Co-Authored-By: Anna Tikhonova Co-Authored-By: Ilya Tocar Co-Authored-By: Ilya Verbin Co-Authored-By: Kirill Yukhin Co-Authored-By: Maxim Kuznetsov Co-Authored-By: Michael Zolotukhin From-SVN: r215542 --- gcc/ChangeLog | 42 ++++++ gcc/config/i386/i386.c | 14 +- gcc/config/i386/sse.md | 281 ++++++++++++++++++++--------------------- 3 files changed, 186 insertions(+), 151 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 05e14112421..a6da7a44e47 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,45 @@ +2014-09-24 Alexander Ivchenko + Maxim Kuznetsov + Anna Tikhonova + Ilya Tocar + Andrey Turetskiy + Ilya Verbin + Kirill Yukhin + Michael Zolotukhin + + * config/i386/i386.c + (CODE_FOR_avx2_extracti128): Rename to ... + (CODE_FOR_avx_vextractf128v4di): this. + (CODE_FOR_avx2_inserti128): Rename to ... + (CODE_FOR_avx_vinsertf128v4di): this. + (ix86_expand_args_builtin): Handle CODE_FOR_avx_vinsertf128v4di, + CODE_FOR_avx_vextractf128v4di. + (ix86_expand_args_builtin): Handle CODE_FOR_avx512dq_vinsertf32x8_mask, + CODE_FOR_avx512dq_vinserti32x8_mask, CODE_FOR_avx512vl_vinsertv4df, + CODE_FOR_avx512vl_vinsertv4di, CODE_FOR_avx512vl_vinsertv8sf, + CODE_FOR_avx512vl_vinsertv8si. + * config/i386/sse.md + (define_expand + "_vinsert_mask"): Use + AVX512_VEC mode iterator. + (define_insn + "_vinsert_1"): + Ditto. + (define_expand + "_vinsert_mask"): Use + AVX512_VEC_2 mode iterator. + (define_insn "vec_set_lo_"): New. + (define_insn "vec_set_hi_"): Ditto. + (define_expand "avx512vl_vinsert"): Ditto. + (define_insn "avx2_vec_set_lo_v4di"): Delete. + (define_insn "avx2_vec_set_hi_v4di"): Ditto. + (define_insn "vec_set_lo_"): Add masking. + (define_insn "vec_set_hi_"): Ditto. + (define_insn "vec_set_lo_"): Ditto. + (define_insn "vec_set_hi_"): Ditto. + (define_expand "avx2_extracti128"): Delete. + (define_expand "avx2_inserti128"): Ditto. + 2014-09-24 Alexander Ivchenko Maxim Kuznetsov Anna Tikhonova diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ed3d85eba5a..0843da35ada 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -29961,8 +29961,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, @@ -34053,8 +34053,8 @@ ix86_expand_args_builtin (const struct builtin_description *d, if (!match) switch (icode) { - case CODE_FOR_avx2_inserti128: - case CODE_FOR_avx2_extracti128: + case CODE_FOR_avx_vinsertf128v4di: + case CODE_FOR_avx_vextractf128v4di: error ("the last argument must be an 1-bit immediate"); return const0_rtx; @@ -34120,6 +34120,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, case CODE_FOR_avx512f_vinserti64x4_mask: case CODE_FOR_avx512f_vextractf64x4_mask: case CODE_FOR_avx512f_vextracti64x4_mask: + case CODE_FOR_avx512dq_vinsertf32x8_mask: + case CODE_FOR_avx512dq_vinserti32x8_mask: + case CODE_FOR_avx512vl_vinsertv4df: + case CODE_FOR_avx512vl_vinsertv4di: + case CODE_FOR_avx512vl_vinsertv8sf: + case CODE_FOR_avx512vl_vinsertv8si: error ("the last argument must be a 1-bit immediate"); return const0_rtx; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a7cc5adfa88..2dd79d0296c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11199,80 +11199,64 @@ (set_attr "prefix" "orig,orig,vex,vex") (set_attr "mode" "TI")]) -(define_expand "avx512f_vinsert32x4_mask" - [(match_operand:V16FI 0 "register_operand") - (match_operand:V16FI 1 "register_operand") +(define_expand "_vinsert_mask" + [(match_operand:AVX512_VEC 0 "register_operand") + (match_operand:AVX512_VEC 1 "register_operand") (match_operand: 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_3_operand") - (match_operand:V16FI 4 "register_operand") + (match_operand:AVX512_VEC 4 "register_operand") (match_operand: 5 "register_operand")] "TARGET_AVX512F" { - switch (INTVAL (operands[3])) - { - case 0: - emit_insn (gen_avx512f_vinsert32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFFF), operands[4], - operands[5])); - break; - case 1: - emit_insn (gen_avx512f_vinsert32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xF0FF), operands[4], - operands[5])); - break; - case 2: - emit_insn (gen_avx512f_vinsert32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFF0F), operands[4], - operands[5])); - break; - case 3: - emit_insn (gen_avx512f_vinsert32x4_1_mask (operands[0], - operands[1], operands[2], GEN_INT (0xFFF0), operands[4], - operands[5])); - break; - default: - gcc_unreachable (); - } + int mask,selector; + mask = INTVAL (operands[3]); + selector = GET_MODE_SIZE (GET_MODE_INNER (mode)) == 4 ? + 0xFFFF ^ (0xF000 >> mask * 4) + : 0xFF ^ (0xC0 >> mask * 2); + emit_insn (gen__vinsert_1_mask + (operands[0], operands[1], operands[2], GEN_INT (selector), + operands[4], operands[5])); DONE; - }) -(define_insn "avx512f_vinsert32x4_1" - [(set (match_operand:V16FI 0 "register_operand" "=v") - (vec_merge:V16FI - (match_operand:V16FI 1 "register_operand" "v") - (vec_duplicate:V16FI +(define_insn "_vinsert_1" + [(set (match_operand:AVX512_VEC 0 "register_operand" "=v") + (vec_merge:AVX512_VEC + (match_operand:AVX512_VEC 1 "register_operand" "v") + (vec_duplicate:AVX512_VEC (match_operand: 2 "nonimmediate_operand" "vm")) (match_operand:SI 3 "const_int_operand" "n")))] "TARGET_AVX512F" { int mask; - if (INTVAL (operands[3]) == 0xFFF) - mask = 0; - else if ( INTVAL (operands[3]) == 0xF0FF) - mask = 1; - else if ( INTVAL (operands[3]) == 0xFF0F) - mask = 2; - else if ( INTVAL (operands[3]) == 0xFFF0) - mask = 3; + int selector = INTVAL (operands[3]); + + if (selector == 0xFFF || selector == 0x3F) + mask = 0; + else if ( selector == 0xF0FF || selector == 0xCF) + mask = 1; + else if ( selector == 0xFF0F || selector == 0xF3) + mask = 2; + else if ( selector == 0xFFF0 || selector == 0xFC) + mask = 3; else gcc_unreachable (); operands[3] = GEN_INT (mask); - return "vinsert32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + return "vinsert\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_expand "avx512f_vinsert64x4_mask" - [(match_operand:V8FI 0 "register_operand") - (match_operand:V8FI 1 "register_operand") +(define_expand "_vinsert_mask" + [(match_operand:AVX512_VEC_2 0 "register_operand") + (match_operand:AVX512_VEC_2 1 "register_operand") (match_operand: 2 "nonimmediate_operand") (match_operand:SI 3 "const_0_to_1_operand") - (match_operand:V8FI 4 "register_operand") + (match_operand:AVX512_VEC_2 4 "register_operand") (match_operand: 5 "register_operand")] "TARGET_AVX512F" { @@ -11288,6 +11272,40 @@ DONE; }) +(define_insn "vec_set_lo_" + [(set (match_operand:V16FI 0 "register_operand" "=v") + (vec_concat:V16FI + (match_operand: 2 "nonimmediate_operand" "vm") + (vec_select: + (match_operand:V16FI 1 "register_operand" "v") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)]))))] + "TARGET_AVX512DQ" + "vinsert32x8\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}" + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +(define_insn "vec_set_hi_" + [(set (match_operand:V16FI 0 "register_operand" "=v") + (vec_concat:V16FI + (match_operand: 2 "nonimmediate_operand" "vm") + (vec_select: + (match_operand:V16FI 1 "register_operand" "v") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)]))))] + "TARGET_AVX512DQ" + "vinsert32x8\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}" + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + (define_insn "vec_set_lo_" [(set (match_operand:V8FI 0 "register_operand" "=v") (vec_concat:V8FI @@ -16330,6 +16348,34 @@ (set_attr "length_immediate" "1") (set_attr "prefix" "orig,vex")]) +(define_expand "avx512vl_vinsert" + [(match_operand:VI48F_256 0 "register_operand") + (match_operand:VI48F_256 1 "register_operand") + (match_operand: 2 "nonimmediate_operand") + (match_operand:SI 3 "const_0_to_1_operand") + (match_operand:VI48F_256 4 "register_operand") + (match_operand: 5 "register_operand")] + "TARGET_AVX512VL" +{ + rtx (*insn)(rtx, rtx, rtx, rtx, rtx); + + switch (INTVAL (operands[3])) + { + case 0: + insn = gen_vec_set_lo__mask; + break; + case 1: + insn = gen_vec_set_hi__mask; + break; + default: + gcc_unreachable (); + } + + emit_insn (insn (operands[0], operands[1], operands[2], operands[4], + operands[5])); + DONE; +}) + (define_expand "avx_vinsertf128" [(match_operand:V_256 0 "register_operand") (match_operand:V_256 1 "register_operand") @@ -16355,92 +16401,82 @@ DONE; }) -(define_insn "avx2_vec_set_lo_v4di" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (vec_concat:V4DI - (match_operand:V2DI 2 "nonimmediate_operand" "xm") - (vec_select:V2DI - (match_operand:V4DI 1 "register_operand" "x") - (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_AVX2" - "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "avx2_vec_set_hi_v4di" - [(set (match_operand:V4DI 0 "register_operand" "=x") - (vec_concat:V4DI - (vec_select:V2DI - (match_operand:V4DI 1 "register_operand" "x") - (parallel [(const_int 0) (const_int 1)])) - (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] - "TARGET_AVX2" - "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" - [(set_attr "type" "sselog") - (set_attr "prefix_extra" "1") - (set_attr "length_immediate" "1") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "vec_set_lo_" - [(set (match_operand:VI8F_256 0 "register_operand" "=x") +(define_insn "vec_set_lo_" + [(set (match_operand:VI8F_256 0 "register_operand" "=v") (vec_concat:VI8F_256 - (match_operand: 2 "nonimmediate_operand" "xm") + (match_operand: 2 "nonimmediate_operand" "vm") (vec_select: - (match_operand:VI8F_256 1 "register_operand" "x") + (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 2) (const_int 3)]))))] "TARGET_AVX" - "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" +{ + if (TARGET_AVX512VL) + return "vinsert64x2\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; + else + return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "vec_set_hi_" - [(set (match_operand:VI8F_256 0 "register_operand" "=x") +(define_insn "vec_set_hi_" + [(set (match_operand:VI8F_256 0 "register_operand" "=v") (vec_concat:VI8F_256 (vec_select: - (match_operand:VI8F_256 1 "register_operand" "x") + (match_operand:VI8F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1)])) - (match_operand: 2 "nonimmediate_operand" "xm")))] + (match_operand: 2 "nonimmediate_operand" "vm")))] "TARGET_AVX" - "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" +{ + if (TARGET_AVX512VL) + return "vinsert64x2\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; + else + return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "vec_set_lo_" - [(set (match_operand:VI4F_256 0 "register_operand" "=x") +(define_insn "vec_set_lo_" + [(set (match_operand:VI4F_256 0 "register_operand" "=v") (vec_concat:VI4F_256 - (match_operand: 2 "nonimmediate_operand" "xm") + (match_operand: 2 "nonimmediate_operand" "vm") (vec_select: - (match_operand:VI4F_256 1 "register_operand" "x") + (match_operand:VI4F_256 1 "register_operand" "v") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))))] "TARGET_AVX" - "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" +{ + if (TARGET_AVX512VL) + return "vinsert32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; + else + return "vinsert\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "vec_set_hi_" - [(set (match_operand:VI4F_256 0 "register_operand" "=x") +(define_insn "vec_set_hi_" + [(set (match_operand:VI4F_256 0 "register_operand" "=v") (vec_concat:VI4F_256 (vec_select: - (match_operand:VI4F_256 1 "register_operand" "x") + (match_operand:VI4F_256 1 "register_operand" "v") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])) - (match_operand: 2 "nonimmediate_operand" "xm")))] + (match_operand: 2 "nonimmediate_operand" "vm")))] "TARGET_AVX" - "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" +{ + if (TARGET_AVX512VL) + return "vinsert32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; + else + return "vinsert\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"; +} [(set_attr "type" "sselog") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -16611,55 +16647,6 @@ DONE; }) -(define_expand "avx2_extracti128" - [(match_operand:V2DI 0 "nonimmediate_operand") - (match_operand:V4DI 1 "register_operand") - (match_operand:SI 2 "const_0_to_1_operand")] - "TARGET_AVX2" -{ - rtx (*insn)(rtx, rtx); - - switch (INTVAL (operands[2])) - { - case 0: - insn = gen_vec_extract_lo_v4di; - break; - case 1: - insn = gen_vec_extract_hi_v4di; - break; - default: - gcc_unreachable (); - } - - emit_insn (insn (operands[0], operands[1])); - DONE; -}) - -(define_expand "avx2_inserti128" - [(match_operand:V4DI 0 "register_operand") - (match_operand:V4DI 1 "register_operand") - (match_operand:V2DI 2 "nonimmediate_operand") - (match_operand:SI 3 "const_0_to_1_operand")] - "TARGET_AVX2" -{ - rtx (*insn)(rtx, rtx, rtx); - - switch (INTVAL (operands[3])) - { - case 0: - insn = gen_avx2_vec_set_lo_v4di; - break; - case 1: - insn = gen_avx2_vec_set_hi_v4di; - break; - default: - gcc_unreachable (); - } - - emit_insn (insn (operands[0], operands[1], operands[2])); - DONE; -}) - (define_insn "_ashrv" [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v") (ashiftrt:VI48_AVX512F_AVX512VL -- 2.30.2