From: H.J. Lu Date: Wed, 15 May 2019 15:04:08 +0000 (+0000) Subject: i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b74ebb2a36adbb18da52f7eb25b54655b04c7be4;p=gcc.git i386: Emulate MMX packsswb/packssdw/packuswb with SSE2 Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb plus moving bits 64:95 to bits 32:63 in SSE register. Only SSE register source operand is allowed. PR target/89021 * config/i386/i386-expand.c (ix86_move_vector_high_sse_to_mmx): New function. (ix86_split_mmx_pack): Likewise. * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx): New prototype. (ix86_split_mmx_pack): Likewise. * config/i386/i386.md (mmx_isa): New. (enabled): Also check mmx_isa. * config/i386/mmx.md (any_s_truncate): New code iterator. (s_trunsuffix): New code attr. (mmx_packsswb): Removed. (mmx_packssdw): Likewise. (mmx_packuswb): Likewise. (mmx_packswb): New define_insn_and_split to emulate MMX packsswb/packuswb with SSE2. (mmx_packssdw): Likewise. * config/i386/predicates.md (register_mmxmem_operand): New. Co-Authored-By: Uros Bizjak From-SVN: r271215 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index eb67d11d285..01783a9b773 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2019-05-15 H.J. Lu + Uros Bizjak + + PR target/89021 + * config/i386/i386-expand.c (ix86_move_vector_high_sse_to_mmx): + New function. + (ix86_split_mmx_pack): Likewise. + * config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx): + New prototype. + (ix86_split_mmx_pack): Likewise. + * config/i386/i386.md (mmx_isa): New. + (enabled): Also check mmx_isa. + * config/i386/mmx.md (any_s_truncate): New code iterator. + (s_trunsuffix): New code attr. + (mmx_packsswb): Removed. + (mmx_packssdw): Likewise. + (mmx_packuswb): Likewise. + (mmx_packswb): New define_insn_and_split to emulate + MMX packsswb/packuswb with SSE2. + (mmx_packssdw): Likewise. + * config/i386/predicates.md (register_mmxmem_operand): New. + 2019-05-15 H.J. Lu PR target/89021 diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index a55d4923be4..f1e05937f46 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -662,6 +662,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) gcc_unreachable (); } +/* Move bits 64:95 to bits 32:63. */ + +void +ix86_move_vector_high_sse_to_mmx (rtx op) +{ + rtx mask = gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (4, GEN_INT (0), GEN_INT (2), + GEN_INT (0), GEN_INT (0))); + rtx dest = lowpart_subreg (V4SImode, op, GET_MODE (op)); + op = gen_rtx_VEC_SELECT (V4SImode, dest, mask); + rtx insn = gen_rtx_SET (dest, op); + emit_insn (insn); +} + +/* Split MMX pack with signed/unsigned saturation with SSE/SSE2. */ + +void +ix86_split_mmx_pack (rtx operands[], enum rtx_code code) +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + + machine_mode dmode = GET_MODE (op0); + machine_mode smode = GET_MODE (op1); + machine_mode inner_dmode = GET_MODE_INNER (dmode); + machine_mode inner_smode = GET_MODE_INNER (smode); + + /* Get the corresponding SSE mode for destination. */ + int nunits = 16 / GET_MODE_SIZE (inner_dmode); + machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode), + nunits).require (); + machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode), + nunits / 2).require (); + + /* Get the corresponding SSE mode for source. */ + nunits = 16 / GET_MODE_SIZE (inner_smode); + machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode), + nunits).require (); + + /* Generate SSE pack with signed/unsigned saturation. */ + rtx dest = lowpart_subreg (sse_dmode, op0, GET_MODE (op0)); + op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1)); + op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2)); + + op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1); + op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2); + rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode, + op1, op2)); + emit_insn (insn); + + ix86_move_vector_high_sse_to_mmx (op0); +} + /* Helper function of ix86_fixup_binary_operands to canonicalize operand order. Returns true if the operands should be swapped. */ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 597af643eb8..760f530bf50 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx); extern rtx ix86_split_stack_guard (void); +extern void ix86_move_vector_high_sse_to_mmx (rtx); +extern void ix86_split_mmx_pack (rtx[], enum rtx_code); + #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); #endif /* TREE_CODE */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 05411221197..2ae4bb84fdf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -796,6 +796,10 @@ avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw" (const_string "base")) +;; Define instruction set of MMX instructions +(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" + (const_string "base")) + (define_attr "enabled" "" (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT") (eq_attr "isa" "x64_sse2") @@ -834,6 +838,15 @@ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") + + (eq_attr "mmx_isa" "native") + (symbol_ref "!TARGET_MMX_WITH_SSE") + (eq_attr "mmx_isa" "x64") + (symbol_ref "TARGET_MMX_WITH_SSE") + (eq_attr "mmx_isa" "x64_avx") + (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX") + (eq_attr "mmx_isa" "x64_noavx") + (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX") ] (const_int 1))) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index b566cc80020..28c9aa744ed 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -1046,41 +1046,48 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "mmx_packsswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") +;; Used in signed and unsigned truncations with saturation. +(define_code_iterator any_s_truncate [ss_truncate us_truncate]) +;; Instruction suffix for truncations with saturation. +(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")]) + +(define_insn_and_split "mmx_packswb" + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") (vec_concat:V8QI - (ss_truncate:V4QI - (match_operand:V4HI 1 "register_operand" "0")) - (ss_truncate:V4QI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] - "TARGET_MMX" - "packsswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + (any_s_truncate:V4QI + (match_operand:V4HI 1 "register_operand" "0,0,Yv")) + (any_s_truncate:V4QI + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + packswb\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] + "ix86_split_mmx_pack (operands, ); DONE;" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sselog,sselog") + (set_attr "mode" "DI,TI,TI")]) -(define_insn "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y") +(define_insn_and_split "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") (vec_concat:V4HI (ss_truncate:V2HI - (match_operand:V2SI 1 "register_operand" "0")) + (match_operand:V2SI 1 "register_operand" "0,0,Yv")) (ss_truncate:V2HI - (match_operand:V2SI 2 "nonimmediate_operand" "ym"))))] - "TARGET_MMX" - "packssdw\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) - -(define_insn "mmx_packuswb" - [(set (match_operand:V8QI 0 "register_operand" "=y") - (vec_concat:V8QI - (us_truncate:V4QI - (match_operand:V4HI 1 "register_operand" "0")) - (us_truncate:V4QI - (match_operand:V4HI 2 "nonimmediate_operand" "ym"))))] - "TARGET_MMX" - "packuswb\t{%2, %0|%0, %2}" - [(set_attr "type" "mmxshft") - (set_attr "mode" "DI")]) + (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + "TARGET_MMX || TARGET_MMX_WITH_SSE" + "@ + packssdw\t{%2, %0|%0, %2} + # + #" + "TARGET_MMX_WITH_SSE && reload_completed" + [(const_int 0)] + "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;" + [(set_attr "mmx_isa" "native,x64_noavx,x64_avx") + (set_attr "type" "mmxshft,sselog,sselog") + (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_punpckhbw" [(set (match_operand:V8QI 0 "register_operand" "=y") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 865947debcc..29867fbeab0 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -49,6 +49,13 @@ (and (match_code "reg") (match_test "MMX_REGNO_P (REGNO (op))"))) +;; Match register operands, but include memory operands for +;; !TARGET_MMX_WITH_SSE. +(define_predicate "register_mmxmem_operand" + (ior (match_operand 0 "register_operand") + (and (not (match_test "TARGET_MMX_WITH_SSE")) + (match_operand 0 "memory_operand")))) + ;; True if the operand is an SSE register. (define_predicate "sse_reg_operand" (and (match_code "reg")