From adedd5c173388ae505470df152b9cb3947339566 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 3 May 2016 13:37:25 +0200 Subject: [PATCH] re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc') PR target/49244 * tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h. (optimize_atomic_bit_test_and): New function. (pass_fold_builtins::execute): Use it. * optabs.def (atomic_bit_test_and_set_optab, atomic_bit_test_and_complement_optab, atomic_bit_test_and_reset_optab): New optabs. * internal-fn.def (ATOMIC_BIT_TEST_AND_SET, ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns. * builtins.h (expand_ifn_atomic_bit_test_and): New prototype. * builtins.c (expand_ifn_atomic_bit_test_and): New function. * internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET, expand_ATOMIC_BIT_TEST_AND_COMPLEMENT, expand_ATOMIC_BIT_TEST_AND_RESET): New functions. * doc/md.texi (atomic_bit_test_and_set@var{mode}, atomic_bit_test_and_complement@var{mode}, atomic_bit_test_and_reset@var{mode}): Document. * config/i386/sync.md (atomic_bit_test_and_set, atomic_bit_test_and_complement, atomic_bit_test_and_reset): New expanders. (atomic_bit_test_and_set_1, atomic_bit_test_and_complement_1, atomic_bit_test_and_reset_1): New insns. * gcc.target/i386/pr49244-1.c: New test. * gcc.target/i386/pr49244-2.c: New test. From-SVN: r235813 --- gcc/ChangeLog | 26 ++ gcc/builtins.c | 84 +++++++ gcc/builtins.h | 1 + gcc/config/i386/sync.md | 111 ++++++++ gcc/doc/md.texi | 27 ++ gcc/internal-fn.c | 25 ++ gcc/internal-fn.def | 5 + gcc/optabs.def | 3 + gcc/testsuite/ChangeLog | 6 + gcc/testsuite/gcc.target/i386/pr49244-1.c | 188 ++++++++++++++ gcc/testsuite/gcc.target/i386/pr49244-2.c | 108 ++++++++ gcc/tree-ssa-ccp.c | 292 ++++++++++++++++++++++ 12 files changed, 876 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr49244-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr49244-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cc520e895ef..7122b6c2ba9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2016-05-03 Jakub Jelinek + + PR target/49244 + * tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h. + (optimize_atomic_bit_test_and): New function. + (pass_fold_builtins::execute): Use it. + * optabs.def (atomic_bit_test_and_set_optab, + atomic_bit_test_and_complement_optab, + atomic_bit_test_and_reset_optab): New optabs. + * internal-fn.def (ATOMIC_BIT_TEST_AND_SET, + ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns. + * builtins.h (expand_ifn_atomic_bit_test_and): New prototype. + * builtins.c (expand_ifn_atomic_bit_test_and): New function. + * internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET, + expand_ATOMIC_BIT_TEST_AND_COMPLEMENT, + expand_ATOMIC_BIT_TEST_AND_RESET): New functions. + * doc/md.texi (atomic_bit_test_and_set@var{mode}, + atomic_bit_test_and_complement@var{mode}, + atomic_bit_test_and_reset@var{mode}): Document. + * config/i386/sync.md (atomic_bit_test_and_set, + atomic_bit_test_and_complement, + atomic_bit_test_and_reset): New expanders. + (atomic_bit_test_and_set_1, + atomic_bit_test_and_complement_1, + atomic_bit_test_and_reset_1): New insns. + 2016-05-03 Richard Sandiford PR rtl-optimization/70687 diff --git a/gcc/builtins.c b/gcc/builtins.c index 3d89bafe34a..7d876199bca 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -5310,6 +5310,90 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree exp, rtx target, return ret; } +/* Expand IFN_ATOMIC_BIT_TEST_AND_* internal function. */ + +void +expand_ifn_atomic_bit_test_and (gcall *call) +{ + tree ptr = gimple_call_arg (call, 0); + tree bit = gimple_call_arg (call, 1); + tree flag = gimple_call_arg (call, 2); + tree lhs = gimple_call_lhs (call); + enum memmodel model = MEMMODEL_SYNC_SEQ_CST; + machine_mode mode = TYPE_MODE (TREE_TYPE (flag)); + enum rtx_code code; + optab optab; + struct expand_operand ops[5]; + + gcc_assert (flag_inline_atomics); + + if (gimple_call_num_args (call) == 4) + model = get_memmodel (gimple_call_arg (call, 3)); + + rtx mem = get_builtin_sync_mem (ptr, mode); + rtx val = expand_expr_force_mode (bit, mode); + + switch (gimple_call_internal_fn (call)) + { + case IFN_ATOMIC_BIT_TEST_AND_SET: + code = IOR; + optab = atomic_bit_test_and_set_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT: + code = XOR; + optab = atomic_bit_test_and_complement_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_RESET: + code = AND; + optab = atomic_bit_test_and_reset_optab; + break; + default: + gcc_unreachable (); + } + + if (lhs == NULL_TREE) + { + val = expand_simple_binop (mode, ASHIFT, const1_rtx, + val, NULL_RTX, true, OPTAB_DIRECT); + if (code == AND) + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); + expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false); + return; + } + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + enum insn_code icode = direct_optab_handler (optab, mode); + gcc_assert (icode != CODE_FOR_nothing); + create_output_operand (&ops[0], target, mode); + create_fixed_operand (&ops[1], mem); + create_convert_operand_to (&ops[2], val, mode, true); + create_integer_operand (&ops[3], model); + create_integer_operand (&ops[4], integer_onep (flag)); + if (maybe_expand_insn (icode, 5, ops)) + return; + + rtx bitval = val; + val = expand_simple_binop (mode, ASHIFT, const1_rtx, + val, NULL_RTX, true, OPTAB_DIRECT); + rtx maskval = val; + if (code == AND) + val = expand_simple_unop (mode, NOT, val, NULL_RTX, true); + rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val, + code, model, false); + if (integer_onep (flag)) + { + result = expand_simple_binop (mode, ASHIFTRT, result, bitval, + NULL_RTX, true, OPTAB_DIRECT); + result = expand_simple_binop (mode, AND, result, const1_rtx, target, + true, OPTAB_DIRECT); + } + else + result = expand_simple_binop (mode, AND, result, maskval, target, true, + OPTAB_DIRECT); + if (result != target) + emit_move_insn (target, result); +} + /* Expand an atomic clear operation. void _atomic_clear (BOOL *obj, enum memmodel) EXP is the call expression. */ diff --git a/gcc/builtins.h b/gcc/builtins.h index b49def349d0..51e298cb76b 100644 --- a/gcc/builtins.h +++ b/gcc/builtins.h @@ -71,6 +71,7 @@ extern tree std_fn_abi_va_list (tree); extern tree std_canonical_va_list_type (tree); extern void std_expand_builtin_va_start (tree, rtx); extern void expand_builtin_trap (void); +extern void expand_ifn_atomic_bit_test_and (gcall *); extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int); extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int); extern enum built_in_function builtin_mathfn_code (const_tree); diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index bc4fd34e6d5..8322676a7b0 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -605,3 +605,114 @@ (clobber (reg:CC FLAGS_REG))] "" "lock{%;} %K2{}\t{%1, %0|%0, %1}") + +(define_expand "atomic_bit_test_and_set" + [(match_operand:SWI248 0 "register_operand") + (match_operand:SWI248 1 "memory_operand") + (match_operand:SWI248 2 "nonmemory_operand") + (match_operand:SI 3 "const_int_operand") ;; model + (match_operand:SI 4 "const_int_operand")] + "" +{ + emit_insn (gen_atomic_bit_test_and_set_1 (operands[1], operands[2], + operands[3])); + rtx tem = gen_reg_rtx (QImode); + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); + rtx result = convert_modes (mode, QImode, tem, 1); + if (operands[4] == const0_rtx) + result = expand_simple_binop (mode, ASHIFT, result, + operands[2], operands[0], 0, OPTAB_DIRECT); + if (result != operands[0]) + emit_move_insn (operands[0], result); + DONE; +}) + +(define_insn "atomic_bit_test_and_set_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (unspec_volatile:SWI248 + [(match_operand:SWI248 0 "memory_operand" "+m") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_XCHG) + (const_int 0))) + (set (zero_extract:SWI248 (match_dup 0) + (const_int 1) + (match_operand:SWI248 1 "nonmemory_operand" "rN")) + (const_int 1))] + "" + "lock{%;} %K2bts{}\t{%1, %0|%0, %1}") + +(define_expand "atomic_bit_test_and_complement" + [(match_operand:SWI248 0 "register_operand") + (match_operand:SWI248 1 "memory_operand") + (match_operand:SWI248 2 "nonmemory_operand") + (match_operand:SI 3 "const_int_operand") ;; model + (match_operand:SI 4 "const_int_operand")] + "" +{ + emit_insn (gen_atomic_bit_test_and_complement_1 (operands[1], + operands[2], + operands[3])); + rtx tem = gen_reg_rtx (QImode); + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); + rtx result = convert_modes (mode, QImode, tem, 1); + if (operands[4] == const0_rtx) + result = expand_simple_binop (mode, ASHIFT, result, + operands[2], operands[0], 0, OPTAB_DIRECT); + if (result != operands[0]) + emit_move_insn (operands[0], result); + DONE; +}) + +(define_insn "atomic_bit_test_and_complement_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (unspec_volatile:SWI248 + [(match_operand:SWI248 0 "memory_operand" "+m") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_XCHG) + (const_int 0))) + (set (zero_extract:SWI248 (match_dup 0) + (const_int 1) + (match_operand:SWI248 1 "nonmemory_operand" "rN")) + (not:SWI248 (zero_extract:SWI248 (match_dup 0) + (const_int 1) + (match_dup 1))))] + "" + "lock{%;} %K2btc{}\t{%1, %0|%0, %1}") + +(define_expand "atomic_bit_test_and_reset" + [(match_operand:SWI248 0 "register_operand") + (match_operand:SWI248 1 "memory_operand") + (match_operand:SWI248 2 "nonmemory_operand") + (match_operand:SI 3 "const_int_operand") ;; model + (match_operand:SI 4 "const_int_operand")] + "" +{ + emit_insn (gen_atomic_bit_test_and_reset_1 (operands[1], operands[2], + operands[3])); + rtx tem = gen_reg_rtx (QImode); + ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx); + rtx result = convert_modes (mode, QImode, tem, 1); + if (operands[4] == const0_rtx) + result = expand_simple_binop (mode, ASHIFT, result, + operands[2], operands[0], 0, OPTAB_DIRECT); + if (result != operands[0]) + emit_move_insn (operands[0], result); + DONE; +}) + +(define_insn "atomic_bit_test_and_reset_1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (unspec_volatile:SWI248 + [(match_operand:SWI248 0 "memory_operand" "+m") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_XCHG) + (const_int 0))) + (set (zero_extract:SWI248 (match_dup 0) + (const_int 1) + (match_operand:SWI248 1 "nonmemory_operand" "rN")) + (const_int 0))] + "" + "lock{%;} %K2btr{}\t{%1, %0|%0, %1}") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 4c83719588a..afaecef4e54 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6909,6 +6909,33 @@ The specific value that defines "set" is implementation defined, and is normally based on what is performed by the native atomic test and set instruction. +@cindex @code{atomic_bit_test_and_set@var{mode}} instruction pattern +@cindex @code{atomic_bit_test_and_complement@var{mode}} instruction pattern +@cindex @code{atomic_bit_test_and_reset@var{mode}} instruction pattern +@item @samp{atomic_bit_test_and_set@var{mode}} +@itemx @samp{atomic_bit_test_and_complement@var{mode}} +@itemx @samp{atomic_bit_test_and_reset@var{mode}} +These patterns emit code for an atomic bitwise operation on memory with memory +model semantics, and return the original value of the specified bit. +Operand 0 is an output operand which contains the value of the specified bit +from the memory location before the operation was performed. Operand 1 is the +memory on which the atomic operation is performed. Operand 2 is the bit within +the operand, starting with least significant bit. Operand 3 is the memory model +to be used by the operation. Operand 4 is a flag - it is @code{const1_rtx} +if operand 0 should contain the original value of the specified bit in the +least significant bit of the operand, and @code{const0_rtx} if the bit should +be in its original position in the operand. +@code{atomic_bit_test_and_set@var{mode}} atomically sets the specified bit after +remembering its original value, @code{atomic_bit_test_and_complement@var{mode}} +inverts the specified bit and @code{atomic_bit_test_and_reset@var{mode}} clears +the specified bit. + +If these patterns are not defined, attempts will be made to use +@code{atomic_fetch_or@var{mode}}, @code{atomic_fetch_xor@var{mode}} or +@code{atomic_fetch_and@var{mode}} instruction patterns, or their @code{sync} +counterparts. If none of these are available a compare-and-swap +loop will be used. + @cindex @code{mem_thread_fence@var{mode}} instruction pattern @item @samp{mem_thread_fence@var{mode}} This pattern emits code required to implement a thread fence with diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index e70c73aba8a..c867ddc0ef7 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see #include "expr.h" #include "ubsan.h" #include "recog.h" +#include "builtins.h" /* The names of each internal function, indexed by function number. */ const char *const internal_fn_name_array[] = { @@ -2118,6 +2119,30 @@ expand_SET_EDOM (internal_fn, gcall *) #endif } +/* Expand atomic bit test and set. */ + +static void +expand_ATOMIC_BIT_TEST_AND_SET (internal_fn, gcall *call) +{ + expand_ifn_atomic_bit_test_and (call); +} + +/* Expand atomic bit test and complement. */ + +static void +expand_ATOMIC_BIT_TEST_AND_COMPLEMENT (internal_fn, gcall *call) +{ + expand_ifn_atomic_bit_test_and (call); +} + +/* Expand atomic bit test and reset. */ + +static void +expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call) +{ + expand_ifn_atomic_bit_test_and (call); +} + /* Expand a call to FN using the operands in STMT. FN has a single output operand and NARGS input operands. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a62f3e8034e..e729d852a13 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -189,6 +189,11 @@ DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL) current target. */ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL) +/* Atomic functions. */ +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL) + #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_OPTAB_FN diff --git a/gcc/optabs.def b/gcc/optabs.def index c938b42ea4e..8875e30d416 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -337,6 +337,9 @@ OPTAB_D (atomic_add_fetch_optab, "atomic_add_fetch$I$a") OPTAB_D (atomic_add_optab, "atomic_add$I$a") OPTAB_D (atomic_and_fetch_optab, "atomic_and_fetch$I$a") OPTAB_D (atomic_and_optab, "atomic_and$I$a") +OPTAB_D (atomic_bit_test_and_set_optab, "atomic_bit_test_and_set$I$a") +OPTAB_D (atomic_bit_test_and_complement_optab, "atomic_bit_test_and_complement$I$a") +OPTAB_D (atomic_bit_test_and_reset_optab, "atomic_bit_test_and_reset$I$a") OPTAB_D (atomic_compare_and_swap_optab, "atomic_compare_and_swap$I$a") OPTAB_D (atomic_exchange_optab, "atomic_exchange$I$a") OPTAB_D (atomic_fetch_add_optab, "atomic_fetch_add$I$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bc7c93f5a5d..b29f76334fe 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-05-03 Jakub Jelinek + + PR target/49244 + * gcc.target/i386/pr49244-1.c: New test. + * gcc.target/i386/pr49244-2.c: New test. + 2016-05-03 Bernd Schmidt PR rtl-optimization/44281 diff --git a/gcc/testsuite/gcc.target/i386/pr49244-1.c b/gcc/testsuite/gcc.target/i386/pr49244-1.c new file mode 100644 index 00000000000..70ccf6e935a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr49244-1.c @@ -0,0 +1,188 @@ +/* PR target/49244 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void bar (void); + +__attribute__((noinline, noclone)) int +f1 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f2 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + unsigned int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED); + unsigned int t2 = t1 & mask; + return t2 != 0; +} + +__attribute__((noinline, noclone)) long int +f3 (long int *a, int bit) +{ + unsigned long int mask = (1ul << bit); + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; +} + +__attribute__((noinline, noclone)) int +f4 (int *a) +{ + unsigned int mask = (1u << 7); + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f5 (int *a) +{ + unsigned int mask = (1u << 13); + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f6 (int *a) +{ + unsigned int mask = (1u << 0); + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) void +f7 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + if ((__sync_fetch_and_xor (a, mask) & mask) != 0) + bar (); +} + +__attribute__((noinline, noclone)) void +f8 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0) + bar (); +} + +__attribute__((noinline, noclone)) int +f9 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f10 (int *a) +{ + unsigned int mask = (1u << 7); + return (__sync_fetch_and_xor (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f11 (int *a) +{ + unsigned int mask = (1u << 13); + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f12 (int *a) +{ + unsigned int mask = (1u << 0); + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f13 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f14 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f15 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f16 (int *a) +{ + unsigned int mask = (1u << 7); + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f17 (int *a) +{ + unsigned int mask = (1u << 13); + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f18 (int *a) +{ + unsigned int mask = (1u << 0); + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) unsigned long int +f19 (unsigned long int *a, int bit) +{ + unsigned long int mask = (1ul << bit); + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) unsigned long int +f20 (unsigned long int *a) +{ + unsigned long int mask = (1ul << 7); + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; +} + +__attribute__((noinline, noclone)) int +f21 (int *a, int bit) +{ + unsigned int mask = (1u << bit); + return (__sync_fetch_and_or (a, mask) & mask); +} + +__attribute__((noinline, noclone)) unsigned long int +f22 (unsigned long int *a) +{ + unsigned long int mask = (1ul << 7); + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask); +} + +__attribute__((noinline, noclone)) unsigned long int +f23 (unsigned long int *a) +{ + unsigned long int mask = (1ul << 7); + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask); +} + +__attribute__((noinline, noclone)) unsigned short int +f24 (unsigned short int *a) +{ + unsigned short int mask = (1u << 7); + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) unsigned short int +f25 (unsigned short int *a) +{ + unsigned short int mask = (1u << 7); + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr49244-2.c b/gcc/testsuite/gcc.target/i386/pr49244-2.c new file mode 100644 index 00000000000..847408e1a3f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr49244-2.c @@ -0,0 +1,108 @@ +/* PR target/49244 */ +/* { dg-do run } */ +/* { dg-options "-O2 -g" } */ + +int cnt; + +__attribute__((noinline, noclone)) void +bar (void) +{ + cnt++; +} + +#include "pr49244-1.c" + +int a; +long int b; +unsigned long int c; +unsigned short int d; + +int +main () +{ + __atomic_store_n (&a, 15, __ATOMIC_RELAXED); + if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15 + || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31) + __builtin_abort (); + if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31 + || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63) + __builtin_abort (); + __atomic_store_n (&b, 24, __ATOMIC_RELAXED); + if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28 + || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28) + __builtin_abort (); + __atomic_store_n (&a, 0, __ATOMIC_RELAXED); + if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128 + || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128) + __builtin_abort (); + if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 + || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320) + __builtin_abort (); + if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321 + || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (cnt != 0 + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 + || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 + || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 + || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) + __builtin_abort (); + if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 + || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 + || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + __atomic_store_n (&a, 8321, __ATOMIC_RELAXED); + if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) + __builtin_abort (); + if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 + || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 + || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 + || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 + || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + __atomic_store_n (&a, 128, __ATOMIC_RELAXED); + if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144 + || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144) + __builtin_abort (); + __atomic_store_n (&c, 1, __ATOMIC_RELAXED); + if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 + || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 + || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128 + || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128) + __builtin_abort (); + __atomic_store_n (&d, 1, __ATOMIC_RELAXED); + if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 + || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 + || cnt != 2) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index baae03f8042..c4e27f1cfe9 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "tree-chkp.h" #include "cfgloop.h" +#include "stor-layout.h" +#include "optabs-query.h" /* Possible lattice values. */ @@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterator i) return ret; } +/* Optimize + mask_2 = 1 << cnt_1; + _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3); + _5 = _4 & mask_2; + to + _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3); + _5 = _4; + If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1 + is passed instead of 0, and the builtin just returns a zero + or 1 value instead of the actual bit. + Similarly for __sync_fetch_and_or_* (without the ", _3" part + in there), and/or if mask_2 is a power of 2 constant. + Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT + in that case. And similarly for and instead of or, except that + the second argument to the builtin needs to be one's complement + of the mask instead of mask. */ + +static void +optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, + enum internal_fn fn, bool has_model_arg, + bool after) +{ + gimple *call = gsi_stmt (*gsip); + tree lhs = gimple_call_lhs (call); + use_operand_p use_p; + gimple *use_stmt; + tree mask, bit; + optab optab; + + if (!flag_inline_atomics + || optimize_debug + || !gimple_call_builtin_p (call, BUILT_IN_NORMAL) + || !lhs + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) + || !single_imm_use (lhs, &use_p, &use_stmt) + || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR + || !gimple_vdef (call)) + return; + + switch (fn) + { + case IFN_ATOMIC_BIT_TEST_AND_SET: + optab = atomic_bit_test_and_set_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT: + optab = atomic_bit_test_and_complement_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_RESET: + optab = atomic_bit_test_and_reset_optab; + break; + default: + return; + } + + if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing) + return; + + mask = gimple_call_arg (call, 1); + tree use_lhs = gimple_assign_lhs (use_stmt); + if (!use_lhs) + return; + + if (TREE_CODE (mask) == INTEGER_CST) + { + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask); + mask = fold_convert (TREE_TYPE (lhs), mask); + int ibit = tree_log2 (mask); + if (ibit < 0) + return; + bit = build_int_cst (TREE_TYPE (lhs), ibit); + } + else if (TREE_CODE (mask) == SSA_NAME) + { + gimple *g = SSA_NAME_DEF_STMT (mask); + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + { + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR) + return; + mask = gimple_assign_rhs1 (g); + if (TREE_CODE (mask) != SSA_NAME) + return; + g = SSA_NAME_DEF_STMT (mask); + } + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != LSHIFT_EXPR + || !integer_onep (gimple_assign_rhs1 (g))) + return; + bit = gimple_assign_rhs2 (g); + } + else + return; + + if (gimple_assign_rhs1 (use_stmt) == lhs) + { + if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0)) + return; + } + else if (gimple_assign_rhs2 (use_stmt) != lhs + || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0)) + return; + + bool use_bool = true; + bool has_debug_uses = false; + imm_use_iterator iter; + gimple *g; + + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs)) + use_bool = false; + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) + { + enum tree_code code = ERROR_MARK; + tree op0, op1; + if (is_gimple_debug (g)) + { + has_debug_uses = true; + continue; + } + else if (is_gimple_assign (g)) + switch (gimple_assign_rhs_code (g)) + { + case COND_EXPR: + op1 = gimple_assign_rhs1 (g); + code = TREE_CODE (op1); + op0 = TREE_OPERAND (op1, 0); + op1 = TREE_OPERAND (op1, 1); + break; + case EQ_EXPR: + case NE_EXPR: + code = gimple_assign_rhs_code (g); + op0 = gimple_assign_rhs1 (g); + op1 = gimple_assign_rhs2 (g); + break; + default: + break; + } + else if (gimple_code (g) == GIMPLE_COND) + { + code = gimple_cond_code (g); + op0 = gimple_cond_lhs (g); + op1 = gimple_cond_rhs (g); + } + + if ((code == EQ_EXPR || code == NE_EXPR) + && op0 == use_lhs + && integer_zerop (op1)) + { + use_operand_p use_p; + int n = 0; + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + n++; + if (n == 1) + continue; + } + + use_bool = false; + BREAK_FROM_IMM_USE_STMT (iter); + } + + tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); + tree flag = build_int_cst (TREE_TYPE (lhs), use_bool); + if (has_model_arg) + g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0), + bit, flag, gimple_call_arg (call, 2)); + else + g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0), + bit, flag); + gimple_call_set_lhs (g, new_lhs); + gimple_set_location (g, gimple_location (call)); + gimple_set_vuse (g, gimple_vuse (call)); + gimple_set_vdef (g, gimple_vdef (call)); + SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; + gimple_stmt_iterator gsi = *gsip; + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + if (after) + { + /* The internal function returns the value of the specified bit + before the atomic operation. If we are interested in the value + of the specified bit after the atomic operation (makes only sense + for xor, otherwise the bit content is compile time known), + we need to invert the bit. */ + g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), + BIT_XOR_EXPR, new_lhs, + use_bool ? build_int_cst (TREE_TYPE (lhs), 1) + : mask); + new_lhs = gimple_assign_lhs (g); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + } + if (use_bool && has_debug_uses) + { + tree temp = make_node (DEBUG_EXPR_DECL); + DECL_ARTIFICIAL (temp) = 1; + TREE_TYPE (temp) = TREE_TYPE (lhs); + DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs)); + tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit); + g = gimple_build_debug_bind (temp, t, g); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) + if (is_gimple_debug (g)) + { + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, temp); + update_stmt (g); + } + } + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs) + = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs); + replace_uses_by (use_lhs, new_lhs); + gsi = gsi_for_stmt (use_stmt); + gsi_remove (&gsi, true); + release_defs (use_stmt); + gsi_remove (gsip, true); + release_ssa_name (lhs); +} + /* A simple pass that attempts to fold all builtin functions. This pass is run after we've propagated as many constants as we can. */ @@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *fun) cfg_changed = true; break; + case BUILT_IN_ATOMIC_FETCH_OR_1: + case BUILT_IN_ATOMIC_FETCH_OR_2: + case BUILT_IN_ATOMIC_FETCH_OR_4: + case BUILT_IN_ATOMIC_FETCH_OR_8: + case BUILT_IN_ATOMIC_FETCH_OR_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_SET, + true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_OR_1: + case BUILT_IN_SYNC_FETCH_AND_OR_2: + case BUILT_IN_SYNC_FETCH_AND_OR_4: + case BUILT_IN_SYNC_FETCH_AND_OR_8: + case BUILT_IN_SYNC_FETCH_AND_OR_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_SET, + false, false); + break; + + case BUILT_IN_ATOMIC_FETCH_XOR_1: + case BUILT_IN_ATOMIC_FETCH_XOR_2: + case BUILT_IN_ATOMIC_FETCH_XOR_4: + case BUILT_IN_ATOMIC_FETCH_XOR_8: + case BUILT_IN_ATOMIC_FETCH_XOR_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_XOR_1: + case BUILT_IN_SYNC_FETCH_AND_XOR_2: + case BUILT_IN_SYNC_FETCH_AND_XOR_4: + case BUILT_IN_SYNC_FETCH_AND_XOR_8: + case BUILT_IN_SYNC_FETCH_AND_XOR_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false); + break; + + case BUILT_IN_ATOMIC_XOR_FETCH_1: + case BUILT_IN_ATOMIC_XOR_FETCH_2: + case BUILT_IN_ATOMIC_XOR_FETCH_4: + case BUILT_IN_ATOMIC_XOR_FETCH_8: + case BUILT_IN_ATOMIC_XOR_FETCH_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true); + break; + case BUILT_IN_SYNC_XOR_AND_FETCH_1: + case BUILT_IN_SYNC_XOR_AND_FETCH_2: + case BUILT_IN_SYNC_XOR_AND_FETCH_4: + case BUILT_IN_SYNC_XOR_AND_FETCH_8: + case BUILT_IN_SYNC_XOR_AND_FETCH_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true); + break; + + case BUILT_IN_ATOMIC_FETCH_AND_1: + case BUILT_IN_ATOMIC_FETCH_AND_2: + case BUILT_IN_ATOMIC_FETCH_AND_4: + case BUILT_IN_ATOMIC_FETCH_AND_8: + case BUILT_IN_ATOMIC_FETCH_AND_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_RESET, + true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_AND_1: + case BUILT_IN_SYNC_FETCH_AND_AND_2: + case BUILT_IN_SYNC_FETCH_AND_AND_4: + case BUILT_IN_SYNC_FETCH_AND_AND_8: + case BUILT_IN_SYNC_FETCH_AND_AND_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_RESET, + false, false); + break; + case BUILT_IN_VA_START: case BUILT_IN_VA_END: case BUILT_IN_VA_COPY: -- 2.30.2