From: Richard Guenther Date: Fri, 20 May 2016 09:17:16 +0000 (+0000) Subject: re PR target/29756 (SSE intrinsics hard to use without redundant temporaries appearing) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=483c642948802336899b14fa57b2e76c760c3c36;p=gcc.git re PR target/29756 (SSE intrinsics hard to use without redundant temporaries appearing) 2016-05-20 Richard Guenther PR tree-optimization/29756 * tree.def (BIT_INSERT_EXPR): New tcc_expression tree code. * expr.c (expand_expr_real_2): Handle BIT_INSERT_EXPR. * fold-const.c (operand_equal_p): Likewise. (fold_ternary_loc): Add constant folding of BIT_INSERT_EXPR. * gimplify.c (gimplify_expr): Handle BIT_INSERT_EXPR. * tree-inline.c (estimate_operator_cost): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. * tree-ssa-operands.c (get_expr_operands): Likewise. * cfgexpand.c (expand_debug_expr): Likewise. * gimple-pretty-print.c (dump_ternary_rhs): Likewise. * gimple.c (get_gimple_rhs_num_ops): Handle BIT_INSERT_EXPR. * tree-cfg.c (verify_gimple_assign_ternary): Verify BIT_INSERT_EXPR. * tree-ssa.c (non_rewritable_lvalue_p): We can rewrite vector inserts using BIT_FIELD_REF or MEM_REF on the lhs. (execute_update_addresses_taken): Do it. * gcc.dg/tree-ssa/vector-6.c: New testcase. From-SVN: r236501 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 83a6f3cc566..d40ec666567 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-05-20 Richard Guenther + + PR tree-optimization/29756 + * tree.def (BIT_INSERT_EXPR): New tcc_expression tree code. + * expr.c (expand_expr_real_2): Handle BIT_INSERT_EXPR. + * fold-const.c (operand_equal_p): Likewise. + (fold_ternary_loc): Add constant folding of BIT_INSERT_EXPR. + * gimplify.c (gimplify_expr): Handle BIT_INSERT_EXPR. + * tree-inline.c (estimate_operator_cost): Likewise. + * tree-pretty-print.c (dump_generic_node): Likewise. + * tree-ssa-operands.c (get_expr_operands): Likewise. + * cfgexpand.c (expand_debug_expr): Likewise. + * gimple-pretty-print.c (dump_ternary_rhs): Likewise. + * gimple.c (get_gimple_rhs_num_ops): Handle BIT_INSERT_EXPR. + * tree-cfg.c (verify_gimple_assign_ternary): Verify BIT_INSERT_EXPR. + * tree-ssa.c (non_rewritable_lvalue_p): We can rewrite + vector inserts using BIT_FIELD_REF or MEM_REF on the lhs. + (execute_update_addresses_taken): Do it. + 2016-05-20 Richard Biener PR tree-optimization/71185 diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 77a1964d4c0..1461ad8b90b 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -5025,6 +5025,7 @@ expand_debug_expr (tree exp) case FIXED_CONVERT_EXPR: case OBJ_TYPE_REF: case WITH_SIZE_EXPR: + case BIT_INSERT_EXPR: return NULL; case DOT_PROD_EXPR: diff --git a/gcc/expr.c b/gcc/expr.c index 2f5a895015a..3c7e71f7130 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9225,6 +9225,23 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target); return target; + case BIT_INSERT_EXPR: + { + unsigned bitpos = tree_to_uhwi (treeop2); + unsigned bitsize; + if (INTEGRAL_TYPE_P (TREE_TYPE (treeop1))) + bitsize = TYPE_PRECISION (TREE_TYPE (treeop1)); + else + bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (treeop1))); + rtx op0 = expand_normal (treeop0); + rtx op1 = expand_normal (treeop1); + rtx dst = gen_reg_rtx (mode); + emit_move_insn (dst, op0); + store_bit_field (dst, bitsize, bitpos, 0, 0, + TYPE_MODE (TREE_TYPE (treeop1)), op1, false); + return dst; + } + default: gcc_unreachable (); } diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 8a7c93e0459..556fc73a33d 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -3163,6 +3163,7 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) case VEC_COND_EXPR: case DOT_PROD_EXPR: + case BIT_INSERT_EXPR: return OP_SAME (0) && OP_SAME (1) && OP_SAME (2); default: @@ -11860,6 +11861,46 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, } return NULL_TREE; + case BIT_INSERT_EXPR: + /* Perform (partial) constant folding of BIT_INSERT_EXPR. */ + if (TREE_CODE (arg0) == INTEGER_CST + && TREE_CODE (arg1) == INTEGER_CST) + { + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (op2); + unsigned bitsize = TYPE_PRECISION (TREE_TYPE (arg1)); + wide_int tem = wi::bit_and (arg0, + wi::shifted_mask (bitpos, bitsize, true, + TYPE_PRECISION (type))); + wide_int tem2 + = wi::lshift (wi::zext (wi::to_wide (arg1, TYPE_PRECISION (type)), + bitsize), bitpos); + return wide_int_to_tree (type, wi::bit_or (tem, tem2)); + } + else if (TREE_CODE (arg0) == VECTOR_CST + && CONSTANT_CLASS_P (arg1) + && types_compatible_p (TREE_TYPE (TREE_TYPE (arg0)), + TREE_TYPE (arg1))) + { + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (op2); + unsigned HOST_WIDE_INT elsize + = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg1))); + if (bitpos % elsize == 0) + { + unsigned k = bitpos / elsize; + if (operand_equal_p (VECTOR_CST_ELT (arg0, k), arg1, 0)) + return arg0; + else + { + tree *elts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (type)); + memcpy (elts, VECTOR_CST_ELTS (arg0), + sizeof (tree) * TYPE_VECTOR_SUBPARTS (type)); + elts[k] = arg1; + return build_vector (type, elts); + } + } + } + return NULL_TREE; + default: return NULL_TREE; } /* switch (code) */ diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 4b0dc7c9a98..f8642684ad9 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -478,6 +478,24 @@ dump_ternary_rhs (pretty_printer *buffer, gassign *gs, int spc, int flags) pp_greater (buffer); break; + case BIT_INSERT_EXPR: + pp_string (buffer, "BIT_INSERT_EXPR <"); + dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false); + pp_string (buffer, ", "); + dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false); + pp_string (buffer, " ("); + if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs2 (gs)))) + pp_decimal_int (buffer, + TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs2 (gs)))); + else + dump_generic_node (buffer, + TYPE_SIZE (TREE_TYPE (gimple_assign_rhs2 (gs))), + spc, flags, false); + pp_string (buffer, " bits)>"); + break; + default: gcc_unreachable (); } diff --git a/gcc/gimple.c b/gcc/gimple.c index d822fabcaa5..742c907d8bb 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2043,6 +2043,7 @@ get_gimple_rhs_num_ops (enum tree_code code) || (SYM) == REALIGN_LOAD_EXPR \ || (SYM) == VEC_COND_EXPR \ || (SYM) == VEC_PERM_EXPR \ + || (SYM) == BIT_INSERT_EXPR \ || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \ : ((SYM) == CONSTRUCTOR \ || (SYM) == OBJ_TYPE_REF \ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index c433a84a854..4a544e3c8ee 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -10931,6 +10931,10 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, /* Classified as tcc_expression. */ goto expr_3; + case BIT_INSERT_EXPR: + /* Argument 3 is a constant. */ + goto expr_2; + case POINTER_PLUS_EXPR: { enum gimplify_status r0, r1; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 87588f02129..2d061d08819 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-05-20 Richard Guenther + + PR tree-optimization/29756 + * gcc.dg/tree-ssa/vector-6.c: New testcase. + 2016-05-20 Richard Biener PR tree-optimization/71185 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vector-6.c b/gcc/testsuite/gcc.dg/tree-ssa/vector-6.c new file mode 100644 index 00000000000..059ef4ec98c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/vector-6.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-ccp1" } */ + +typedef int v4si __attribute__((vector_size (4 * sizeof (int)))); + +v4si test1 (v4si v, int i) +{ + ((int *)&v)[0] = i; + return v; +} + +v4si test2 (v4si v, int i) +{ + int *p = (int *)&v; + *p = i; + return v; +} + +v4si test3 (v4si v, int i) +{ + ((int *)&v)[3] = i; + return v; +} + +v4si test4 (v4si v, int i) +{ + int *p = (int *)&v; + p += 3; + *p = i; + return v; +} + +/* { dg-final { scan-tree-dump-times "Now a gimple register: v" 4 "ccp1" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 65737028843..7c2ee78bdfb 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -4134,6 +4134,53 @@ verify_gimple_assign_ternary (gassign *stmt) return false; + case BIT_INSERT_EXPR: + if (! useless_type_conversion_p (lhs_type, rhs1_type)) + { + error ("type mismatch in BIT_INSERT_EXPR"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; + } + if (! ((INTEGRAL_TYPE_P (rhs1_type) + && INTEGRAL_TYPE_P (rhs2_type)) + || (VECTOR_TYPE_P (rhs1_type) + && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type)))) + { + error ("not allowed type combination in BIT_INSERT_EXPR"); + debug_generic_expr (rhs1_type); + debug_generic_expr (rhs2_type); + return true; + } + if (! tree_fits_uhwi_p (rhs3) + || ! tree_fits_uhwi_p (TYPE_SIZE (rhs2_type))) + { + error ("invalid position or size in BIT_INSERT_EXPR"); + return true; + } + if (INTEGRAL_TYPE_P (rhs1_type)) + { + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (rhs3); + if (bitpos >= TYPE_PRECISION (rhs1_type) + || (bitpos + TYPE_PRECISION (rhs2_type) + > TYPE_PRECISION (rhs1_type))) + { + error ("insertion out of range in BIT_INSERT_EXPR"); + return true; + } + } + else if (VECTOR_TYPE_P (rhs1_type)) + { + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (rhs3); + unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TYPE_SIZE (rhs2_type)); + if (bitpos % bitsize != 0) + { + error ("vector insertion not at element boundary"); + return true; + } + } + return false; + case DOT_PROD_EXPR: case REALIGN_LOAD_EXPR: /* FIXME. */ diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 4eb8d200411..07f6a83ff7c 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3941,6 +3941,10 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, return weights->div_mod_cost; return 1; + /* Bit-field insertion needs several shift and mask operations. */ + case BIT_INSERT_EXPR: + return 3; + default: /* We expect a copy assignment with no operator. */ gcc_assert (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS); diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index c393d34b5ec..0e7fdd1fec4 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1876,6 +1876,23 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, int flags, pp_greater (pp); break; + case BIT_INSERT_EXPR: + pp_string (pp, "BIT_INSERT_EXPR <"); + dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (pp, ", "); + dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false); + pp_string (pp, ", "); + dump_generic_node (pp, TREE_OPERAND (node, 2), spc, flags, false); + pp_string (pp, " ("); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (node, 1)))) + pp_decimal_int (pp, + TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (node, 1)))); + else + dump_generic_node (pp, TYPE_SIZE (TREE_TYPE (TREE_OPERAND (node, 1))), + spc, flags, false); + pp_string (pp, " bits)>"); + break; + case ARRAY_REF: case ARRAY_RANGE_REF: op0 = TREE_OPERAND (node, 0); diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c index 7d59bfda24b..eccea2f62b9 100644 --- a/gcc/tree-ssa-operands.c +++ b/gcc/tree-ssa-operands.c @@ -833,6 +833,7 @@ get_expr_operands (struct function *fn, gimple *stmt, tree *expr_p, int flags) get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 0), flags); return; + case BIT_INSERT_EXPR: case COMPOUND_EXPR: case OBJ_TYPE_REF: case ASSERT_EXPR: diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c index 0a59dd209ff..cf6e76405b3 100644 --- a/gcc/tree-ssa.c +++ b/gcc/tree-ssa.c @@ -1275,21 +1275,48 @@ non_rewritable_lvalue_p (tree lhs) && DECL_P (TREE_OPERAND (lhs, 0))) return false; - /* A decl that is wrapped inside a MEM-REF that covers - it full is also rewritable. - ??? The following could be relaxed allowing component + /* ??? The following could be relaxed allowing component references that do not change the access size. */ if (TREE_CODE (lhs) == MEM_REF - && TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR - && integer_zerop (TREE_OPERAND (lhs, 1))) + && TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR) { tree decl = TREE_OPERAND (TREE_OPERAND (lhs, 0), 0); - if (DECL_P (decl) + + /* A decl that is wrapped inside a MEM-REF that covers + it full is also rewritable. */ + if (integer_zerop (TREE_OPERAND (lhs, 1)) + && DECL_P (decl) && DECL_SIZE (decl) == TYPE_SIZE (TREE_TYPE (lhs)) && (TREE_THIS_VOLATILE (decl) == TREE_THIS_VOLATILE (lhs))) return false; + + /* A vector-insert using a MEM_REF or ARRAY_REF is rewritable + using a BIT_INSERT_EXPR. */ + if (DECL_P (decl) + && VECTOR_TYPE_P (TREE_TYPE (decl)) + && TYPE_MODE (TREE_TYPE (decl)) != BLKmode + && types_compatible_p (TREE_TYPE (lhs), + TREE_TYPE (TREE_TYPE (decl))) + && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1)) + && tree_int_cst_lt (TREE_OPERAND (lhs, 1), + TYPE_SIZE_UNIT (TREE_TYPE (decl))) + && (tree_to_uhwi (TREE_OPERAND (lhs, 1)) + % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0) + return false; } + /* A vector-insert using a BIT_FIELD_REF is rewritable using + BIT_INSERT_EXPR. */ + if (TREE_CODE (lhs) == BIT_FIELD_REF + && DECL_P (TREE_OPERAND (lhs, 0)) + && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (lhs, 0))) + && TYPE_MODE (TREE_TYPE (TREE_OPERAND (lhs, 0))) != BLKmode + && types_compatible_p (TREE_TYPE (lhs), + TREE_TYPE (TREE_TYPE (TREE_OPERAND (lhs, 0)))) + && (tree_to_uhwi (TREE_OPERAND (lhs, 2)) + % tree_to_uhwi (TYPE_SIZE (TREE_TYPE (lhs)))) == 0) + return false; + return true; } @@ -1511,6 +1538,62 @@ execute_update_addresses_taken (void) continue; } + /* Rewrite a vector insert via a BIT_FIELD_REF on the LHS + into a BIT_INSERT_EXPR. */ + if (TREE_CODE (lhs) == BIT_FIELD_REF + && DECL_P (TREE_OPERAND (lhs, 0)) + && bitmap_bit_p (suitable_for_renaming, + DECL_UID (TREE_OPERAND (lhs, 0))) + && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (lhs, 0))) + && TYPE_MODE (TREE_TYPE (TREE_OPERAND (lhs, 0))) != BLKmode + && types_compatible_p (TREE_TYPE (lhs), + TREE_TYPE (TREE_TYPE + (TREE_OPERAND (lhs, 0)))) + && (tree_to_uhwi (TREE_OPERAND (lhs, 2)) + % tree_to_uhwi (TYPE_SIZE (TREE_TYPE (lhs))) == 0)) + { + tree var = TREE_OPERAND (lhs, 0); + tree val = gimple_assign_rhs1 (stmt); + tree bitpos = TREE_OPERAND (lhs, 2); + gimple_assign_set_lhs (stmt, var); + gimple_assign_set_rhs_with_ops + (&gsi, BIT_INSERT_EXPR, var, val, bitpos); + stmt = gsi_stmt (gsi); + unlink_stmt_vdef (stmt); + update_stmt (stmt); + continue; + } + + /* Rewrite a vector insert using a MEM_REF on the LHS + into a BIT_INSERT_EXPR. */ + if (TREE_CODE (lhs) == MEM_REF + && TREE_CODE (TREE_OPERAND (lhs, 0)) == ADDR_EXPR + && (sym = TREE_OPERAND (TREE_OPERAND (lhs, 0), 0)) + && DECL_P (sym) + && bitmap_bit_p (suitable_for_renaming, DECL_UID (sym)) + && VECTOR_TYPE_P (TREE_TYPE (sym)) + && TYPE_MODE (TREE_TYPE (sym)) != BLKmode + && types_compatible_p (TREE_TYPE (lhs), + TREE_TYPE (TREE_TYPE (sym))) + && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1)) + && tree_int_cst_lt (TREE_OPERAND (lhs, 1), + TYPE_SIZE_UNIT (TREE_TYPE (sym))) + && (tree_to_uhwi (TREE_OPERAND (lhs, 1)) + % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0) + { + tree val = gimple_assign_rhs1 (stmt); + tree bitpos + = wide_int_to_tree (bitsizetype, + mem_ref_offset (lhs) * BITS_PER_UNIT); + gimple_assign_set_lhs (stmt, sym); + gimple_assign_set_rhs_with_ops + (&gsi, BIT_INSERT_EXPR, sym, val, bitpos); + stmt = gsi_stmt (gsi); + unlink_stmt_vdef (stmt); + update_stmt (stmt); + continue; + } + /* We shouldn't have any fancy wrapping of component-refs on the LHS, but look through VIEW_CONVERT_EXPRs as that is easy. */ diff --git a/gcc/tree.def b/gcc/tree.def index 44130d7c77f..d16575aee62 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -852,6 +852,21 @@ DEFTREECODE (ADDR_EXPR, "addr_expr", tcc_expression, 1) descriptor of type ptr_mode. */ DEFTREECODE (FDESC_EXPR, "fdesc_expr", tcc_expression, 2) +/* Given a container value, a replacement value and a bit position within + the container, produce the value that results from replacing the part of + the container starting at the bit position with the replacement value. + Operand 0 is a tree for the container value of integral or vector type; + Operand 1 is a tree for the replacement value of another integral or + the vector element type; + Operand 2 is a tree giving the constant bit position; + The number of bits replaced is given by the precision of the type of the + replacement value if it is integral or by its size if it is non-integral. + ??? The reason to make the size of the replacement implicit is to avoid + introducing a quaternary operation. + The replaced bits shall be fully inside the container. If the container + is of vector type, then these bits shall be aligned with its elements. */ +DEFTREECODE (BIT_INSERT_EXPR, "bit_field_insert", tcc_expression, 3) + /* Given two real or integer operands of the same type, returns a complex value of the corresponding complex type. */ DEFTREECODE (COMPLEX_EXPR, "complex_expr", tcc_binary, 2)