From: Will Schmidt Date: Mon, 25 Sep 2017 14:35:02 +0000 (+0000) Subject: rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector store... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=df5cc22c7e45d5cf1d68f24ee6db99c052fe7de1;p=gcc.git rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). [gcc] 2017-09-25 Will Schmidt * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). (rs6000_builtin_valid_without_lhs): New helper function. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST. From-SVN: r253152 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b53da5390d5..ae61ed3f566 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-09-25 Will Schmidt + + * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling + for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). + (rs6000_builtin_valid_without_lhs): New helper function. + * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): + Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST. + 2017-09-25 Richard Sandiford * target.h (vec_perm_indices): Use unsigned short rather than diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index a49db972e91..4a363a12a8e 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -6472,78 +6472,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, return stmt; } - /* Expand vec_st into an expression that masks the address and - performs the store. We need to expand this early to allow - the best aliasing, as by the time we get into RTL we no longer - are able to honor __restrict__, for example. We may want to - consider this for all memory access built-ins. - - When -maltivec=be is specified, or the wrong number of arguments - is provided, simply punt to existing built-in processing. */ - - if (fcode == ALTIVEC_BUILTIN_VEC_ST - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) - && nargs == 3) - { - tree arg0 = (*arglist)[0]; - tree arg1 = (*arglist)[1]; - tree arg2 = (*arglist)[2]; - - /* Construct the masked address. Let existing error handling take - over if we don't have a constant offset. */ - arg1 = fold (arg1); - - if (TREE_CODE (arg1) == INTEGER_CST) - { - if (!ptrofftype_p (TREE_TYPE (arg1))) - arg1 = build1 (NOP_EXPR, sizetype, arg1); - - tree arg2_type = TREE_TYPE (arg2); - if (TREE_CODE (arg2_type) == ARRAY_TYPE && c_dialect_cxx ()) - { - /* Force array-to-pointer decay for C++. */ - arg2 = default_conversion (arg2); - arg2_type = TREE_TYPE (arg2); - } - - /* Find the built-in to make sure a compatible one exists; if not - we fall back to default handling to get the error message. */ - for (desc = altivec_overloaded_builtins; - desc->code && desc->code != fcode; desc++) - continue; - - for (; desc->code == fcode; desc++) - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) - && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2) - && rs6000_builtin_type_compatible (TREE_TYPE (arg2), - desc->op3)) - { - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, - arg2, arg1); - tree aligned - = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, - addr, build_int_cst (arg2_type, -16)); - - tree arg0_type = TREE_TYPE (arg0); - if (TYPE_MODE (arg0_type) == V2DImode) - /* Type-based aliasing analysis thinks vector long - and vector long long are different and will put them - in distinct alias classes. Force our address type - to be a may-alias type to avoid this. */ - arg0_type - = build_pointer_type_for_mode (arg0_type, Pmode, - true/*can_alias_all*/); - else - arg0_type = build_pointer_type (arg0_type); - aligned = build1 (NOP_EXPR, arg0_type, aligned); - tree stg = build_indirect_ref (loc, aligned, RO_NULL); - tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg, - convert (TREE_TYPE (stg), arg0)); - return retval; - } - } - } - for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; fnargs = TREE_CHAIN (fnargs), n++) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 9198e9ebeba..3212634f783 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16157,6 +16157,25 @@ rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, #endif } +/* Helper function to sort out which built-ins may be valid without having + a LHS. */ +static bool +rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code) +{ + switch (fn_code) + { + case ALTIVEC_BUILTIN_STVX_V16QI: + case ALTIVEC_BUILTIN_STVX_V8HI: + case ALTIVEC_BUILTIN_STVX_V4SI: + case ALTIVEC_BUILTIN_STVX_V4SF: + case ALTIVEC_BUILTIN_STVX_V2DI: + case ALTIVEC_BUILTIN_STVX_V2DF: + return true; + default: + return false; + } +} + /* Fold a machine-dependent built-in in GIMPLE. (For folding into a constant, use rs6000_fold_builtin.) */ @@ -16184,8 +16203,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) if (!rs6000_fold_gimple) return false; - /* Generic solution to prevent gimple folding of code without a LHS. */ - if (!gimple_call_lhs (stmt)) + /* Prevent gimple folding for code that does not have a LHS, unless it is + allowed per the rs6000_builtin_valid_without_lhs helper function. */ + if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code)) return false; switch (fn_code) @@ -16587,7 +16607,54 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) gsi_replace (gsi, g, true); return true; } - + /* Vector stores. */ + case ALTIVEC_BUILTIN_STVX_V16QI: + case ALTIVEC_BUILTIN_STVX_V8HI: + case ALTIVEC_BUILTIN_STVX_V4SI: + case ALTIVEC_BUILTIN_STVX_V4SF: + case ALTIVEC_BUILTIN_STVX_V2DI: + case ALTIVEC_BUILTIN_STVX_V2DF: + { + /* Do not fold for -maltivec=be on LE targets. */ + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) + return false; + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ + location_t loc = gimple_location (stmt); + tree arg0_type = TREE_TYPE (arg0); + /* Use ptr_type_node (no TBAA) for the arg2_type. + FIXME: (Richard) "A proper fix would be to transition this type as + seen from the frontend to GIMPLE, for example in a similar way we + do for MEM_REFs by piggy-backing that on an extra argument, a + constant zero pointer of the alias pointer type to use (which would + also serve as a type indicator of the store itself). I'd use a + target specific internal function for this (not sure if we can have + those target specific, but I guess if it's folded away then that's + fine) and get away with the overload set." + */ + tree arg2_type = ptr_type_node; + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg2. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg2_type, arg2, temp_offset); + /* Mask off any lower bits from the address. */ + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, + arg2_type, temp_addr, + build_int_cst (arg2_type, -16)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + /* The desired gimple result should be similar to: + MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ + gimple *g; + g = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, + build_int_cst (arg2_type, 0)), arg0); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",