From 3f029aea51a9b48b03a0671e445339a5ab1607eb Mon Sep 17 00:00:00 2001 From: Carl Love Date: Mon, 15 Jun 2020 17:44:19 -0500 Subject: [PATCH] rs6000, Add vector replace builtin support GCC maintainers: The following patch adds support for builtins vec_replace_elt and vec_replace_unaligned. The patch has been compiled and tested on powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) and mambo with no regression errors. Please let me know if this patch is acceptable for the mainline branch. Thanks. Carl Love ------------------------------------------------------- gcc/ChangeLog 2020-08-04 Carl Love * config/rs6000/altivec.h: Add define for vec_replace_elt and vec_replace_unaligned. * config/rs6000/vsx.md (UNSPEC_REPLACE_ELT, UNSPEC_REPLACE_UN): New unspecs. (REPLACE_ELT): New mode iterator. (REPLACE_ELT_char, REPLACE_ELT_sh, REPLACE_ELT_max): New mode attributes. (vreplace_un_, vreplace_elt__inst): New. * config/rs6000/rs6000-builtin.def (VREPLACE_ELT_V4SI, VREPLACE_ELT_UV4SI, VREPLACE_ELT_V4SF, VREPLACE_ELT_UV2DI, VREPLACE_ELT_V2DF, VREPLACE_UN_V4SI, VREPLACE_UN_UV4SI, VREPLACE_UN_V4SF, VREPLACE_UN_V2DI, VREPLACE_UN_UV2DI, VREPLACE_UN_V2DF, (REPLACE_ELT, REPLACE_UN, VREPLACE_ELT_V2DI): New builtin entries. * config/rs6000/rs6000-call.c (P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VEC_REPLACE_UN): New builtin argument definitions. (rs6000_expand_quaternop_builtin): Add 3rd argument checks for CODE_FOR_vreplace_elt_v4si, CODE_FOR_vreplace_elt_v4sf, CODE_FOR_vreplace_un_v4si, CODE_FOR_vreplace_un_v4sf. (builtin_function_type) [P10_BUILTIN_VREPLACE_ELT_UV4SI, P10_BUILTIN_VREPLACE_ELT_UV2DI, P10_BUILTIN_VREPLACE_UN_UV4SI, P10_BUILTIN_VREPLACE_UN_UV2DI]: New cases. * doc/extend.texi: Add description for vec_replace_elt and vec_replace_unaligned builtins. gcc/testsuite/ChangeLog 2020-08-04 Carl Love * gcc.target/powerpc/vec-replace-word-runnable.c: New test. --- gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/rs6000-builtin.def | 16 + gcc/config/rs6000/rs6000-call.c | 61 ++++ gcc/config/rs6000/vsx.md | 60 ++++ gcc/doc/extend.texi | 50 +++ .../powerpc/vec-replace-word-runnable.c | 289 ++++++++++++++++++ 6 files changed, 478 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 7ba28780923..62fe0bfc6fb 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -705,6 +705,8 @@ __altivec_scalar_pred(vec_any_nle, #define vec_extracth(a, b, c) __builtin_vec_extracth (a, b, c) #define vec_insertl(a, b, c) __builtin_vec_insertl (a, b, c) #define vec_inserth(a, b, c) __builtin_vec_inserth (a, b, c) +#define vec_replace_elt(a, b, c) __builtin_vec_replace_elt (a, b, c) +#define vec_replace_unaligned(a, b, c) __builtin_vec_replace_un (a, b, c) #define vec_gnb(a, b) __builtin_vec_gnb (a, b) #define vec_clrl(a, b) __builtin_vec_clrl (a, b) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 50a885cff10..6e11d38d1fa 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2750,6 +2750,20 @@ BU_P10V_3 (VINSERTVPRBR, "vinsvubvrx", CONST, vinsertvr_v16qi) BU_P10V_3 (VINSERTVPRHR, "vinsvuhvrx", CONST, vinsertvr_v8hi) BU_P10V_3 (VINSERTVPRWR, "vinsvuwvrx", CONST, vinsertvr_v4si) +BU_P10V_3 (VREPLACE_ELT_V4SI, "vreplace_v4si", CONST, vreplace_elt_v4si) +BU_P10V_3 (VREPLACE_ELT_UV4SI, "vreplace_uv4si", CONST, vreplace_elt_v4si) +BU_P10V_3 (VREPLACE_ELT_V4SF, "vreplace_v4sf", CONST, vreplace_elt_v4sf) +BU_P10V_3 (VREPLACE_ELT_V2DI, "vreplace_v2di", CONST, vreplace_elt_v2di) +BU_P10V_3 (VREPLACE_ELT_UV2DI, "vreplace_uv2di", CONST, vreplace_elt_v2di) +BU_P10V_3 (VREPLACE_ELT_V2DF, "vreplace_v2df", CONST, vreplace_elt_v2df) + +BU_P10V_3 (VREPLACE_UN_V4SI, "vreplace_un_v4si", CONST, vreplace_un_v4si) +BU_P10V_3 (VREPLACE_UN_UV4SI, "vreplace_un_uv4si", CONST, vreplace_un_v4si) +BU_P10V_3 (VREPLACE_UN_V4SF, "vreplace_un_v4sf", CONST, vreplace_un_v4sf) +BU_P10V_3 (VREPLACE_UN_V2DI, "vreplace_un_v2di", CONST, vreplace_un_v2di) +BU_P10V_3 (VREPLACE_UN_UV2DI, "vreplace_un_uv2di", CONST, vreplace_un_v2di) +BU_P10V_3 (VREPLACE_UN_V2DF, "vreplace_un_v2df", CONST, vreplace_un_v2df) + BU_P10V_1 (VSTRIBR, "vstribr", CONST, vstrir_v16qi) BU_P10V_1 (VSTRIHR, "vstrihr", CONST, vstrir_v8hi) BU_P10V_1 (VSTRIBL, "vstribl", CONST, vstril_v16qi) @@ -2794,6 +2808,8 @@ BU_P10_OVERLOAD_3 (EXTRACTL, "extractl") BU_P10_OVERLOAD_3 (EXTRACTH, "extracth") BU_P10_OVERLOAD_3 (INSERTL, "insertl") BU_P10_OVERLOAD_3 (INSERTH, "inserth") +BU_P10_OVERLOAD_3 (REPLACE_ELT, "replace_elt") +BU_P10_OVERLOAD_3 (REPLACE_UN, "replace_un") BU_P10_OVERLOAD_1 (VSTRIR, "strir") BU_P10_OVERLOAD_1 (VSTRIL, "stril") diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 574f7182936..5dd1f666feb 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5639,6 +5639,36 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_UV4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_UV2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_UINTDI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_ELT, P10_BUILTIN_VREPLACE_ELT_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI }, + + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_UV4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_UINTSI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_INTSI, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_float, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_UV2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_UINTDI, RS6000_BTI_UINTQI }, + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_INTDI, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_REPLACE_UN, P10_BUILTIN_VREPLACE_UN_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_double, RS6000_BTI_INTQI }, + { P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, { P10_BUILTIN_VEC_VSTRIL, P10_BUILTIN_VSTRIBL, @@ -10066,6 +10096,33 @@ rs6000_expand_quaternop_builtin (enum insn_code icode, tree exp, rtx target) return CONST0_RTX (tmode); } } + else if (icode == CODE_FOR_vreplace_elt_v4si + || icode == CODE_FOR_vreplace_elt_v4sf) + { + /* Check whether the 3rd argument is an integer constant in the range + 0 to 3 inclusive. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 3)) + { + error ("argument 3 must be in the range 0 to 3"); + return CONST0_RTX (tmode); + } + } + + else if (icode == CODE_FOR_vreplace_un_v4si + || icode == CODE_FOR_vreplace_un_v4sf) + { + /* Check whether the 3rd argument is an integer constant in the range + 0 to 12 inclusive. */ + STRIP_NOPS (arg2); + if (TREE_CODE (arg2) != INTEGER_CST + || !IN_RANGE(TREE_INT_CST_LOW (arg2), 0, 12)) + { + error ("argument 3 must be in the range 0 to 12"); + return CONST0_RTX (tmode); + } + } if (target == 0 || GET_MODE (target) != tmode @@ -13912,6 +13969,10 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, case P10_BUILTIN_VINSERTVPRBL: case P10_BUILTIN_VINSERTVPRHL: case P10_BUILTIN_VINSERTVPRWL: + case P10_BUILTIN_VREPLACE_ELT_UV4SI: + case P10_BUILTIN_VREPLACE_ELT_UV2DI: + case P10_BUILTIN_VREPLACE_UN_UV4SI: + case P10_BUILTIN_VREPLACE_UN_UV2DI: h.uns_p[0] = 1; h.uns_p[1] = 1; h.uns_p[2] = 1; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 626ba7825d4..dd750210758 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -359,6 +359,8 @@ UNSPEC_EXTRACTR UNSPEC_INSERTL UNSPEC_INSERTR + UNSPEC_REPLACE_ELT + UNSPEC_REPLACE_UN ]) (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 @@ -370,6 +372,15 @@ ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) +;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements +(define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF]) +(define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w") + (V2DI "d") (V2DF "d")]) +(define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2") + (V2DI "3") (V2DF "3")]) +(define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12") + (V2DI "8") (V2DF "8")]) + ;; VSX moves ;; The patterns for LE permuted loads and stores come before the general @@ -4022,6 +4033,55 @@ "vinsrx %0,%1,%2" [(set_attr "type" "vecsimple")]) +(define_expand "vreplace_elt_" + [(set (match_operand:REPLACE_ELT 0 "register_operand") + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand:QI 3 "const_0_to_3_operand")] + UNSPEC_REPLACE_ELT))] + "TARGET_POWER10" +{ + int index; + /* Immediate value is the word index, convert to byte index and adjust for + Endianness if needed. */ + if (BYTES_BIG_ENDIAN) + index = INTVAL (operands[3]) << ; + + else + index = - (INTVAL (operands[3]) << ); + + emit_insn (gen_vreplace_elt__inst (operands[0], operands[1], + operands[2], + GEN_INT (index))); + DONE; + } +[(set_attr "type" "vecsimple")]) + +(define_expand "vreplace_un_" + [(set (match_operand:REPLACE_ELT 0 "register_operand") + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") + (match_operand: 2 "register_operand") + (match_operand:QI 3 "const_0_to_12_operand")] + UNSPEC_REPLACE_UN))] + "TARGET_POWER10" +{ + /* Immediate value is the byte index Big Endian numbering. */ + emit_insn (gen_vreplace_elt__inst (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +[(set_attr "type" "vecsimple")]) + +(define_insn "vreplace_elt__inst" + [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v") + (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0") + (match_operand: 2 "register_operand" "r") + (match_operand:QI 3 "const_0_to_12_operand" "n")] + UNSPEC_REPLACE_ELT))] + "TARGET_POWER10" + "vins %0,%2,%3" + [(set_attr "type" "vecsimple")]) + ;; VSX_EXTRACT optimizations ;; Optimize double d = (double) vec_extract (vi, ) ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 035c38c1d77..f9b57e4d616 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21090,6 +21090,56 @@ Note that some fairly anomalous results can be generated if the byte index is not aligned on an element boundary for the sort of element being inserted. @findex vec_inserth +Vector Replace Element +@smallexample +@exdent vector signed int vec_replace_elt (vector signed int, signed int, +const int); +@exdent vector unsigned int vec_replace_elt (vector unsigned int, +unsigned int, const int); +@exdent vector float vec_replace_elt (vector float, float, const int); +@exdent vector signed long long vec_replace_elt (vector signed long long, +signed long long, const int); +@exdent vector unsigned long long vec_replace_elt (vector unsigned long long, +unsigned long long, const int); +@exdent vector double rec_replace_elt (vector double, double, const int); +@end smallexample +The third argument (constrained to [0,3]) identifies the natural-endian +element number of the first argument that will be replaced by the second +argument to produce the result. The other elements of the first argument will +remain unchanged in the result. + +If it's desirable to insert a word at an unaligned position, use +vec_replace_unaligned instead. + +@findex vec_replace_element + +Vector Replace Unaligned +@smallexample +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +signed int, const int); +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +unsigned int, const int); +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +float, const int); +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +signed long long, const int); +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +unsigned long long, const int); +@exdent vector unsigned char vec_replace_unaligned (vector unsigned char, +double, const int); +@end smallexample + +The second argument replaces a portion of the first argument to produce the +result, with the rest of the first argument unchanged in the result. The +third argument identifies the byte index (using left-to-right, or big-endian +order) where the high-order byte of the second argument will be placed, with +the remaining bytes of the second argument placed naturally "to the right" +of the high-order byte. + +The programmer is responsible for understanding the endianness issues involved +with the first argument and the result. +@findex vec_replace_unaligned + @smallexample @exdent vector unsigned long long int @exdent vec_pext (vector unsigned long long int, vector unsigned long long int) diff --git a/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c new file mode 100644 index 00000000000..94af2106482 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vec-replace-word-runnable.c @@ -0,0 +1,289 @@ +/* { dg-do run } */ +/* { dg-require-effective-target power10_hw } */ +/* { dg-options "-mdejagnu-cpu=power10" } */ + +#include + +#define DEBUG 0 + +#ifdef DEBUG +#include +#endif + +extern void abort (void); + +int +main (int argc, char *argv []) +{ + int i; + unsigned char ch; + unsigned int index; + + vector unsigned int vresult_uint; + vector unsigned int expected_vresult_uint; + vector unsigned int src_va_uint; + vector unsigned int src_vb_uint; + unsigned int src_a_uint; + + vector int vresult_int; + vector int expected_vresult_int; + vector int src_va_int; + vector int src_vb_int; + int src_a_int; + + vector unsigned long long int vresult_ullint; + vector unsigned long long int expected_vresult_ullint; + vector unsigned long long int src_va_ullint; + vector unsigned long long int src_vb_ullint; + unsigned int long long src_a_ullint; + + vector long long int vresult_llint; + vector long long int expected_vresult_llint; + vector long long int src_va_llint; + vector long long int src_vb_llint; + long long int src_a_llint; + + vector float vresult_float; + vector float expected_vresult_float; + vector float src_va_float; + float src_a_float; + + vector double vresult_double; + vector double expected_vresult_double; + vector double src_va_double; + double src_a_double; + + /* Vector replace 32-bit element */ + src_a_uint = 345; + src_va_uint = (vector unsigned int) { 0, 1, 2, 3 }; + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; + expected_vresult_uint = (vector unsigned int) { 0, 1, 345, 3 }; + + vresult_uint = vec_replace_elt (src_va_uint, src_a_uint, 2); + + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_uint, src_va_uint, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", + i, vresult_uint[i], i, expected_vresult_uint[i]); +#else + abort(); +#endif + } + + src_a_int = 234; + src_va_int = (vector int) { 0, 1, 2, 3 }; + vresult_int = (vector int) { 0, 0, 0, 0 }; + expected_vresult_int = (vector int) { 0, 234, 2, 3 }; + + vresult_int = vec_replace_elt (src_va_int, src_a_int, 1); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_int, src_va_int, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + src_a_float = 34.0; + src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 }; + vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 }; + expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 }; + + vresult_float = vec_replace_elt (src_va_float, src_a_float, 1); + + if (!vec_all_eq (vresult_float, expected_vresult_float)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_float, src_va_float, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n", + i, vresult_float[i], i, expected_vresult_float[i]); +#else + abort(); +#endif + } + + /* Vector replace 64-bit element */ + src_a_ullint = 456; + src_va_ullint = (vector unsigned long long int) { 0, 1 }; + vresult_ullint = (vector unsigned long long int) { 0, 0 }; + expected_vresult_ullint = (vector unsigned long long int) { 0, 456 }; + + vresult_ullint = vec_replace_elt (src_va_ullint, src_a_ullint, 1); + + if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_ullint, src_va_ullint, index)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n", + i, vresult_ullint[i], i, expected_vresult_ullint[i]); +#else + abort(); +#endif + } + + src_a_llint = 678; + src_va_llint = (vector long long int) { 0, 1 }; + vresult_llint = (vector long long int) { 0, 0 }; + expected_vresult_llint = (vector long long int) { 0, 678 }; + + vresult_llint = vec_replace_elt (src_va_llint, src_a_llint, 1); + + if (!vec_all_eq (vresult_llint, expected_vresult_llint)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_llint, src_va_llint, index)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n", + i, vresult_llint[i], i, expected_vresult_llint[i]); +#else + abort(); +#endif + } + + src_a_double = 678.0; + src_va_double = (vector double) { 0.0, 50.0 }; + vresult_double = (vector double) { 0.0, 0.0 }; + expected_vresult_double = (vector double) { 0.0, 678.0 }; + + vresult_double = vec_replace_elt (src_va_double, src_a_double, 1); + + if (!vec_all_eq (vresult_double, expected_vresult_double)) { +#if DEBUG + printf("ERROR, vec_replace_elt (src_vb_double, src_va_double, index)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n", + i, vresult_double[i], i, expected_vresult_double[i]); +#else + abort(); +#endif + } + + + /* Vector replace 32-bit element, unaligned */ + src_a_uint = 345; + src_va_uint = (vector unsigned int) { 1, 2, 0, 0 }; + vresult_uint = (vector unsigned int) { 0, 0, 0, 0 }; + /* Byte index 7 will overwrite part of elements 2 and 3 */ + expected_vresult_uint = (vector unsigned int) { 1, 2, 345*256, 0 }; + + vresult_uint = vec_replace_unaligned (src_va_uint, src_a_uint, 3); + + if (!vec_all_eq (vresult_uint, expected_vresult_uint)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_uint, src_va_uint, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_uint[%d] = %d, expected_vresult_uint[%d] = %d\n", + i, vresult_uint[i], i, expected_vresult_uint[i]); +#else + abort(); +#endif + } + + src_a_int = 234; + src_va_int = (vector int) { 1, 0, 3, 4 }; + vresult_int = (vector int) { 0, 0, 0, 0 }; + /* Byte index 7 will over write part of elements 1 and 2 */ + expected_vresult_int = (vector int) { 1, 234*256, 0, 4 }; + + vresult_int = vec_replace_unaligned (src_va_int, src_a_int, 7); + + if (!vec_all_eq (vresult_int, expected_vresult_int)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_int, src_va_int, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_int[%d] = %d, expected_vresult_int[%d] = %d\n", + i, vresult_int[i], i, expected_vresult_int[i]); +#else + abort(); +#endif + } + + src_a_float = 34.0; + src_va_float = (vector float) { 0.0, 10.0, 20.0, 30.0 }; + vresult_float = (vector float) { 0.0, 0.0, 0.0, 0.0 }; + expected_vresult_float = (vector float) { 0.0, 34.0, 20.0, 30.0 }; + + vresult_float = vec_replace_unaligned (src_va_float, src_a_float, 8); + + if (!vec_all_eq (vresult_float, expected_vresult_float)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_float, src_va_float, index)\n"); + for(i = 0; i < 4; i++) + printf(" vresult_float[%d] = %f, expected_vresult_float[%d] = %f\n", + i, vresult_float[i], i, expected_vresult_float[i]); +#else + abort(); +#endif + } + + /* Vector replace 64-bit element, unaligned */ + src_a_ullint = 456; + src_va_ullint = (vector unsigned long long int) { 0, 0x222 }; + vresult_ullint = (vector unsigned long long int) { 0, 0 }; + expected_vresult_ullint = (vector unsigned long long int) { 456*256, + 0x200 }; + + /* Byte index 7 will over write least significant byte of element 0 */ + vresult_ullint = vec_replace_unaligned (src_va_ullint, src_a_ullint, 7); + + if (!vec_all_eq (vresult_ullint, expected_vresult_ullint)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_ullint, src_va_ullint, index)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_ullint[%d] = %d, expected_vresult_ullint[%d] = %d\n", + i, vresult_ullint[i], i, expected_vresult_ullint[i]); +#else + abort(); +#endif + } + + src_a_llint = 678; + src_va_llint = (vector long long int) { 0, 0x101 }; + vresult_llint = (vector long long int) { 0, 0 }; + /* Byte index 7 will over write least significant byte of element 0 */ + expected_vresult_llint = (vector long long int) { 678*256, 0x100 }; + + vresult_llint = vec_replace_unaligned (src_va_llint, src_a_llint, 7); + + if (!vec_all_eq (vresult_llint, expected_vresult_llint)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_llint, src_va_llint, index)\n"); + for(i = 0; i < 2; i++) + printf(" vresult_llint[%d] = %d, expected_vresult_llint[%d] = %d\n", + i, vresult_llint[i], i, expected_vresult_llint[i]); +#else + abort(); +#endif + } + + src_a_double = 678.0; + src_va_double = (vector double) { 0.0, 50.0 }; + vresult_double = (vector double) { 0.0, 0.0 }; + expected_vresult_double = (vector double) { 0.0, 678.0 }; + + vresult_double = vec_replace_unaligned (src_va_double, src_a_double, 0); + + if (!vec_all_eq (vresult_double, expected_vresult_double)) { +#if DEBUG + printf("ERROR, vec_replace_unaligned (src_vb_double, src_va_double, index)\ +n"); + for(i = 0; i < 2; i++) + printf(" vresult_double[%d] = %f, expected_vresult_double[%d] = %f\n", + i, vresult_double[i], i, expected_vresult_double[i]); +#else + abort(); +#endif + } + + return 0; +} + +/* { dg-final { scan-assembler-times {\mvinsw\M} 6 } } */ +/* { dg-final { scan-assembler-times {\mvinsd\M} 6 } } */ + + -- 2.30.2