From: Carl Love Date: Mon, 6 Nov 2017 19:35:55 +0000 (+0000) Subject: rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8 definitions. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fc504349a6a27a5ae3c70ae71d4c399db3d1f225;p=gcc.git rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8 definitions. gcc/ChangeLog: 2017-11-06 Carl Love * config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8 definitions. (P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions. * config/rs6000/altivec.h (vec_revb): Change the #define from power 9 to power 8. * config/rs6000/r6000-protos.h (swap_endian_selector_for_mode): Add new extern declaration. * config/rs6000/rs6000.c (swap_endian_selector_for_mode): Add function. * config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1, BU_P8V_OVERLOAD_1): Add power 8 macro expansions. (BU_P9V_OVERLOAD_1): Remove power 9 overload expansion. * config/rs6000/vsx.md (revb_): Add define_expand to generate power 8 instructions. (VSX_XXBR): Add iterator. gcc/testsuite/ChangeLog: 2017-11-06 Carl Love * gcc.target/powerpc/builtins-revb-runnable.c: New runnable test file. From-SVN: r254464 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d9a3a67c480..12ef83539f1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2017-11-06 Carl Love + + * config/rs6000/rs6000-c.c (P8V_BUILTIN_VEC_REVB): Add power 8 + definitions. + (P9V_BUILTIN_VEC_REVB): Remove the power 9 instance definitions. + * config/rs6000/altivec.h (vec_revb): Change the #define from power 9 + to power 8. + * config/rs6000/r6000-protos.h (swap_endian_selector_for_mode): Add new + extern declaration. + * config/rs6000/rs6000.c (swap_endian_selector_for_mode): Add function. + * config/rs6000/rs6000-builtin.def (BU_P8V_VSX_1, BU_P8V_OVERLOAD_1): + Add power 8 macro expansions. + (BU_P9V_OVERLOAD_1): Remove power 9 overload expansion. + * config/rs6000/vsx.md (revb_): Add define_expand to generate + power 8 instructions. (VSX_XXBR): Add iterator. + 2017-11-06 Wilco Dijkstra * config/arm/arm.md (predicable_short_it): Change default to "no", diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 94a4db24a78..cec617a208a 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -415,6 +415,7 @@ #define vec_vsubuqm __builtin_vec_vsubuqm #define vec_vupkhsw __builtin_vec_vupkhsw #define vec_vupklsw __builtin_vec_vupklsw +#define vec_revb __builtin_vec_revb #endif #ifdef __POWER9_VECTOR__ @@ -478,8 +479,6 @@ #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx - -#define vec_revb __builtin_vec_revb #endif /* Predicates. diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 86857c7658c..9dddc11015d 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1850,6 +1850,13 @@ BU_P6_64BIT_2 (CMPB, "cmpb", CONST, cmpbdi3) /* 1 argument VSX instructions added in ISA 2.07. */ BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn) BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn) +BU_P8V_VSX_1 (REVB_V1TI, "revb_v1ti", CONST, revb_v1ti) +BU_P8V_VSX_1 (REVB_V2DI, "revb_v2di", CONST, revb_v2di) +BU_P8V_VSX_1 (REVB_V4SI, "revb_v4si", CONST, revb_v4si) +BU_P8V_VSX_1 (REVB_V8HI, "revb_v8hi", CONST, revb_v8hi) +BU_P8V_VSX_1 (REVB_V16QI, "revb_v16qi", CONST, revb_v16qi) +BU_P8V_VSX_1 (REVB_V2DF, "revb_v2df", CONST, revb_v2df) +BU_P8V_VSX_1 (REVB_V4SF, "revb_v4sf", CONST, revb_v4sf) /* 1 argument altivec instructions added in ISA 2.07. */ BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2) @@ -1959,6 +1966,7 @@ BU_P8V_OVERLOAD_1 (VPOPCNTUH, "vpopcntuh") BU_P8V_OVERLOAD_1 (VPOPCNTUW, "vpopcntuw") BU_P8V_OVERLOAD_1 (VPOPCNTUD, "vpopcntud") BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") +BU_P8V_OVERLOAD_1 (REVB, "revb") /* ISA 2.07 vector overloaded 2 argument functions. */ BU_P8V_OVERLOAD_2 (EQV, "eqv") @@ -2070,8 +2078,6 @@ BU_P9V_OVERLOAD_1 (VSTDCNQP, "scalar_test_neg_qp") BU_P9V_OVERLOAD_1 (VSTDCNDP, "scalar_test_neg_dp") BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp") -BU_P9V_OVERLOAD_1 (REVB, "revb") - BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth") BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 1e7128102e4..0959c0b4871 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -5562,36 +5562,38 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V16QI, - RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, - RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRQ_V1TI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRD_V2DF, - RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRW_V4SF, - RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 }, - { P9V_BUILTIN_VEC_REVB, P9V_BUILTIN_XXBRH_V8HI, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, + { P8V_BUILTIN_VEC_REVB, P8V_BUILTIN_REVB_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_VREVE, ALTIVEC_BUILTIN_VREVE_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 }, diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 721b906ee65..07288000705 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -136,6 +136,8 @@ extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); extern void rs6000_split_signbit (rtx, rtx); extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); +extern rtx swap_endian_selector_for_mode (machine_mode mode); + extern void rs6000_expand_atomic_exchange (rtx op[]); extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index b4ab2ab53e3..c2712d9deb2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -14303,6 +14303,77 @@ swap_selector_for_mode (machine_mode mode) return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); } +rtx +swap_endian_selector_for_mode (machine_mode mode) +{ + unsigned int le_swap1[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + unsigned int le_swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; + unsigned int le_swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; + unsigned int le_swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; + unsigned int le_swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + + unsigned int be_swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + unsigned int be_swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; + unsigned int be_swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; + unsigned int be_swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; + unsigned int be_swap16[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}; + unsigned int *swaparray, i; + rtx perm[16]; + + if (VECTOR_ELT_ORDER_BIG) + switch (mode) + { + case E_V1TImode: + swaparray = le_swap1; + break; + case E_V2DFmode: + case E_V2DImode: + swaparray = le_swap2; + break; + case E_V4SFmode: + case E_V4SImode: + swaparray = le_swap4; + break; + case E_V8HImode: + swaparray = le_swap8; + break; + case E_V16QImode: + swaparray = le_swap16; + break; + default: + gcc_unreachable (); + } + else + switch (mode) + { + case E_V1TImode: + swaparray = be_swap1; + break; + case E_V2DFmode: + case E_V2DImode: + swaparray = be_swap2; + break; + case E_V4SFmode: + case E_V4SImode: + swaparray = be_swap4; + break; + case E_V8HImode: + swaparray = be_swap8; + break; + case E_V16QImode: + swaparray = be_swap16; + break; + default: + gcc_unreachable (); + } + + for (i = 0; i < 16; ++i) + perm[i] = GEN_INT (swaparray[i]); + + return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, + gen_rtvec_v (16, perm))); +} + /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target with -maltivec=be specified. Issue the load followed by an element- reversing permute. */ diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 35be5dead64..901688ed101 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -73,6 +73,13 @@ (TF "FLOAT128_VECTOR_P (TFmode)") TI]) +(define_mode_attr VSX_XXBR [(V8HI "h") + (V4SI "w") + (V4SF "w") + (V2DF "d") + (V2DI "d") + (V1TI "q")]) + ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") @@ -273,6 +280,9 @@ (define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) (define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) +;; Vector reverse byte modes +(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) + ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be ;; done on ISA 2.07 and not just ISA 3.0. @@ -4776,6 +4786,37 @@ "xxbrw %x0,%x1" [(set_attr "type" "vecperm")]) +;; Swap all bytes in each element of vector +(define_expand "revb_" + [(set (match_operand:VEC_REVB 0 "vsx_register_operand") + (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))] + "" +{ + if (TARGET_P9_VECTOR) + emit_insn (gen_p9_xxbr_ (operands[0], operands[1])); + else + { + /* Want to have the elements in reverse order relative + to the endian mode in use, i.e. in LE mode, put elements + in BE order. */ + rtx sel = swap_endian_selector_for_mode(mode); + emit_insn (gen_altivec_vperm_ (operands[0], operands[1], + operands[1], sel)); + } + + DONE; +}) + +;; Reversing bytes in vector char is just a NOP. +(define_expand "revb_v16qi" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] + "" +{ + emit_move_insn (operands[0], operands[1]); + DONE; +}) + ;; Swap all bytes in each 16-bit element (define_insn "p9_xxbrh_v8hi" [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2ab7972410c..c3a197422e8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2017-11-06 Carl Love + + * gcc.target/powerpc/builtins-revb-runnable.c: New runnable test file. + 2017-11-06 Michael Meissner PR target/82748 diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c new file mode 100644 index 00000000000..25bd4a2f729 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-revb-runnable.c @@ -0,0 +1,344 @@ +/* { dg-do run { target { powerpc*-*-* && p8vector_hw } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O3" } */ + +#include + +#ifdef DEBUG +#include +#endif + +void abort (void); + +/* Verify vec_revb builtins */ + +int +main() +{ + int i; + vector bool char arg_bc, result_bc, expected_bc; + vector unsigned char arg_uc, result_uc, expected_uc; + vector signed char arg_sc, result_sc, expected_sc; + + vector bool short int arg_bsi, result_bsi, expected_bsi; + vector unsigned short int arg_usi, result_usi, expected_usi; + vector short int arg_si, result_si, expected_si; + + vector bool int arg_bi, result_bi, expected_bi; + vector unsigned int arg_ui, result_ui, expected_ui; + vector int arg_int, result_int, expected_int; + + vector bool long long int arg_blli, result_blli, expected_blli; + vector unsigned long long int arg_ulli, result_ulli, expected_ulli; + vector long long int arg_lli, result_lli, expected_lli; + + vector __uint128_t arg_uint128, result_uint128, expected_uint128; + vector __int128_t arg_int128, result_int128, expected_int128; + + vector float arg_f, result_f, expected_f; + vector double arg_d, result_d, expected_d; + + /* 8-bit ints */ + /* The element is a byte. Reversing the byte in each byte element + gives the same value. */ + arg_bc = (vector bool char) {0x01, 0x23, 0x45, 0x67, + 0x7E, 0x7C, 0x7A, 0x78, + 0x02, 0x46, 0x7A, 0x7E, + 0x13, 0x57, 0x7B, 0x7F}; + expected_bc = arg_bc; + + result_bc = vec_revb (arg_bc); + + for (i = 0; i < 16; i++) { + if (result_bc[i] != expected_bc[i]) +#ifdef DEBUG + printf("arg_bc[%d] = 0x%x, result_bc[%d] = 0x%x, expected_bc[%d] = 0x%x\n", + i, arg_bc[i], i, result_bc[i], i, expected_bc[i]); +#else + abort(); +#endif + } + + arg_uc = (vector unsigned char) {0x01, 0x23, 0x45, 0x67, + 0x7E, 0x7C, 0x7A, 0x78, + 0x02, 0x46, 0x7A, 0x7E, + 0x13, 0x57, 0x7B, 0x7F}; + expected_uc = arg_uc; + + result_uc = vec_revb (arg_uc); + + for (i = 0; i < 16; i++) { + if (result_uc[i] != expected_uc[i]) +#ifdef DEBUG + printf("arg_uc[%d] = 0x%x, result_uc[%d] = 0x%x, expected_uc[%d] = 0x%x\n", + i, arg_uc[i], i, result_uc[i], i, expected_uc[i]); +#else + abort(); +#endif + } + + arg_sc = (vector signed char) {0x01, 0x23, 0x45, 0x67, + 0x7E, 0x7C, 0x7A, 0x78, + 0x02, 0x46, 0x7A, 0x7E, + 0x13, 0x57, 0x7B, 0x7F}; + expected_sc = arg_sc; + + result_sc = vec_revb (arg_sc); + + for (i = 0; i < 16; i++) { + if (result_sc[i] != expected_sc[i]) +#ifdef DEBUG + printf("arg_sc[%d] = 0x%x, result_sc[%d] = 0x%x, expected_sc[%d] = 0x%x\n", + i, arg_sc[i], i, result_sc[i], i, expected_sc[i]); +#else + abort(); +#endif + } + + /* 16-bit ints */ + arg_bsi = (vector bool short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246, + 0x8ACE, 0x1357, 0x9BDF}; + expected_bsi = (vector bool short int) {0x2301, 0x6745, 0xDCFE, 0x98BA, + 0x4602, 0xCE8A, 0x5713, 0xDF9B}; + + result_bsi = vec_revb (arg_bsi); + + for (i = 0; i < 8; i++) { + if (result_bsi[i] != expected_bsi[i]) +#ifdef DEBUG + printf("arg_bsi[%d] = 0x%x, result_bsi[%d] = 0x%x, expected_bsi[%d] = 0x%x\n", + i, arg_bsi[i], i, result_bsi[i], i, expected_bsi[i]); +#else + abort(); +#endif + } + + arg_usi = (vector unsigned short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, + 0x0246, 0x8ACE, 0x1357, 0x9BDF}; + expected_usi = (vector unsigned short int) {0x2301, 0x6745, 0xDCFE, 0x98BA, + 0x4602, 0xCE8A, 0x5713, 0xDF9B}; + + result_usi = vec_revb (arg_usi); + + for (i = 0; i < 8; i++) { + if (result_usi[i] != expected_usi[i]) +#ifdef DEBUG + printf("arg_usi[%d] = 0x%x, result_usi[%d] = 0x%x, expected_usi[%d] = 0x%x\n", + i, arg_usi[i], i, result_usi[i], i, expected_usi[i]); +#else + abort(); +#endif + } + + arg_si = (vector short int) {0x0123, 0x4567, 0xFEDC, 0xBA98, 0x0246, 0x8ACE, + 0x1357, 0x9BDF}; + expected_si = (vector short int) {0x2301, 0x6745, 0xDCFE, 0x98BA, 0x4602, + 0xCE8A, 0x5713, 0xDF9B}; + + result_si = vec_revb (arg_si); + + for (i = 0; i < 8; i++) { + if (result_si[i] != expected_si[i]) +#ifdef DEBUG + printf("arg_si[%d] = 0x%x, result_si[%d] = 0x%x, expected_si[%d] = 0x%x\n", + i, arg_si[i], i, result_si[i], i, expected_si[i]); +#else + abort(); +#endif + } + + /* 32-bit ints */ + arg_bi = (vector bool int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF}; + expected_bi = (vector bool int) {0x67452301, 0x98BADCFE, 0xCE8A4602, + 0xDF9B5713}; + + result_bi = vec_revb (arg_bi); + + for (i = 0; i < 4; i++) { + if (result_bi[i] != expected_bi[i]) +#ifdef DEBUG + printf("arg_bi[%d] = 0x%x, result_bi[%d] = 0x%x, expected_bi[%d] = 0x%x\n", + i, arg_bi[i], i, result_bi[i], i, expected_bi[i]); +#else + abort(); +#endif + } + + arg_ui = (vector unsigned int) {0x01234567, 0xFEDCBA98, 0x02468ACE, + 0x13579BDF}; + expected_ui = (vector unsigned int) {0x67452301, 0x98BADCFE, 0xCE8A4602, + 0xDF9B5713}; + + result_ui = vec_revb (arg_ui); + + for (i = 0; i < 4; i++) { + if (result_ui[i] != expected_ui[i]) +#ifdef DEBUG + printf("arg_ui[%d] = 0x%x, result_ui[%d] = 0x%x, expected_ui[%d] = 0x%x\n", + i, arg_ui[i], i, result_ui[i], i, expected_ui[i]); +#else + abort(); +#endif + } + + arg_int = (vector int) {0x01234567, 0xFEDCBA98, 0x02468ACE, 0x13579BDF}; + expected_int = (vector int) {0x67452301, 0x98BADCFE, 0xCE8A4602, 0xDF9B5713}; + + result_int = vec_revb (arg_int); + + for (i = 0; i < 4; i++) { + if (result_int[i] != expected_int[i]) +#ifdef DEBUG + printf("arg_int[%d] = 0x%x, result_int[%d] = 0x%x, expected_int[%d] = 0x%x\n", + i, arg_int[i], i, result_int[i], i, expected_int[i]); +#else + abort(); +#endif + } + + /* 64-bit ints */ + arg_blli = (vector bool long long int) {0x01234567FEDCBA98, + 0x02468ACE13579BDF}; + expected_blli = (vector bool long long int) {0x98BADCFE67452301, + 0xDF9B5713CE8A4602}; + + result_blli = vec_revb (arg_blli); + + for (i = 0; i < 2; i++) { + if (result_blli[i] != expected_blli[i]) +#ifdef DEBUG + printf("arg_blli[%d] = 0x%x, result_blli[%d] = 0x%llx, expected_blli[%d] = 0x%llx\n", + i, arg_blli[i], i, result_blli[i], i, expected_blli[i]); +#else + abort(); +#endif + } + + arg_ulli = (vector unsigned long long int) {0x01234567FEDCBA98, + 0x02468ACE13579BDF}; + expected_ulli = (vector unsigned long long int) {0x98BADCFE67452301, + 0xDF9B5713CE8A4602}; + + result_ulli = vec_revb (arg_ulli); + + for (i = 0; i < 2; i++) { + if (result_ulli[i] != expected_ulli[i]) +#ifdef DEBUG + printf("arg_ulli[%d] = 0x%x, result_ulli[%d] = 0x%llx, expected_ulli[%d] = 0x%llx\n", + i, arg_ulli[i], i, result_ulli[i], i, expected_ulli[i]); +#else + abort(); +#endif + } + + arg_lli = (vector long long int) {0x01234567FEDCBA98, 0x02468ACE13579BDF}; + expected_lli = (vector long long int) {0x98BADCFE67452301, + 0xDF9B5713CE8A4602}; + + result_lli = vec_revb (arg_lli); + + for (i = 0; i < 2; i++) { + if (result_lli[i] != expected_lli[i]) +#ifdef DEBUG + printf("arg_lli[%d] = 0x%x, result_lli[%d] = 0x%llx, expected_lli[%d] = 0x%llx\n", + i, arg_lli[i], i, result_lli[i], i, expected_lli[i]); +#else + abort(); +#endif + } + + /* 128-bit ints */ + arg_uint128[0] = 0x1627384950617243; + arg_uint128[0] = arg_uint128[0] << 64; + arg_uint128[0] |= 0x9405182930415263; + expected_uint128[0] = 0x6352413029180594; + expected_uint128[0] = expected_uint128[0] << 64; + expected_uint128[0] |= 0x4372615049382716; + + result_uint128 = vec_revb (arg_uint128); + + if (result_uint128[0] != expected_uint128[0]) + { +#ifdef DEBUG + printf("result_uint128[0] doesn't match expected_u128[0]\n"); + printf("arg_uint128[0] = %llx ", arg_uint128[0] >> 64); + printf(" %llx\n", arg_uint128[0] & 0xFFFFFFFFFFFFFFFF); + + printf("result_uint128[0] = %llx ", result_uint128[0] >> 64); + printf(" %llx\n", result_uint128[0] & 0xFFFFFFFFFFFFFFFF); + + printf("expected_uint128[0] = %llx ", expected_uint128[0] >> 64); + printf(" %llx\n", expected_uint128[0] & 0xFFFFFFFFFFFFFFFF); +#else + abort(); +#endif + } + + arg_int128[0] = 0x1627384950617283; + arg_int128[0] = arg_int128[0] << 64; + arg_int128[0] |= 0x9405182930415263; + expected_int128[0] = 0x6352413029180594; + expected_int128[0] = expected_int128[0] << 64; + expected_int128[0] |= 0x8372615049382716;; + + result_int128 = vec_revb (arg_int128); + + if (result_int128[0] != expected_int128[0]) + { +#ifdef DEBUG + printf("result_int128[0] doesn't match expected128[0]\n"); + printf("arg_int128[0] = %llx ", arg_int128[0] >> 64); + printf(" %llx\n", arg_int128[0] & 0xFFFFFFFFFFFFFFFF); + + printf("result_int128[0] = %llx ", result_int128[0] >> 64); + printf(" %llx\n", result_int128[0] & 0xFFFFFFFFFFFFFFFF); + + printf("expected_int128[0] = %llx ", expected_int128[0] >> 64); + printf(" %llx\n", expected_int128[0] & 0xFFFFFFFFFFFFFFFF); +#else + abort(); +#endif + } + + /* 32-bit floats */ + /* 0x42f7224e, 0x43e471ec, 0x49712062, 0x4a0f2b38 */ + arg_f = (vector float) {123.567, 456.89, 987654.123456, 2345678.0}; + /* 0x4e22F742, 0xec71e443, 0x62207149, 0x382b0f4a */ + expected_f = (vector float) {683528320.0, + -1169716232068291395011477504.0, + 739910526898278498304.0, + 0.0000407838160754181444644927978515625}; + + result_f = vec_revb (arg_f); + + for (i = 0; i < 4; i++) { + if (result_f[i] != expected_f[i]) + { +#ifdef DEBUG + printf(" arg_f[%d] = %f, result_f[%d] = %f, expected_f[%d] = %f\n", + i, arg_f[i], i, result_f[i], i, expected_f[i]); +#else + abort(); +#endif + } + } + + /* 64-bit floats */ + /* 0x419D6F34547E6B75 0x4194E5FEC781948B */ + arg_d = (vector double) {123456789.123456789, 87654321.87654321}; + /* 0x756B7E54346F9D41 0x8B9481C7FEE59441 */ + expected_d = (vector double) {4.12815412905659550518671402044E257, + -6.99269992046390236552018719554E-253}; + + result_d = vec_revb (arg_d); + + for (i = 0; i < 2; i++) { + if (result_d[i] != expected_d[i]) +#ifdef DEBUG + printf("arg_d[%d] = %f, result_d[%d] = %f, expected_d[%d] = %f\n", + i, arg_d[i], i, result_d[i], i, expected_d[i]); +#else + abort(); +#endif + } +}