+2018-02-16 Carl Love <cel@us.ibm.com>
+
+ * config/rs6000/altivec.h: Add builtin names vec_extract4b
+ vec_insert4b.
+ * config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B
+ definitions.
+ * config/rs6000/rs6000-c.c: Add the definitions for
+ P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B.
+ * config/rs6000/rs6000.c (altivec_expand_builtin): Add
+ P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements.
+ * config/rs6000/vsx.md: Add define_insn extract4b. Add define_expand
+ definition for insert4b and define insn *insert3b_internal.
+ * doc/extend.texi: Add documentation for vec_extract4b.
+
2018-02-16 Nathan Sidwell <nathan@acm.org>
* doc/extend.texi (Backwards Compatibility): Mention friend
#define vec_vctzw __builtin_vec_vctzw
#define vec_vextract4b __builtin_vec_vextract4b
#define vec_vinsert4b __builtin_vec_vinsert4b
+#define vec_extract4b __builtin_vec_extract4b
+#define vec_insert4b __builtin_vec_insert4b
#define vec_vprtyb __builtin_vec_vprtyb
#define vec_vprtybd __builtin_vec_vprtybd
#define vec_vprtybw __builtin_vec_vprtybw
BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b)
BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b)
BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di)
+BU_P9V_VSX_3 (INSERT4B, "insert4b", CONST, insert4b)
+BU_P9V_VSX_2 (EXTRACT4B, "extract4b", CONST, extract4b)
/* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA
3.0 (power9). */
BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx")
BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx")
BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b")
+BU_P9V_OVERLOAD_2 (EXTRACT4B, "extract4b")
/* ISA 3.0 Vector scalar overloaded 3 argument functions */
BU_P9V_OVERLOAD_3 (STXVL, "stxvl")
BU_P9V_OVERLOAD_3 (XST_LEN_R, "xst_len_r")
BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b")
+BU_P9V_OVERLOAD_3 (INSERT4B, "insert4b")
/* Overloaded CMPNE support was implemented prior to Power 9,
so is not mentioned here. */
RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
{ P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+ { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
{ P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH,
RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
{ P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
+ { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
{ P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
RS6000_BTI_V16QI, RS6000_BTI_V4SI,
RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
case P9V_BUILTIN_VEXTRACT4B:
case P9V_BUILTIN_VEC_VEXTRACT4B:
+ case P9V_BUILTIN_VEC_EXTRACT4B:
arg1 = CALL_EXPR_ARG (exp, 1);
STRIP_NOPS (arg1);
case P9V_BUILTIN_VINSERT4B:
case P9V_BUILTIN_VINSERT4B_DI:
case P9V_BUILTIN_VEC_VINSERT4B:
+ case P9V_BUILTIN_VEC_INSERT4B:
arg2 = CALL_EXPR_ARG (exp, 2);
STRIP_NOPS (arg2);
;; Vector insert/extract word at arbitrary byte values. Note, the little
;; endian version needs to adjust the byte number, and the V4SI element in
;; vinsert4b.
+(define_insn "extract4b"
+ [(set (match_operand:V2DI 0 "vsx_register_operand")
+ (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+ (match_operand:QI 2 "const_0_to_12_operand" "n")]
+ UNSPEC_XXEXTRACTUW))]
+ "TARGET_P9_VECTOR"
+{
+ if (!VECTOR_ELT_ORDER_BIG)
+ operands[2] = GEN_INT (12 - INTVAL (operands[2]));
+
+ return "xxextractuw %x0,%x1,%2";
+})
+
+(define_expand "insert4b"
+ [(set (match_operand:V16QI 0 "vsx_register_operand")
+ (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
+ (match_operand:V16QI 2 "vsx_register_operand")
+ (match_operand:QI 3 "const_0_to_12_operand")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+{
+ if (!VECTOR_ELT_ORDER_BIG)
+ {
+ rtx op1 = operands[1];
+ rtx v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
+ operands[1] = v4si_tmp;
+ operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+ }
+})
+
+(define_insn "*insert4b_internal"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
+ (match_operand:V16QI 2 "vsx_register_operand" "0")
+ (match_operand:QI 3 "const_0_to_12_operand" "n")]
+ UNSPEC_XXINSERTW))]
+ "TARGET_P9_VECTOR"
+ "xxinsertw %x0,%x1,%3"
+ [(set_attr "type" "vecperm")])
+
(define_expand "vextract4b"
[(set (match_operand:DI 0 "gpc_reg_operand")
(unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
vector unsigned int vec_vctzw (vector int);
long long vec_vextract4b (const vector signed char, const int);
+vector unsigned long long vec_extract4b (vector unsigned char,
+ const int);
+long long vec_extract4b (const vector signed char, const int);
long long vec_vextract4b (const vector unsigned char, const int);
+vector unsigned char vec_insert4b (vector signed int, vector unsigned char,
+ const int);
+vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
+ const int);
vector signed char vec_insert4b (vector int, vector signed char, const int);
vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
const int);
+2018-02-16 Carl Love <cel@us.ibm.com>
+
+ * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file
+ for the ABI definitions for vec_extract4b and vec_insert4b.
+
2018-02-16 Nathan Sidwell <nathan@acm.org>
* g++.old-deja/g++.jason/scoping15.C: Expect warnings.
--- /dev/null
+/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+#define TRUE 1
+#define FALSE 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define EXTRACT 0
+
+void abort (void);
+
+int result_wrong_ull (vector unsigned long long vec_expected,
+ vector unsigned long long vec_actual)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ if (vec_expected[i] != vec_actual[i])
+ return TRUE;
+
+ return FALSE;
+}
+
+int result_wrong_uc (vector unsigned char vec_expected,
+ vector unsigned char vec_actual)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (vec_expected[i] != vec_actual[i])
+ return TRUE;
+
+ return FALSE;
+}
+
+#ifdef DEBUG
+void print_ull (vector unsigned long long vec_expected,
+ vector unsigned long long vec_actual)
+{
+ int i;
+
+ printf("expected unsigned long long data\n");
+ for (i = 0; i < 2; i++)
+ printf(" %lld,", vec_expected[i]);
+
+ printf("\nactual signed char data\n");
+ for (i = 0; i < 2; i++)
+ printf(" %lld,", vec_actual[i]);
+ printf("\n");
+}
+
+void print_uc (vector unsigned char vec_expected,
+ vector unsigned char vec_actual)
+{
+ int i;
+
+ printf("expected unsigned char data\n");
+ for (i = 0; i < 16; i++)
+ printf(" %d,", vec_expected[i]);
+
+ printf("\nactual unsigned char data\n");
+ for (i = 0; i < 16; i++)
+ printf(" %d,", vec_actual[i]);
+ printf("\n");
+}
+#endif
+
+#if EXTRACT
+vector unsigned long long
+vext (vector unsigned char *vc)
+{
+ return vextract_si_vchar (*vc, 5);
+}
+#endif
+
+int main()
+{
+ vector signed int vsi_arg;
+ vector unsigned char vec_uc_arg, vec_uc_result, vec_uc_expected;
+ vector unsigned long long vec_ull_result, vec_ull_expected;
+ unsigned long long ull_result, ull_expected;
+
+ vec_uc_arg = (vector unsigned char){1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16};
+
+ vsi_arg = (vector signed int){0xA, 0xB, 0xC, 0xD};
+
+ vec_uc_expected = (vector unsigned char){0xC, 0, 0, 0,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16};
+ /* Test vec_insert4b() */
+ /* Insert into char 0 location */
+ vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 0);
+
+ if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+ {
+#ifdef DEBUG
+ printf("Error: vec_insert4b pos 0, result does not match expected result\n");
+ print_uc (vec_uc_expected, vec_uc_result);
+#else
+ abort();
+#endif
+ }
+
+ /* insert into char 4 location */
+ vec_uc_expected = (vector unsigned char){1, 2, 3, 4,
+ 0xC, 0, 0, 0,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16};
+ vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 4);
+
+ if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+ {
+#ifdef DEBUG
+ printf("Error: vec_insert4b pos 4, result does not match expected result\n");
+ print_uc (vec_uc_expected, vec_uc_result);
+#else
+ abort();
+#endif
+ }
+
+ /* Test vec_extract4b() */
+ /* Extract 4b, from char 0 location */
+ vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+ 20, 0, 0, 0,
+ 30, 0, 0, 0,
+ 40, 0, 0, 0};
+
+ vec_ull_expected = (vector unsigned long long){0, 10};
+ vec_ull_result = vec_extract4b(vec_uc_arg, 0);
+
+ if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+ {
+#ifdef DEBUG
+ printf("Error: vec_extract4b pos 0, result does not match expected result\n");
+ print_ull (vec_ull_expected, vec_ull_result);
+#else
+ abort();
+#endif
+ }
+
+ /* Extract 4b, from char 12 location */
+ vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+ 20, 0, 0, 0,
+ 30, 0, 0, 0,
+ 40, 0, 0, 0};
+
+ vec_ull_expected = (vector unsigned long long){0, 40};
+ vec_ull_result = vec_extract4b(vec_uc_arg, 12);
+
+ if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+ {
+#ifdef DEBUG
+ printf("Error: vec_extract4b pos 12, result does not match expected result\n");
+ print_ull (vec_ull_expected, vec_ull_result);
+#else
+ abort();
+#endif
+ }
+}