altivec.h: Add builtin names vec_extract4b vec_insert4b.
authorCarl Love <cel@us.ibm.com>
Fri, 16 Feb 2018 17:30:45 +0000 (17:30 +0000)
committerCarl Love <carll@gcc.gnu.org>
Fri, 16 Feb 2018 17:30:45 +0000 (17:30 +0000)
gcc/ChangeLog:

2018-02-16  Carl Love  <cel@us.ibm.com>

* config/rs6000/altivec.h: Add builtin names vec_extract4b
vec_insert4b.
* config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B
definitions.
* config/rs6000/rs6000-c.c: Add the definitions for
P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B.
* config/rs6000/rs6000.c (altivec_expand_builtin): Add
P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements.
* config/rs6000/vsx.md: Add define_insn extract4b.  Add define_expand
definition for insert4b and define insn *insert3b_internal.
* doc/extend.texi: Add documentation for vec_extract4b.

gcc/testsuite/ChangeLog:

2018-02-16  Carl Love  <cel@us.ibm.com>
* gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file
for the ABI definitions for vec_extract4b and vec_insert4b.

From-SVN: r257747

gcc/ChangeLog
gcc/config/rs6000/altivec.h
gcc/config/rs6000/rs6000-builtin.def
gcc/config/rs6000/rs6000-c.c
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/vsx.md
gcc/doc/extend.texi
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c [new file with mode: 0644]

index d3c06450004abb810d8b09db1847b517989680b8..d1c94de2ef8643abb709f15a8ad54129eb2e3e51 100644 (file)
@@ -1,3 +1,17 @@
+2018-02-16  Carl Love  <cel@us.ibm.com>
+
+       * config/rs6000/altivec.h: Add builtin names vec_extract4b
+       vec_insert4b.
+       * config/rs6000/rs6000-builtin.def: Add INSERT4B and EXTRACT4B
+       definitions.
+       * config/rs6000/rs6000-c.c: Add the definitions for
+       P9V_BUILTIN_VEC_EXTRACT4B and P9V_BUILTIN_VEC_INSERT4B.
+       * config/rs6000/rs6000.c (altivec_expand_builtin): Add
+       P9V_BUILTIN_EXTRACT4B and P9V_BUILTIN_INSERT4B case statements.
+       * config/rs6000/vsx.md: Add define_insn extract4b.  Add define_expand
+       definition for insert4b and define insn *insert3b_internal.
+       * doc/extend.texi: Add documentation for vec_extract4b.
+
 2018-02-16  Nathan Sidwell  <nathan@acm.org>
 
        * doc/extend.texi (Backwards Compatibility): Mention friend
index 684cb1990ef05713bda09cfd5327664800fc98e6..3bce2ae396d2f4f759b5cb1d607256002ffea5f5 100644 (file)
 #define vec_vctzw __builtin_vec_vctzw
 #define vec_vextract4b __builtin_vec_vextract4b
 #define vec_vinsert4b __builtin_vec_vinsert4b
+#define vec_extract4b __builtin_vec_extract4b
+#define vec_insert4b __builtin_vec_insert4b
 #define vec_vprtyb __builtin_vec_vprtyb
 #define vec_vprtybd __builtin_vec_vprtybd
 #define vec_vprtybw __builtin_vec_vprtybw
index 86604da46faf8607060e3bb66ec871bfda21264a..420d12e29010d87f053237e2a93dec1b11f7d582 100644 (file)
@@ -2229,6 +2229,8 @@ BU_P9V_AV_2 (VEXTUWRX, "vextuwrx",                CONST,  vextuwrx)
 BU_P9V_VSX_2 (VEXTRACT4B,   "vextract4b",      CONST,  vextract4b)
 BU_P9V_VSX_3 (VINSERT4B,    "vinsert4b",       CONST,  vinsert4b)
 BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di",    CONST,  vinsert4b_di)
+BU_P9V_VSX_3 (INSERT4B,    "insert4b",         CONST,  insert4b)
+BU_P9V_VSX_2 (EXTRACT4B,   "extract4b",        CONST,  extract4b)
 
 /* Hardware IEEE 128-bit floating point round to odd instrucitons added in ISA
    3.0 (power9).  */
@@ -2291,11 +2293,13 @@ BU_P9V_OVERLOAD_2 (XL_LEN_R,    "xl_len_r")
 BU_P9V_OVERLOAD_2 (VEXTULX,    "vextulx")
 BU_P9V_OVERLOAD_2 (VEXTURX,    "vexturx")
 BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b")
+BU_P9V_OVERLOAD_2 (EXTRACT4B,  "extract4b")
 
 /* ISA 3.0 Vector scalar overloaded 3 argument functions */
 BU_P9V_OVERLOAD_3 (STXVL,      "stxvl")
 BU_P9V_OVERLOAD_3 (XST_LEN_R,  "xst_len_r")
 BU_P9V_OVERLOAD_3 (VINSERT4B,  "vinsert4b")
+BU_P9V_OVERLOAD_3 (INSERT4B,    "insert4b")
 
 /* Overloaded CMPNE support was implemented prior to Power 9,
    so is not mentioned here.  */
index 843a3750b821ea5d7b943659d0e7b045b4182f93..e4eab59ca4770da9d080253ac95dd91d149cf867 100644 (file)
@@ -5435,6 +5435,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
     RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
   { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
     RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+  { P9V_BUILTIN_VEC_EXTRACT4B, P9V_BUILTIN_EXTRACT4B,
+    RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, 0 },
 
   { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH,
     RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
@@ -5494,6 +5496,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
 
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
+  { P9V_BUILTIN_VEC_INSERT4B, P9V_BUILTIN_INSERT4B,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI,
+    RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI },
   { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B,
     RS6000_BTI_V16QI, RS6000_BTI_V4SI,
     RS6000_BTI_V16QI, RS6000_BTI_UINTSI },
index 3bb8d2a9224a430a75bbf9e338024c3a70038ff6..5eca053a25d4894bc1d55cb3e8c044efe4dc6d1f 100644 (file)
@@ -15743,6 +15743,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
 
     case P9V_BUILTIN_VEXTRACT4B:
     case P9V_BUILTIN_VEC_VEXTRACT4B:
+    case P9V_BUILTIN_VEC_EXTRACT4B:
       arg1 = CALL_EXPR_ARG (exp, 1);
       STRIP_NOPS (arg1);
 
@@ -15760,6 +15761,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case P9V_BUILTIN_VINSERT4B:
     case P9V_BUILTIN_VINSERT4B_DI:
     case P9V_BUILTIN_VEC_VINSERT4B:
+    case P9V_BUILTIN_VEC_INSERT4B:
       arg2 = CALL_EXPR_ARG (exp, 2);
       STRIP_NOPS (arg2);
 
index 6f0bd09e9bb257bc70b2209aca385c9640e55286..2d36f05e464d3360e2b9df56de2dd58a010f6338 100644 (file)
 ;; Vector insert/extract word at arbitrary byte values.  Note, the little
 ;; endian version needs to adjust the byte number, and the V4SI element in
 ;; vinsert4b.
+(define_insn "extract4b"
+  [(set (match_operand:V2DI 0 "vsx_register_operand")
+       (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa")
+                     (match_operand:QI 2 "const_0_to_12_operand" "n")]
+                    UNSPEC_XXEXTRACTUW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    operands[2] = GEN_INT (12 - INTVAL (operands[2]));
+
+  return "xxextractuw %x0,%x1,%2";
+})
+
+(define_expand "insert4b"
+  [(set (match_operand:V16QI 0 "vsx_register_operand")
+       (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand")
+                      (match_operand:V16QI 2 "vsx_register_operand")
+                      (match_operand:QI 3 "const_0_to_12_operand")]
+                  UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+{
+  if (!VECTOR_ELT_ORDER_BIG)
+    {
+      rtx op1 = operands[1];
+      rtx v4si_tmp = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
+      operands[1] = v4si_tmp;
+      operands[3] = GEN_INT (12 - INTVAL (operands[3]));
+    }
+})
+
+(define_insn "*insert4b_internal"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+       (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa")
+                      (match_operand:V16QI 2 "vsx_register_operand" "0")
+                      (match_operand:QI 3 "const_0_to_12_operand" "n")]
+                  UNSPEC_XXINSERTW))]
+  "TARGET_P9_VECTOR"
+  "xxinsertw %x0,%x1,%3"
+  [(set_attr "type" "vecperm")])
+
 (define_expand "vextract4b"
   [(set (match_operand:DI 0 "gpc_reg_operand")
        (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand")
index ee37eee4a5a56a783c463d3b84794f4fc7cdc771..b7effef17745f3a3ce9afd2889f590d4e99d368b 100644 (file)
@@ -19056,8 +19056,15 @@ vector int vec_vctzw (vector int);
 vector unsigned int vec_vctzw (vector int);
 
 long long vec_vextract4b (const vector signed char, const int);
+vector unsigned long long vec_extract4b (vector unsigned char,
+                                         const int);
+long long vec_extract4b (const vector signed char, const int);
 long long vec_vextract4b (const vector unsigned char, const int);
 
+vector unsigned char vec_insert4b (vector signed int, vector unsigned char,
+                                   const int);
+vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
+                                   const int);
 vector signed char vec_insert4b (vector int, vector signed char, const int);
 vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char,
                                    const int);
index 0bed6c3f09db5cc7d604cde8ecab8e1aafc699bd..e6f1331d20f3f9261063d7878108ee8344d2b841 100644 (file)
@@ -1,3 +1,8 @@
+2018-02-16  Carl Love  <cel@us.ibm.com>
+
+       * gcc.target/powerpc/builtins-7-p9-runnable.c: New runnable test file
+       for the ABI definitions for vec_extract4b and vec_insert4b.
+
 2018-02-16  Nathan Sidwell  <nathan@acm.org>
 
        * g++.old-deja/g++.jason/scoping15.C: Expect warnings.
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-p9-runnable.c
new file mode 100644 (file)
index 0000000..137b46b
--- /dev/null
@@ -0,0 +1,169 @@
+/* { dg-do run { target { powerpc*-*-* && p9vector_hw } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+#include <altivec.h>
+#define TRUE 1
+#define FALSE 0
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#define EXTRACT 0
+
+void abort (void);
+
+int result_wrong_ull (vector unsigned long long vec_expected,
+                     vector unsigned long long vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 2; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+int result_wrong_uc (vector unsigned char vec_expected,
+                    vector unsigned char vec_actual)
+{
+  int i;
+
+  for (i = 0; i < 16; i++)
+    if (vec_expected[i] != vec_actual[i])
+      return TRUE;
+
+  return FALSE;
+}
+
+#ifdef DEBUG
+void print_ull (vector unsigned long long vec_expected,
+               vector unsigned long long vec_actual)
+{
+  int i;
+
+  printf("expected unsigned long long data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_expected[i]);
+
+  printf("\nactual signed char data\n");
+  for (i = 0; i < 2; i++)
+    printf(" %lld,", vec_actual[i]);
+  printf("\n");
+}
+
+void print_uc (vector unsigned char vec_expected,
+              vector unsigned char vec_actual)
+{
+  int i;
+
+  printf("expected unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_expected[i]);
+
+  printf("\nactual unsigned char data\n");
+  for (i = 0; i < 16; i++)
+    printf(" %d,", vec_actual[i]);
+  printf("\n");
+}
+#endif
+
+#if EXTRACT
+vector unsigned long long
+vext (vector unsigned char *vc)
+{
+  return vextract_si_vchar (*vc, 5);
+}
+#endif
+
+int main()
+{
+   vector signed int vsi_arg;
+   vector unsigned char vec_uc_arg, vec_uc_result, vec_uc_expected;
+   vector unsigned long long vec_ull_result, vec_ull_expected;
+   unsigned long long ull_result, ull_expected;
+
+   vec_uc_arg = (vector unsigned char){1, 2, 3, 4,
+                                      5, 6, 7, 8,
+                                      9, 10, 11, 12,
+                                      13, 14, 15, 16};
+
+   vsi_arg = (vector signed int){0xA, 0xB, 0xC, 0xD};
+
+   vec_uc_expected = (vector unsigned char){0xC, 0, 0, 0,
+                                           5, 6, 7, 8,
+                                           9, 10, 11, 12,
+                                           13, 14, 15, 16};
+   /* Test vec_insert4b() */
+   /* Insert into char 0 location */
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 0);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 0, result does not match expected result\n");
+       print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* insert into char 4 location */
+   vec_uc_expected = (vector unsigned char){1, 2, 3, 4,
+                                           0xC, 0, 0, 0,
+                                           9, 10, 11, 12,
+                                           13, 14, 15, 16};
+   vec_uc_result = vec_insert4b (vsi_arg, vec_uc_arg, 4);
+
+   if (result_wrong_uc(vec_uc_expected, vec_uc_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_insert4b pos 4, result does not match expected result\n");
+       print_uc (vec_uc_expected, vec_uc_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Test vec_extract4b() */
+   /* Extract 4b, from char 0 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+                                      20, 0, 0, 0,
+                                      30, 0, 0, 0,
+                                      40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 10};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 0);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 0, result does not match expected result\n");
+       print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+
+   /* Extract 4b, from char 12 location */
+   vec_uc_arg = (vector unsigned char){10, 0, 0, 0,
+                                      20, 0, 0, 0,
+                                      30, 0, 0, 0,
+                                      40, 0, 0, 0};
+
+   vec_ull_expected = (vector unsigned long long){0, 40};
+   vec_ull_result = vec_extract4b(vec_uc_arg, 12);
+
+   if (result_wrong_ull(vec_ull_expected, vec_ull_result))
+     {
+#ifdef DEBUG
+        printf("Error: vec_extract4b pos 12, result does not match expected result\n");
+       print_ull (vec_ull_expected, vec_ull_result);
+#else
+        abort();
+#endif
+      }
+}