+2017-01-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/altivec.h (vec_rlmi): New #define.
+ (vec_vrlnm): Likewise.
+ (vec_rlnm): Likewise.
+ * config/rs6000/altivec.md (UNSPEC_VRLMI): New UNSPEC enum value.
+ (UNSPEC_VRLNM): Likewise.
+ (VIlong): New mode iterator.
+ (altivec_vrl<VI_char>mi): New define_insn.
+ (altivec_vrl<VI_char>nm): Likewise.
+ * config/rs6000/rs6000-builtin.def (VRLWNM): New monomorphic
+ function entry.
+ (VRLDNM): Likewise.
+ (RLNM): New polymorphic function entry.
+ (VRLWMI): New monomorphic function entry.
+ (VRLDMI): Likewise.
+ (RLMI): New polymorphic function entry.
+ * config/rs6000/r6000-c.c (altivec_overloaded_builtin_table): Add
+ new entries for P9V_BUILTIN_VEC_RLMI and P9V_BUILTIN_VEC_RLNM.
+ * doc/extend.texi: Add description of vec_rlmi, vec_rlnm, and
+ vec_vrlnm.
+
2017-01-17 Jakub Jelinek <jakub@redhat.com>
PR debug/78839
#define vec_re __builtin_vec_re
#define vec_round __builtin_vec_round
#define vec_recipdiv __builtin_vec_recipdiv
+#define vec_rlmi __builtin_vec_rlmi
+#define vec_vrlnm __builtin_vec_rlnm
+#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
#define vec_rsqrt __builtin_vec_rsqrt
#define vec_rsqrte __builtin_vec_rsqrte
#define vec_vsubfp __builtin_vec_vsubfp
UNSPEC_CMPRB
UNSPEC_CMPRB2
UNSPEC_CMPEQB
+ UNSPEC_VRLMI
+ UNSPEC_VRLNM
])
(define_c_enum "unspecv"
;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops
(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
-;; Short vec in modes
+;; Short vec int modes
(define_mode_iterator VIshort [V8HI V16QI])
+;; Longer vec int modes for rotate/mask ops
+(define_mode_iterator VIlong [V2DI V4SI])
;; Vec float modes
(define_mode_iterator VF [V4SF])
;; Vec modes, pity mode iterators are not composable
"vrl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
+(define_insn "altivec_vrl<VI_char>mi"
+ [(set (match_operand:VIlong 0 "register_operand" "=v")
+ (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "0")
+ (match_operand:VIlong 2 "register_operand" "v")
+ (match_operand:VIlong 3 "register_operand" "v")]
+ UNSPEC_VRLMI))]
+ "TARGET_P9_VECTOR"
+ "vrl<VI_char>mi %0,%2,%3"
+ [(set_attr "type" "veclogical")])
+
+(define_insn "altivec_vrl<VI_char>nm"
+ [(set (match_operand:VIlong 0 "register_operand" "=v")
+ (unspec:VIlong [(match_operand:VIlong 1 "register_operand" "v")
+ (match_operand:VIlong 2 "register_operand" "v")]
+ UNSPEC_VRLNM))]
+ "TARGET_P9_VECTOR"
+ "vrl<VI_char>nm %0,%1,%2"
+ [(set_attr "type" "veclogical")])
+
(define_insn "altivec_vsl"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3)
BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3)
BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3)
+BU_P9V_AV_2 (VRLWNM, "vrlwnm", CONST, altivec_vrlwnm)
+BU_P9V_AV_2 (VRLDNM, "vrldnm", CONST, altivec_vrldnm)
/* ISA 3.0 vector overloaded 2 argument functions. */
BU_P9V_OVERLOAD_2 (VADU, "vadu")
BU_P9V_OVERLOAD_2 (VADUB, "vadub")
BU_P9V_OVERLOAD_2 (VADUH, "vaduh")
BU_P9V_OVERLOAD_2 (VADUW, "vaduw")
+BU_P9V_OVERLOAD_2 (RLNM, "rlnm")
+
+/* ISA 3.0 3-argument vector functions. */
+BU_P9V_AV_3 (VRLWMI, "vrlwmi", CONST, altivec_vrlwmi)
+BU_P9V_AV_3 (VRLDMI, "vrldmi", CONST, altivec_vrldmi)
+
+/* ISA 3.0 vector overloaded 3-argument functions. */
+BU_P9V_OVERLOAD_3 (RLMI, "rlmi")
/* 1 argument vsx scalar functions added in ISA 3.0 (power9). */
BU_P9V_64BIT_VSX_1 (VSEEDP, "scalar_extract_exp", CONST, xsxexpdp)
RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_VRLB, ALTIVEC_BUILTIN_VRLB,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
+ { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLWMI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+ { P9V_BUILTIN_VEC_RLMI, P9V_BUILTIN_VRLDMI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLWNM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P9V_BUILTIN_VEC_RLNM, P9V_BUILTIN_VRLDNM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLB,
in the result vector is -1. Otherwise (all of the enabled test
conditions are false), the corresponding entry of the result vector is 0.
+The following built-in functions are available for the PowerPC family
+of processors, starting with ISA 3.0 or later (@option{-mcpu=power9}):
+@smallexample
+vector unsigned int vec_rlmi (vector unsigned int, vector unsigned int,
+ vector unsigned int);
+vector unsigned long long vec_rlmi (vector unsigned long long,
+ vector unsigned long long,
+ vector unsigned long long);
+vector unsigned int vec_rlnm (vector unsigned int, vector unsigned int,
+ vector unsigned int);
+vector unsigned long long vec_rlnm (vector unsigned long long,
+ vector unsigned long long,
+ vector unsigned long long);
+vector unsigned int vec_vrlnm (vector unsigned int, vector unsigned int);
+vector unsigned long long vec_vrlnm (vector unsigned long long,
+ vector unsigned long long);
+@end smallexample
+
+The result of @code{vec_rlmi} is obtained by rotating each element of
+the first argument vector left and inserting it under mask into the
+second argument vector. The third argument vector contains the mask
+beginning in bits 11:15, the mask end in bits 19:23, and the shift
+count in bits 27:31, of each element.
+
+The result of @code{vec_rlnm} is obtained by rotating each element of
+the first argument vector left and ANDing it with a mask specified by
+the second and third argument vectors. The second argument vector
+contains the shift count for each element in the low-order byte. The
+third argument vector contains the mask end for each element in the
+low-order byte, with the mask begin in the next higher byte.
+
+The result of @code{vec_vrlnm} is obtained by rotating each element
+of the first argument vector left and ANDing it with a mask. The
+second argument vector contains the mask beginning in bits 11:15,
+the mask end in bits 19:23, and the shift count in bits 27:31,
+of each element.
+
If the cryptographic instructions are enabled (@option{-mcrypto} or
@option{-mcpu=power8}), the following builtins are enabled.
+2017-01-17 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * vec-rlmi-rlnm.c: New file.
+
2017-01-17 Nathan Sidwell <nathan@acm.org>
PR c++/61636
--- /dev/null
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mcpu=power9" } */
+
+#include <altivec.h>
+
+vector unsigned int
+rlmi_test_1 (vector unsigned int x, vector unsigned int y,
+ vector unsigned int z)
+{
+ return vec_rlmi (x, y, z);
+}
+
+vector unsigned long long
+rlmi_test_2 (vector unsigned long long x, vector unsigned long long y,
+ vector unsigned long long z)
+{
+ return vec_rlmi (x, y, z);
+}
+
+vector unsigned int
+vrlnm_test_1 (vector unsigned int x, vector unsigned int y)
+{
+ return vec_vrlnm (x, y);
+}
+
+vector unsigned long long
+vrlnm_test_2 (vector unsigned long long x, vector unsigned long long y)
+{
+ return vec_vrlnm (x, y);
+}
+
+vector unsigned int
+rlnm_test_1 (vector unsigned int x, vector unsigned int y,
+ vector unsigned int z)
+{
+ return vec_rlnm (x, y, z);
+}
+
+vector unsigned long long
+rlnm_test_2 (vector unsigned long long x, vector unsigned long long y,
+ vector unsigned long long z)
+{
+ return vec_rlnm (x, y, z);
+}
+
+/* Expected code generation for rlmi_test_1 is vrlwmi.
+ Expected code generation for rlmi_test_2 is vrldmi.
+ Expected code generation for vrlnm_test_1 is vrlwnm.
+ Expected code generation for vrlnm_test_2 is vrldnm.
+ Expected code generation for the others is more complex, because
+ the second and third arguments are combined by a shift and OR,
+ and because there is no splat-immediate doubleword.
+ - For rlnm_test_1: vspltisw, vslw, xxlor, vrlwnm.
+ - For rlnm_test_2: xxspltib, vextsb2d, vsld, xxlor, vrldnm.
+ There is a choice of splat instructions in both cases, so we
+ just check for "splt". */
+
+/* { dg-final { scan-assembler-times "vrlwmi" 1 } } */
+/* { dg-final { scan-assembler-times "vrldmi" 1 } } */
+/* { dg-final { scan-assembler-times "splt" 2 } } */
+/* { dg-final { scan-assembler-times "vextsb2d" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
+/* { dg-final { scan-assembler-times "vsld" 1 } } */
+/* { dg-final { scan-assembler-times "xxlor" 2 } } */
+/* { dg-final { scan-assembler-times "vrlwnm" 2 } } */
+/* { dg-final { scan-assembler-times "vrldnm" 2 } } */