+2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/35757
+ * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
+ proper error message for the third argument on blendpd and
+ blendps.
+
+ * config/i386/sse.md (blendbits): New.
+ (sse4_1_blendp<ssemodesuffixf2c>): Use it.
+
2008-03-30 Eric Botcazou <ebotcazou@adacore.com>
* fold-const.c (fold_binary) <BIT_IOR_EXPR>: Add missing conversions.
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
error ("the third argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the third argument must be a 2-bit immediate");
+ return const0_rtx;
+
default:
error ("the third argument must be an 8-bit immediate");
return const0_rtx;
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
+;; Mapping of immediate bits for blend instructions
+(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(vec_merge:SSEMODEF2P
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
(match_operand:SSEMODEF2P 1 "register_operand" "0")
- (match_operand:SI 3 "const_0_to_3_operand" "n")))]
+ (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
"TARGET_SSE4_1"
"blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "type" "ssemov")
+2008-03-30 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/35757
+ * gcc.target/i386/sse4_1-blendps-2.c: New.
+ * gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
+
2008-03-30 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran.dg/internal_pack_1.f90: Added complex to test case.
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xe
+
+static void
+init_blendps (float *src1, float *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 4; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_blendps (__m128 *dst, float *src1, float *src2)
+{
+ float tmp[4];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 4; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128 x, y;
+ union
+ {
+ __m128 x[NUM];
+ float f[NUM * 4];
+ } dst, src1, src2;
+ union
+ {
+ __m128 x;
+ float f[4];
+ } src3;
+ int i;
+
+ init_blendps (src1.f, src2.f);
+
+ /* Check blendps imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
+ if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
+ abort ();
+ }
+
+ /* Check blendps imm8, xmm, xmm */
+ x = _mm_blend_ps (dst.x[2], src3.x, MASK);
+ y = _mm_blend_ps (src3.x, dst.x[2], MASK);
+
+ if (check_blendps (&x, &dst.f[8], &src3.f[0]))
+ abort ();
+
+ if (check_blendps (&y, &src3.f[0], &dst.f[8]))
+ abort ();
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+
+#include <smmintrin.h>
+#include <string.h>
+
+#define NUM 20
+
+#undef MASK
+#define MASK 0xfe
+
+static void
+init_pblendw (short *src1, short *src2)
+{
+ int i, sign = 1;
+
+ for (i = 0; i < NUM * 8; i++)
+ {
+ src1[i] = i * i * sign;
+ src2[i] = (i + 20) * sign;
+ sign = -sign;
+ }
+}
+
+static int
+check_pblendw (__m128i *dst, short *src1, short *src2)
+{
+ short tmp[8];
+ int j;
+
+ memcpy (&tmp[0], src1, sizeof (tmp));
+ for (j = 0; j < 8; j++)
+ if ((MASK & (1 << j)))
+ tmp[j] = src2[j];
+
+ return memcmp (dst, &tmp[0], sizeof (tmp));
+}
+
+static void
+sse4_1_test (void)
+{
+ __m128i x, y;
+ union
+ {
+ __m128i x[NUM];
+ short s[NUM * 8];
+ } dst, src1, src2;
+ union
+ {
+ __m128i x;
+ short s[8];
+ } src3;
+ int i;
+
+ init_pblendw (src1.s, src2.s);
+
+ /* Check pblendw imm8, m128, xmm */
+ for (i = 0; i < NUM; i++)
+ {
+ dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
+ if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
+ abort ();
+ }
+
+ /* Check pblendw imm8, xmm, xmm */
+ src3.x = _mm_setzero_si128 ();
+
+ x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
+ y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
+
+ if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
+ abort ();
+
+ if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
+ abort ();
+}