optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...

author J"orn Rennecke <joern.rennecke@superh.com>

Wed, 3 Jul 2002 09:49:46 +0000 (09:49 +0000)

committer Joern Rennecke <amylaar@gcc.gnu.org>

Wed, 3 Jul 2002 09:49:46 +0000 (10:49 +0100)
author J"orn Rennecke <joern.rennecke@superh.com>
Wed, 3 Jul 2002 09:49:46 +0000 (09:49 +0000)
committer Joern Rennecke <amylaar@gcc.gnu.org>
Wed, 3 Jul 2002 09:49:46 +0000 (10:49 +0100)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8aaa2244fceaeb9a860aa0308d9180cdabc065f5..9218d3fefb5cf2ff83a82aceecbbc4421cd0ef83 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+Wed Jul  3 10:24:16 2002  J"orn Rennecke <joern.rennecke@superh.com>
+
+       * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
+       than UNITS_PER_WORD, unless this is little endian and the first unit
+       in this word.  Let extract_bit_field decide how to load an element.
+       Force arguments to matching mode.
+       (expand_vector_unop): Likewise.
+
+       * simplify-rtx.c (simplify_subreg): Don't assume that all vectors
+       consist of word_mode elements.
+       * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
+       BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
+       (build_unary_op): Allow vector types for BIT_NOT_EPR.
+       * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
+       CONST_VECTOR.
+       * optabs.c (expand_vector_binop): Try to perform operation in
+       smaller vector modes with same inner size.  Add handling of AND, IOR
+       and XOR.  Reject expansion to inner-mode sized scalars when using
+       OPTAB_DIRECT.  Use simplify_gen_subreg on constants.
+       (expand_vector_unop): Try to perform operation in smaller vector
+       modes with same inner size.  Add handling of one's complement.
+       When there is no vector negate operation, try a vector subtract
+       operation.  Use simplify_gen_subreg on constants.
+       * simplify-rtx.c (simplify_subreg): Add capability to convert vector
+       constants into smaller vectors with same inner mode, and to
+       integer CONST_DOUBLEs.
+
  2002-07-02  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
  
         * c-parse.in (parsing_iso_function_signature): New variable.
diff --git a/gcc/c-typeck.c b/gcc/c-typeck.c

index d26f87780f31f854c33f7e0614ebab3ad8d613a8..0a70363f406a731ef07b91ba4407b2ea918e61e2 100644 (file)
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p)
      case BIT_XOR_EXPR:
        if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE)
         shorten = -1;
+      else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
+       common = 1;
        break;
  
      case TRUNC_MOD_EXPR:
@@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag)
        break;
  
      case BIT_NOT_EXPR:
-      if (typecode == COMPLEX_TYPE)
+      if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE)
+       {
+         if (!noconvert)
+           arg = default_conversion (arg);
+       }
+      else if (typecode == COMPLEX_TYPE)
         {
           code = CONJ_EXPR;
           if (pedantic)
@@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag)
           if (!noconvert)
             arg = default_conversion (arg);
         }
-      else if (typecode != INTEGER_TYPE)
+      else
         {
           error ("wrong type argument to bit-complement");
           return error_mark_node;
         }
-      else if (!noconvert)
-       arg = default_conversion (arg);
        break;
  
      case ABS_EXPR:
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c

index 379de960dddc8c01d406c0a979dbd0b12f0c365e..0ee4f117f352f888c20f46db77d8b4033beb4143 100644 (file)
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -991,7 +991,7 @@ gen_lowpart_common (mode, x)
         return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
      }
    else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG
-          || GET_CODE (x) == CONCAT)
+          || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR)
      return simplify_gen_subreg (mode, x, GET_MODE (x), offset);
    /* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits
       from the low-order part of the constant.  */
diff --git a/gcc/optabs.c b/gcc/optabs.c

index d3568b2998e72c14f506de78981b8a0d10d3a882..c9230618e45a8a686cd369d74dbca77a20b862d2 100644 (file)
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
       int unsignedp;
       enum optab_methods methods;
  {
-  enum machine_mode submode;
-  int elts, subsize, i;
+  enum machine_mode submode, tmode;
+  int size, elts, subsize, subbitsize, i;
    rtx t, a, b, res, seq;
    enum mode_class class;
  
    class = GET_MODE_CLASS (mode);
  
+  size =  GET_MODE_SIZE (mode);
    submode = GET_MODE_INNER (mode);
-  subsize = GET_MODE_UNIT_SIZE (mode);
-  elts = GET_MODE_NUNITS (mode);
-
-  if (!target)
-    target = gen_reg_rtx (mode);
-
-  start_sequence ();
  
-  /* FIXME: Optimally, we should try to do this in narrower vector
-     modes if available.  E.g. When trying V8SI, try V4SI, else
-     V2SI, else decay into SI.  */
+  /* Search for the widest vector mode with the same inner mode that is
+     still narrower than MODE and that allows to open-code this operator.
+     Note, if we find such a mode and the handler later decides it can't
+     do the expansion, we'll be called recursively with the narrower mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (class);
+       GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+         && binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+       submode = tmode;
+    }
  
    switch (binoptab->code)
      {
+    case AND:
+    case IOR:
+    case XOR:
+      tmode = int_mode_for_mode (mode);
+      if (tmode != BLKmode)
+       submode = tmode;
      case PLUS:
      case MINUS:
      case MULT:
      case DIV:
+      subsize = GET_MODE_SIZE (submode);
+      subbitsize = GET_MODE_BITSIZE (submode);
+      elts = size / subsize;
+
+      /* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode,
+        but that we operate on more than one element at a time.  */
+      if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT)
+       return 0;
+
+      start_sequence ();
+
+      /* Errors can leave us with a const0_rtx as operand.  */
+      if (GET_MODE (op0) != mode)
+       op0 = copy_to_mode_reg (mode, op0);
+      if (GET_MODE (op1) != mode)
+       op1 = copy_to_mode_reg (mode, op1);
+
+      if (!target)
+       target = gen_reg_rtx (mode);
+
        for (i = 0; i < elts; ++i)
         {
-         t = simplify_gen_subreg (submode, target, mode,
-                                  i * subsize);
-         a = simplify_gen_subreg (submode, op0, mode,
-                                  i * subsize);
-         b = simplify_gen_subreg (submode, op1, mode,
-                                  i * subsize);
+         /* If this is part of a register, and not the first item in the
+            word, we can't store using a SUBREG - that would clobber
+            previous results.
+            And storing with a SUBREG is only possible for the least
+            significant part, hence we can't do it for big endian
+            (unless we want to permute the evaluation order.  */
+         if (GET_CODE (target) == REG
+             && (BYTES_BIG_ENDIAN
+                 ? subsize < UNITS_PER_WORD
+                 : ((i * subsize) % UNITS_PER_WORD) != 0))
+           t = NULL_RTX;
+         else
+           t = simplify_gen_subreg (submode, target, mode, i * subsize);
+         if (CONSTANT_P (op0))
+           a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+         else
+           a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+                                  NULL_RTX, submode, submode, size);
+         if (CONSTANT_P (op1))
+           b = simplify_gen_subreg (submode, op1, mode, i * subsize);
+         else
+           b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp,
+                                  NULL_RTX, submode, submode, size);
  
           if (binoptab->code == DIV)
             {
@@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
           if (res == 0)
             break;
  
-         emit_move_insn (t, res);
+         if (t)
+           emit_move_insn (t, res);
+         else
+           store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+                            size);
         }
        break;
  
@@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp)
       rtx target;
       int unsignedp;
  {
-  enum machine_mode submode;
-  int elts, subsize, i;
+  enum machine_mode submode, tmode;
+  int size, elts, subsize, subbitsize, i;
    rtx t, a, res, seq;
  
+  size =  GET_MODE_SIZE (mode);
    submode = GET_MODE_INNER (mode);
-  subsize = GET_MODE_UNIT_SIZE (mode);
-  elts = GET_MODE_NUNITS (mode);
+
+  /* Search for the widest vector mode with the same inner mode that is
+     still narrower than MODE and that allows to open-code this operator.
+     Note, if we find such a mode and the handler later decides it can't
+     do the expansion, we'll be called recursively with the narrower mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode));
+       GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+         && unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+       submode = tmode;
+    }
+  /* If there is no negate operation, try doing a subtract from zero.  */
+  if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT)
+    {    
+      rtx temp;
+      temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0,
+                           target, unsignedp, OPTAB_DIRECT);
+      if (temp)
+       return temp;
+    }
+
+  if (unoptab == one_cmpl_optab)
+    {
+      tmode = int_mode_for_mode (mode);
+      if (tmode != BLKmode)
+       submode = tmode;
+    }
+
+  subsize = GET_MODE_SIZE (submode);
+  subbitsize = GET_MODE_BITSIZE (submode);
+  elts = size / subsize;
+
+  /* Errors can leave us with a const0_rtx as operand.  */
+  if (GET_MODE (op0) != mode)
+    op0 = copy_to_mode_reg (mode, op0);
  
    if (!target)
      target = gen_reg_rtx (mode);
  
    start_sequence ();
  
-  /* FIXME: Optimally, we should try to do this in narrower vector
-     modes if available.  E.g. When trying V8SI, try V4SI, else
-     V2SI, else decay into SI.  */
-
    for (i = 0; i < elts; ++i)
      {
-      t = simplify_gen_subreg (submode, target, mode, i * subsize);
-      a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+      /* If this is part of a register, and not the first item in the
+        word, we can't store using a SUBREG - that would clobber
+        previous results.
+        And storing with a SUBREG is only possible for the least
+        significant part, hence we can't do it for big endian
+        (unless we want to permute the evaluation order.  */
+      if (GET_CODE (target) == REG
+         && (BYTES_BIG_ENDIAN
+             ?  subsize < UNITS_PER_WORD
+             : ((i * subsize) % UNITS_PER_WORD) != 0))
+       t = NULL_RTX;
+      else
+       t = simplify_gen_subreg (submode, target, mode, i * subsize);
+      if (CONSTANT_P (op0))
+       a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+      else
+       a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+                              t, submode, submode, size);
  
        res = expand_unop (submode, unoptab, a, t, unsignedp);
  
-      emit_move_insn (t, res);
+      if (t)
+       emit_move_insn (t, res);
+      else
+       store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+                        size);
      }
  
    seq = get_insns ();
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c

index cdc604336546d121be0006a6ed300b21b8251059..63961dd5ada4c9a3155fcdb2110e967cdc788f15 100644 (file)
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte)
    /* Simplify subregs of vector constants.  */
    if (GET_CODE (op) == CONST_VECTOR)
      {
-      int offset = byte / UNITS_PER_WORD;
+      int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode));
+      int offset = byte / elt_size;
        rtx elt;
  
-      /* This shouldn't happen, but let's not do anything stupid.  */
-      if (GET_MODE_INNER (innermode) != outermode)
-       return NULL_RTX;
-
-      elt = CONST_VECTOR_ELT (op, offset);
+      if (GET_MODE_INNER (innermode) == outermode)
+       {
+         elt = CONST_VECTOR_ELT (op, offset);
  
-      /* ?? We probably don't need this copy_rtx because constants
-        can be shared.  ?? */
+         /* ?? We probably don't need this copy_rtx because constants
+            can be shared.  ?? */
  
-      return copy_rtx (elt);
+         return copy_rtx (elt);
+       }
+      else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode)
+              && GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode))
+       {
+         return (gen_rtx_CONST_VECTOR
+                 (outermode,
+                  gen_rtvec_v (GET_MODE_NUNITS (outermode),
+                               &CONST_VECTOR_ELT (op, offset))));
+       }
+      else if (GET_MODE_CLASS (outermode) == MODE_INT
+              && (GET_MODE_SIZE (outermode) % elt_size == 0))
+       {
+         /* This happens when the target register size is smaller then
+            the vector mode, and we synthesize operations with vectors
+            of elements that are smaller than the register size.  */
+         HOST_WIDE_INT sum = 0, high = 0;
+         unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size);
+         unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1;
+         unsigned step = BYTES_BIG_ENDIAN ? 1 : -1;
+         int shift = BITS_PER_UNIT * elt_size;
+
+         for (; n_elts--; i += step)
+           {
+             elt = CONST_VECTOR_ELT (op, i);
+             if (GET_CODE (elt) != CONST_INT)
+               return NULL_RTX;
+             high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift);
+             sum = (sum << shift) + INTVAL (elt);
+           }
+         if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT)
+           return GEN_INT (trunc_int_for_mode (sum, outermode));
+         else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT)
+           return immed_double_const (high, sum, outermode);
+         else
+           return NULL_RTX;
+       }
+      else
+        /* This shouldn't happen, but let's not do anything stupid.  */
+       return NULL_RTX;
      }
  
    /* Attempt to simplify constant to non-SUBREG expression.  */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 06b43e1bc75ded753a63713503b3e648bef6d6b6..9daae1a46b07f0f3897176a08f04695829bc6647 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+Wed Jul  3 10:25:41 2002  J"orn Rennecke <joern.rennecke@superh.com>
+
+       * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
+       * gcc.c-torture/execute/simd-2.c (main): Likewise.
+
  2002-07-02  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>
  
         * gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions.
diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-1.c b/gcc/testsuite/gcc.c-torture/execute/simd-1.c

index cb503e457d844e58231bcd494e5ebc79c75f308a..a93a6191cd13e98679a7ae33a258d1e4644708f0 100644 (file)
--- a/gcc/testsuite/gcc.c-torture/execute/simd-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-1.c
@@ -45,10 +45,29 @@ main ()
  
    verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
  
+  k = i & j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+  k = i | j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+  k = i ^ j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
    k = -i;
    res.v = k;
    verify (res.i[0], res.i[1], res.i[2], res.i[3],
           -150, -100, -150, -200);
  
+  k = ~i;
+  res.v = k;
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
    exit (0);
  }
diff --git a/gcc/testsuite/gcc.c-torture/execute/simd-2.c b/gcc/testsuite/gcc.c-torture/execute/simd-2.c

index a49d9da589c848649469b9a2ddf6877bbad08a4f..2d1b92228f7cffce9b253ca83a557e40c73365c3 100644 (file)
--- a/gcc/testsuite/gcc.c-torture/execute/simd-2.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-2.c
@@ -44,10 +44,29 @@ main ()
  
    verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
  
+  k = i & j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+  k = i | j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+  k = i ^ j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
    k = -i;
    res.v = k;
    verify (res.i[0], res.i[1], res.i[2], res.i[3],
           -150, -100, -150, -200);
  
+  k = ~i;
+  res.v = k;
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
    exit (0);
  }
author	J"orn Rennecke <joern.rennecke@superh.com>
	Wed, 3 Jul 2002 09:49:46 +0000 (09:49 +0000)
committer	Joern Rennecke <amylaar@gcc.gnu.org>
	Wed, 3 Jul 2002 09:49:46 +0000 (10:49 +0100)
gcc/ChangeLog		patch \| blob \| history
gcc/c-typeck.c		patch \| blob \| history
gcc/emit-rtl.c		patch \| blob \| history
gcc/optabs.c		patch \| blob \| history
gcc/simplify-rtx.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.c-torture/execute/simd-1.c		patch \| blob \| history
gcc/testsuite/gcc.c-torture/execute/simd-2.c		patch \| blob \| history