CELL: add codegen for logic op, color mask

author Robert Ellison <papillo@tungstengraphics.com>

Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)

committer Robert Ellison <papillo@tungstengraphics.com>

Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)
author Robert Ellison <papillo@tungstengraphics.com>
Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)
committer Robert Ellison <papillo@tungstengraphics.com>
Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c

index 12e0826fb9b3545338b3febc30a383c80827b30a..f60bfba3f51f0a399242e0304a45106b790d456e 100644 (file)
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c
@@ -592,11 +592,32 @@ spe_load_int(struct spe_function *p, unsigned rT, int i)
     }
  }
  
+void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui)
+{
+   /* If the whole value is in the lower 18 bits, use ila, which
+    * doesn't sign-extend.  Otherwise, if the two halfwords of
+    * the constant are identical, use ilh.  Otherwise, we have
+    * to use ilhu followed by iohl.
+    */
+   if ((ui & 0xfffc0000) == ui) {
+      spe_ila(p, rT, ui);
+   }
+   else if ((ui >> 16) == (ui & 0xffff)) {
+      spe_ilh(p, rT, ui & 0xffff);
+   }
+   else {
+      spe_ilhu(p, rT, ui >> 16);
+      if (ui & 0xffff)
+         spe_iohl(p, rT, ui & 0xffff);
+   }
+}
+
  
  void
  spe_splat(struct spe_function *p, unsigned rT, unsigned rA)
  {
-   spe_ila(p, rT, 66051);
+   /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
+   spe_ila(p, rT, 0x00010203);
     spe_shufb(p, rT, rA, rA, rT);
  }
  
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h

index 4ef05ea27d1a10560a024c5fe6f4b548fc516d6f..09400b3fb2a27f4a5e44bc98cfa6961b86129b1d 100644 (file)
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h
@@ -302,6 +302,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x);
  extern void
  spe_load_int(struct spe_function *p, unsigned rT, int i);
  
+/** Load/splat immediate unsigned int into rT. */
+extern void
+spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui);
+
  /** Replicate word 0 of rA across rT. */
  extern void
  spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c

index 9d25e820ad9a0cdc551d4e58bdab64219102a712..899d8423b241624d964e7eca2ebc1f3232f8f772 100644 (file)
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -902,8 +902,69 @@ gen_logicop(const struct pipe_blend_state *blend,
              struct spe_function *f,
              int fragRGBA_reg, int fbRGBA_reg)
  {
-   /* XXX to-do */
-   /* operate on 32-bit packed pixels, not float colors */
+   /* We've got four 32-bit RGBA packed pixels in each of
+    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+    * reds, greens, blues, and alphas.
+    * */
+   ASSERT(blend->logicop_enable);
+
+   switch(blend->logicop_func) {
+      case PIPE_LOGICOP_CLEAR: /* 0 */
+         spe_zero(f, fragRGBA_reg);
+         break;
+      case PIPE_LOGICOP_NOR: /* ~(s | d) */
+         spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
+         /* andc R, A, B computes R = A & ~B */
+         spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+         break;
+      case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
+         spe_complement(f, fragRGBA_reg);
+         break;
+      case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
+         /* andc R, A, B computes R = A & ~B */
+         spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_INVERT: /* ~d */
+         /* Note that (A nor A) == ~(A|A) == ~A */
+         spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_XOR: /* s ^ d */
+         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_NAND: /* ~(s & d) */
+         spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_AND: /* s & d */
+         spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
+         spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         spe_complement(f, fragRGBA_reg);
+         break;
+      case PIPE_LOGICOP_NOOP: /* d */
+         spe_move(f, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
+         /* orc R, A, B computes R = A | ~B */
+         spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
+         break;
+      case PIPE_LOGICOP_COPY: /* s */
+         break;
+      case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
+         /* orc R, A, B computes R = A | ~B */
+         spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_OR: /* s | d */
+         spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
+         break;
+      case PIPE_LOGICOP_SET: /* 1 */
+         spe_load_int(f, fragRGBA_reg, 0xffffffff);
+         break;
+      default:
+         ASSERT(0);
+   }
  }
  
  
@@ -912,11 +973,81 @@ gen_colormask(uint colormask,
                struct spe_function *f,
                int fragRGBA_reg, int fbRGBA_reg)
  {
-   /* XXX to-do */
-   /* operate on 32-bit packed pixels, not float colors */
-}
+   /* We've got four 32-bit RGBA packed pixels in each of
+    * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
+    * reds, greens, blues, and alphas.
+    * */
+
+   /* The color mask operation can prevent any set of color
+    * components in the incoming fragment from being written to the frame 
+    * buffer; we do this by replacing the masked components of the 
+    * fragment with the frame buffer values.
+    *
+    * There are only 16 possibilities, with a unique mask for
+    * each of the possibilities.  (Technically, there are only 15
+    * possibilities, since we shouldn't be called for the one mask
+    * that does nothing, but the complete implementation is here
+    * anyway to avoid confusion.)
+    *
+    * We implement this via a constant static array which we'll index 
+    * into to get the correct mask.
+    * 
+    * We're dependent on the mask values being low-order bits,
+    * with particular values for each bit; so we start with a
+    * few assertions, which will fail if any of the values were
+    * to change.
+    */
+   ASSERT(PIPE_MASK_R == 0x1);
+   ASSERT(PIPE_MASK_G == 0x2);
+   ASSERT(PIPE_MASK_B == 0x4);
+   ASSERT(PIPE_MASK_A == 0x8);
  
+   /* Here's the list of all possible colormasks, indexed by the
+    * value of the combined mask specifier.
+    */
+   static const unsigned int colormasks[16] = {
+      0x00000000, /* 0: all colors masked */
+      0xff000000, /* 1: PIPE_MASK_R */
+      0x00ff0000, /* 2: PIPE_MASK_G */
+      0xffff0000, /* 3: PIPE_MASK_R | PIPE_MASK_G */
+      0x0000ff00, /* 4: PIPE_MASK_B */
+      0xff00ff00, /* 5: PIPE_MASK_R | PIPE_MASK_B */
+      0x00ffff00, /* 6: PIPE_MASK_G | PIPE_MASK_B */
+      0xffffff00, /* 7: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B */
+      0x000000ff, /* 8: PIPE_MASK_A */
+      0xff0000ff, /* 9: PIPE_MASK_R | PIPE_MASK_A */
+      0x00ff00ff, /* 10: PIPE_MASK_G | PIPE_MASK_A */
+      0xffff00ff, /* 11: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_A */
+      0x0000ffff, /* 12: PIPE_MASK_B | PIPE_MASK_A */
+      0xff00ffff, /* 13: PIPE_MASK_R | PIPE_MASK_B | PIPE_MASK_A */
+      0x00ffffff, /* 14: PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */
+      0xffffffff  /* 15: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */
+   };
+
+   /* Get a temporary register to hold the mask */
+   int colormask_reg = spe_allocate_available_register(f);
+
+   /* Look up the desired mask directly and load it into the mask register.
+    * This will load the same mask into each of the four words in the
+    * mask register.
+    */
+   spe_load_uint(f, colormask_reg, colormasks[colormask]);
+
+   /* Use the mask register to select between the fragment color
+    * values and the frame buffer color values.  Wherever the
+    * mask has a 0 bit, the current frame buffer color should override
+    * the fragment color.  Wherever the mask has a 1 bit, the 
+    * fragment color should persevere.  The Select Bits (selb rt, rA, rB, rM)
+    * instruction will select bits from its first operand rA wherever the
+    * the mask bits rM are 0, and from its second operand rB wherever the
+    * mask bits rM are 1.  That means that the frame buffer color is the
+    * first operand, and the fragment color the second.
+    */
+    spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
  
+    /* Release the temporary register and we're done */
+    spe_release_register(f, colormask_reg);
+}
  
  /**
   * Generate code to pack a quad of float colors into a four 32-bit integers.
@@ -1223,7 +1354,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
           gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
        }
  
-      if (blend->colormask != 0xf) {
+      if (blend->colormask != PIPE_MASK_RGBA) {
           gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
        }
author	Robert Ellison <papillo@tungstengraphics.com>
	Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)
committer	Robert Ellison <papillo@tungstengraphics.com>
	Fri, 19 Sep 2008 07:55:00 +0000 (01:55 -0600)
src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c		patch \| blob \| history
src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h		patch \| blob \| history
src/gallium/drivers/cell/ppu/cell_gen_fragment.c		patch \| blob \| history