tgsi: use TGSI_INTERPOLATE_x arguments instead of zeros in ureg code

[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_lowering.c
diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c

index 795993145913e4d1f9ad480b8fb05a53e35c77a4..bfc3a6bfe3602e195d8eb496935b913c862f6afb 100644 (file)
--- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
@@ -258,153 +258,21 @@ transform_dst(struct tgsi_transform_context *tctx,
     }
  }
  
-/* XPD - Cross Product
- *   dst.x = src0.y \times src1.z - src1.y \times src0.z
- *   dst.y = src0.z \times src1.x - src1.z \times src0.x
- *   dst.z = src0.x \times src1.y - src1.x \times src0.y
- *   dst.w = 1.0
- *
- * ; needs: 2 tmp, imm{1.0}
- * MUL tmpA.xyz, src0.yzx, src1.zxy
- * MUL tmpB.xyz, src1.yzx, src0.zxy
- * SUB dst.xyz, tmpA.xyz, tmpB.xyz
- * MOV dst.w, imm{1.0}
- */
-#define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
-#define XPD_TMP  2
-static void
-transform_xpd(struct tgsi_transform_context *tctx,
-              struct tgsi_full_instruction *inst)
-{
-   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
-   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
-   struct tgsi_full_src_register *src0 = &inst->Src[0];
-   struct tgsi_full_src_register *src1 = &inst->Src[1];
-   struct tgsi_full_instruction new_inst;
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
-      /* MUL tmpA.xyz, src0.yzx, src1.zxy */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _));
-      reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _));
-      tctx->emit_instruction(tctx, &new_inst);
-
-      /* MUL tmpB.xyz, src1.yzx, src0.zxy */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZ);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _));
-      reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _));
-      tctx->emit_instruction(tctx, &new_inst);
-
-      /* SUB dst.xyz, tmpA.xyz, tmpB.xyz */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, _));
-      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
-      /* MOV dst.w, imm{1.0} */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-}
-
-/* SCS - Sine Cosine
- *   dst.x = \cos{src.x}
- *   dst.y = \sin{src.x}
- *   dst.z = 0.0
- *   dst.w = 1.0
- *
- * ; needs: 1 tmp, imm{0.0, 1.0}
- * if (dst.x aliases src.x) {
- *   MOV tmpA.x, src.x
- *   src = tmpA
- * }
- * COS dst.x, src.x
- * SIN dst.y, src.x
- * MOV dst.zw, imm{0.0, 1.0}
- */
-#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
-#define SCS_TMP  1
-static void
-transform_scs(struct tgsi_transform_context *tctx,
-              struct tgsi_full_instruction *inst)
-{
-   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
-   struct tgsi_full_dst_register *dst = &inst->Dst[0];
-   struct tgsi_full_src_register *src = &inst->Src[0];
-   struct tgsi_full_instruction new_inst;
-
-   if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
-      create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
-      src = &ctx->tmp[A].src;
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
-      /* COS dst.x, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_COS;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
-      /* SIN dst.y, src.x */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SIN;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-
-   if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) {
-      /* MOV dst.zw, imm{0.0, 1.0} */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW);
-      new_inst.Instruction.NumSrcRegs = 1;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y));
-      tctx->emit_instruction(tctx, &new_inst);
-   }
-}
-
  /* LRP - Linear Interpolate
   *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
   *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
   *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
   *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
   *
- * ; needs: 2 tmp, imm{1.0}
- * MUL tmpA, src0, src1
- * SUB tmpB, imm{1.0}, src0
- * MUL tmpB, tmpB, src2
- * ADD dst, tmpA, tmpB
+ * This becomes: src0 \times src1 + src2 - src0 \times src2, which
+ * can then become: src0 \times src1 - (src0 \times src2 - src2)
+ *
+ * ; needs: 1 tmp
+ * MAD tmpA, src0, src2, -src2
+ * MAD dst, src0, src1, -tmpA
   */
-#define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
-#define LRP_TMP  2
+#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
+#define LRP_TMP  1
  static void
  transform_lrp(struct tgsi_transform_context *tctx,
                struct tgsi_full_instruction *inst)
@@ -417,44 +285,28 @@ transform_lrp(struct tgsi_transform_context *tctx,
     struct tgsi_full_instruction new_inst;
  
     if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
-      /* MUL tmpA, src0, src1 */
+      /* MAD tmpA, src0, src2, -src2 */
        new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
-      new_inst.Instruction.NumSrcRegs = 2;
+      new_inst.Instruction.NumSrcRegs = 3;
        reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
-      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
-      tctx->emit_instruction(tctx, &new_inst);
-
-      /* SUB tmpB, imm{1.0}, src0 */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, Y, Y, Y));
-      reg_src(&new_inst.Src[1], src0, SWIZ(X, Y, Z, W));
-      tctx->emit_instruction(tctx, &new_inst);
-
-      /* MUL tmpB, tmpB, src2 */
-      new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
-      new_inst.Instruction.NumDstRegs = 1;
-      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_XYZW);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
        reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
+      reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
+      new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
        tctx->emit_instruction(tctx, &new_inst);
  
-      /* ADD dst, tmpA, tmpB */
+      /* MAD dst, src0, src1, -tmpA */
        new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
-      new_inst.Instruction.NumSrcRegs = 2;
-      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
-      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, Y, Z, W));
+      new_inst.Instruction.NumSrcRegs = 3;
+      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
+      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
+      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      new_inst.Src[2].Register.Negate = true;
        tctx->emit_instruction(tctx, &new_inst);
     }
  }
@@ -492,12 +344,13 @@ transform_frc(struct tgsi_transform_context *tctx,
  
        /* SUB dst, src, tmpA */
        new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
        new_inst.Instruction.NumSrcRegs = 2;
        reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
        reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      new_inst.Src[1].Register.Negate = 1;
        tctx->emit_instruction(tctx, &new_inst);
     }
  }
@@ -595,16 +448,25 @@ transform_lit(struct tgsi_transform_context *tctx,
        reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
        tctx->emit_instruction(tctx, &new_inst);
  
-      /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */
+      /* MIN tmpA.z, src.w, imm{128.0} */
        new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
-      new_inst.Instruction.NumSrcRegs = 3;
+      new_inst.Instruction.NumSrcRegs = 2;
        reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
        reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
+      tctx->emit_instruction(tctx, &new_inst);
+
+      /* MAX tmpA.z, tmpA.z, -imm{128.0} */
+      new_inst = tgsi_default_full_instruction();
+      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
+      new_inst.Instruction.NumDstRegs = 1;
+      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
+      new_inst.Instruction.NumSrcRegs = 2;
+      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
+      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
        new_inst.Src[1].Register.Negate = true;
-      reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _));
        tctx->emit_instruction(tctx, &new_inst);
  
        /* LG2 tmpA.y, tmpA.y */
@@ -713,12 +575,13 @@ transform_exp(struct tgsi_transform_context *tctx,
  
           /* SUB tmpA.x, src.x, tmpA.x */
           new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
           new_inst.Instruction.NumDstRegs = 1;
           reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
           new_inst.Instruction.NumSrcRegs = 2;
           reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
           reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
+         new_inst.Src[1].Register.Negate = 1;
           tctx->emit_instruction(tctx, &new_inst);
       } else {
           /* FLR tmpA.x, src.x */
@@ -746,12 +609,13 @@ transform_exp(struct tgsi_transform_context *tctx,
     if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
        /* SUB dst.y, src.x, tmpA.x */
        new_inst = tgsi_default_full_instruction();
-      new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
        new_inst.Instruction.NumSrcRegs = 2;
        reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
        reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
+      new_inst.Src[1].Register.Negate = 1;
        tctx->emit_instruction(tctx, &new_inst);
     }
  
@@ -846,12 +710,13 @@ transform_log(struct tgsi_transform_context *tctx,
  
           /* SUB tmpA.y, tmpA.x, tmpA.y */
           new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
           new_inst.Instruction.NumDstRegs = 1;
           reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
           new_inst.Instruction.NumSrcRegs = 2;
           reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
           reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
+         new_inst.Src[1].Register.Negate = 1;
           tctx->emit_instruction(tctx, &new_inst);
        } else {
           /* FLR tmpA.y, tmpA.x */
@@ -925,41 +790,29 @@ transform_log(struct tgsi_transform_context *tctx,
   * DP3 - 3-component Dot Product
   *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
   *
- * DPH - Homogeneous Dot Product
- *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
- *
   * DP2 - 2-component Dot Product
   *   dst = src0.x \times src1.x + src0.y \times src1.y
   *
- * DP2A - 2-component Dot Product And Add
- *   dst = src0.x \times src1.x + src0.y \times src1.y + src2.x
- *
   * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
   * operations, which is what you'd prefer for a ISA that is natively
   * scalar.  Probably a native vector ISA would at least already have
   * DP4/DP3 instructions, but perhaps there is room for an alternative
- * translation for DPH/DP2/DP2A using vector instructions.
+ * translation for DP2 using vector instructions.
   *
   * ; needs: 1 tmp
   * MUL tmpA.x, src0.x, src1.x
   * MAD tmpA.x, src0.y, src1.y, tmpA.x
- * if (DPH || DP3 || DP4) {
+ * if (DP3 || DP4) {
   *   MAD tmpA.x, src0.z, src1.z, tmpA.x
- *   if (DPH) {
- *     ADD tmpA.x, src1.w, tmpA.x
- *   } else if (DP4) {
+ *   if (DP4) {
   *     MAD tmpA.x, src0.w, src1.w, tmpA.x
   *   }
- * } else if (DP2A) {
- *   ADD tmpA.x, src2.x, tmpA.x
   * }
   * ; fixup last instruction to replicate into dst
   */
  #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
  #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
-#define DPH_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
  #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
-#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
  #define DOTP_TMP  1
  static void
  transform_dotp(struct tgsi_transform_context *tctx,
@@ -969,7 +822,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
     struct tgsi_full_dst_register *dst  = &inst->Dst[0];
     struct tgsi_full_src_register *src0 = &inst->Src[0];
     struct tgsi_full_src_register *src1 = &inst->Src[1];
-   struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */
     struct tgsi_full_instruction new_inst;
     unsigned opcode = inst->Instruction.Opcode;
  
@@ -998,8 +850,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
        reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
        reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
  
-      if ((opcode == TGSI_OPCODE_DPH) ||
-          (opcode == TGSI_OPCODE_DP3) ||
+      if ((opcode == TGSI_OPCODE_DP3) ||
            (opcode == TGSI_OPCODE_DP4)) {
           tctx->emit_instruction(tctx, &new_inst);
  
@@ -1013,18 +864,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
           reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
           reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
  
-         if (opcode == TGSI_OPCODE_DPH) {
-            tctx->emit_instruction(tctx, &new_inst);
-
-            /* ADD tmpA.x, src1.w, tmpA.x */
-            new_inst = tgsi_default_full_instruction();
-            new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-            new_inst.Instruction.NumDstRegs = 1;
-            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
-            new_inst.Instruction.NumSrcRegs = 2;
-            reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W));
-            reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
-         } else if (opcode == TGSI_OPCODE_DP4) {
+         if (opcode == TGSI_OPCODE_DP4) {
              tctx->emit_instruction(tctx, &new_inst);
  
              /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
@@ -1037,17 +877,6 @@ transform_dotp(struct tgsi_transform_context *tctx,
              reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
              reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
           }
-      } else if (opcode == TGSI_OPCODE_DP2A) {
-         tctx->emit_instruction(tctx, &new_inst);
-
-         /* ADD tmpA.x, src2.x, tmpA.x */
-         new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-         new_inst.Instruction.NumDstRegs = 1;
-         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
-         new_inst.Instruction.NumSrcRegs = 2;
-         reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X));
-         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X));
        }
  
        /* fixup last instruction to write to dst: */
@@ -1096,15 +925,14 @@ transform_flr_ceil(struct tgsi_transform_context *tctx,
  
        /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
        new_inst = tgsi_default_full_instruction();
-      if (opcode == TGSI_OPCODE_CEIL)
-         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
-      else
-         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
        new_inst.Instruction.NumDstRegs = 1;
        reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
        new_inst.Instruction.NumSrcRegs = 2;
        reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
        reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+      if (opcode == TGSI_OPCODE_FLR)
+         new_inst.Src[1].Register.Negate = 1;
        tctx->emit_instruction(tctx, &new_inst);
     }
  }
@@ -1148,7 +976,7 @@ transform_trunc(struct tgsi_transform_context *tctx,
           tctx->emit_instruction(tctx, &new_inst);
  
           new_inst = tgsi_default_full_instruction();
-         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
+         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
           new_inst.Instruction.NumDstRegs = 1;
           reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
           new_inst.Instruction.NumSrcRegs = 2;
@@ -1156,6 +984,7 @@ transform_trunc(struct tgsi_transform_context *tctx,
           new_inst.Src[0].Register.Absolute = true;
           new_inst.Src[0].Register.Negate = false;
           reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
+         new_inst.Src[1].Register.Negate = 1;
           tctx->emit_instruction(tctx, &new_inst);
        } else {
           new_inst = tgsi_default_full_instruction();
@@ -1513,16 +1342,6 @@ transform_instr(struct tgsi_transform_context *tctx,
           goto skip;
        transform_dst(tctx, inst);
        break;
-   case TGSI_OPCODE_XPD:
-      if (!ctx->config->lower_XPD)
-         goto skip;
-      transform_xpd(tctx, inst);
-      break;
-   case TGSI_OPCODE_SCS:
-      if (!ctx->config->lower_SCS)
-         goto skip;
-      transform_scs(tctx, inst);
-      break;
     case TGSI_OPCODE_LRP:
        if (!ctx->config->lower_LRP)
           goto skip;
@@ -1563,21 +1382,11 @@ transform_instr(struct tgsi_transform_context *tctx,
           goto skip;
        transform_dotp(tctx, inst);
        break;
-   case TGSI_OPCODE_DPH:
-      if (!ctx->config->lower_DPH)
-         goto skip;
-      transform_dotp(tctx, inst);
-      break;
     case TGSI_OPCODE_DP2:
        if (!ctx->config->lower_DP2)
           goto skip;
        transform_dotp(tctx, inst);
        break;
-   case TGSI_OPCODE_DP2A:
-      if (!ctx->config->lower_DP2A)
-         goto skip;
-      transform_dotp(tctx, inst);
-      break;
     case TGSI_OPCODE_FLR:
        if (!ctx->config->lower_FLR)
           goto skip;
@@ -1639,7 +1448,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
      * color, then figure out the number of additional inputs we need
      * to create for BCOLOR's..
      */
-   if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
+   if ((info->processor == PIPE_SHADER_FRAGMENT) &&
         config->color_two_side) {
        int i;
        ctx.face_idx = -1;
@@ -1656,8 +1465,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
  #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
     /* if there are no instructions to lower, then we are done: */
     if (!(OPCS(DST) ||
-         OPCS(XPD) ||
-         OPCS(SCS) ||
           OPCS(LRP) ||
           OPCS(FRC) ||
           OPCS(POW) ||
@@ -1666,9 +1473,7 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
           OPCS(LOG) ||
           OPCS(DP4) ||
           OPCS(DP3) ||
-         OPCS(DPH) ||
           OPCS(DP2) ||
-         OPCS(DP2A) ||
           OPCS(FLR) ||
           OPCS(CEIL) ||
           OPCS(TRUNC) ||
@@ -1688,14 +1493,6 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
        newlen += DST_GROW * OPCS(DST);
        numtmp = MAX2(numtmp, DST_TMP);
     }
-   if (OPCS(XPD)) {
-      newlen += XPD_GROW * OPCS(XPD);
-      numtmp = MAX2(numtmp, XPD_TMP);
-   }
-   if (OPCS(SCS)) {
-      newlen += SCS_GROW * OPCS(SCS);
-      numtmp = MAX2(numtmp, SCS_TMP);
-   }
     if (OPCS(LRP)) {
        newlen += LRP_GROW * OPCS(LRP);
        numtmp = MAX2(numtmp, LRP_TMP);
@@ -1728,18 +1525,10 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
        newlen += DP3_GROW * OPCS(DP3);
        numtmp = MAX2(numtmp, DOTP_TMP);
     }
-   if (OPCS(DPH)) {
-      newlen += DPH_GROW * OPCS(DPH);
-      numtmp = MAX2(numtmp, DOTP_TMP);
-   }
     if (OPCS(DP2)) {
        newlen += DP2_GROW * OPCS(DP2);
        numtmp = MAX2(numtmp, DOTP_TMP);
     }
-   if (OPCS(DP2A)) {
-      newlen += DP2A_GROW * OPCS(DP2A);
-      numtmp = MAX2(numtmp, DOTP_TMP);
-   }
     if (OPCS(FLR)) {
        newlen += FLR_GROW * OPCS(FLR);
        numtmp = MAX2(numtmp, FLR_TMP);