i915g: Fix gl_FragCoord.
[mesa.git] / src / gallium / drivers / i915 / i915_fpc_translate.c
index 379d47e79a35d41d53b5199ef0809010b0a30edc..ec2c31685cb705757128cee1d1c9fa9ba69bea27 100644 (file)
@@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
    p->error = 1;
 }
 
-
+static uint get_mapping(struct i915_fragment_shader* fs, int unit)
+{
+   int i;
+   for (i = 0; i < I915_TEX_UNITS; i++)
+   {
+      if (fs->generic_mapping[i] == -1) {
+         fs->generic_mapping[i] = unit;
+         return i;
+      }
+      if (fs->generic_mapping[i] == unit)
+         return i;
+   }
+   debug_printf("Exceeded max generics\n");
+   return 0;
+}
 
 /**
  * Construct a ureg for the given source register.  Will emit
@@ -141,14 +155,15 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
  */
 static uint
 src_vector(struct i915_fp_compile *p,
-           const struct tgsi_full_src_register *source)
+           const struct tgsi_full_src_register *source,
+           struct i915_fragment_shader* fs)
 {
-   uint index = source->SrcRegister.Index;
+   uint index = source->Register.Index;
    uint src = 0, sem_name, sem_ind;
 
-   switch (source->SrcRegister.File) {
+   switch (source->Register.File) {
    case TGSI_FILE_TEMPORARY:
-      if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+      if (source->Register.Index >= I915_MAX_TEMPORARY) {
          i915_program_error(p, "Exceeded max temporary reg");
          return 0;
       }
@@ -170,12 +185,12 @@ src_vector(struct i915_fp_compile *p,
 
       switch (sem_name) {
       case TGSI_SEMANTIC_POSITION:
-         debug_printf("SKIP SEM POS\n");
-         /*
-         assert(p->wpos_tex != -1);
-         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
-         */
-         break;
+         {
+            /* for fragcoord */
+            int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
+            src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
+            break;
+         }
       case TGSI_SEMANTIC_COLOR:
          if (sem_ind == 0) {
             src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
@@ -192,9 +207,11 @@ src_vector(struct i915_fp_compile *p,
          src = swizzle(src, W, W, W, W);
          break;
       case TGSI_SEMANTIC_GENERIC:
-         /* usually a texcoord */
-         src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL);
-         break;
+         {
+            int real_tex_unit = get_mapping(fs, sem_ind);
+            src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
+            break;
+         }
       default:
          i915_program_error(p, "Bad source->Index");
          return 0;
@@ -215,26 +232,25 @@ src_vector(struct i915_fp_compile *p,
    }
 
    src = swizzle(src,
-                source->SrcRegister.SwizzleX,
-                source->SrcRegister.SwizzleY,
-                source->SrcRegister.SwizzleZ,
-                source->SrcRegister.SwizzleW);
+                source->Register.SwizzleX,
+                source->Register.SwizzleY,
+                source->Register.SwizzleZ,
+                source->Register.SwizzleW);
 
 
    /* There's both negate-all-components and per-component negation.
     * Try to handle both here.
     */
    {
-      int n = source->SrcRegister.Negate;
+      int n = source->Register.Negate;
       src = negate(src, n, n, n, n);
    }
 
-   /* no abs() or post-abs negation */
+   /* no abs() */
 #if 0
    /* XXX assertions disabled to allow arbfplight.c to run */
    /* XXX enable these assertions, or fix things */
-   assert(!source->SrcRegisterExtMod.Absolute);
-   assert(!source->SrcRegisterExtMod.Negate);
+   assert(!source->Register.Absolute);
 #endif
    return src;
 }
@@ -247,10 +263,10 @@ static uint
 get_result_vector(struct i915_fp_compile *p,
                   const struct tgsi_full_dst_register *dest)
 {
-   switch (dest->DstRegister.File) {
+   switch (dest->Register.File) {
    case TGSI_FILE_OUTPUT:
       {
-         uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index];
+         uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
          switch (sem_name) {
          case TGSI_SEMANTIC_POSITION:
             return UREG(REG_TYPE_OD, 0);
@@ -262,7 +278,7 @@ get_result_vector(struct i915_fp_compile *p,
          }
       }
    case TGSI_FILE_TEMPORARY:
-      return UREG(REG_TYPE_R, dest->DstRegister.Index);
+      return UREG(REG_TYPE_R, dest->Register.Index);
    default:
       i915_program_error(p, "Bad inst->DstReg.File");
       return 0;
@@ -277,7 +293,7 @@ static uint
 get_result_flags(const struct tgsi_full_instruction *inst)
 {
    const uint writeMask
-      = inst->FullDstRegisters[0].DstRegister.WriteMask;
+      = inst->Dst[0].Register.WriteMask;
    uint flags = 0x0;
 
    if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
@@ -337,16 +353,17 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
 static void
 emit_tex(struct i915_fp_compile *p,
          const struct tgsi_full_instruction *inst,
-         uint opcode)
+         uint opcode,
+         struct i915_fragment_shader* fs)
 {
-   uint texture = inst->InstructionExtTexture.Texture;
-   uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   uint texture = inst->Texture.Texture;
+   uint unit = inst->Src[1].Register.Index;
    uint tex = translate_tex_src_target( p, texture );
    uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
-   uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+   uint coord = src_vector( p, &inst->Src[0], fs);
 
    i915_emit_texld( p,
-                    get_result_vector( p, &inst->FullDstRegisters[0] ),
+                    get_result_vector( p, &inst->Dst[0] ),
                     get_result_flags( inst ),
                     sampler,
                     coord,
@@ -362,19 +379,20 @@ emit_tex(struct i915_fp_compile *p,
 static void
 emit_simple_arith(struct i915_fp_compile *p,
                   const struct tgsi_full_instruction *inst,
-                  uint opcode, uint numArgs)
+                  uint opcode, uint numArgs,
+                  struct i915_fragment_shader* fs)
 {
    uint arg1, arg2, arg3;
 
    assert(numArgs <= 3);
 
-   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
-   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
-   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
+   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
+   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
 
    i915_emit_arith( p,
                     opcode,
-                    get_result_vector( p, &inst->FullDstRegisters[0]),
+                    get_result_vector( p, &inst->Dst[0]),
                     get_result_flags( inst ), 0,
                     arg1,
                     arg2,
@@ -386,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p,
 static void
 emit_simple_arith_swap2(struct i915_fp_compile *p,
                         const struct tgsi_full_instruction *inst,
-                        uint opcode, uint numArgs)
+                        uint opcode, uint numArgs,
+                        struct i915_fragment_shader* fs)
 {
    struct tgsi_full_instruction inst2;
 
@@ -394,10 +413,10 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
 
    /* transpose first two registers */
    inst2 = *inst;
-   inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
-   inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+   inst2.Src[0] = inst->Src[1];
+   inst2.Src[1] = inst->Src[0];
 
-   emit_simple_arith(p, &inst2, opcode, numArgs);
+   emit_simple_arith(p, &inst2, opcode, numArgs, fs);
 }
 
 
@@ -416,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
  */ 
 static void
 i915_translate_instruction(struct i915_fp_compile *p,
-                           const struct tgsi_full_instruction *inst)
+                           const struct tgsi_full_instruction *inst,
+                           struct i915_fragment_shader *fs)
 {
    uint writemask;
    uint src0, src1, src2, flags;
@@ -424,30 +444,30 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ABS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       i915_emit_arith(p,
                       A0_MAX,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       src0, negate(src0, 1, 1, 1, 1), 0);
       break;
 
    case TGSI_OPCODE_ADD:
-      emit_simple_arith(p, inst, A0_ADD, 2);
+      emit_simple_arith(p, inst, A0_ADD, 2, fs);
       break;
 
    case TGSI_OPCODE_CMP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
-      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
+      src2 = src_vector(p, &inst->Src[2], fs);
       i915_emit_arith(p, A0_CMP, 
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 
                       0, src0, src2, src1);   /* NOTE: order of src2, src1 */
       break;
 
    case TGSI_OPCODE_COS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -490,34 +510,45 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(tmp, ONE, Z, Y, X),
                       i915_emit_const4fv(p, cos_constants), 0);
       break;
 
+  case TGSI_OPCODE_DP2:
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
+
+      i915_emit_arith(p,
+                      A0_DP3,
+                      get_result_vector(p, &inst->Dst[0]),
+                      get_result_flags(inst), 0,
+                      swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
+      break;
+
    case TGSI_OPCODE_DP3:
-      emit_simple_arith(p, inst, A0_DP3, 2);
+      emit_simple_arith(p, inst, A0_DP3, 2, fs);
       break;
 
    case TGSI_OPCODE_DP4:
-      emit_simple_arith(p, inst, A0_DP4, 2);
+      emit_simple_arith(p, inst, A0_DP4, 2, fs);
       break;
 
    case TGSI_OPCODE_DPH:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, Y, Z, ONE), src1, 0);
       break;
 
    case TGSI_OPCODE_DST:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
 
       /* result[0] = 1    * 1;
        * result[1] = a[1] * b[1];
@@ -526,7 +557,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
        */
       i915_emit_arith(p,
                       A0_MUL,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, ONE, Y, Z, ONE),
                       swizzle(src1, ONE, Y, ONE, W), 0);
@@ -537,26 +568,26 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_EX2:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
 
       i915_emit_arith(p,
                       A0_EXP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_FLR:
-      emit_simple_arith(p, inst, A0_FLR, 1);
+      emit_simple_arith(p, inst, A0_FLR, 1, fs);
       break;
 
    case TGSI_OPCODE_FRC:
-      emit_simple_arith(p, inst, A0_FRC, 1);
+      emit_simple_arith(p, inst, A0_FRC, 1, fs);
       break;
 
    case TGSI_OPCODE_KIL:
       /* kill if src[0].x < 0 || src[0].y < 0 ... */
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       tmp = i915_get_utemp(p);
 
       i915_emit_texld(p,
@@ -572,17 +603,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_LG2:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
 
       i915_emit_arith(p,
                       A0_LOG,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_LIT:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       tmp = i915_get_utemp(p);
 
       /* tmp = max( a.xyzw, a.00zw )
@@ -606,7 +637,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
 
       i915_emit_arith(p, A0_CMP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
                       swizzle(tmp, ONE, X, ZERO, ONE),
@@ -615,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_LRP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
-      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
+      src2 = src_vector(p, &inst->Src[2], fs);
       flags = get_result_flags(inst);
       tmp = i915_get_utemp(p);
 
@@ -632,21 +663,21 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
 
       i915_emit_arith(p, A0_MAD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
       break;
 
    case TGSI_OPCODE_MAD:
-      emit_simple_arith(p, inst, A0_MAD, 3);
+      emit_simple_arith(p, inst, A0_MAD, 3, fs);
       break;
 
    case TGSI_OPCODE_MAX:
-      emit_simple_arith(p, inst, A0_MAX, 2);
+      emit_simple_arith(p, inst, A0_MAX, 2, fs);
       break;
 
    case TGSI_OPCODE_MIN:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
       tmp = i915_get_utemp(p);
       flags = get_result_flags(inst);
 
@@ -658,21 +689,21 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_MOV,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
       break;
 
    case TGSI_OPCODE_MOV:
-      emit_simple_arith(p, inst, A0_MOV, 1);
+      emit_simple_arith(p, inst, A0_MOV, 1, fs);
       break;
 
    case TGSI_OPCODE_MUL:
-      emit_simple_arith(p, inst, A0_MUL, 2);
+      emit_simple_arith(p, inst, A0_MUL, 2, fs);
       break;
 
    case TGSI_OPCODE_POW:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
       tmp = i915_get_utemp(p);
       flags = get_result_flags(inst);
 
@@ -687,7 +718,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_EXP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
       break;
       
@@ -696,27 +727,27 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
       
    case TGSI_OPCODE_RCP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
 
       i915_emit_arith(p,
                       A0_RCP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
-                         get_result_flags(inst), 0,
+                      get_result_vector(p, &inst->Dst[0]),
+                      get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_RSQ:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
 
       i915_emit_arith(p,
                       A0_RSQ,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_SCS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       tmp = i915_get_utemp(p);
 
       /* 
@@ -739,7 +770,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(tmp, X, Y, X, Y),
                       swizzle(tmp, X, X, ONE, ONE), 0);
 
-      writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+      writemask = inst->Dst[0].Register.WriteMask;
 
       if (writemask & TGSI_WRITEMASK_Y) {
          uint tmp1;
@@ -757,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
          i915_emit_arith(p,
                          A0_DP4,
-                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         get_result_vector(p, &inst->Dst[0]),
                          A0_DEST_CHANNEL_Y, 0,
                          swizzle(tmp1, W, Z, Y, X),
                          i915_emit_const4fv(p, sin_constants), 0);
@@ -772,24 +803,47 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
          i915_emit_arith(p,
                          A0_DP4,
-                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         get_result_vector(p, &inst->Dst[0]),
                          A0_DEST_CHANNEL_X, 0,
                          swizzle(tmp, ONE, Z, Y, X),
                          i915_emit_const4fv(p, cos_constants), 0);
       }
       break;
 
-   case TGSI_OPCODE_SGE:
-      emit_simple_arith(p, inst, A0_SGE, 2);
+   case TGSI_OPCODE_SEQ:
+      /* if we're both >= and <= then we're == */
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_SGE,
+                      tmp, A0_DEST_CHANNEL_ALL, 0,
+                      src0,
+                      src1, 0);
+
+      i915_emit_arith(p,
+                      A0_SGE,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      src1,
+                      src0, 0);
+
+      i915_emit_arith(p,
+                      A0_MUL,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      get_result_vector(p, &inst->Dst[0]),
+                      tmp, 0);
+
       break;
 
-   case TGSI_OPCODE_SLE:
-      /* like SGE, but swap reg0, reg1 */
-      emit_simple_arith_swap2(p, inst, A0_SGE, 2);
+   case TGSI_OPCODE_SGE:
+      emit_simple_arith(p, inst, A0_SGE, 2, fs);
       break;
 
    case TGSI_OPCODE_SIN:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0], fs);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -832,42 +886,106 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(tmp, W, Z, Y, X),
                       i915_emit_const4fv(p, sin_constants), 0);
       break;
 
+   case TGSI_OPCODE_SLE:
+      /* like SGE, but swap reg0, reg1 */
+      emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
+      break;
+
    case TGSI_OPCODE_SLT:
-      emit_simple_arith(p, inst, A0_SLT, 2);
+      emit_simple_arith(p, inst, A0_SLT, 2, fs);
       break;
 
    case TGSI_OPCODE_SGT:
       /* like SLT, but swap reg0, reg1 */
-      emit_simple_arith_swap2(p, inst, A0_SLT, 2);
+      emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
+      break;
+
+   case TGSI_OPCODE_SNE:
+      /* if we're < or > then we're != */
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_SLT,
+                      tmp,
+                      A0_DEST_CHANNEL_ALL, 0,
+                      src0,
+                      src1, 0);
+
+      i915_emit_arith(p,
+                      A0_SLT,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      src1,
+                      src0, 0);
+
+      i915_emit_arith(p,
+                      A0_ADD,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      get_result_vector(p, &inst->Dst[0]),
+                      tmp, 0);
+      break;
+
+   case TGSI_OPCODE_SSG:
+      /* compute (src>0) - (src<0) */
+      src0 = src_vector(p, &inst->Src[0], fs);
+      tmp = i915_get_utemp(p);
+
+      i915_emit_arith(p,
+                      A0_SLT,
+                      tmp,
+                      A0_DEST_CHANNEL_ALL, 0,
+                      src0,
+                      swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
+
+      i915_emit_arith(p,
+                      A0_SLT,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+                      src0, 0);
+
+      i915_emit_arith(p,
+                      A0_ADD,
+                      get_result_vector(p, &inst->Dst[0]),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      get_result_vector(p, &inst->Dst[0]),
+                      negate(tmp, 1, 1, 1, 1), 0);
       break;
 
    case TGSI_OPCODE_SUB:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
 
       i915_emit_arith(p,
                       A0_ADD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       src0, negate(src1, 1, 1, 1, 1), 0);
       break;
 
    case TGSI_OPCODE_TEX:
-      emit_tex(p, inst, T0_TEXLD);
+      emit_tex(p, inst, T0_TEXLD, fs);
+      break;
+
+   case TGSI_OPCODE_TRUNC:
+      emit_simple_arith(p, inst, A0_TRC, 1, fs);
       break;
 
    case TGSI_OPCODE_TXB:
-      emit_tex(p, inst, T0_TEXLDB);
+      emit_tex(p, inst, T0_TEXLDB, fs);
       break;
 
    case TGSI_OPCODE_TXP:
-      emit_tex(p, inst, T0_TEXLDP);
+      emit_tex(p, inst, T0_TEXLDP, fs);
       break;
 
    case TGSI_OPCODE_XPD:
@@ -877,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
        *      result.z = src0.x * src1.y - src0.y * src1.x;
        *      result.w = undef;
        */
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0], fs);
+      src1 = src_vector(p, &inst->Src[1], fs);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -889,7 +1007,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_MAD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, Y, Z, X, ONE),
                       swizzle(src1, Z, X, Y, ONE),
@@ -913,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
  */
 static void
 i915_translate_instructions(struct i915_fp_compile *p,
-                            const struct tgsi_token *tokens)
+                            const struct tgsi_token *tokens,
+                            struct i915_fragment_shader *fs)
 {
    struct i915_fragment_shader *ifs = p->shader;
    struct tgsi_parse_context parse;
@@ -925,12 +1044,20 @@ i915_translate_instructions(struct i915_fp_compile *p,
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_PROPERTY:
+         /*
+          * We only support one cbuf, but we still need to ignore the property
+          * correctly so we don't hit the assert at the end of the switch case.
+          */
+         assert(parse.FullToken.FullProperty.Property.PropertyName ==
+                TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
+         break;
       case TGSI_TOKEN_TYPE_DECLARATION:
          if (parse.FullToken.FullDeclaration.Declaration.File
                   == TGSI_FILE_CONSTANT) {
             uint i;
-            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
-                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+            for (i = parse.FullToken.FullDeclaration.Range.First;
+                 i <= parse.FullToken.FullDeclaration.Range.Last;
                  i++) {
                assert(ifs->constant_flags[i] == 0x0);
                ifs->constant_flags[i] = I915_CONSTFLAG_USER;
@@ -940,8 +1067,8 @@ i915_translate_instructions(struct i915_fp_compile *p,
          else if (parse.FullToken.FullDeclaration.Declaration.File
                   == TGSI_FILE_TEMPORARY) {
             uint i;
-            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
-                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+            for (i = parse.FullToken.FullDeclaration.Range.First;
+                 i <= parse.FullToken.FullDeclaration.Range.Last;
                  i++) {
                assert(i < I915_MAX_TEMPORARY);
                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
@@ -986,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
             p->first_instruction = FALSE;
          }
 
-         i915_translate_instruction(p, &parse.FullToken.FullInstruction);
+         i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
          break;
 
       default:
@@ -1004,6 +1131,7 @@ i915_init_compile(struct i915_context *i915,
                   struct i915_fragment_shader *ifs)
 {
    struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
+   int i;
 
    p->shader = ifs;
 
@@ -1016,6 +1144,9 @@ i915_init_compile(struct i915_context *i915,
    ifs->num_constants = 0;
    memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
 
+   for (i = 0; i < I915_TEX_UNITS; i++)
+      ifs->generic_mapping[i] = -1;
+
    p->first_instruction = TRUE;
 
    p->nr_tex_indirect = 1;      /* correct? */
@@ -1030,8 +1161,6 @@ i915_init_compile(struct i915_context *i915,
    p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
    p->utemp_flag = ~0x7;
 
-   p->wpos_tex = -1;
-
    /* initialize the first program word */
    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
 
@@ -1103,40 +1232,6 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
 }
 
 
-/**
- * Find an unused texture coordinate slot to use for fragment WPOS.
- * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found).
- */
-static void
-i915_find_wpos_space(struct i915_fp_compile *p)
-{
-#if 0
-   const uint inputs
-      = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/
-   uint i;
-
-   p->wpos_tex = -1;
-
-   if (inputs & (1 << TGSI_ATTRIB_POS)) {
-      for (i = 0; i < I915_TEX_UNITS; i++) {
-        if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) {
-           p->wpos_tex = i;
-           return;
-        }
-      }
-
-      i915_program_error(p, "No free texcoord for wpos value");
-   }
-#else
-   if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
-      /* frag shader using the fragment position input */
-#if 0
-      assert(0);
-#endif
-   }
-#endif
-}
-
 
 
 
@@ -1167,16 +1262,24 @@ void
 i915_translate_fragment_program( struct i915_context *i915,
                                  struct i915_fragment_shader *fs)
 {
-   struct i915_fp_compile *p = i915_init_compile(i915, fs);
+   struct i915_fp_compile *p;
    const struct tgsi_token *tokens = fs->state.tokens;
 
-   i915_find_wpos_space(p);
-
 #if 0
    tgsi_dump(tokens, 0);
 #endif
 
-   i915_translate_instructions(p, tokens);
+   /* hw doesn't seem to like empty frag programs, even when the depth write
+    * fixup gets emitted below - may that one is fishy, too? */
+   if (fs->info.num_instructions == 1) {
+      i915_use_passthrough_shader(fs);
+
+      return;
+   }
+
+   p = i915_init_compile(i915, fs);
+
+   i915_translate_instructions(p, tokens, fs);
    i915_fixup_depth_write(p);
 
    i915_fini_compile(i915, p);