radeonsi: emit TA_BC_BASE_ADDR_HI for border color on CIK

[mesa.git] / src / gallium / drivers / svga / svga_tgsi_insn.c
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c

index 64dfa55753948bc66f4ad377ad9627885e11d778..3add4c03d096b9171e9e19e356cad5087f66a361 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -38,16 +38,12 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
  static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
  
  
-
- 
  static unsigned
-translate_opcode(
-   uint opcode )
+translate_opcode(uint opcode)
  {
     switch (opcode) {
     case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
     case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
-   case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
     case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
     case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
     case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
@@ -67,7 +63,8 @@ translate_opcode(
  }
  
  
-static unsigned translate_file( unsigned file )
+static unsigned
+translate_file(unsigned file)
  {
     switch (file) {
     case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
@@ -84,11 +81,7 @@ static unsigned translate_file( unsigned file )
  }
  
  
-
-
-
-
-static SVGA3dShaderDestToken 
+static SVGA3dShaderDestToken
  translate_dst_register( struct svga_shader_emitter *emit,
                          const struct tgsi_full_instruction *insn,
                          unsigned idx )
@@ -110,45 +103,58 @@ translate_dst_register( struct svga_shader_emitter *emit,
           assert(index < SVGA3D_TEMPREG_MAX);
           index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
           dest = dst_register(translate_file(reg->Register.File), index);
-      }                 
+      }
        break;
     }
  
     dest.mask = reg->Register.WriteMask;
     assert(dest.mask);
  
-   if (insn->Instruction.Saturate) 
+   if (insn->Instruction.Saturate)
        dest.dstMod = SVGA3DDSTMOD_SATURATE;
  
     return dest;
  }
  
  
-static struct src_register 
-swizzle( struct src_register src,
-         int x,
-         int y,
-         int z,
-         int w )
+/**
+ * Apply a swizzle to a src_register, returning a new src_register
+ * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
+ * would return SRC.YYZZ
+ */
+static struct src_register
+swizzle(struct src_register src,
+        unsigned x, unsigned y, unsigned z, unsigned w)
  {
+   assert(x < 4);
+   assert(y < 4);
+   assert(z < 4);
+   assert(w < 4);
     x = (src.base.swizzle >> (x * 2)) & 0x3;
     y = (src.base.swizzle >> (y * 2)) & 0x3;
     z = (src.base.swizzle >> (z * 2)) & 0x3;
     w = (src.base.swizzle >> (w * 2)) & 0x3;
  
-   src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
+   src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
  
     return src;
  }
  
+
+/**
+ * Apply a "scalar" swizzle to a src_register returning a new
+ * src_register where all the swizzle terms are the same.
+ * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
+ */
  static struct src_register
-scalar( struct src_register src,
-        int comp )
+scalar(struct src_register src, unsigned comp)
  {
+   assert(comp < 4);
     return swizzle( src, comp, comp, comp, comp );
  }
  
-static INLINE boolean
+
+static boolean
  svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
  {
     int i;
@@ -160,7 +166,8 @@ svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
     return FALSE;
  }
  
-static INLINE int
+
+static int
  svga_arl_adjustment( const struct svga_shader_emitter *emit )
  {
     int i;
@@ -172,7 +179,8 @@ svga_arl_adjustment( const struct svga_shader_emitter *emit )
     return 0;
  }
  
-static struct src_register 
+
+static struct src_register
  translate_src_register( const struct svga_shader_emitter *emit,
                          const struct tgsi_full_src_register *reg )
  {
@@ -185,20 +193,18 @@ translate_src_register( const struct svga_shader_emitter *emit,
         */
        src = emit->input_map[reg->Register.Index];
        break;
-       
+
     case TGSI_FILE_IMMEDIATE:
        /* Immediates are appended after TGSI constants in the D3D
         * constant buffer.
         */
        src = src_register( translate_file( reg->Register.File ),
-                          reg->Register.Index + 
-                          emit->imm_start );
+                          reg->Register.Index + emit->imm_start );
        break;
  
     default:
        src = src_register( translate_file( reg->Register.File ),
                            reg->Register.Index );
-
        break;
     }
  
@@ -266,7 +272,7 @@ translate_src_register( const struct svga_shader_emitter *emit,
   * Note: if we exceed the temporary register limit we just use
   * register SVGA3D_TEMPREG_MAX - 1.
   */
-static INLINE SVGA3dShaderDestToken 
+static SVGA3dShaderDestToken
  get_temp( struct svga_shader_emitter *emit )
  {
     int i = emit->nr_hw_temp + emit->internal_temp_count++;
@@ -275,11 +281,13 @@ get_temp( struct svga_shader_emitter *emit )
     return dst_register( SVGA3DREG_TEMP, i );
  }
  
-/* Release a single temp.  Currently only effective if it was the last
+
+/**
+ * Release a single temp.  Currently only effective if it was the last
   * allocated temp, otherwise release will be delayed until the next
   * call to reset_temp_regs().
   */
-static INLINE void 
+static void
  release_temp( struct svga_shader_emitter *emit,
                SVGA3dShaderDestToken temp )
  {
@@ -287,20 +295,152 @@ release_temp( struct svga_shader_emitter *emit,
        emit->internal_temp_count--;
  }
  
-static void reset_temp_regs( struct svga_shader_emitter *emit )
+
+static void
+reset_temp_regs(struct svga_shader_emitter *emit)
  {
     emit->internal_temp_count = 0;
  }
-   
+
+
+/** Emit bytecode for a src_register */
+static boolean
+emit_src(struct svga_shader_emitter *emit, const struct src_register src)
+{
+   if (src.base.relAddr) {
+      assert(src.base.reserved0);
+      assert(src.indirect.reserved0);
+      return (svga_shader_emit_dword( emit, src.base.value ) &&
+              svga_shader_emit_dword( emit, src.indirect.value ));
+   }
+   else {
+      assert(src.base.reserved0);
+      return svga_shader_emit_dword( emit, src.base.value );
+   }
+}
+
+
+/** Emit bytecode for a dst_register */
+static boolean
+emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
+{
+   assert(dest.reserved0);
+   assert(dest.mask);
+   return svga_shader_emit_dword( emit, dest.value );
+}
+
+
+/** Emit bytecode for a 1-operand instruction */
+static boolean
+emit_op1(struct svga_shader_emitter *emit,
+         SVGA3dShaderInstToken inst,
+         SVGA3dShaderDestToken dest,
+         struct src_register src0)
+{
+   return (emit_instruction(emit, inst) &&
+           emit_dst(emit, dest) &&
+           emit_src(emit, src0));
+}
+
+
+/** Emit bytecode for a 2-operand instruction */
+static boolean
+emit_op2(struct svga_shader_emitter *emit,
+         SVGA3dShaderInstToken inst,
+         SVGA3dShaderDestToken dest,
+         struct src_register src0,
+         struct src_register src1)
+{
+   return (emit_instruction(emit, inst) &&
+           emit_dst(emit, dest) &&
+           emit_src(emit, src0) &&
+           emit_src(emit, src1));
+}
+
+
+/** Emit bytecode for a 3-operand instruction */
+static boolean
+emit_op3(struct svga_shader_emitter *emit,
+         SVGA3dShaderInstToken inst,
+         SVGA3dShaderDestToken dest,
+         struct src_register src0,
+         struct src_register src1,
+         struct src_register src2)
+{
+   return (emit_instruction(emit, inst) &&
+           emit_dst(emit, dest) &&
+           emit_src(emit, src0) &&
+           emit_src(emit, src1) &&
+           emit_src(emit, src2));
+}
+
+
+/** Emit bytecode for a 4-operand instruction */
+static boolean
+emit_op4(struct svga_shader_emitter *emit,
+         SVGA3dShaderInstToken inst,
+         SVGA3dShaderDestToken dest,
+         struct src_register src0,
+         struct src_register src1,
+         struct src_register src2,
+         struct src_register src3)
+{
+   return (emit_instruction(emit, inst) &&
+           emit_dst(emit, dest) &&
+           emit_src(emit, src0) &&
+           emit_src(emit, src1) &&
+           emit_src(emit, src2) &&
+           emit_src(emit, src3));
+}
+
+
+/**
+ * Apply the absolute value modifier to the given src_register, returning
+ * a new src_register.
+ */
+static struct src_register 
+absolute(struct src_register src)
+{
+   src.base.srcMod = SVGA3DSRCMOD_ABS;
+   return src;
+}
+
+
+/**
+ * Apply the negation modifier to the given src_register, returning
+ * a new src_register.
+ */
+static struct src_register 
+negate(struct src_register src)
+{
+   switch (src.base.srcMod) {
+   case SVGA3DSRCMOD_ABS:
+      src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      src.base.srcMod = SVGA3DSRCMOD_ABS;
+      break;
+   case SVGA3DSRCMOD_NEG:
+      src.base.srcMod = SVGA3DSRCMOD_NONE;
+      break;
+   case SVGA3DSRCMOD_NONE:
+      src.base.srcMod = SVGA3DSRCMOD_NEG;
+      break;
+   }
+   return src;
+}
+
+
  
  /* Replace the src with the temporary specified in the dst, but copying
   * only the necessary channels, and preserving the original swizzle (which is
   * important given that several opcodes have constraints in the allowed
   * swizzles).
   */
-static boolean emit_repl( struct svga_shader_emitter *emit,
-                          SVGA3dShaderDestToken dst,
-                          struct src_register *src0)
+static boolean
+emit_repl(struct svga_shader_emitter *emit,
+          SVGA3dShaderDestToken dst,
+          struct src_register *src0)
  {
     unsigned src0_swizzle;
     unsigned chan;
@@ -328,35 +468,40 @@ static boolean emit_repl( struct svga_shader_emitter *emit,
  }
  
  
-static boolean submit_op0( struct svga_shader_emitter *emit,
-                           SVGA3dShaderInstToken inst,
-                           SVGA3dShaderDestToken dest )
+static boolean
+submit_op0(struct svga_shader_emitter *emit,
+           SVGA3dShaderInstToken inst,
+           SVGA3dShaderDestToken dest)
  {
-   return (emit_instruction( emit, inst ) && 
+   return (emit_instruction( emit, inst ) &&
             emit_dst( emit, dest ));
  }
  
-static boolean submit_op1( struct svga_shader_emitter *emit,
-                           SVGA3dShaderInstToken inst,
-                           SVGA3dShaderDestToken dest,
-                           struct src_register src0 )
+
+static boolean
+submit_op1(struct svga_shader_emitter *emit,
+           SVGA3dShaderInstToken inst,
+           SVGA3dShaderDestToken dest,
+           struct src_register src0)
  {
     return emit_op1( emit, inst, dest, src0 );
  }
  
  
-/* SVGA shaders may not refer to >1 constant register in a single
+/**
+ * SVGA shaders may not refer to >1 constant register in a single
   * instruction.  This function checks for that usage and inserts a
   * move to temporary if detected.
   *
   * The same applies to input registers -- at most a single input
   * register may be read by any instruction.
   */
-static boolean submit_op2( struct svga_shader_emitter *emit,
-                           SVGA3dShaderInstToken inst,
-                           SVGA3dShaderDestToken dest,
-                           struct src_register src0,
-                           struct src_register src1 )
+static boolean
+submit_op2(struct svga_shader_emitter *emit,
+           SVGA3dShaderInstToken inst,
+           SVGA3dShaderDestToken dest,
+           struct src_register src0,
+           struct src_register src1)
  {
     SVGA3dShaderDestToken temp;
     SVGA3dShaderRegType type0, type1;
@@ -393,16 +538,18 @@ static boolean submit_op2( struct svga_shader_emitter *emit,
  }
  
  
-/* SVGA shaders may not refer to >1 constant register in a single
+/**
+ * SVGA shaders may not refer to >1 constant register in a single
   * instruction.  This function checks for that usage and inserts a
   * move to temporary if detected.
   */
-static boolean submit_op3( struct svga_shader_emitter *emit,
-                           SVGA3dShaderInstToken inst,
-                           SVGA3dShaderDestToken dest,
-                           struct src_register src0,
-                           struct src_register src1,
-                           struct src_register src2 )
+static boolean
+submit_op3(struct svga_shader_emitter *emit,
+           SVGA3dShaderInstToken inst,
+           SVGA3dShaderDestToken dest,
+           struct src_register src0,
+           struct src_register src1,
+           struct src_register src2)
  {
     SVGA3dShaderDestToken temp0;
     SVGA3dShaderDestToken temp1;
@@ -438,7 +585,7 @@ static boolean submit_op3( struct svga_shader_emitter *emit,
  
     if (need_temp0) {
        temp0 = get_temp( emit );
- 
+
        if (!emit_repl( emit, temp0, &src0 ))
           return FALSE;
     }
@@ -461,19 +608,19 @@ static boolean submit_op3( struct svga_shader_emitter *emit,
  }
  
  
-
-
-/* SVGA shaders may not refer to >1 constant register in a single
+/**
+ * SVGA shaders may not refer to >1 constant register in a single
   * instruction.  This function checks for that usage and inserts a
   * move to temporary if detected.
   */
-static boolean submit_op4( struct svga_shader_emitter *emit,
-                           SVGA3dShaderInstToken inst,
-                           SVGA3dShaderDestToken dest,
-                           struct src_register src0,
-                           struct src_register src1,
-                           struct src_register src2,
-                           struct src_register src3)
+static boolean
+submit_op4(struct svga_shader_emitter *emit,
+           SVGA3dShaderInstToken inst,
+           SVGA3dShaderDestToken dest,
+           struct src_register src0,
+           struct src_register src1,
+           struct src_register src2,
+           struct src_register src3)
  {
     SVGA3dShaderDestToken temp0;
     SVGA3dShaderDestToken temp3;
@@ -514,7 +661,7 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
  
     if (need_temp0) {
        temp0 = get_temp( emit );
- 
+
        if (!emit_repl( emit, temp0, &src0 ))
           return FALSE;
     }
@@ -537,8 +684,12 @@ static boolean submit_op4( struct svga_shader_emitter *emit,
  }
  
  
-static boolean alias_src_dst( struct src_register src,
-                              SVGA3dShaderDestToken dst )
+/**
+ * Do the src and dest registers refer to the same register?
+ */
+static boolean
+alias_src_dst(struct src_register src,
+              SVGA3dShaderDestToken dst)
  {
     if (src.base.num != dst.num)
        return FALSE;
@@ -551,11 +702,12 @@ static boolean alias_src_dst( struct src_register src,
  }
  
  
-static boolean submit_lrp(struct svga_shader_emitter *emit,
-                          SVGA3dShaderDestToken dst,
-                          struct src_register src0,
-                          struct src_register src1,
-                          struct src_register src2)
+static boolean
+submit_lrp(struct svga_shader_emitter *emit,
+           SVGA3dShaderDestToken dst,
+           struct src_register src0,
+           struct src_register src1,
+           struct src_register src2)
  {
     SVGA3dShaderDestToken tmp;
     boolean need_dst_tmp = FALSE;
@@ -586,13 +738,10 @@ static boolean submit_lrp(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_def_const( struct svga_shader_emitter *emit,
-                               SVGA3dShaderConstType type,
-                               unsigned idx,
-                               float a,
-                               float b,
-                               float c,
-                               float d )
+static boolean
+emit_def_const(struct svga_shader_emitter *emit,
+               SVGA3dShaderConstType type,
+               unsigned idx, float a, float b, float c, float d)
  {
     SVGA3DOpDefArgs def;
     SVGA3dShaderInstToken opcode;
@@ -627,16 +776,17 @@ static boolean emit_def_const( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static INLINE boolean
+
+static boolean
  create_zero_immediate( struct svga_shader_emitter *emit )
  {
     unsigned idx = emit->nr_hw_float_const++;
  
-   /* Emit the constant (0, 0, -1, 1) and use swizzling to generate
+   /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
      * other useful vectors.
      */
     if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
-                        idx, 0, 0, -1, 1 ))
+                        idx, 0, 0.5, -1, 1 ))
        return FALSE;
  
     emit->zero_immediate_idx = idx;
@@ -645,7 +795,8 @@ create_zero_immediate( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static INLINE boolean
+
+static boolean
  create_loop_const( struct svga_shader_emitter *emit )
  {
     unsigned idx = emit->nr_hw_int_const++;
@@ -663,7 +814,7 @@ create_loop_const( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static INLINE boolean
+static boolean
  create_arl_consts( struct svga_shader_emitter *emit )
  {
     int i;
@@ -673,7 +824,7 @@ create_arl_consts( struct svga_shader_emitter *emit )
        unsigned idx = emit->nr_hw_float_const++;
        float vals[4];
        for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
-         vals[j] = emit->arl_consts[i + j].number;
+         vals[j] = (float) emit->arl_consts[i + j].number;
           emit->arl_consts[i + j].idx = idx;
           switch (j) {
           case 0:
@@ -702,16 +853,23 @@ create_arl_consts( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static INLINE struct src_register
+
+/**
+ * Return the register which holds the pixel shaders front/back-
+ * facing value.
+ */
+static struct src_register
  get_vface( struct svga_shader_emitter *emit )
  {
     assert(emit->emitted_vface);
-   return src_register(SVGA3DREG_MISCTYPE, 
-                       SVGA3DMISCREG_FACE);
+   return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
  }
  
-/* returns {0, 0, 0, 1} immediate */
-static INLINE struct src_register
+
+/**
+ * returns {0, 0, 0, 1} immediate
+ */
+static struct src_register
  get_zero_immediate( struct svga_shader_emitter *emit )
  {
     assert(emit->created_zero_immediate);
@@ -721,8 +879,11 @@ get_zero_immediate( struct svga_shader_emitter *emit )
                    0, 0, 0, 3);
  }
  
-/* returns {1, 1, 1, -1} immediate */
-static INLINE struct src_register
+
+/**
+ * returns {1, 1, 1, -1} immediate
+ */
+static struct src_register
  get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
  {
     assert(emit->created_zero_immediate);
@@ -732,8 +893,24 @@ get_pos_neg_one_immediate( struct svga_shader_emitter *emit )
                    3, 3, 3, 2);
  }
  
-/* returns the loop const */
-static INLINE struct src_register
+
+/**
+ * returns {0.5, 0.5, 0.5, 0.5} immediate
+ */
+static struct src_register
+get_half_immediate( struct svga_shader_emitter *emit )
+{
+   assert(emit->created_zero_immediate);
+   assert(emit->zero_immediate_idx >= 0);
+   return swizzle(src_register(SVGA3DREG_CONST, emit->zero_immediate_idx),
+                  1, 1, 1, 1);
+}
+
+
+/**
+ * returns the loop const
+ */
+static struct src_register
  get_loop_const( struct svga_shader_emitter *emit )
  {
     assert(emit->created_loop_const);
@@ -742,7 +919,8 @@ get_loop_const( struct svga_shader_emitter *emit )
                          emit->loop_const_idx );
  }
  
-static INLINE struct src_register
+
+static struct src_register
  get_fake_arl_const( struct svga_shader_emitter *emit )
  {
     struct src_register reg;
@@ -759,7 +937,12 @@ get_fake_arl_const( struct svga_shader_emitter *emit )
     return scalar(reg, swizzle);
  }
  
-static INLINE struct src_register
+
+/**
+ * Return the register which holds the current dimenions of the
+ * texture bound to the given sampler
+ */
+static struct src_register
  get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
  {
     int idx;
@@ -773,11 +956,13 @@ get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
     return reg;
  }
  
-static boolean emit_fake_arl(struct svga_shader_emitter *emit,
-                             const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_fake_arl(struct svga_shader_emitter *emit,
+              const struct tgsi_full_instruction *insn)
  {
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
     struct src_register src1 = get_fake_arl_const( emit );
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     SVGA3dShaderDestToken tmp = get_temp( emit );
@@ -797,11 +982,13 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit,
                        dst, src1 );
  }
  
-static boolean emit_if(struct svga_shader_emitter *emit,
-                       const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_if(struct svga_shader_emitter *emit,
+        const struct tgsi_full_instruction *insn)
  {
-   struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0]);
     struct src_register zero = get_zero_immediate( emit );
     SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
  
@@ -812,7 +999,6 @@ static boolean emit_if(struct svga_shader_emitter *emit,
        /*
         * Max different constant registers readable per IFC instruction is 1.
         */
-
        SVGA3dShaderDestToken tmp = get_temp( emit );
  
        if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
@@ -828,34 +1014,39 @@ static boolean emit_if(struct svga_shader_emitter *emit,
             emit_src( emit, zero ) );
  }
  
-static boolean emit_endif(struct svga_shader_emitter *emit,
-                       const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_endif(struct svga_shader_emitter *emit,
+           const struct tgsi_full_instruction *insn)
  {
     emit->dynamic_branching_level--;
  
-   return (emit_instruction( emit,
-                             inst_token( SVGA3DOP_ENDIF )));
+   return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
  }
  
-static boolean emit_else(struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_else(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn)
  {
-   return (emit_instruction( emit,
-                             inst_token( SVGA3DOP_ELSE )));
+   return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
  }
  
-/* Translate the following TGSI FLR instruction.
+
+/**
+ * Translate the following TGSI FLR instruction.
   *    FLR  DST, SRC
   * To the following SVGA3D instruction sequence.
   *    FRC  TMP, SRC
   *    SUB  DST, SRC, TMP
   */
-static boolean emit_floor(struct svga_shader_emitter *emit,
-                          const struct tgsi_full_instruction *insn )
+static boolean
+emit_floor(struct svga_shader_emitter *emit,
+           const struct tgsi_full_instruction *insn )
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* FRC  TMP, SRC */
@@ -871,17 +1062,20 @@ static boolean emit_floor(struct svga_shader_emitter *emit,
  }
  
  
-/* Translate the following TGSI CEIL instruction.
+/**
+ * Translate the following TGSI CEIL instruction.
   *    CEIL  DST, SRC
   * To the following SVGA3D instruction sequence.
   *    FRC  TMP, -SRC
   *    ADD  DST, SRC, TMP
   */
-static boolean emit_ceil(struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn)
+static boolean
+emit_ceil(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
-   const struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0]);
     SVGA3dShaderDestToken temp = get_temp(emit);
  
     /* FRC  TMP, -SRC */
@@ -896,57 +1090,23 @@ static boolean emit_ceil(struct svga_shader_emitter *emit,
  }
  
  
-/* Translate the following TGSI CMP instruction.
- *    CMP  DST, SRC0, SRC1, SRC2
- * To the following SVGA3D instruction sequence.
- *    CMP  DST, SRC0, SRC2, SRC1
- */
-static boolean emit_cmp(struct svga_shader_emitter *emit,
-                          const struct tgsi_full_instruction *insn )
-{
-   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   const struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
-   const struct src_register src2 = translate_src_register(
-      emit, &insn->Src[2] );
-
-   if (emit->unit == PIPE_SHADER_VERTEX) {
-      SVGA3dShaderDestToken temp = get_temp(emit);
-      struct src_register zero = scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
-
-      /* Since vertex shaders don't support the CMP instruction,
-       * simulate it with SLT and LRP instructions.
-       *    SLT  TMP, SRC0, 0.0
-       *    LRP  DST, TMP, SRC1, SRC2
-       */
-      if (!submit_op2(emit, inst_token(SVGA3DOP_SLT), temp, src0, zero))
-         return FALSE;
-      return submit_lrp(emit, dst, src(temp), src1, src2);
-   }
-
-   /* CMP  DST, SRC0, SRC2, SRC1 */
-   return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
-}
-
-
-
-/* Translate the following TGSI DIV instruction.
+/**
+ * Translate the following TGSI DIV instruction.
   *    DIV  DST.xy, SRC0, SRC1
   * To the following SVGA3D instruction sequence.
   *    RCP  TMP.x, SRC1.xxxx
   *    RCP  TMP.y, SRC1.yyyy
   *    MUL  DST.xy, SRC0, TMP
   */
-static boolean emit_div(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_div(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn )
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   const struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
+   const struct src_register src1 =
+      translate_src_register(emit, &insn->Src[1] );
     SVGA3dShaderDestToken temp = get_temp( emit );
     int i;
  
@@ -957,15 +1117,14 @@ static boolean emit_div(struct svga_shader_emitter *emit,
        unsigned channel = 1 << i;
        if (dst.mask & channel) {
           /* RCP  TMP.?, SRC1.???? */
-         if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 
-                          writemask(temp, channel), 
+         if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
+                          writemask(temp, channel),
                            scalar(src1, i) ))
              return FALSE;
        }
     }
  
-   /* Then multiply them out with a single mul:
-    *
+   /* Vector mul:
      * MUL  DST, SRC0, TMP
      */
     if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
@@ -975,20 +1134,23 @@ static boolean emit_div(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-/* Translate the following TGSI DP2 instruction.
+
+/**
+ * Translate the following TGSI DP2 instruction.
   *    DP2  DST, SRC1, SRC2
   * To the following SVGA3D instruction sequence.
   *    MUL  TMP, SRC1, SRC2
   *    ADD  DST, TMP.xxxx, TMP.yyyy
   */
-static boolean emit_dp2(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_dp2(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn )
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
-   const struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0]);
+   const struct src_register src1 =
+      translate_src_register(emit, &insn->Src[1]);
     SVGA3dShaderDestToken temp = get_temp( emit );
     struct src_register temp_src0, temp_src1;
  
@@ -1008,20 +1170,22 @@ static boolean emit_dp2(struct svga_shader_emitter *emit,
  }
  
  
-/* Translate the following TGSI DPH instruction.
+/**
+ * Translate the following TGSI DPH instruction.
   *    DPH  DST, SRC1, SRC2
   * To the following SVGA3D instruction sequence.
   *    DP3  TMP, SRC1, SRC2
   *    ADD  DST, TMP, SRC2.wwww
   */
-static boolean emit_dph(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_dph(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn )
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     const struct src_register src0 = translate_src_register(
        emit, &insn->Src[0] );
-   struct src_register src1 = translate_src_register(
-      emit, &insn->Src[1] );
+   struct src_register src1 =
+      translate_src_register(emit, &insn->Src[1]);
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* DP3  TMP, SRC1, SRC2 */
@@ -1038,19 +1202,22 @@ static boolean emit_dph(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-/* Translate the following TGSI DST instruction.
+
+/**
+ * Translate the following TGSI DST instruction.
   *    NRM  DST, SRC
   * To the following SVGA3D instruction sequence.
   *    DP3  TMP, SRC, SRC
   *    RSQ  TMP, TMP
   *    MUL  DST, SRC, TMP
   */
-static boolean emit_nrm(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_nrm(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   const struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0]);
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* DP3  TMP, SRC, SRC */
@@ -1067,25 +1234,25 @@ static boolean emit_nrm(struct svga_shader_emitter *emit,
        return FALSE;
  
     return TRUE;
-
  }
  
-static boolean do_emit_sincos(struct svga_shader_emitter *emit,
-                              SVGA3dShaderDestToken dst,
-                              struct src_register src0)
+
+static boolean
+do_emit_sincos(struct svga_shader_emitter *emit,
+               SVGA3dShaderDestToken dst,
+               struct src_register src0)
  {
     src0 = scalar(src0, TGSI_SWIZZLE_X);
-
-   return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
-                      dst, src0 );
+   return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
  }
  
-static boolean emit_sincos(struct svga_shader_emitter *emit,
-                           const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_sincos(struct svga_shader_emitter *emit,
+            const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   struct src_register src0 = translate_src_register(emit, &insn->Src[0]);
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* SCS TMP SRC */
@@ -1099,16 +1266,18 @@ static boolean emit_sincos(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-/*
+
+/**
   * SCS TMP SRC
   * MOV DST TMP.yyyy
   */
-static boolean emit_sin(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_sin(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn )
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* SCS TMP SRC */
@@ -1128,12 +1297,13 @@ static boolean emit_sin(struct svga_shader_emitter *emit,
   * SCS TMP SRC
   * MOV DST TMP.xxxx
   */
-static boolean emit_cos(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_cos(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
     SVGA3dShaderDestToken temp = get_temp( emit );
  
     /* SCS TMP SRC */
@@ -1149,12 +1319,14 @@ static boolean emit_cos(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean emit_ssg(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_ssg(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
-   struct src_register src0 = translate_src_register(
-      emit, &insn->Src[0] );
+   struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
     SVGA3dShaderDestToken temp0 = get_temp( emit );
     SVGA3dShaderDestToken temp1 = get_temp( emit );
     struct src_register zero, one;
@@ -1185,11 +1357,13 @@ static boolean emit_ssg(struct svga_shader_emitter *emit,
                        src( temp1 ) );
  }
  
-/*
+
+/**
   * ADD DST SRC0, negate(SRC0)
   */
-static boolean emit_sub(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+static boolean
+emit_sub(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     struct src_register src0 = translate_src_register(
@@ -1207,8 +1381,9 @@ static boolean emit_sub(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_kil(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_kil(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     const struct tgsi_full_src_register *reg = &insn->Src[0];
     struct src_register src0, srcIn;
@@ -1263,17 +1438,17 @@ static boolean emit_kil(struct svga_shader_emitter *emit,
  }
  
  
-/* mesa state tracker always emits kilp as an unconditional
- * kil */
-static boolean emit_kilp(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+/**
+ * mesa state tracker always emits kilp as an unconditional kil
+ */
+static boolean
+emit_kilp(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn)
  {
-   SVGA3dShaderInstToken inst;
     SVGA3dShaderDestToken temp;
     struct src_register one = scalar( get_zero_immediate( emit ),
                                       TGSI_SWIZZLE_W );
-
-   inst = inst_token( SVGA3DOP_TEXKILL );
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
  
     /* texkill doesn't allow negation on the operand so lets move
      * negation of {1} to a temp register */
@@ -1285,13 +1460,27 @@ static boolean emit_kilp(struct svga_shader_emitter *emit,
     return submit_op0( emit, inst, temp );
  }
  
+
+/**
+ * Test if r1 and r2 are the same register.
+ */
+static boolean
+same_register(struct src_register r1, struct src_register r2)
+{
+   return (r1.base.num == r2.base.num &&
+           r1.base.type_upper == r2.base.type_upper &&
+           r1.base.type_lower == r2.base.type_lower);
+}
+
+
+
  /* Implement conditionals by initializing destination reg to 'fail',
   * then set predicate reg with UFOP_SETP, then move 'pass' to dest
   * based on predicate reg.
   *
   * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
   * MOV dst, fail
- * MOV dst, pass, p0 
+ * MOV dst, pass, p0
   */
  static boolean
  emit_conditional(struct svga_shader_emitter *emit,
@@ -1335,6 +1524,16 @@ emit_conditional(struct svga_shader_emitter *emit,
        break;
     }
  
+   if (same_register(src(dst), pass)) {
+      /* We'll get bad results if the dst and pass registers are the same
+       * so use a temp register containing pass.
+       */
+      SVGA3dShaderDestToken temp = get_temp(emit);
+      if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
+         return FALSE;
+      pass = src(temp);
+   }
+
     /* SETP src0, COMPOP, src1 */
     if (!submit_op2( emit, setp_token, pred_reg,
                      src0, src1 ))
@@ -1386,7 +1585,6 @@ emit_select(struct svga_shader_emitter *emit,
        }
     }
  
-
     /* Otherwise, need to use the setp approach:
      */
     {
@@ -1407,31 +1605,70 @@ emit_select(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_select_op(struct svga_shader_emitter *emit,
-                              unsigned compare,
-                              const struct tgsi_full_instruction *insn)
+static boolean
+emit_select_op(struct svga_shader_emitter *emit,
+               unsigned compare,
+               const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     struct src_register src0 = translate_src_register(
        emit, &insn->Src[0] );
     struct src_register src1 = translate_src_register(
        emit, &insn->Src[1] );
-      
+
     return emit_select( emit, compare, dst, src0, src1 );
  }
  
  
-/* Translate texture instructions to SVGA3D representation.
+/**
+ * Translate TGSI CMP instruction.
+ */
+static boolean
+emit_cmp(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
+   const struct src_register src1 =
+      translate_src_register(emit, &insn->Src[1] );
+   const struct src_register src2 =
+      translate_src_register(emit, &insn->Src[2] );
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      struct src_register zero =
+         scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X);
+      /* We used to simulate CMP with SLT+LRP.  But that didn't work when
+       * src1 or src2 was Inf/NaN.  In particular, GLSL sqrt(0) failed
+       * because it involves a CMP to handle the 0 case.
+       * Use a conditional expression instead.
+       */
+      return emit_conditional(emit, PIPE_FUNC_LESS, dst,
+                              src0, zero, src1, src2);
+   }
+   else {
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+      /* CMP  DST, SRC0, SRC2, SRC1 */
+      return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
+                         src0, src2, src1);
+   }
+}
+
+
+/**
+ * Translate texture instructions to SVGA3D representation.
   */
-static boolean emit_tex2(struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn,
-                         SVGA3dShaderDestToken dst )
+static boolean
+emit_tex2(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn,
+          SVGA3dShaderDestToken dst)
  {
     SVGA3dShaderInstToken inst;
     struct src_register texcoord;
     struct src_register sampler;
     SVGA3dShaderDestToken tmp;
-   
+
     inst.value = 0;
  
     switch (insn->Instruction.Opcode) {
@@ -1477,12 +1714,12 @@ static boolean emit_tex2(struct svga_shader_emitter *emit,
           return FALSE;
  
        /* MOV  tmp.w, zero */
-      if (!submit_op1( emit, 
+      if (!submit_op1( emit,
                         inst_token( SVGA3DOP_MOV ),
-                       writemask( tmp, TGSI_WRITEMASK_W ), 
+                       writemask( tmp, TGSI_WRITEMASK_W ),
                         scalar( zero, TGSI_SWIZZLE_X )))
           return FALSE;
-      
+
        texcoord = src( tmp );
        inst.op = SVGA3DOP_TEXLDL;
     }
@@ -1504,13 +1741,13 @@ static boolean emit_tex2(struct svga_shader_emitter *emit,
  }
  
  
-
-
-/* Translate texture instructions to SVGA3D representation.
+/**
+ * Translate texture instructions to SVGA3D representation.
   */
-static boolean emit_tex4(struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn,
-                         SVGA3dShaderDestToken dst )
+static boolean
+emit_tex4(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn,
+          SVGA3dShaderDestToken dst )
  {
     SVGA3dShaderInstToken inst;
     struct src_register texcoord;
@@ -1526,7 +1763,7 @@ static boolean emit_tex4(struct svga_shader_emitter *emit,
     inst.value = 0;
  
     switch (insn->Instruction.Opcode) {
-   case TGSI_OPCODE_TXD: 
+   case TGSI_OPCODE_TXD:
        inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
        break;
     default:
@@ -1541,13 +1778,14 @@ static boolean emit_tex4(struct svga_shader_emitter *emit,
  /**
   * Emit texture swizzle code.
   */
-static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
-                                 SVGA3dShaderDestToken dst,
-                                 struct src_register src,
-                                 unsigned swizzle_x,
-                                 unsigned swizzle_y,
-                                 unsigned swizzle_z,
-                                 unsigned swizzle_w)
+static boolean
+emit_tex_swizzle(struct svga_shader_emitter *emit,
+                 SVGA3dShaderDestToken dst,
+                 struct src_register src,
+                 unsigned swizzle_x,
+                 unsigned swizzle_y,
+                 unsigned swizzle_z,
+                 unsigned swizzle_w)
  {
     const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
     unsigned srcSwizzle[4];
@@ -1572,9 +1810,9 @@ static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
  
     /* write x/y/z/w comps */
     if (dst.mask & srcWritemask) {
-      if (!submit_op1(emit, 
-                      inst_token(SVGA3DOP_MOV), 
-                      writemask(dst, srcWritemask), 
+      if (!submit_op1(emit,
+                      inst_token(SVGA3DOP_MOV),
+                      writemask(dst, srcWritemask),
                        swizzle(src,
                                srcSwizzle[0],
                                srcSwizzle[1],
@@ -1585,18 +1823,18 @@ static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
  
     /* write 0 comps */
     if (dst.mask & zeroWritemask) {
-      if (!submit_op1(emit, 
-                      inst_token(SVGA3DOP_MOV), 
-                      writemask(dst, zeroWritemask), 
+      if (!submit_op1(emit,
+                      inst_token(SVGA3DOP_MOV),
+                      writemask(dst, zeroWritemask),
                        scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X)))
           return FALSE;
     }
  
     /* write 1 comps */
     if (dst.mask & oneWritemask) {
-      if (!submit_op1(emit, 
-                      inst_token(SVGA3DOP_MOV), 
-                      writemask(dst, oneWritemask), 
+      if (!submit_op1(emit,
+                      inst_token(SVGA3DOP_MOV),
+                      writemask(dst, oneWritemask),
                        scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W)))
           return FALSE;
     }
@@ -1605,10 +1843,11 @@ static boolean emit_tex_swizzle( struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_tex(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn )
+static boolean
+emit_tex(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
-   SVGA3dShaderDestToken dst = 
+   SVGA3dShaderDestToken dst =
        translate_dst_register( emit, insn, 0 );
     struct src_register src0 =
        translate_src_register( emit, &insn->Src[0] );
@@ -1656,7 +1895,6 @@ static boolean emit_tex(struct svga_shader_emitter *emit,
        assert(0);
     }
  
-
     if (compare) {
        SVGA3dShaderDestToken dst2;
  
@@ -1730,8 +1968,10 @@ static boolean emit_tex(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
-                              const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_bgnloop2(struct svga_shader_emitter *emit,
+              const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
     struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
@@ -1744,8 +1984,10 @@ static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
             emit_src( emit, const_int ) );
  }
  
-static boolean emit_endloop2( struct svga_shader_emitter *emit,
-                              const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_endloop2(struct svga_shader_emitter *emit,
+              const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
  
@@ -1754,16 +1996,20 @@ static boolean emit_endloop2( struct svga_shader_emitter *emit,
     return emit_instruction( emit, inst );
  }
  
-static boolean emit_brk( struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_brk(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
     return emit_instruction( emit, inst );
  }
  
-static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
-                                unsigned opcode,
-                                const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_scalar_op1(struct svga_shader_emitter *emit,
+                unsigned opcode,
+                const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderInstToken inst;
     SVGA3dShaderDestToken dst;
@@ -1778,9 +2024,10 @@ static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
-                                       unsigned opcode,
-                                       const struct tgsi_full_instruction *insn )
+static boolean
+emit_simple_instruction(struct svga_shader_emitter *emit,
+                        unsigned opcode,
+                        const struct tgsi_full_instruction *insn)
  {
     const struct tgsi_full_src_register *src = insn->Src;
     SVGA3dShaderInstToken inst;
@@ -1811,25 +2058,26 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_deriv(struct svga_shader_emitter *emit,
-                          const struct tgsi_full_instruction *insn )
+static boolean
+emit_deriv(struct svga_shader_emitter *emit,
+           const struct tgsi_full_instruction *insn )
  {
     if (emit->dynamic_branching_level > 0 &&
-       insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) 
+       insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
     {
        struct src_register zero = get_zero_immediate( emit );
-      SVGA3dShaderDestToken dst = 
+      SVGA3dShaderDestToken dst =
           translate_dst_register( emit, insn, 0 );
  
        /* Deriv opcodes not valid inside dynamic branching, workaround
         * by zeroing out the destination.
         */
-      if (!submit_op1(emit, 
-                      inst_token( SVGA3DOP_MOV ), 
+      if (!submit_op1(emit,
+                      inst_token( SVGA3DOP_MOV ),
                        dst,
                        scalar(zero, TGSI_SWIZZLE_X)))
           return FALSE;
-      
+
        return TRUE;
     }
     else {
@@ -1868,8 +2116,10 @@ static boolean emit_deriv(struct svga_shader_emitter *emit,
     }
  }
  
-static boolean emit_arl(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_arl(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     ++emit->current_arl;
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
@@ -1888,8 +2138,10 @@ static boolean emit_arl(struct svga_shader_emitter *emit,
     }
  }
  
-static boolean emit_pow(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_pow(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     struct src_register src0 = translate_src_register(
@@ -1897,11 +2149,11 @@ static boolean emit_pow(struct svga_shader_emitter *emit,
     struct src_register src1 = translate_src_register(
        emit, &insn->Src[1] );
     boolean need_tmp = FALSE;
-   
+
     /* POW can only output to a temporary */
     if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
        need_tmp = TRUE;
-   
+
     /* POW src1 must not be the same register as dst */
     if (alias_src_dst( src1, dst ))
        need_tmp = TRUE;
@@ -1911,20 +2163,24 @@ static boolean emit_pow(struct svga_shader_emitter *emit,
     src1 = scalar( src1, TGSI_SWIZZLE_X );
  
     if (need_tmp) {
-      SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
+      SVGA3dShaderDestToken tmp =
+         writemask(get_temp( emit ), TGSI_WRITEMASK_X );
  
        if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
           return FALSE;
  
-      return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
-   } 
+      return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
+                        dst, scalar(src(tmp), 0) );
+   }
     else {
        return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
     }
  }
  
-static boolean emit_xpd(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+
+static boolean
+emit_xpd(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     const struct src_register src0 = translate_src_register(
@@ -1934,7 +2190,7 @@ static boolean emit_xpd(struct svga_shader_emitter *emit,
     boolean need_dst_tmp = FALSE;
  
     /* XPD can only output to a temporary */
-   if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) 
+   if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP)
        need_dst_tmp = TRUE;
  
     /* The dst reg must not be the same as src0 or src1*/
@@ -1954,7 +2210,7 @@ static boolean emit_xpd(struct svga_shader_emitter *emit,
  
        if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
           return FALSE;
-   } 
+   }
     else {
        if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
           return FALSE;
@@ -1965,8 +2221,8 @@ static boolean emit_xpd(struct svga_shader_emitter *emit,
     if (dst.mask & TGSI_WRITEMASK_W) {
        struct src_register zero = get_zero_immediate( emit );
  
-      if (!submit_op1(emit, 
-                      inst_token( SVGA3DOP_MOV ), 
+      if (!submit_op1(emit,
+                      inst_token( SVGA3DOP_MOV ),
                        writemask(dst, TGSI_WRITEMASK_W),
                        zero))
           return FALSE;
@@ -1976,8 +2232,9 @@ static boolean emit_xpd(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_lrp(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+static boolean
+emit_lrp(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     const struct src_register src0 = translate_src_register(
@@ -1991,8 +2248,9 @@ static boolean emit_lrp(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_dst_insn(struct svga_shader_emitter *emit,
-                             const struct tgsi_full_instruction *insn )
+static boolean
+emit_dst_insn(struct svga_shader_emitter *emit,
+              const struct tgsi_full_instruction *insn)
  {
     if (emit->unit == PIPE_SHADER_VERTEX) {
        /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
@@ -2000,13 +2258,11 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit,
        return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
     }
     else {
-
        /* result[0] = 1    * 1;
         * result[1] = a[1] * b[1];
         * result[2] = a[2] * 1;
         * result[3] = 1    * b[3];
         */
-
        SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
        SVGA3dShaderDestToken tmp;
        const struct src_register src0 = translate_src_register(
@@ -2031,16 +2287,16 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit,
        /* tmp.xw = 1.0
         */
        if (tmp.mask & TGSI_WRITEMASK_XW) {
-         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
                            writemask(tmp, TGSI_WRITEMASK_XW ),
                            scalar( zero, 3 )))
              return FALSE;
        }
-      
+
        /* tmp.yz = src0
         */
        if (tmp.mask & TGSI_WRITEMASK_YZ) {
-         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
                            writemask(tmp, TGSI_WRITEMASK_YZ ),
                            src0))
              return FALSE;
@@ -2049,7 +2305,7 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit,
        /* tmp.yw = tmp * src1
         */
        if (tmp.mask & TGSI_WRITEMASK_YW) {
-         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 
+         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
                            writemask(tmp, TGSI_WRITEMASK_YW ),
                            src(tmp),
                            src1))
@@ -2059,19 +2315,20 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit,
        /* dst = tmp
         */
        if (need_tmp) {
-         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
                            dst,
                            src(tmp)))
              return FALSE;
-      }      
+      }
     }
-   
+
     return TRUE;
  }
  
  
-static boolean emit_exp(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+static boolean
+emit_exp(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     struct src_register src0 =
@@ -2134,8 +2391,10 @@ static boolean emit_exp(struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean emit_lit(struct svga_shader_emitter *emit,
-                             const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_lit(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     if (emit->unit == PIPE_SHADER_VERTEX) {
        /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
@@ -2143,7 +2402,6 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
        return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
     }
     else {
-
        /* D3D vs. GL semantics can be fairly easily accomodated by
         * variations on this sequence.
         *
@@ -2163,7 +2421,6 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
         *
         * Will implement the GL version for now.
         */
-
        SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
        SVGA3dShaderDestToken tmp = get_temp( emit );
        const struct src_register src0 = translate_src_register(
@@ -2173,9 +2430,9 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
        /* tmp = pow(src.y, src.w)
         */
        if (dst.mask & TGSI_WRITEMASK_Z) {
-         if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 
-                         tmp, 
-                         scalar(src0, 1), 
+         if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
+                         tmp,
+                         scalar(src0, 1),
                           scalar(src0, 3)))
              return FALSE;
        }
@@ -2183,12 +2440,12 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
        /* tmp.y = src.x
         */
        if (dst.mask & TGSI_WRITEMASK_Y) {
-         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
                            writemask(tmp, TGSI_WRITEMASK_Y ),
                            scalar(src0, 0)))
              return FALSE;
        }
-      
+
        /* Can't quite do this with emit conditional due to the extra
         * writemask on the predicated mov:
         */
@@ -2211,10 +2468,10 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
  
           /* SETP src0.xxyy, GT, {0}.x */
           if (!submit_op2( emit, setp_token, pred_reg,
-                          predsrc, 
+                          predsrc,
                            swizzle(zero, 0, 0, 0, 0) ))
              return FALSE;
-         
+
           /* MOV dst, fail */
           if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
                            swizzle(zero, 3, 0, 0, 3 )))
@@ -2239,10 +2496,9 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
  }
  
  
-
-
-static boolean emit_ex2( struct svga_shader_emitter *emit,
-                         const struct tgsi_full_instruction *insn )
+static boolean
+emit_ex2(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderInstToken inst;
     SVGA3dShaderDestToken dst;
@@ -2268,8 +2524,9 @@ static boolean emit_ex2( struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_log(struct svga_shader_emitter *emit,
-                        const struct tgsi_full_instruction *insn)
+static boolean
+emit_log(struct svga_shader_emitter *emit,
+         const struct tgsi_full_instruction *insn)
  {
     SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
     struct src_register src0 =
@@ -2374,9 +2631,84 @@ static boolean emit_log(struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_bgnsub( struct svga_shader_emitter *emit,
-                           unsigned position,
-                           const struct tgsi_full_instruction *insn )
+/**
+ * Translate TGSI TRUNC or ROUND instruction.
+ * We need to truncate toward zero. Ex: trunc(-1.9) = -1
+ * Different approaches are needed for VS versus PS.
+ */
+static boolean
+emit_trunc_round(struct svga_shader_emitter *emit,
+                 const struct tgsi_full_instruction *insn,
+                 boolean round)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
+   const struct src_register src0 =
+      translate_src_register(emit, &insn->Src[0] );
+   SVGA3dShaderDestToken t1 = get_temp(emit);
+
+   if (round) {
+      SVGA3dShaderDestToken t0 = get_temp(emit);
+      struct src_register half = get_half_immediate(emit);
+
+      /* t0 = abs(src0) + 0.5 */
+      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
+                      absolute(src0), half))
+         return FALSE;
+
+      /* t1 = fract(t0) */
+      if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
+         return FALSE;
+
+      /* t1 = t0 - t1 */
+      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
+                      negate(src(t1))))
+         return FALSE;
+   }
+   else {
+      /* trunc */
+
+      /* t1 = fract(abs(src0)) */
+      if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
+         return FALSE;
+
+      /* t1 = abs(src0) - t1 */
+      if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
+                      negate(src(t1))))
+         return FALSE;
+   }
+
+   /*
+    * Now we need to multiply t1 by the sign of the original value.
+   */
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      /* For VS: use SGN instruction */
+      /* Need two extra/dummy registers: */
+      SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
+         t4 = get_temp(emit);
+
+      /* t2 = sign(src0) */
+      if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
+                      src(t3), src(t4)))
+         return FALSE;
+
+      /* dst = t1 * t2 */
+      if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
+         return FALSE;
+   }
+   else {
+      /* For FS: Use CMP instruction */
+      return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
+                        src0, src(t1), negate(src(t1)));
+   }
+
+   return TRUE;
+}
+
+
+static boolean
+emit_bgnsub(struct svga_shader_emitter *emit,
+            unsigned position,
+            const struct tgsi_full_instruction *insn)
  {
     unsigned i;
  
@@ -2385,7 +2717,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit,
      * shader.
      */
     emit->in_main_func = FALSE;
-   
+
     for (i = 0; i < emit->nr_labels; i++) {
        if (emit->label[i] == position) {
           return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
@@ -2398,14 +2730,16 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean emit_call( struct svga_shader_emitter *emit,
-                           const struct tgsi_full_instruction *insn )
+
+static boolean
+emit_call(struct svga_shader_emitter *emit,
+          const struct tgsi_full_instruction *insn)
  {
     unsigned position = insn->Label.Label;
     unsigned i;
-   
+
     for (i = 0; i < emit->nr_labels; i++) {
-      if (emit->label[i] == position) 
+      if (emit->label[i] == position)
           break;
     }
  
@@ -2422,7 +2756,12 @@ static boolean emit_call( struct svga_shader_emitter *emit,
  }
  
  
-static boolean emit_end( struct svga_shader_emitter *emit )
+/**
+ * Called at the end of the shader.  Actually, emit special "fix-up"
+ * code for the vertex/fragment shader.
+ */
+static boolean
+emit_end(struct svga_shader_emitter *emit)
  {
     if (emit->unit == PIPE_SHADER_VERTEX) {
        return emit_vs_postamble( emit );
@@ -2434,9 +2773,10 @@ static boolean emit_end( struct svga_shader_emitter *emit )
  
  
  
-static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
-                                      unsigned position,
-                                      const struct tgsi_full_instruction *insn )
+static boolean
+svga_emit_instruction(struct svga_shader_emitter *emit,
+                      unsigned position,
+                      const struct tgsi_full_instruction *insn)
  {
     switch (insn->Instruction.Opcode) {
  
@@ -2464,9 +2804,14 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
        return emit_call( emit, insn );
  
     case TGSI_OPCODE_FLR:
-   case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
        return emit_floor( emit, insn );
  
+   case TGSI_OPCODE_TRUNC:
+      return emit_trunc_round( emit, insn, FALSE );
+
+   case TGSI_OPCODE_ROUND:
+      return emit_trunc_round( emit, insn, TRUE );
+
     case TGSI_OPCODE_CEIL:
        return emit_ceil( emit, insn );
  
@@ -2558,7 +2903,6 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
         * about:
         */
     case TGSI_OPCODE_CLAMP:
-   case TGSI_OPCODE_ROUND:
     case TGSI_OPCODE_AND:
     case TGSI_OPCODE_OR:
     case TGSI_OPCODE_I2F:
@@ -2600,31 +2944,35 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
     case TGSI_OPCODE_SSG:
        return emit_ssg( emit, insn );
  
-   default: {
-      unsigned opcode = translate_opcode(insn->Instruction.Opcode);
+   default:
+      {
+         unsigned opcode = translate_opcode(insn->Instruction.Opcode);
  
-      if (opcode == SVGA3DOP_LAST_INST)
-         return FALSE;
+         if (opcode == SVGA3DOP_LAST_INST)
+            return FALSE;
  
-      if (!emit_simple_instruction( emit, opcode, insn ))
-         return FALSE;
-   }
+         if (!emit_simple_instruction( emit, opcode, insn ))
+            return FALSE;
+      }
     }
  
     return TRUE;
  }
  
  
-static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
-                                    struct tgsi_full_immediate *imm)
+static boolean
+svga_emit_immediate(struct svga_shader_emitter *emit,
+                    struct tgsi_full_immediate *imm)
  {
     static const float id[4] = {0,0,0,1};
     float value[4];
     unsigned i;
  
     assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
-   for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
-      value[i] = imm->u[i].Float;
+   for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
+      float f = imm->u[i].Float;
+      value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
+   }
  
     for ( ; i < 4; i++ )
        value[i] = id[i];
@@ -2634,12 +2982,11 @@ static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
                            value[0], value[1], value[2], value[3]);
  }
  
-static boolean make_immediate( struct svga_shader_emitter *emit,
-                               float a,
-                               float b,
-                               float c,
-                               float d,
-                               struct src_register *out )
+
+static boolean
+make_immediate(struct svga_shader_emitter *emit,
+               float a, float b, float c, float d,
+               struct src_register *out )
  {
     unsigned idx = emit->nr_hw_float_const++;
  
@@ -2652,7 +2999,9 @@ static boolean make_immediate( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
+
+static boolean
+emit_vs_preamble(struct svga_shader_emitter *emit)
  {
     if (!emit->key.vkey.need_prescale) {
        if (!make_immediate( emit, 0, 0, .5, .5,
@@ -2663,7 +3012,9 @@ static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
+
+static boolean
+emit_ps_preamble(struct svga_shader_emitter *emit)
  {
     if (emit->ps_reads_pos && emit->info.reads_z) {
        /*
@@ -2698,7 +3049,9 @@ static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
+
+static boolean
+emit_ps_postamble(struct svga_shader_emitter *emit)
  {
     unsigned i;
  
@@ -2708,7 +3061,6 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
      * oDepth with a hand-crafted MOV below.
      */
     if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
-
        if (!submit_op1( emit,
                         inst_token(SVGA3DOP_MOV),
                         emit->true_pos,
@@ -2718,13 +3070,11 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
  
     for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
        if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
-
           /* Potentially override output colors with white for XOR
            * logicop workaround.
            */
           if (emit->unit == PIPE_SHADER_FRAGMENT &&
               emit->key.fkey.white_fragments) {
-
              struct src_register one = scalar( get_zero_immediate( emit ),
                                                TGSI_SWIZZLE_W );
  
@@ -2747,7 +3097,9 @@ static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
+
+static boolean
+emit_vs_postamble(struct svga_shader_emitter *emit)
  {
     /* PSIZ output is incredibly fragile and it's very hard to catch
      * the types of usage that break it during shader emit.  Easier
@@ -2755,7 +3107,6 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
      * touch PSIZ with a hand-crafted MOV below.
      */
     if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
-      
        if (!submit_op1( emit,
                         inst_token(SVGA3DOP_MOV),
                         emit->true_psiz,
@@ -2771,10 +3122,10 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
        SVGA3dShaderDestToken depth = emit->depth_pos;
        SVGA3dShaderDestToken pos = emit->true_pos;
        unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 
-                                                         offset + 0 ); 
-      struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 
-                                                         offset + 1 ); 
+      struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
+                                                         offset + 0 );
+      struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
+                                                         offset + 1 );
  
        if (!submit_op1( emit,
                         inst_token(SVGA3DOP_MOV),
@@ -2786,16 +3137,16 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
         * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
         *   --> Note that prescale.trans.w == 0
         */
-      if (!submit_op2( emit, 
-                       inst_token(SVGA3DOP_MUL), 
-                       writemask(temp_pos, TGSI_WRITEMASK_XYZ), 
+      if (!submit_op2( emit,
+                       inst_token(SVGA3DOP_MUL),
+                       writemask(temp_pos, TGSI_WRITEMASK_XYZ),
                         src(temp_pos),
                         prescale_scale ))
           return FALSE;
  
-      if (!submit_op3( emit, 
-                       inst_token(SVGA3DOP_MAD), 
-                       pos, 
+      if (!submit_op3( emit,
+                       inst_token(SVGA3DOP_MAD),
+                       pos,
                         swizzle(src(temp_pos), 3, 3, 3, 3),
                         prescale_trans,
                         src(temp_pos)))
@@ -2819,12 +3170,12 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
        /* Adjust GL clipping coordinate space to hardware (D3D-style):
         *
         * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
-       * MOV result.position, temp_pos 
+       * MOV result.position, temp_pos
         */
-      if (!submit_op2( emit, 
-                       inst_token(SVGA3DOP_DP4), 
-                       writemask(temp_pos, TGSI_WRITEMASK_Z), 
-                       imm_0055, 
+      if (!submit_op2( emit,
+                       inst_token(SVGA3DOP_DP4),
+                       writemask(temp_pos, TGSI_WRITEMASK_Z),
+                       imm_0055,
                         src(temp_pos) ))
           return FALSE;
  
@@ -2845,20 +3196,25 @@ static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-/*
-  0: IF VFACE :4
-  1:   COLOR = FrontColor;
-  2: ELSE
-  3:   COLOR = BackColor;
-  4: ENDIF
+
+/**
+ * For the pixel shader: emit the code which chooses the front
+ * or back face color depending on triangle orientation.
+ *
+ *  0: IF VFACE :4
+ *  1:   COLOR = FrontColor;
+ *  2: ELSE
+ *  3:   COLOR = BackColor;
+ *  4: ENDIF
   */
-static boolean emit_light_twoside( struct svga_shader_emitter *emit )
+static boolean
+emit_light_twoside(struct svga_shader_emitter *emit)
  {
     struct src_register vface, zero;
     struct src_register front[2];
     struct src_register back[2];
     SVGA3dShaderDestToken color[2];
-   int count =  emit->internal_color_count;
+   int count = emit->internal_color_count;
     int i;
     SVGA3dShaderInstToken if_token;
  
@@ -2873,9 +3229,7 @@ static boolean emit_light_twoside( struct svga_shader_emitter *emit )
      * to reset_temp_regs().
      */
     for (i = 0; i < count; i++) {
-      color[i] = dst_register( SVGA3DREG_TEMP, 
-                               emit->nr_hw_temp++ );
-
+      color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
        front[i] = emit->input_map[emit->internal_color_idx[i]];
  
        /* Back is always the next input:
@@ -2887,7 +3241,7 @@ static boolean emit_light_twoside( struct svga_shader_emitter *emit )
         */
        emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
     }
-   
+
     if_token = inst_token( SVGA3DOP_IFC );
  
     if (emit->key.fkey.front_ccw)
@@ -2909,7 +3263,7 @@ static boolean emit_light_twoside( struct svga_shader_emitter *emit )
  
     if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
        return FALSE;
-   
+
     for (i = 0; i < count; i++) {
        if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
           return FALSE;
@@ -2921,11 +3275,13 @@ static boolean emit_light_twoside( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-/*
-  0: SETP_GT TEMP, VFACE, 0
-  where TEMP is a fake frontface register
+
+/**
+ *  0: SETP_GT TEMP, VFACE, 0
+ *  where TEMP is a fake frontface register
   */
-static boolean emit_frontface( struct svga_shader_emitter *emit )
+static boolean
+emit_frontface(struct svga_shader_emitter *emit)
  {
     struct src_register vface, zero;
     SVGA3dShaderDestToken temp;
@@ -2967,7 +3323,8 @@ static boolean emit_frontface( struct svga_shader_emitter *emit )
   * This is used for drawing point sprites when
   * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
   */
-static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
+static boolean
+emit_inverted_texcoords(struct svga_shader_emitter *emit)
  {
     struct src_register zero = get_zero_immediate(emit);
     struct src_register pos_neg_one = get_pos_neg_one_immediate( emit );
@@ -2986,8 +3343,8 @@ static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
               < Elements(emit->input_map));
  
        /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
-      if (!submit_op3(emit, 
-                      inst_token(SVGA3DOP_MAD), 
+      if (!submit_op3(emit,
+                      inst_token(SVGA3DOP_MAD),
                        dst(emit->ps_inverted_texcoord[unit]),
                        emit->ps_true_texcoord[unit],
                        swizzle(pos_neg_one, 0, 3, 0, 0),  /* (1, -1, 1, 1) */
@@ -2998,17 +3355,17 @@ static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit )
        emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
           emit->ps_inverted_texcoord[unit];
  
-      inverted_texcoords &= ~(1 << unit);      
+      inverted_texcoords &= ~(1 << unit);
     }
  
     return TRUE;
  }
  
  
-static INLINE boolean
+static boolean
  needs_to_create_zero( struct svga_shader_emitter *emit )
  {
-   int i;
+   unsigned i;
  
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
        if (emit->key.fkey.light_twoside)
@@ -3036,6 +3393,12 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
               emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA)
              return TRUE;
        }
+
+      for (i = 0; i < emit->key.fkey.num_textures; i++) {
+         if (emit->key.fkey.tex[i].compare_mode
+             == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+            return TRUE;
+      }
     }
  
     if (emit->unit == PIPE_SHADER_VERTEX) {
@@ -3047,6 +3410,7 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
         emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
         emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
         emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
         emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
         emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
         emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
@@ -3059,27 +3423,25 @@ needs_to_create_zero( struct svga_shader_emitter *emit )
         emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
        return TRUE;
  
-   for (i = 0; i < emit->key.fkey.num_textures; i++) {
-      if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
-         return TRUE;
-   }
-
     return FALSE;
  }
  
-static INLINE boolean
+
+static boolean
  needs_to_create_loop_const( struct svga_shader_emitter *emit )
  {
     return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
  }
  
-static INLINE boolean
+
+static boolean
  needs_to_create_arl_consts( struct svga_shader_emitter *emit )
  {
     return (emit->num_arl_consts > 0);
  }
  
-static INLINE boolean
+
+static boolean
  pre_parse_add_indirect( struct svga_shader_emitter *emit,
                          int num, int current_arl)
  {
@@ -3101,6 +3463,7 @@ pre_parse_add_indirect( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
+
  static boolean
  pre_parse_instruction( struct svga_shader_emitter *emit,
                         const struct tgsi_full_instruction *insn,
@@ -3133,6 +3496,7 @@ pre_parse_instruction( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
+
  static boolean
  pre_parse_tokens( struct svga_shader_emitter *emit,
                    const struct tgsi_token *tokens )
@@ -3165,8 +3529,9 @@ pre_parse_tokens( struct svga_shader_emitter *emit,
     return TRUE;
  }
  
-static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
  
+static boolean
+svga_shader_emit_helpers(struct svga_shader_emitter *emit)
  {
     if (needs_to_create_zero( emit )) {
        create_zero_immediate( emit );
@@ -3199,8 +3564,10 @@ static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
     return TRUE;
  }
  
-boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
-                                       const struct tgsi_token *tokens )
+
+boolean
+svga_shader_emit_instructions(struct svga_shader_emitter *emit,
+                              const struct tgsi_token *tokens)
  {
     struct tgsi_parse_context parse;
     boolean ret = TRUE;
@@ -3233,14 +3600,14 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
           if (!ret)
              goto done;
           break;
-         
+
        case TGSI_TOKEN_TYPE_INSTRUCTION:
           if (!helpers_emitted) {
              if (!svga_shader_emit_helpers( emit ))
                 goto done;
              helpers_emitted = TRUE;
           }
-         ret = svga_emit_instruction( emit, 
+         ret = svga_emit_instruction( emit,
                                        line_nr++,
                                        &parse.FullToken.FullInstruction );
           if (!ret)
@@ -3249,7 +3616,7 @@ boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
        default:
           break;
        }
-      
+
        reset_temp_regs( emit );
     }
  
@@ -3275,4 +3642,3 @@ done:
     tgsi_parse_free( &parse );
     return ret;
  }
-