ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics

[mesa.git] / src / gallium / drivers / svga / svga_tgsi_vgpu10.c
diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c

index d62f2bbcc966501f275eb942344121b83b9dfa32..a5bbb4d0f2a307658e75fd4c59cdd75f98721b29 100644 (file)
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -98,6 +98,7 @@ struct svga_shader_emitter_v10
     struct svga_compile_key key;
     struct tgsi_shader_info info;
     unsigned unit;
     struct svga_compile_key key;
     struct tgsi_shader_info info;
     unsigned unit;
+   unsigned version; /**< Either 40 or 41 at this time */
  
     unsigned inst_start_token;
     boolean discard_instruction; /**< throw away current instruction? */
  
     unsigned inst_start_token;
     boolean discard_instruction; /**< throw away current instruction? */
@@ -134,6 +135,9 @@ struct svga_shader_emitter_v10
  
     /* Samplers */
     unsigned num_samplers;
  
     /* Samplers */
     unsigned num_samplers;
+   boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
+   ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
+   ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
  
     /* Address regs (really implemented with temps) */
     unsigned num_address_regs;
  
     /* Address regs (really implemented with temps) */
     unsigned num_address_regs;
@@ -165,8 +169,8 @@ struct svga_shader_emitter_v10
  
     /* For fragment shaders only */
     struct {
  
     /* For fragment shaders only */
     struct {
-      /* apha test */
        unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
        unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
+      unsigned num_color_outputs;
        unsigned color_tmp_index;  /**< fake/temp color output reg */
        unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
  
        unsigned color_tmp_index;  /**< fake/temp color output reg */
        unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
  
@@ -178,6 +182,14 @@ struct svga_shader_emitter_v10
  
        unsigned fragcoord_input_index;  /**< real fragment position input reg */
        unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
  
        unsigned fragcoord_input_index;  /**< real fragment position input reg */
        unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
+
+      /** Which texture units are doing shadow comparison in the FS code */
+      unsigned shadow_compare_units;
+
+      unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
+
+      unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
+      unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
     } fs;
  
     /* For geometry shaders only */
     } fs;
  
     /* For geometry shaders only */
@@ -202,6 +214,9 @@ struct svga_shader_emitter_v10
     /* user clip plane constant slot indexes */
     unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
  
     /* user clip plane constant slot indexes */
     unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
  
+   unsigned num_output_writes;
+   boolean constant_color_output;
+
     boolean uses_flat_interp;
  
     /* For all shaders: const reg index for RECT coord scaling */
     boolean uses_flat_interp;
  
     /* For all shaders: const reg index for RECT coord scaling */
@@ -237,7 +252,7 @@ expand(struct svga_shader_emitter_v10 *emit)
     else
        new_buf = NULL;
  
     else
        new_buf = NULL;
  
-   if (new_buf == NULL) {
+   if (!new_buf) {
        emit->ptr = err_buf;
        emit->buf = err_buf;
        emit->size = sizeof(err_buf);
        emit->ptr = err_buf;
        emit->buf = err_buf;
        emit->size = sizeof(err_buf);
@@ -400,6 +415,9 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
           emit->register_overflow = TRUE;
        }
        break;
           emit->register_overflow = TRUE;
        }
        break;
+   case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
+      /* nothing */
+      break;
     default:
        assert(0);
        ; /* nothing */
     default:
        assert(0);
        ; /* nothing */
@@ -492,7 +510,7 @@ translate_shader_type(unsigned type)
   * as seen below.  All other opcodes are handled/translated specially.
   */
  static VGPU10_OPCODE_TYPE
   * as seen below.  All other opcodes are handled/translated specially.
   */
  static VGPU10_OPCODE_TYPE
-translate_opcode(unsigned opcode)
+translate_opcode(enum tgsi_opcode opcode)
  {
     switch (opcode) {
     case TGSI_OPCODE_MOV:
  {
     switch (opcode) {
     case TGSI_OPCODE_MOV:
@@ -573,8 +591,6 @@ translate_opcode(unsigned opcode)
        return VGPU10_OPCODE_RET;
     case TGSI_OPCODE_NOP:
        return VGPU10_OPCODE_NOP;
        return VGPU10_OPCODE_RET;
     case TGSI_OPCODE_NOP:
        return VGPU10_OPCODE_NOP;
-   case TGSI_OPCODE_BREAKC:
-      return VGPU10_OPCODE_BREAKC;
     case TGSI_OPCODE_END:
        return VGPU10_OPCODE_RET;
     case TGSI_OPCODE_F2I:
     case TGSI_OPCODE_END:
        return VGPU10_OPCODE_RET;
     case TGSI_OPCODE_F2I:
@@ -636,6 +652,19 @@ translate_opcode(unsigned opcode)
        return VGPU10_OPCODE_LT;
     case TGSI_OPCODE_ROUND:
        return VGPU10_OPCODE_ROUND_NE;
        return VGPU10_OPCODE_LT;
     case TGSI_OPCODE_ROUND:
        return VGPU10_OPCODE_ROUND_NE;
+   case TGSI_OPCODE_SAMPLE_POS:
+      /* Note: we never actually get this opcode because there's no GLSL
+       * function to query multisample resource sample positions.  There's
+       * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
+       * position of the current sample in the render target.
+       */
+      /* FALL-THROUGH */
+   case TGSI_OPCODE_SAMPLE_INFO:
+      /* NOTE: we never actually get this opcode because the GLSL compiler
+       * implements the gl_NumSamples variable with a simple constant in the
+       * constant buffer.
+       */
+      /* FALL-THROUGH */
     default:
        assert(!"Unexpected TGSI opcode in translate_opcode()");
        return VGPU10_OPCODE_NOP;
     default:
        assert(!"Unexpected TGSI opcode in translate_opcode()");
        return VGPU10_OPCODE_NOP;
@@ -701,7 +730,7 @@ emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
   */
  static unsigned
  get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
   */
  static unsigned
  get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
-                  unsigned file, unsigned index)
+                  enum tgsi_file_type file, unsigned index)
  {
     if (file == TGSI_FILE_TEMPORARY) {
        return emit->temp_map[index].arrayId;
  {
     if (file == TGSI_FILE_TEMPORARY) {
        return emit->temp_map[index].arrayId;
@@ -718,7 +747,7 @@ get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
   */
  static unsigned
  remap_temp_index(const struct svga_shader_emitter_v10 *emit,
   */
  static unsigned
  remap_temp_index(const struct svga_shader_emitter_v10 *emit,
-                 unsigned file, unsigned index)
+                 enum tgsi_file_type file, unsigned index)
  {
     if (file == TGSI_FILE_TEMPORARY) {
        return emit->temp_map[index].index;
  {
     if (file == TGSI_FILE_TEMPORARY) {
        return emit->temp_map[index].index;
@@ -736,11 +765,11 @@ remap_temp_index(const struct svga_shader_emitter_v10 *emit,
  static VGPU10OperandToken0
  setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
                          VGPU10OperandToken0 operand0,
  static VGPU10OperandToken0
  setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
                          VGPU10OperandToken0 operand0,
-                        unsigned file,
+                        enum tgsi_file_type file,
                          boolean indirect, boolean index2D,
                          unsigned tempArrayID)
  {
                          boolean indirect, boolean index2D,
                          unsigned tempArrayID)
  {
-   unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_0D;
+   unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32;
  
     /*
      * Compute index dimensions
  
     /*
      * Compute index dimensions
@@ -844,14 +873,14 @@ static void
  emit_dst_register(struct svga_shader_emitter_v10 *emit,
                    const struct tgsi_full_dst_register *reg)
  {
  emit_dst_register(struct svga_shader_emitter_v10 *emit,
                    const struct tgsi_full_dst_register *reg)
  {
-   unsigned file = reg->Register.File;
+   enum tgsi_file_type file = reg->Register.File;
     unsigned index = reg->Register.Index;
     unsigned index = reg->Register.Index;
-   const unsigned sem_name = emit->info.output_semantic_name[index];
+   const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
     const unsigned sem_index = emit->info.output_semantic_index[index];
     unsigned writemask = reg->Register.WriteMask;
     const unsigned sem_index = emit->info.output_semantic_index[index];
     unsigned writemask = reg->Register.WriteMask;
-   const unsigned indirect = reg->Register.Indirect;
+   const boolean indirect = reg->Register.Indirect;
     const unsigned tempArrayId = get_temp_array_id(emit, file, index);
     const unsigned tempArrayId = get_temp_array_id(emit, file, index);
-   const unsigned index2d = reg->Register.Dimension;
+   const boolean index2d = reg->Register.Dimension;
     VGPU10OperandToken0 operand0;
  
     if (file == TGSI_FILE_OUTPUT) {
     VGPU10OperandToken0 operand0;
  
     if (file == TGSI_FILE_OUTPUT) {
@@ -895,6 +924,15 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
              emit_dword(emit, operand0.value);
              return;
           }
              emit_dword(emit, operand0.value);
              return;
           }
+         else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
+            /* Fragment sample mask output */
+            operand0.value = 0;
+            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+            emit_dword(emit, operand0.value);
+            return;
+         }
           else if (index == emit->fs.color_out_index[0] &&
               emit->fs.color_tmp_index != INVALID_INDEX) {
              /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
           else if (index == emit->fs.color_out_index[0] &&
               emit->fs.color_tmp_index != INVALID_INDEX) {
              /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
@@ -913,6 +951,8 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
               */
              assert(sem_name == TGSI_SEMANTIC_COLOR);
              index = emit->info.output_semantic_index[index];
               */
              assert(sem_name == TGSI_SEMANTIC_COLOR);
              index = emit->info.output_semantic_index[index];
+
+            emit->num_output_writes++;
           }
        }
     }
           }
        }
     }
@@ -955,51 +995,70 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
  
  /**
   * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
  
  /**
   * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ * In quite a few cases, we do register substitution.  For example, if
+ * the TGSI register is the front/back-face register, we replace that with
+ * a temp register containing a value we computed earlier.
   */
  static void
  emit_src_register(struct svga_shader_emitter_v10 *emit,
                    const struct tgsi_full_src_register *reg)
  {
   */
  static void
  emit_src_register(struct svga_shader_emitter_v10 *emit,
                    const struct tgsi_full_src_register *reg)
  {
-   unsigned file = reg->Register.File;
+   enum tgsi_file_type file = reg->Register.File;
     unsigned index = reg->Register.Index;
     unsigned index = reg->Register.Index;
-   const unsigned indirect = reg->Register.Indirect;
+   const boolean indirect = reg->Register.Indirect;
     const unsigned tempArrayId = get_temp_array_id(emit, file, index);
     const unsigned tempArrayId = get_temp_array_id(emit, file, index);
-   const unsigned index2d = reg->Register.Dimension;
+   const boolean index2d = reg->Register.Dimension;
     const unsigned swizzleX = reg->Register.SwizzleX;
     const unsigned swizzleY = reg->Register.SwizzleY;
     const unsigned swizzleZ = reg->Register.SwizzleZ;
     const unsigned swizzleW = reg->Register.SwizzleW;
     const unsigned swizzleX = reg->Register.SwizzleX;
     const unsigned swizzleY = reg->Register.SwizzleY;
     const unsigned swizzleZ = reg->Register.SwizzleZ;
     const unsigned swizzleW = reg->Register.SwizzleW;
-   const unsigned absolute = reg->Register.Absolute;
-   const unsigned negate = reg->Register.Negate;
+   const boolean absolute = reg->Register.Absolute;
+   const boolean negate = reg->Register.Negate;
     bool is_prim_id = FALSE;
  
     VGPU10OperandToken0 operand0;
     VGPU10OperandToken1 operand1;
  
     bool is_prim_id = FALSE;
  
     VGPU10OperandToken0 operand0;
     VGPU10OperandToken1 operand1;
  
-   if (emit->unit == PIPE_SHADER_FRAGMENT &&
-      file == TGSI_FILE_INPUT) {
-      if (index == emit->fs.face_input_index) {
-         /* Replace INPUT[FACE] with TEMP[FACE] */
-         file = TGSI_FILE_TEMPORARY;
-         index = emit->fs.face_tmp_index;
+   if (emit->unit == PIPE_SHADER_FRAGMENT){
+      if (file == TGSI_FILE_INPUT) {
+         if (index == emit->fs.face_input_index) {
+            /* Replace INPUT[FACE] with TEMP[FACE] */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.face_tmp_index;
+         }
+         else if (index == emit->fs.fragcoord_input_index) {
+            /* Replace INPUT[POSITION] with TEMP[POSITION] */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.fragcoord_tmp_index;
+         }
+         else {
+            /* We remap fragment shader inputs to that FS input indexes
+             * match up with VS/GS output indexes.
+             */
+            index = emit->linkage.input_map[index];
+         }
        }
        }
-      else if (index == emit->fs.fragcoord_input_index) {
-         /* Replace INPUT[POSITION] with TEMP[POSITION] */
-         file = TGSI_FILE_TEMPORARY;
-         index = emit->fs.fragcoord_tmp_index;
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         if (index == emit->fs.sample_pos_sys_index) {
+            assert(emit->version >= 41);
+            /* Current sample position is in a temp register */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.sample_pos_tmp_index;
+         }
+         else {
+            /* Map the TGSI system value to a VGPU10 input register */
+            assert(index < ARRAY_SIZE(emit->system_value_indexes));
+            file = TGSI_FILE_INPUT;
+            index = emit->system_value_indexes[index];
+         }
        }
        }
-      else {
-         /* We remap fragment shader inputs to that FS input indexes
-          * match up with VS/GS output indexes.
-          */
+   }
+   else if (emit->unit == PIPE_SHADER_GEOMETRY) {
+      if (file == TGSI_FILE_INPUT) {
+         is_prim_id = (index == emit->gs.prim_id_index);
           index = emit->linkage.input_map[index];
        }
     }
           index = emit->linkage.input_map[index];
        }
     }
-   else if (emit->unit == PIPE_SHADER_GEOMETRY &&
-            file == TGSI_FILE_INPUT) {
-      is_prim_id = (index == emit->gs.prim_id_index);
-      index = emit->linkage.input_map[index];
-   }
     else if (emit->unit == PIPE_SHADER_VERTEX) {
        if (file == TGSI_FILE_INPUT) {
           /* if input is adjusted... */
     else if (emit->unit == PIPE_SHADER_VERTEX) {
        if (file == TGSI_FILE_INPUT) {
           /* if input is adjusted... */
@@ -1015,7 +1074,9 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
           }
        }
        else if (file == TGSI_FILE_SYSTEM_VALUE) {
           }
        }
        else if (file == TGSI_FILE_SYSTEM_VALUE) {
-         assert(index < Elements(emit->system_value_indexes));
+         /* Map the TGSI system value to a VGPU10 input register */
+         assert(index < ARRAY_SIZE(emit->system_value_indexes));
+         file = TGSI_FILE_INPUT;
           index = emit->system_value_indexes[index];
        }
     }
           index = emit->system_value_indexes[index];
        }
     }
@@ -1023,6 +1084,9 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
     operand0.value = operand1.value = 0;
  
     if (is_prim_id) {
     operand0.value = operand1.value = 0;
  
     if (is_prim_id) {
+      /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but
+       * our virtual GPU accepts this as-is.
+       */
        operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
        operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
     }
        operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
        operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
     }
@@ -1071,7 +1135,7 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
     if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
        /* Emit the four float/int in-line immediate values */
        unsigned *c;
     if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
        /* Emit the four float/int in-line immediate values */
        unsigned *c;
-      assert(index < Elements(emit->immediates));
+      assert(index < ARRAY_SIZE(emit->immediates));
        assert(file == TGSI_FILE_IMMEDIATE);
        assert(swizzleX < 4);
        assert(swizzleY < 4);
        assert(file == TGSI_FILE_IMMEDIATE);
        assert(swizzleX < 4);
        assert(swizzleY < 4);
@@ -1180,13 +1244,39 @@ emit_face_register(struct svga_shader_emitter_v10 *emit)
  }
  
  
  }
  
  
+/**
+ * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
+ * instruction.
+ */
+static void
+emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand0;
+
+   /* init */
+   operand0.value = 0;
+
+   /* No register index for rasterizer index (there's only one) */
+   operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_Y;
+   operand0.swizzleZ = VGPU10_COMPONENT_Z;
+   operand0.swizzleW = VGPU10_COMPONENT_W;
+
+   emit_dword(emit, operand0.value);
+}
+
+
  /**
   * Emit the token for a VGPU10 opcode.
   * \param saturate   clamp result to [0,1]?
   */
  static void
  emit_opcode(struct svga_shader_emitter_v10 *emit,
  /**
   * Emit the token for a VGPU10 opcode.
   * \param saturate   clamp result to [0,1]?
   */
  static void
  emit_opcode(struct svga_shader_emitter_v10 *emit,
-            unsigned vgpu10_opcode, boolean saturate)
+            VGPU10_OPCODE_TYPE vgpu10_opcode, boolean saturate)
  {
     VGPU10OpcodeToken0 token0;
  
  {
     VGPU10OpcodeToken0 token0;
  
@@ -1354,7 +1444,7 @@ free_temp_indexes(struct svga_shader_emitter_v10 *emit)
   * Create a tgsi_full_src_register.
   */
  static struct tgsi_full_src_register
   * Create a tgsi_full_src_register.
   */
  static struct tgsi_full_src_register
-make_src_reg(unsigned file, unsigned index)
+make_src_reg(enum tgsi_file_type file, unsigned index)
  {
     struct tgsi_full_src_register reg;
  
  {
     struct tgsi_full_src_register reg;
  
@@ -1369,6 +1459,29 @@ make_src_reg(unsigned file, unsigned index)
  }
  
  
  }
  
  
+/**
+ * Create a tgsi_full_src_register with a swizzle such that all four
+ * vector components have the same scalar value.
+ */
+static struct tgsi_full_src_register
+make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
+{
+   struct tgsi_full_src_register reg;
+
+   assert(component >= TGSI_SWIZZLE_X);
+   assert(component <= TGSI_SWIZZLE_W);
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.SwizzleX =
+   reg.Register.SwizzleY =
+   reg.Register.SwizzleZ =
+   reg.Register.SwizzleW = component;
+   return reg;
+}
+
+
  /**
   * Create a tgsi_full_src_register for a temporary.
   */
  /**
   * Create a tgsi_full_src_register for a temporary.
   */
@@ -1403,7 +1516,7 @@ make_src_immediate_reg(unsigned index)
   * Create a tgsi_full_dst_register.
   */
  static struct tgsi_full_dst_register
   * Create a tgsi_full_dst_register.
   */
  static struct tgsi_full_dst_register
-make_dst_reg(unsigned file, unsigned index)
+make_dst_reg(enum tgsi_file_type file, unsigned index)
  {
     struct tgsi_full_dst_register reg;
  
  {
     struct tgsi_full_dst_register reg;
  
@@ -1460,7 +1573,7 @@ absolute_src(const struct tgsi_full_src_register *reg)
  
  /** Return the named swizzle term from the src register */
  static inline unsigned
  
  /** Return the named swizzle term from the src register */
  static inline unsigned
-get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
+get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
  {
     switch (term) {
     case TGSI_SWIZZLE_X:
  {
     switch (term) {
     case TGSI_SWIZZLE_X:
@@ -1483,8 +1596,8 @@ get_swizzle(const struct tgsi_full_src_register *reg, unsigned term)
   */
  static struct tgsi_full_src_register
  swizzle_src(const struct tgsi_full_src_register *reg,
   */
  static struct tgsi_full_src_register
  swizzle_src(const struct tgsi_full_src_register *reg,
-            unsigned swizzleX, unsigned swizzleY,
-            unsigned swizzleZ, unsigned swizzleW)
+            enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
+            enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
  {
     struct tgsi_full_src_register swizzled = *reg;
     /* Note: we swizzle the current swizzle */
  {
     struct tgsi_full_src_register swizzled = *reg;
     /* Note: we swizzle the current swizzle */
@@ -1501,7 +1614,7 @@ swizzle_src(const struct tgsi_full_src_register *reg,
   * terms are the same.
   */
  static struct tgsi_full_src_register
   * terms are the same.
   */
  static struct tgsi_full_src_register
-scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle)
+scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
  {
     struct tgsi_full_src_register swizzled = *reg;
     /* Note: we swizzle the current swizzle */
  {
     struct tgsi_full_src_register swizzled = *reg;
     /* Note: we swizzle the current swizzle */
@@ -1728,7 +1841,7 @@ alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
  {
     unsigned n = emit->num_immediates++;
     assert(!emit->immediates_emitted);
  {
     unsigned n = emit->num_immediates++;
     assert(!emit->immediates_emitted);
-   assert(n < Elements(emit->immediates));
+   assert(n < ARRAY_SIZE(emit->immediates));
     emit->immediates[n][0] = imm[0];
     emit->immediates[n][1] = imm[1];
     emit->immediates[n][2] = imm[2];
     emit->immediates[n][0] = imm[0];
     emit->immediates[n][1] = imm[1];
     emit->immediates[n][2] = imm[2];
@@ -1755,7 +1868,7 @@ alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
  
  
  /**
  
  
  /**
- * Allocate space for a int[4] immediate.
+ * Allocate space for an int[4] immediate.
   * \return  the index/position of the immediate.
   */
  static unsigned
   * \return  the index/position of the immediate.
   */
  static unsigned
@@ -1777,8 +1890,8 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
  static unsigned
  alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
  {
  static unsigned
  alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
  {
-   const unsigned n = emit->info.num_inputs + index;
-   assert(index < Elements(emit->system_value_indexes));
+   const unsigned n = emit->linkage.input_map_max + 1 + index;
+   assert(index < ARRAY_SIZE(emit->system_value_indexes));
     emit->system_value_indexes[index] = n;
     return n;
  }
     emit->system_value_indexes[index] = n;
     return n;
  }
@@ -1833,7 +1946,8 @@ emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
   */
  static unsigned
  translate_interpolation(const struct svga_shader_emitter_v10 *emit,
   */
  static unsigned
  translate_interpolation(const struct svga_shader_emitter_v10 *emit,
-                        unsigned interp, unsigned interpolate_loc)
+                        enum tgsi_interpolate_mode interp,
+                        enum tgsi_interpolate_loc interpolate_loc)
  {
     if (interp == TGSI_INTERPOLATE_COLOR) {
        interp = emit->key.fs.flatshade ?
  {
     if (interp == TGSI_INTERPOLATE_COLOR) {
        interp = emit->key.fs.flatshade ?
@@ -1844,13 +1958,25 @@ translate_interpolation(const struct svga_shader_emitter_v10 *emit,
     case TGSI_INTERPOLATE_CONSTANT:
        return VGPU10_INTERPOLATION_CONSTANT;
     case TGSI_INTERPOLATE_LINEAR:
     case TGSI_INTERPOLATE_CONSTANT:
        return VGPU10_INTERPOLATION_CONSTANT;
     case TGSI_INTERPOLATE_LINEAR:
-      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
-             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
-             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
+      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
+                 emit->version >= 41) {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
+      } else {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+      }
+      break;
     case TGSI_INTERPOLATE_PERSPECTIVE:
     case TGSI_INTERPOLATE_PERSPECTIVE:
-      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
-             VGPU10_INTERPOLATION_LINEAR_CENTROID :
-             VGPU10_INTERPOLATION_LINEAR;
+      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
+         return VGPU10_INTERPOLATION_LINEAR_CENTROID;
+      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
+                 emit->version >= 41) {
+         return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
+      } else {
+         return VGPU10_INTERPOLATION_LINEAR;
+      }
+      break;
     default:
        assert(!"Unexpected interpolation mode");
        return VGPU10_INTERPOLATION_CONSTANT;
     default:
        assert(!"Unexpected interpolation mode");
        return VGPU10_INTERPOLATION_CONSTANT;
@@ -1860,9 +1986,9 @@ translate_interpolation(const struct svga_shader_emitter_v10 *emit,
  
  /**
   * Translate a TGSI property to VGPU10.
  
  /**
   * Translate a TGSI property to VGPU10.
- * Don't emit any instructions yet, only need to gather the primitive property information.
- * The output primitive topology might be changed later. The final property instructions
- * will be emitted as part of the pre-helper code.
+ * Don't emit any instructions yet, only need to gather the primitive property
+ * information.  The output primitive topology might be changed later. The
+ * final property instructions will be emitted as part of the pre-helper code.
   */
  static boolean
  emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
   */
  static boolean
  emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
@@ -1915,14 +2041,14 @@ emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
  
     switch (prop->Property.PropertyName) {
     case TGSI_PROPERTY_GS_INPUT_PRIM:
  
     switch (prop->Property.PropertyName) {
     case TGSI_PROPERTY_GS_INPUT_PRIM:
-      assert(prop->u[0].Data < Elements(primType));
+      assert(prop->u[0].Data < ARRAY_SIZE(primType));
        emit->gs.prim_type = primType[prop->u[0].Data];
        assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
        emit->gs.input_size = inputArraySize[emit->gs.prim_type];
        break;
  
     case TGSI_PROPERTY_GS_OUTPUT_PRIM:
        emit->gs.prim_type = primType[prop->u[0].Data];
        assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
        emit->gs.input_size = inputArraySize[emit->gs.prim_type];
        break;
  
     case TGSI_PROPERTY_GS_OUTPUT_PRIM:
-      assert(prop->u[0].Data < Elements(primTopology));
+      assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
        emit->gs.prim_topology = primTopology[prop->u[0].Data];
        assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
        break;
        emit->gs.prim_topology = primTopology[prop->u[0].Data];
        assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
        break;
@@ -1996,7 +2122,9 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
                        unsigned index, unsigned size)
  {
     assert(opcode0.opcodeType);
                        unsigned index, unsigned size)
  {
     assert(opcode0.opcodeType);
-   assert(operand0.mask);
+   assert(operand0.mask ||
+          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
+          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK));
  
     begin_emit_instruction(emit);
     emit_dword(emit, opcode0.value);
  
     begin_emit_instruction(emit);
     emit_dword(emit, opcode0.value);
@@ -2040,11 +2168,15 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_input_declaration(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_input_declaration(struct svga_shader_emitter_v10 *emit,
-                       unsigned opcodeType, unsigned operandType,
-                       unsigned dim, unsigned index, unsigned size,
-                       unsigned name, unsigned numComp,
-                       unsigned selMode, unsigned usageMask,
-                       unsigned interpMode)
+                       VGPU10_OPCODE_TYPE opcodeType,
+                       VGPU10_OPERAND_TYPE operandType,
+                       VGPU10_OPERAND_INDEX_DIMENSION dim,
+                       unsigned index, unsigned size,
+                       VGPU10_SYSTEM_NAME name,
+                       VGPU10_OPERAND_NUM_COMPONENTS numComp,
+                       VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
+                       unsigned usageMask,
+                       VGPU10_INTERPOLATION_MODE interpMode)
  {
     VGPU10OpcodeToken0 opcode0;
     VGPU10OperandToken0 operand0;
  {
     VGPU10OpcodeToken0 opcode0;
     VGPU10OperandToken0 operand0;
@@ -2054,6 +2186,7 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
     assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
            opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
            opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
     assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
            opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
            opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
            opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
     assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
            operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
            opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
     assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
            operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
@@ -2065,13 +2198,17 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
            name == VGPU10_NAME_INSTANCE_ID ||
            name == VGPU10_NAME_VERTEX_ID ||
            name == VGPU10_NAME_PRIMITIVE_ID ||
            name == VGPU10_NAME_INSTANCE_ID ||
            name == VGPU10_NAME_VERTEX_ID ||
            name == VGPU10_NAME_PRIMITIVE_ID ||
-          name == VGPU10_NAME_IS_FRONT_FACE);
+          name == VGPU10_NAME_IS_FRONT_FACE ||
+          name == VGPU10_NAME_SAMPLE_INDEX);
+
     assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
            interpMode == VGPU10_INTERPOLATION_CONSTANT ||
            interpMode == VGPU10_INTERPOLATION_LINEAR ||
            interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
            interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
     assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
            interpMode == VGPU10_INTERPOLATION_CONSTANT ||
            interpMode == VGPU10_INTERPOLATION_LINEAR ||
            interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
            interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
-          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
  
     check_register_index(emit, opcodeType, index);
  
  
     check_register_index(emit, opcodeType, index);
  
@@ -2104,8 +2241,9 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_output_declaration(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_output_declaration(struct svga_shader_emitter_v10 *emit,
-                        unsigned type, unsigned index,
-                        unsigned name, unsigned usageMask)
+                        VGPU10_OPCODE_TYPE type, unsigned index,
+                        VGPU10_SYSTEM_NAME name,
+                        unsigned usageMask)
  {
     VGPU10OpcodeToken0 opcode0;
     VGPU10OperandToken0 operand0;
  {
     VGPU10OpcodeToken0 opcode0;
     VGPU10OperandToken0 operand0;
@@ -2157,7 +2295,32 @@ emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
     operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
     operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
     operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
     operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
     operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
     operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
-   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+   operand0.mask = 0;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment sample mask/coverage output.
+ */
+static void
+emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+   assert(emit->version >= 41);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.mask = 0;
  
     emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
  }
  
     emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
  }
@@ -2168,7 +2331,7 @@ emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
   */
  static void
  emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
-                              unsigned semantic_name, unsigned index)
+                              enum tgsi_semantic semantic_name, unsigned index)
  {
     switch (semantic_name) {
     case TGSI_SEMANTIC_INSTANCEID:
  {
     switch (semantic_name) {
     case TGSI_SEMANTIC_INSTANCEID:
@@ -2195,8 +2358,33 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
                               VGPU10_OPERAND_4_COMPONENT_MASK_X,
                               VGPU10_INTERPOLATION_UNDEFINED);
        break;
                               VGPU10_OPERAND_4_COMPONENT_MASK_X,
                               VGPU10_INTERPOLATION_UNDEFINED);
        break;
+   case TGSI_SEMANTIC_SAMPLEID:
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+      emit->fs.sample_id_sys_index = index;
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_SAMPLE_INDEX,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_CONSTANT);
+      break;
+   case TGSI_SEMANTIC_SAMPLEPOS:
+      /* This system value contains the position of the current sample
+       * when using per-sample shading.  We implement this by calling
+       * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
+       * index as the argument.  See emit_sample_position_instructions().
+       */
+      assert(emit->version >= 41);
+      emit->fs.sample_pos_sys_index = index;
+      index = alloc_system_value_index(emit, index);
+      break;
     default:
     default:
-      ; /* XXX */
+      debug_printf("unexpected sytem value semantic index %u\n",
+         semantic_name);
     }
  }
  
     }
  }
  
@@ -2262,7 +2450,7 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
            * have linked due to constbuf index out of bounds, so we shouldn't
            * have reached here.
            */
            * have linked due to constbuf index out of bounds, so we shouldn't
            * have reached here.
            */
-         assert(constbuf < Elements(emit->num_shader_consts));
+         assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
  
           num_consts = MAX2(emit->num_shader_consts[constbuf],
                             decl->Range.Last + 1);
  
           num_consts = MAX2(emit->num_shader_consts[constbuf],
                             decl->Range.Last + 1);
@@ -2293,11 +2481,13 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
        emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
        return TRUE;
  
        emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
        return TRUE;
  
+#if 0
     case TGSI_FILE_RESOURCE:
        /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
        /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
        assert(!"TGSI_FILE_RESOURCE not handled yet");
        return FALSE;
     case TGSI_FILE_RESOURCE:
        /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
        /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
        assert(!"TGSI_FILE_RESOURCE not handled yet");
        return FALSE;
+#endif
  
     case TGSI_FILE_ADDRESS:
        emit->num_address_regs = MAX2(emit->num_address_regs,
  
     case TGSI_FILE_ADDRESS:
        emit->num_address_regs = MAX2(emit->num_address_regs,
@@ -2305,9 +2495,14 @@ emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
        return TRUE;
  
     case TGSI_FILE_SAMPLER_VIEW:
        return TRUE;
  
     case TGSI_FILE_SAMPLER_VIEW:
-      /* Not used at this time, but maybe in the future.
-       * See emit_resource_declarations().
-       */
+      {
+         unsigned unit = decl->Range.First;
+         assert(decl->Range.First == decl->Range.Last);
+         emit->sampler_target[unit] = decl->SamplerView.Resource;
+         /* Note: we can ignore YZW return types for now */
+         emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
+         emit->sampler_view[unit] = TRUE;
+      }
        return TRUE;
  
     default:
        return TRUE;
  
     default:
@@ -2329,10 +2524,12 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
  
        for (i = 0; i < emit->linkage.num_inputs; i++) {
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
  
        for (i = 0; i < emit->linkage.num_inputs; i++) {
-         unsigned semantic_name = emit->info.input_semantic_name[i];
+         enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = emit->linkage.input_map[i];
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = emit->linkage.input_map[i];
-         unsigned type, interpolationMode, name;
+         VGPU10_OPCODE_TYPE type;
+         VGPU10_INTERPOLATION_MODE interpolationMode;
+         VGPU10_SYSTEM_NAME name;
  
           if (usage_mask == 0)
              continue;  /* register is not actually used */
  
           if (usage_mask == 0)
              continue;  /* register is not actually used */
@@ -2360,6 +2557,12 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
              interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
              name = VGPU10_NAME_PRIMITIVE_ID;
           }
              interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
              name = VGPU10_NAME_PRIMITIVE_ID;
           }
+         else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
+            /* sample index / ID */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_SAMPLE_INDEX;
+         }
           else {
              /* general fragment input */
              type = VGPU10_OPCODE_DCL_INPUT_PS;
           else {
              /* general fragment input */
              type = VGPU10_OPCODE_DCL_INPUT_PS;
@@ -2369,7 +2572,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
                                         emit->info.input_interpolate_loc[i]);
  
              /* keeps track if flat interpolation mode is being used */
                                         emit->info.input_interpolate_loc[i]);
  
              /* keeps track if flat interpolation mode is being used */
-            emit->uses_flat_interp = emit->uses_flat_interp ||
+            emit->uses_flat_interp |=
                 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
  
              name = VGPU10_NAME_UNDEFINED;
                 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
  
              name = VGPU10_NAME_UNDEFINED;
@@ -2388,13 +2591,14 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
     else if (emit->unit == PIPE_SHADER_GEOMETRY) {
  
        for (i = 0; i < emit->info.num_inputs; i++) {
     else if (emit->unit == PIPE_SHADER_GEOMETRY) {
  
        for (i = 0; i < emit->info.num_inputs; i++) {
-         unsigned semantic_name = emit->info.input_semantic_name[i];
+         enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = emit->linkage.input_map[i];
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = emit->linkage.input_map[i];
-         unsigned opcodeType, operandType;
-         unsigned numComp, selMode;
-         unsigned name;
-         unsigned dim;
+         VGPU10_OPCODE_TYPE opcodeType, operandType;
+         VGPU10_OPERAND_NUM_COMPONENTS numComp;
+         VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode;
+         VGPU10_SYSTEM_NAME name;
+         VGPU10_OPERAND_INDEX_DIMENSION dim;
  
           if (usage_mask == 0)
              continue;  /* register is not actually used */
  
           if (usage_mask == 0)
              continue;  /* register is not actually used */
@@ -2405,7 +2609,9 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
           selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
           name = VGPU10_NAME_UNDEFINED;
  
           selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
           name = VGPU10_NAME_UNDEFINED;
  
-         /* all geometry shader inputs are two dimensional except gl_PrimitiveID */
+         /* all geometry shader inputs are two dimensional except
+          * gl_PrimitiveID
+          */
           dim = VGPU10_OPERAND_INDEX_2D;
  
           if (semantic_name == TGSI_SEMANTIC_PRIMID) {
           dim = VGPU10_OPERAND_INDEX_2D;
  
           if (semantic_name == TGSI_SEMANTIC_PRIMID) {
@@ -2439,7 +2645,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
     else {
        assert(emit->unit == PIPE_SHADER_VERTEX);
  
     else {
        assert(emit->unit == PIPE_SHADER_VERTEX);
  
-      for (i = 0; i < emit->info.num_inputs; i++) {
+      for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = i;
  
           unsigned usage_mask = emit->info.input_usage_mask[i];
           unsigned index = i;
  
@@ -2471,16 +2677,20 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
  
     for (i = 0; i < emit->info.num_outputs; i++) {
        /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
  
     for (i = 0; i < emit->info.num_outputs; i++) {
        /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
-      const unsigned semantic_name = emit->info.output_semantic_name[i];
+      const enum tgsi_semantic semantic_name =
+         emit->info.output_semantic_name[i];
        const unsigned semantic_index = emit->info.output_semantic_index[i];
        unsigned index = i;
  
        if (emit->unit == PIPE_SHADER_FRAGMENT) {
           if (semantic_name == TGSI_SEMANTIC_COLOR) {
        const unsigned semantic_index = emit->info.output_semantic_index[i];
        unsigned index = i;
  
        if (emit->unit == PIPE_SHADER_FRAGMENT) {
           if (semantic_name == TGSI_SEMANTIC_COLOR) {
-            assert(semantic_index < Elements(emit->fs.color_out_index));
+            assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
  
              emit->fs.color_out_index[semantic_index] = index;
  
  
              emit->fs.color_out_index[semantic_index] = index;
  
+            emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
+                                              index + 1);
+
              /* The semantic index is the shader's color output/buffer index */
              emit_output_declaration(emit,
                                      VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
              /* The semantic index is the shader's color output/buffer index */
              emit_output_declaration(emit,
                                      VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
@@ -2503,6 +2713,9 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
                                          VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
                       emit->info.output_semantic_index[idx] = j;
                    }
                                          VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
                       emit->info.output_semantic_index[idx] = j;
                    }
+
+                  emit->fs.num_color_outputs =
+                     emit->key.fs.write_color0_to_n_cbufs;
                 }
              }
              else {
                 }
              }
              else {
@@ -2513,13 +2726,18 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
              /* Fragment depth output */
              emit_fragdepth_output_declaration(emit);
           }
              /* Fragment depth output */
              emit_fragdepth_output_declaration(emit);
           }
+         else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
+            /* Fragment depth output */
+            emit_samplemask_output_declaration(emit);
+         }
           else {
              assert(!"Bad output semantic name");
           }
        }
        else {
           /* VS or GS */
           else {
              assert(!"Bad output semantic name");
           }
        }
        else {
           /* VS or GS */
-         unsigned name, type;
+         VGPU10_COMPONENT_NAME name;
+         VGPU10_OPCODE_TYPE type;
           unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
  
           switch (semantic_name) {
           unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
  
           switch (semantic_name) {
@@ -2618,6 +2836,28 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
  
     total_temps = emit->num_shader_temps;
  
  
     total_temps = emit->num_shader_temps;
  
+   /* If there is indirect access to non-indexable temps in the shader,
+    * convert those temps to indexable temps. This works around a bug
+    * in the GLSL->TGSI translator exposed in piglit test
+    * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
+    * Internal temps added by the driver remain as non-indexable temps.
+    */
+   if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
+       emit->num_temp_arrays == 0) {
+      unsigned arrayID;
+
+      arrayID = 1;
+      emit->num_temp_arrays = arrayID + 1; 
+      emit->temp_arrays[arrayID].start = 0;
+      emit->temp_arrays[arrayID].size = total_temps;
+
+      /* Fill in the temp_map entries for this temp array */
+      for (i = 0; i < total_temps; i++) {
+         emit->temp_map[i].arrayId = arrayID;
+         emit->temp_map[i].index = i;
+      }
+   }
+
     /* Allocate extra temps for specially-implemented instructions,
      * such as LIT.
      */
     /* Allocate extra temps for specially-implemented instructions,
      * such as LIT.
      */
@@ -2684,6 +2924,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
           emit->fs.fragcoord_tmp_index = total_temps;
           total_temps += 1;
        }
           emit->fs.fragcoord_tmp_index = total_temps;
           total_temps += 1;
        }
+
+      if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
+         /* Allocate a temp for the sample position */
+         emit->fs.sample_pos_tmp_index = total_temps++;
+      }
     }
  
     for (i = 0; i < emit->num_address_regs; i++) {
     }
  
     for (i = 0; i < emit->num_address_regs; i++) {
@@ -2726,16 +2971,17 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
           emit->temp_map[i].index = reg++;
        }
     }
           emit->temp_map[i].index = reg++;
        }
     }
-   total_temps = reg;
  
     if (0) {
        debug_printf("total_temps %u\n", total_temps);
  
     if (0) {
        debug_printf("total_temps %u\n", total_temps);
-      for (i = 0; i < 30; i++) {
+      for (i = 0; i < total_temps; i++) {
           debug_printf("temp %u ->  array %u  index %u\n",
                        i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
        }
     }
  
           debug_printf("temp %u ->  array %u  index %u\n",
                        i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
        }
     }
  
+   total_temps = reg;
+
     /* Emit declaration of ordinary temp registers */
     if (total_temps > 0) {
        VGPU10OpcodeToken0 opcode0;
     /* Emit declaration of ordinary temp registers */
     if (total_temps > 0) {
        VGPU10OpcodeToken0 opcode0;
@@ -2811,7 +3057,11 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
      */
     total_consts = emit->num_shader_consts[0];
  
      */
     total_consts = emit->num_shader_consts[0];
  
-   /* Now, allocate constant slots for the "extra" constants */
+   /* Now, allocate constant slots for the "extra" constants.
+    * Note: it's critical that these extra constant locations
+    * exactly match what's emitted by the "extra" constants code
+    * in svga_state_constants.c
+    */
  
     /* Vertex position scale/translation */
     if (emit->vposition.need_prescale) {
  
     /* Vertex position scale/translation */
     if (emit->vposition.need_prescale) {
@@ -2835,19 +3085,19 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
        }
     }
  
        }
     }
  
-   /* Texcoord scale factors for RECT textures */
-   {
-      for (i = 0; i < emit->num_samplers; i++) {
+   for (i = 0; i < emit->num_samplers; i++) {
+
+      if (emit->sampler_view[i]) {
+
+         /* Texcoord scale factors for RECT textures */
           if (emit->key.tex[i].unnormalized) {
              emit->texcoord_scale_index[i] = total_consts++;
           }
           if (emit->key.tex[i].unnormalized) {
              emit->texcoord_scale_index[i] = total_consts++;
           }
-      }
-   }
  
  
-   /* Texture buffer sizes */
-   for (i = 0; i < emit->num_samplers; i++) {
-      if (emit->key.tex[i].texture_target == PIPE_BUFFER) {
-         emit->texture_buffer_size_index[i] = total_consts++;
+         /* Texture buffer sizes */
+         if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
+            emit->texture_buffer_size_index[i] = total_consts++;
+         }
        }
     }
  
        }
     }
  
@@ -2861,7 +3111,7 @@ emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
     }
  
     /* Declare remaining constant buffers (UBOs) */
     }
  
     /* Declare remaining constant buffers (UBOs) */
-   for (i = 1; i < Elements(emit->num_shader_consts); i++) {
+   for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
        if (emit->num_shader_consts[i] > 0) {
           begin_emit_instruction(emit);
           emit_dword(emit, opcode0.value);
        if (emit->num_shader_consts[i] > 0) {
           begin_emit_instruction(emit);
           emit_dword(emit, opcode0.value);
@@ -2910,31 +3160,55 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
  
  
  /**
  
  
  /**
- * Translate PIPE_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
   */
  static unsigned
   */
  static unsigned
-pipe_texture_to_resource_dimension(unsigned target, bool msaa)
+tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
+                                   unsigned num_samples,
+                                   boolean is_array)
  {
  {
+   if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
+      target = TGSI_TEXTURE_2D;
+   }
+   else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
+      target = TGSI_TEXTURE_2D_ARRAY;
+   }
+
     switch (target) {
     switch (target) {
-   case PIPE_BUFFER:
+   case TGSI_TEXTURE_BUFFER:
        return VGPU10_RESOURCE_DIMENSION_BUFFER;
        return VGPU10_RESOURCE_DIMENSION_BUFFER;
-   case PIPE_TEXTURE_1D:
+   case TGSI_TEXTURE_1D:
        return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
        return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
-   case PIPE_TEXTURE_2D:
-   case PIPE_TEXTURE_RECT:
-      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS
-         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
-   case PIPE_TEXTURE_3D:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case TGSI_TEXTURE_3D:
        return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
        return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
-   case PIPE_TEXTURE_CUBE:
+   case TGSI_TEXTURE_CUBE:
+   case TGSI_TEXTURE_SHADOWCUBE:
        return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
        return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
-   case PIPE_TEXTURE_1D_ARRAY:
-      return VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY;
-   case PIPE_TEXTURE_2D_ARRAY:
-      return msaa ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
-         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
-   case PIPE_TEXTURE_CUBE_ARRAY:
-      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+   case TGSI_TEXTURE_SHADOW1D:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
+   case TGSI_TEXTURE_2D_MSAA:
+      return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+   case TGSI_TEXTURE_2D_ARRAY_MSAA:
+      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
+   case TGSI_TEXTURE_CUBE_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
     default:
        assert(!"Unexpected resource type");
        return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
     default:
        assert(!"Unexpected resource type");
        return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
@@ -2985,8 +3259,10 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
        opcode0.value = 0;
        opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
        opcode0.resourceDimension =
        opcode0.value = 0;
        opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
        opcode0.resourceDimension =
-         pipe_texture_to_resource_dimension(emit->key.tex[i].texture_target,
-                                            emit->key.tex[i].texture_msaa);
+         tgsi_texture_to_resource_dimension(emit->sampler_target[i],
+                                            emit->key.tex[i].num_samples,
+                                            emit->key.tex[i].is_array);
+      opcode0.sampleCount = emit->key.tex[i].num_samples;
        operand0.value = 0;
        operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
        operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
        operand0.value = 0;
        operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
        operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
@@ -3000,10 +3276,10 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
        STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
        STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
        STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
        STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
        STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
        STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
-      assert(emit->key.tex[i].return_type <= TGSI_RETURN_TYPE_FLOAT);
-      rt = emit->key.tex[i].return_type + 1;
+      assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
+      rt = emit->sampler_return_type[i] + 1;
  #else
  #else
-      switch (emit->key.tex[i].return_type) {
+      switch (emit->sampler_return_type[i]) {
           case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
           case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
           case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
           case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
           case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
           case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
@@ -3035,7 +3311,7 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
  
  static void
  emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
  
  static void
  emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
-                     unsigned opcode,
+                     VGPU10_OPCODE_TYPE opcode,
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src,
                       boolean saturate)
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src,
                       boolean saturate)
@@ -3049,7 +3325,7 @@ emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
  
  static void
  emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
  
  static void
  emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
-                     unsigned opcode,
+                     VGPU10_OPCODE_TYPE opcode,
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src1,
                       const struct tgsi_full_src_register *src2,
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src1,
                       const struct tgsi_full_src_register *src2,
@@ -3065,7 +3341,7 @@ emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
  
  static void
  emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
  
  static void
  emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
-                     unsigned opcode,
+                     VGPU10_OPCODE_TYPE opcode,
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src1,
                       const struct tgsi_full_src_register *src2,
                       const struct tgsi_full_dst_register *dst,
                       const struct tgsi_full_src_register *src1,
                       const struct tgsi_full_src_register *src2,
@@ -3097,7 +3373,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
     unsigned i;
     unsigned clip_plane_enable = emit->key.clip_plane_enable;
     unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
     unsigned i;
     unsigned clip_plane_enable = emit->key.clip_plane_enable;
     unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
-   unsigned num_written_clipdist = emit->info.num_written_clipdistance;
+   int num_written_clipdist = emit->info.num_written_clipdistance;
  
     assert(emit->clip_dist_out_index != INVALID_INDEX);
     assert(emit->clip_dist_tmp_index != INVALID_INDEX);
  
     assert(emit->clip_dist_out_index != INVALID_INDEX);
     assert(emit->clip_dist_tmp_index != INVALID_INDEX);
@@ -3109,7 +3385,7 @@ emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
      */
     emit->clip_dist_tmp_index = INVALID_INDEX;
  
      */
     emit->clip_dist_tmp_index = INVALID_INDEX;
  
-   for (i = 0; i < 2 && num_written_clipdist; i++, num_written_clipdist-=4) {
+   for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
  
        tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
  
  
        tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
  
@@ -3404,28 +3680,6 @@ emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
-/**
- * Emit code for TGSI_OPCODE_ABS instruction.
- */
-static boolean
-emit_abs(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst = ABS(s0):
-    *   dst = abs(s0)
-    * Translates into:
-    *   MOV dst, abs(s0)
-    */
-   struct tgsi_full_src_register abs_src0 = absolute_src(&inst->Src[0]);
-
-   /* MOV dst, abs(s0) */
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
-                        &abs_src0, inst->Instruction.Saturate);
-
-   return TRUE;
-}
-
-
  /**
   * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
   */
  /**
   * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
   */
@@ -3435,7 +3689,7 @@ emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
  {
     unsigned index = inst->Dst[0].Register.Index;
     struct tgsi_full_dst_register dst;
  {
     unsigned index = inst->Dst[0].Register.Index;
     struct tgsi_full_dst_register dst;
-   unsigned opcode;
+   VGPU10_OPCODE_TYPE opcode;
  
     assert(index < MAX_VGPU10_ADDR_REGS);
     dst = make_dst_temp_reg(emit->address_reg_index[index]);
  
     assert(index < MAX_VGPU10_ADDR_REGS);
     dst = make_dst_temp_reg(emit->address_reg_index[index]);
@@ -3536,89 +3790,6 @@ emit_cmp(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
-/**
- * Emit code for TGSI_OPCODE_DP2A instruction.
- */
-static boolean
-emit_dp2a(struct svga_shader_emitter_v10 *emit,
-          const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
-    * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
-    * Translate into
-    *   MAD tmp.x, s0.y, s1.y, s2.x
-    *   MAD tmp.x, s0.x, s1.x, tmp.x
-    *   MOV dst.xyzw, tmp.xxxx
-    */
-   unsigned tmp = get_temp_index(emit);
-   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
-   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
-   struct tgsi_full_src_register tmp_src_xxxx =
-      scalar_src(&tmp_src, TGSI_SWIZZLE_X);
-   struct tgsi_full_dst_register tmp_dst_x =
-      writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
-
-   struct tgsi_full_src_register src0_xxxx =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register src0_yyyy =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register src1_xxxx =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register src1_yyyy =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register src2_xxxx =
-      scalar_src(&inst->Src[2], TGSI_SWIZZLE_X);
-
-   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy,
-                        &src1_yyyy, &src2_xxxx, FALSE);
-   emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx,
-                        &src1_xxxx, &tmp_src_xxxx, FALSE);
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
-                        &tmp_src_xxxx, inst->Instruction.Saturate);
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
-/**
- * Emit code for TGSI_OPCODE_DPH instruction.
- */
-static boolean
-emit_dph(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /*
-    * DP3 tmp, s0, s1
-    * ADD dst, tmp, s1.wwww
-    */
-
-   struct tgsi_full_src_register s1_wwww =
-      swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W,
-                  TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
-
-   unsigned tmp = get_temp_index(emit);
-   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
-   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
-
-   /* DP3 tmp, s0, s1 */
-   emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0],
-                        &inst->Src[1], FALSE);
-
-   /* ADD dst, tmp, s1.wwww */
-   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src,
-                        &s1_wwww, inst->Instruction.Saturate);
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
  /**
   * Emit code for TGSI_OPCODE_DST instruction.
   */
  /**
   * Emit code for TGSI_OPCODE_DST instruction.
   */
@@ -4094,6 +4265,30 @@ emit_lit(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
+/**
+ * Emit Level Of Detail Query (LODQ) instruction.
+ */
+static boolean
+emit_lodq(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+
+   assert(emit->version >= 41);
+
+   /* LOD dst, coord, resource, sampler */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
+   emit_dst_register(emit, &inst->Dst[0]);
+   emit_src_register(emit, &inst->Src[0]); /* coord */
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
  /**
   * Emit code for TGSI_OPCODE_LOG instruction.
   */
  /**
   * Emit code for TGSI_OPCODE_LOG instruction.
   */
@@ -4322,42 +4517,6 @@ emit_rsq(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
-/**
- * Emit code for TGSI_OPCODE_SCS instruction.
- */
-static boolean
-emit_scs(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = cos(src.x)
-    * dst.y = sin(src.x)
-    * dst.z = 0.0
-    * dst.w = 1.0
-    */
-   struct tgsi_full_dst_register dst_x =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
-   struct tgsi_full_dst_register dst_y =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
-   struct tgsi_full_dst_register dst_zw =
-      writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW);
-
-   struct tgsi_full_src_register zero_one =
-      make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f);
-
-   begin_emit_instruction(emit);
-   emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate);
-   emit_dst_register(emit, &dst_y);
-   emit_dst_register(emit, &dst_x);
-   emit_src_register(emit, &inst->Src[0]);
-   end_emit_instruction(emit);
-
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
-                        &dst_zw, &zero_one, inst->Instruction.Saturate);
-
-   return TRUE;
-}
-
-
  /**
   * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
   */
  /**
   * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
   */
@@ -4686,31 +4845,8 @@ emit_issg(struct svga_shader_emitter_v10 *emit,
  
  
  /**
  
  
  /**
- * Emit code for TGSI_OPCODE_SUB instruction.
- */
-static boolean
-emit_sub(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst = SUB(s0, s1):
-    *   dst = s0 - s1
-    * Translates into:
-    *   ADD dst, s0, neg(s1)
-    */
-   struct tgsi_full_src_register neg_src1 = negate_src(&inst->Src[1]);
-
-   /* ADD dst, s0, neg(s1) */
-   emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0],
-                        &inst->Src[0], &neg_src1,
-                        inst->Instruction.Saturate);
-
-   return TRUE;
-}
-
-
-/**
- * Emit a comparison instruction.  The dest register will get
- * 0 or ~0 values depending on the outcome of comparing src0 to src1.
+ * Emit a comparison instruction.  The dest register will get
+ * 0 or ~0 values depending on the outcome of comparing src0 to src1.
   */
  static void
  emit_comparison(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_comparison(struct svga_shader_emitter_v10 *emit,
@@ -4831,16 +4967,31 @@ setup_texcoord(struct svga_shader_emitter_v10 *emit,
                 unsigned unit,
                 const struct tgsi_full_src_register *coord)
  {
                 unsigned unit,
                 const struct tgsi_full_src_register *coord)
  {
-   if (emit->key.tex[unit].unnormalized) {
+   if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) {
        unsigned scale_index = emit->texcoord_scale_index[unit];
        unsigned tmp = get_temp_index(emit);
        struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
        struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
        struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
  
        unsigned scale_index = emit->texcoord_scale_index[unit];
        unsigned tmp = get_temp_index(emit);
        struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
        struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
        struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
  
-      /* MUL tmp, coord, const[] */
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
-                           coord, &scale_src, FALSE);
+      if (emit->key.tex[unit].texel_bias) {
+         /* to fix texture coordinate rounding issue, 0.0001 offset is
+          * been added. This fixes piglit test fbo-blit-scaled-linear. */
+         struct tgsi_full_src_register offset =
+            make_immediate_reg_float(emit, 0.0001f);
+
+         /* ADD tmp, coord, offset */
+         emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
+                              coord, &offset, FALSE);
+         /* MUL tmp, tmp, scale */
+         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+                              &tmp_src, &scale_src, FALSE);
+      }
+      else {
+         /* MUL tmp, coord, const[] */
+         emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
+                              coord, &scale_src, FALSE);
+      }
        return tmp_src;
     }
     else {
        return tmp_src;
     }
     else {
@@ -4856,20 +5007,16 @@ setup_texcoord(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
   */
  static void
  emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
-                          unsigned target,
+                          enum tgsi_texture_type target,
                            const struct tgsi_full_src_register *coord)
  {
     struct tgsi_full_src_register coord_src_ref;
                            const struct tgsi_full_src_register *coord)
  {
     struct tgsi_full_src_register coord_src_ref;
-   unsigned component;
+   int component;
  
     assert(tgsi_is_shadow_target(target));
  
  
     assert(tgsi_is_shadow_target(target));
  
-   assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */
-   if (target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
-       target == TGSI_TEXTURE_SHADOWCUBE)
-      component = TGSI_SWIZZLE_W;
-   else
-      component = TGSI_SWIZZLE_Z;
+   component = tgsi_util_get_shadow_ref_src_index(target) % 4;
+   assert(component >= 0);
  
     coord_src_ref = scalar_src(coord, component);
  
  
     coord_src_ref = scalar_src(coord, component);
  
@@ -4890,7 +5037,7 @@ struct tex_swizzle_info
     boolean swizzled;
     boolean shadow_compare;
     unsigned unit;
     boolean swizzled;
     boolean shadow_compare;
     unsigned unit;
-   unsigned texture_target;  /**< TGSI_TEXTURE_x */
+   enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
     struct tgsi_full_src_register tmp_src;
     struct tgsi_full_dst_register tmp_dst;
     const struct tgsi_full_dst_register *inst_dst;
     struct tgsi_full_src_register tmp_src;
     struct tgsi_full_dst_register tmp_dst;
     const struct tgsi_full_dst_register *inst_dst;
@@ -4932,6 +5079,8 @@ begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
     }
     swz->inst_dst = &inst->Dst[0];
     swz->coord_src = &inst->Src[0];
     }
     swz->inst_dst = &inst->Dst[0];
     swz->coord_src = &inst->Src[0];
+
+   emit->fs.shadow_compare_units |= shadow_compare << unit;
  }
  
  
  }
  
  
@@ -4971,30 +5120,14 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
  
        assert(emit->unit == PIPE_SHADER_FRAGMENT);
  
  
        assert(emit->unit == PIPE_SHADER_FRAGMENT);
  
-      switch (swz->texture_target) {
-      case TGSI_TEXTURE_SHADOW2D:
-      case TGSI_TEXTURE_SHADOWRECT:
-      case TGSI_TEXTURE_SHADOW1D_ARRAY:
-         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
-         break;
-      case TGSI_TEXTURE_SHADOW1D:
-         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y);
-         break;
-      case TGSI_TEXTURE_SHADOWCUBE:
-      case TGSI_TEXTURE_SHADOW2D_ARRAY:
-         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W);
-         break;
-      default:
-         assert(!"Unexpected texture target in end_tex_swizzle()");
-         coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z);
-      }
+      int component =
+         tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
+      assert(component >= 0);
+      coord_src = scalar_src(swz->coord_src, component);
  
        /* COMPARE tmp, coord, texel */
  
        /* COMPARE tmp, coord, texel */
-      /* XXX it would seem that the texel and coord arguments should
-       * be transposed here, but piglit tests indicate otherwise.
-       */
        emit_comparison(emit, compare_func,
        emit_comparison(emit, compare_func,
-                      &swz->tmp_dst, &texel_src, &coord_src);
+                      &swz->tmp_dst, &coord_src, &texel_src);
  
        /* AND dest, tmp, {1.0} */
        begin_emit_instruction(emit);
  
        /* AND dest, tmp, {1.0} */
        begin_emit_instruction(emit);
@@ -5016,25 +5149,26 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
        unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
        unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
        unsigned writemask_0 = 0, writemask_1 = 0;
        unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
        unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
        unsigned writemask_0 = 0, writemask_1 = 0;
-      boolean int_tex = is_integer_type(emit->key.tex[swz->unit].return_type);
+      boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
  
        /* Swizzle w/out zero/one terms */
        struct tgsi_full_src_register src_swizzled =
           swizzle_src(&swz->tmp_src,
  
        /* Swizzle w/out zero/one terms */
        struct tgsi_full_src_register src_swizzled =
           swizzle_src(&swz->tmp_src,
-                     swz_r < PIPE_SWIZZLE_ZERO ? swz_r : PIPE_SWIZZLE_RED,
-                     swz_g < PIPE_SWIZZLE_ZERO ? swz_g : PIPE_SWIZZLE_GREEN,
-                     swz_b < PIPE_SWIZZLE_ZERO ? swz_b : PIPE_SWIZZLE_BLUE,
-                     swz_a < PIPE_SWIZZLE_ZERO ? swz_a : PIPE_SWIZZLE_ALPHA);
+                     swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
+                     swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
+                     swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
+                     swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
  
        /* MOV dst, color(tmp).<swizzle> */
        emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
                             swz->inst_dst, &src_swizzled, FALSE);
  
        /* handle swizzle zero terms */
  
        /* MOV dst, color(tmp).<swizzle> */
        emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
                             swz->inst_dst, &src_swizzled, FALSE);
  
        /* handle swizzle zero terms */
-      writemask_0 = (((swz_r == PIPE_SWIZZLE_ZERO) << 0) |
-                     ((swz_g == PIPE_SWIZZLE_ZERO) << 1) |
-                     ((swz_b == PIPE_SWIZZLE_ZERO) << 2) |
-                     ((swz_a == PIPE_SWIZZLE_ZERO) << 3));
+      writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_0) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_0) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_0) << 3));
+      writemask_0 &= swz->inst_dst->Register.WriteMask;
  
        if (writemask_0) {
           struct tgsi_full_src_register zero = int_tex ?
  
        if (writemask_0) {
           struct tgsi_full_src_register zero = int_tex ?
@@ -5049,10 +5183,11 @@ end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
        }
  
        /* handle swizzle one terms */
        }
  
        /* handle swizzle one terms */
-      writemask_1 = (((swz_r == PIPE_SWIZZLE_ONE) << 0) |
-                     ((swz_g == PIPE_SWIZZLE_ONE) << 1) |
-                     ((swz_b == PIPE_SWIZZLE_ONE) << 2) |
-                     ((swz_a == PIPE_SWIZZLE_ONE) << 3));
+      writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
+                     ((swz_g == PIPE_SWIZZLE_1) << 1) |
+                     ((swz_b == PIPE_SWIZZLE_1) << 2) |
+                     ((swz_a == PIPE_SWIZZLE_1) << 3));
+      writemask_1 &= swz->inst_dst->Register.WriteMask;
  
        if (writemask_1) {
           struct tgsi_full_src_register one = int_tex ?
  
        if (writemask_1) {
           struct tgsi_full_src_register one = int_tex ?
@@ -5090,6 +5225,9 @@ emit_sample(struct svga_shader_emitter_v10 *emit,
     /* SAMPLE dst, coord(s0), resource, sampler */
     begin_emit_instruction(emit);
  
     /* SAMPLE dst, coord(s0), resource, sampler */
     begin_emit_instruction(emit);
  
+   /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
+    * with LOD=0.  But our virtual GPU accepts this as-is.
+    */
     emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
                        inst->Instruction.Saturate, offsets);
     emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
     emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
                        inst->Instruction.Saturate, offsets);
     emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
@@ -5119,11 +5257,11 @@ is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
                           const struct tgsi_full_instruction *inst)
  {
     const unsigned unit = inst->Src[1].Register.Index;
                           const struct tgsi_full_instruction *inst)
  {
     const unsigned unit = inst->Src[1].Register.Index;
-   const unsigned target = inst->Texture.Texture;
+   const enum tgsi_texture_type target = inst->Texture.Texture;
     boolean valid = TRUE;
  
     if (tgsi_is_shadow_target(target) &&
     boolean valid = TRUE;
  
     if (tgsi_is_shadow_target(target) &&
-       is_integer_type(emit->key.tex[unit].return_type)) {
+       is_integer_type(emit->sampler_return_type[unit])) {
        debug_printf("Invalid SAMPLE_C with an integer texture!\n");
        valid = FALSE;
     }
        debug_printf("Invalid SAMPLE_C with an integer texture!\n");
        valid = FALSE;
     }
@@ -5151,8 +5289,8 @@ emit_tex(struct svga_shader_emitter_v10 *emit,
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
-   unsigned target = inst->Texture.Texture;
-   unsigned opcode;
+   const enum tgsi_texture_type target = inst->Texture.Texture;
+   VGPU10_OPCODE_TYPE opcode;
     struct tgsi_full_src_register coord;
     int offsets[3];
     struct tex_swizzle_info swz_info;
     struct tgsi_full_src_register coord;
     int offsets[3];
     struct tex_swizzle_info swz_info;
@@ -5192,6 +5330,103 @@ emit_tex(struct svga_shader_emitter_v10 *emit,
     return TRUE;
  }
  
     return TRUE;
  }
  
+/**
+ * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
+ */
+static boolean
+emit_tg4(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[2].Register.Index;
+   struct tgsi_full_src_register src;
+   int offsets[3];
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   /* Only a single channel is supported in SM4_1 and we report
+    * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
+    * Only the 0th component will be gathered.
+    */
+   switch (emit->key.tex[unit].swizzle_r) {
+   case PIPE_SWIZZLE_X:
+      get_texel_offsets(emit, inst, offsets);
+      src = setup_texcoord(emit, unit, &inst->Src[0]);
+
+      /* Gather dst, coord, resource, sampler */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &src);
+      emit_resource_register(emit, unit);
+      emit_sampler_register(emit, unit);
+      end_emit_instruction(emit);
+      break;
+   case PIPE_SWIZZLE_W:
+   case PIPE_SWIZZLE_1:
+      src = make_immediate_reg_float(emit, 1.0);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           &inst->Dst[0], &src, FALSE);
+      break;
+   case PIPE_SWIZZLE_Y:
+   case PIPE_SWIZZLE_Z:
+   case PIPE_SWIZZLE_0:
+   default:
+      src = make_immediate_reg_float(emit, 0.0);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           &inst->Dst[0], &src, FALSE);
+      break;
+   }
+
+   return TRUE;
+}
+
+
+
+/**
+ * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
+ */
+static boolean
+emit_tex2(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[2].Register.Index;
+   unsigned target = inst->Texture.Texture;
+   struct tgsi_full_src_register coord, ref;
+   int offsets[3];
+   struct tex_swizzle_info swz_info;
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+   ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+
+   /* SAMPLE_C dst, coord, resource, sampler, ref */
+   begin_emit_instruction(emit);
+   emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
+                      inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_tex_compare_refcoord(emit, target, &ref);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
  
  /**
   * Emit code for TGSI_OPCODE_TXP (projective texture)
  
  /**
   * Emit code for TGSI_OPCODE_TXP (projective texture)
@@ -5201,8 +5436,8 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
-   unsigned target = inst->Texture.Texture;
-   unsigned opcode;
+   const enum tgsi_texture_type target = inst->Texture.Texture;
+   VGPU10_OPCODE_TYPE opcode;
     int offsets[3];
     unsigned tmp = get_temp_index(emit);
     struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
     int offsets[3];
     unsigned tmp = get_temp_index(emit);
     struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
@@ -5230,6 +5465,9 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
     begin_emit_instruction(emit);
  
     if (tgsi_is_shadow_target(target))
     begin_emit_instruction(emit);
  
     if (tgsi_is_shadow_target(target))
+      /* NOTE: for non-fragment shaders, we should use
+       * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
+       */
        opcode = VGPU10_OPCODE_SAMPLE_C;
     else
        opcode = VGPU10_OPCODE_SAMPLE;
        opcode = VGPU10_OPCODE_SAMPLE_C;
     else
        opcode = VGPU10_OPCODE_SAMPLE;
@@ -5252,117 +5490,6 @@ emit_txp(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
-/*
- * Emit code for TGSI_OPCODE_XPD instruction.
- */
-static boolean
-emit_xpd(struct svga_shader_emitter_v10 *emit,
-         const struct tgsi_full_instruction *inst)
-{
-   /* dst.x = src0.y * src1.z - src1.y * src0.z
-    * dst.y = src0.z * src1.x - src1.z * src0.x
-    * dst.z = src0.x * src1.y - src1.x * src0.y
-    * dst.w = 1
-    */
-   struct tgsi_full_src_register s0_xxxx =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register s0_yyyy =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register s0_zzzz =
-      scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
-
-   struct tgsi_full_src_register s1_xxxx =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
-   struct tgsi_full_src_register s1_yyyy =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
-   struct tgsi_full_src_register s1_zzzz =
-      scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z);
-
-   unsigned tmp1 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
-   struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
-
-   unsigned tmp2 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
-   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
-   struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src);
-
-   unsigned tmp3 = get_temp_index(emit);
-   struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3);
-   struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3);
-   struct tgsi_full_dst_register tmp3_dst_x =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X);
-   struct tgsi_full_dst_register tmp3_dst_y =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y);
-   struct tgsi_full_dst_register tmp3_dst_z =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z);
-   struct tgsi_full_dst_register tmp3_dst_w =
-      writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W);
-
-   /* Note: we put all the intermediate computations into tmp3 in case
-    * the XPD dest register is that same as one of the src regs (in which
-    * case we could clobber a src reg before we're done with it) .
-    *
-    * Note: we could get by with just one temp register instead of three
-    * since we're doing scalar operations and there's enough room in one
-    * temp for everything.
-    */
-
-   /* MUL tmp1, src0.y, src1.z */
-   /* MUL tmp2, src1.y, src0.z */
-   /* ADD tmp3.x, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst,
-                           &s0_yyyy, &s1_zzzz, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst,
-                           &s1_yyyy, &s0_zzzz, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MUL tmp1, src0.z, src1.x */
-   /* MUL tmp2, src1.z, src0.x */
-   /* ADD tmp3.y, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz,
-                           &s1_xxxx, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz,
-                           &s0_xxxx, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MUL tmp1, src0.x, src1.y */
-   /* MUL tmp2, src1.x, src0.y */
-   /* ADD tmp3.z, tmp1, -tmp2 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx,
-                           &s1_yyyy, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx,
-                           &s0_yyyy, FALSE);
-      emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z,
-                           &tmp1_src, &neg_tmp2_src, FALSE);
-   }
-
-   /* MOV tmp3.w, 1.0 */
-   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
-      struct tgsi_full_src_register one =
-         make_immediate_reg_float(emit, 1.0f);
-
-      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE);
-   }
-
-   /* MOV dst, tmp3 */
-   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src,
-                        inst->Instruction.Saturate);
-
-
-   free_temp_indexes(emit);
-
-   return TRUE;
-}
-
-
  /**
   * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
   */
  /**
   * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
   */
@@ -5371,7 +5498,7 @@ emit_txd(struct svga_shader_emitter_v10 *emit,
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[3].Register.Index;
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[3].Register.Index;
-   unsigned target = inst->Texture.Texture;
+   const enum tgsi_texture_type target = inst->Texture.Texture;
     int offsets[3];
     struct tgsi_full_src_register coord;
     struct tex_swizzle_info swz_info;
     int offsets[3];
     struct tgsi_full_src_register coord;
     struct tex_swizzle_info swz_info;
@@ -5411,7 +5538,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
           const struct tgsi_full_instruction *inst)
  {
     const uint unit = inst->Src[1].Register.Index;
-   const unsigned msaa = emit->key.tex[unit].texture_msaa;
+   const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
+      && emit->key.tex[unit].num_samples > 1;
     int offsets[3];
     struct tex_swizzle_info swz_info;
  
     int offsets[3];
     struct tex_swizzle_info swz_info;
  
@@ -5420,6 +5548,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
     get_texel_offsets(emit, inst, offsets);
  
     if (msaa) {
     get_texel_offsets(emit, inst, offsets);
  
     if (msaa) {
+      assert(emit->key.tex[unit].num_samples > 1);
+
        /* Fetch one sample from an MSAA texture */
        struct tgsi_full_src_register sampleIndex =
           scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
        /* Fetch one sample from an MSAA texture */
        struct tgsi_full_src_register sampleIndex =
           scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
@@ -5461,8 +5591,9 @@ static boolean
  emit_txl_txb(struct svga_shader_emitter_v10 *emit,
               const struct tgsi_full_instruction *inst)
  {
  emit_txl_txb(struct svga_shader_emitter_v10 *emit,
               const struct tgsi_full_instruction *inst)
  {
-   unsigned target = inst->Texture.Texture;
-   unsigned opcode, unit;
+   const enum tgsi_texture_type target = inst->Texture.Texture;
+   VGPU10_OPCODE_TYPE opcode;
+   unsigned unit;
     int offsets[3];
     struct tgsi_full_src_register coord, lod_bias;
     struct tex_swizzle_info swz_info;
     int offsets[3];
     struct tgsi_full_src_register coord, lod_bias;
     struct tex_swizzle_info swz_info;
@@ -5511,6 +5642,50 @@ emit_txl_txb(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
+/**
+ * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
+ */
+static boolean
+emit_txl2(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   unsigned target = inst->Texture.Texture;
+   unsigned opcode, unit;
+   int offsets[3];
+   struct tgsi_full_src_register coord, lod;
+   struct tex_swizzle_info swz_info;
+
+   assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
+
+   lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
+   unit = inst->Src[2].Register.Index;
+
+   begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
+                     &swz_info);
+
+   get_texel_offsets(emit, inst, offsets);
+
+   coord = setup_texcoord(emit, unit, &inst->Src[0]);
+
+   /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
+   begin_emit_instruction(emit);
+   opcode = VGPU10_OPCODE_SAMPLE_L;
+   emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
+   emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
+   emit_src_register(emit, &coord);
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   emit_src_register(emit, &lod);
+   end_emit_instruction(emit);
+
+   end_tex_swizzle(emit, &swz_info);
+
+   free_temp_indexes(emit);
+
+   return TRUE;
+}
+
+
  /**
   * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
   */
  /**
   * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
   */
@@ -5520,7 +5695,7 @@ emit_txq(struct svga_shader_emitter_v10 *emit,
  {
     const uint unit = inst->Src[1].Register.Index;
  
  {
     const uint unit = inst->Src[1].Register.Index;
  
-   if (emit->key.tex[unit].texture_target == PIPE_BUFFER) {
+   if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
        /* RESINFO does not support querying texture buffers, so we instead
         * store texture buffer sizes in shader constants, then copy them to
         * implement TXQ instead of emitting RESINFO.
        /* RESINFO does not support querying texture buffers, so we instead
         * store texture buffer sizes in shader constants, then copy them to
         * implement TXQ instead of emitting RESINFO.
@@ -5553,13 +5728,12 @@ static boolean
  emit_simple(struct svga_shader_emitter_v10 *emit,
              const struct tgsi_full_instruction *inst)
  {
  emit_simple(struct svga_shader_emitter_v10 *emit,
              const struct tgsi_full_instruction *inst)
  {
-   const unsigned opcode = inst->Instruction.Opcode;
+   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
     const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
     unsigned i;
  
     begin_emit_instruction(emit);
     const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
     unsigned i;
  
     begin_emit_instruction(emit);
-   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
-               inst->Instruction.Saturate);
+   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
     for (i = 0; i < op->num_dst; i++) {
        emit_dst_register(emit, &inst->Dst[i]);
     }
     for (i = 0; i < op->num_dst; i++) {
        emit_dst_register(emit, &inst->Dst[i]);
     }
@@ -5572,6 +5746,29 @@ emit_simple(struct svga_shader_emitter_v10 *emit,
  }
  
  
  }
  
  
+/**
+ * We only special case the MOV instruction to try to detect constant
+ * color writes in the fragment shader.
+ */
+static boolean
+emit_mov(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const struct tgsi_full_src_register *src = &inst->Src[0];
+   const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT &&
+       dst->Register.File == TGSI_FILE_OUTPUT &&
+       dst->Register.Index == 0 &&
+       src->Register.File == TGSI_FILE_CONSTANT &&
+       !src->Register.Indirect) {
+      emit->constant_color_output = TRUE;
+   }
+
+   return emit_simple(emit, inst);
+}
+
+
  /**
   * Emit a simple VGPU10 instruction which writes to multiple dest registers,
   * where TGSI only uses one dest register.
  /**
   * Emit a simple VGPU10 instruction which writes to multiple dest registers,
   * where TGSI only uses one dest register.
@@ -5582,13 +5779,12 @@ emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
                   unsigned dst_count,
                   unsigned dst_index)
  {
                   unsigned dst_count,
                   unsigned dst_index)
  {
-   const unsigned opcode = inst->Instruction.Opcode;
+   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
     const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
     unsigned i;
  
     begin_emit_instruction(emit);
     const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
     unsigned i;
  
     begin_emit_instruction(emit);
-   emit_opcode(emit, translate_opcode(inst->Instruction.Opcode),
-               inst->Instruction.Saturate);
+   emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
  
     for (i = 0; i < dst_count; i++) {
        if (i == dst_index) {
  
     for (i = 0; i < dst_count; i++) {
        if (i == dst_index) {
@@ -5615,7 +5811,7 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
                          unsigned inst_number,
                          const struct tgsi_full_instruction *inst)
  {
                          unsigned inst_number,
                          const struct tgsi_full_instruction *inst)
  {
-   const unsigned opcode = inst->Instruction.Opcode;
+   const enum tgsi_opcode opcode = inst->Instruction.Opcode;
  
     switch (opcode) {
     case TGSI_OPCODE_ADD:
  
     switch (opcode) {
     case TGSI_OPCODE_ADD:
@@ -5652,7 +5848,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
     case TGSI_OPCODE_MAD:
     case TGSI_OPCODE_MAX:
     case TGSI_OPCODE_MIN:
     case TGSI_OPCODE_MAD:
     case TGSI_OPCODE_MAX:
     case TGSI_OPCODE_MIN:
-   case TGSI_OPCODE_MOV:
     case TGSI_OPCODE_MUL:
     case TGSI_OPCODE_NOP:
     case TGSI_OPCODE_NOT:
     case TGSI_OPCODE_MUL:
     case TGSI_OPCODE_NOP:
     case TGSI_OPCODE_NOT:
@@ -5677,13 +5872,12 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        /* simple instructions */
        return emit_simple(emit, inst);
  
        /* simple instructions */
        return emit_simple(emit, inst);
  
-
+   case TGSI_OPCODE_MOV:
+      return emit_mov(emit, inst);
     case TGSI_OPCODE_EMIT:
        return emit_vertex(emit, inst);
     case TGSI_OPCODE_ENDPRIM:
        return emit_endprim(emit, inst);
     case TGSI_OPCODE_EMIT:
        return emit_vertex(emit, inst);
     case TGSI_OPCODE_ENDPRIM:
        return emit_endprim(emit, inst);
-   case TGSI_OPCODE_ABS:
-      return emit_abs(emit, inst);
     case TGSI_OPCODE_IABS:
        return emit_iabs(emit, inst);
     case TGSI_OPCODE_ARL:
     case TGSI_OPCODE_IABS:
        return emit_iabs(emit, inst);
     case TGSI_OPCODE_ARL:
@@ -5699,10 +5893,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        return emit_cmp(emit, inst);
     case TGSI_OPCODE_COS:
        return emit_sincos(emit, inst);
        return emit_cmp(emit, inst);
     case TGSI_OPCODE_COS:
        return emit_sincos(emit, inst);
-   case TGSI_OPCODE_DP2A:
-      return emit_dp2a(emit, inst);
-   case TGSI_OPCODE_DPH:
-      return emit_dph(emit, inst);
     case TGSI_OPCODE_DST:
        return emit_dst(emit, inst);
     case TGSI_OPCODE_EX2:
     case TGSI_OPCODE_DST:
        return emit_dst(emit, inst);
     case TGSI_OPCODE_EX2:
@@ -5719,6 +5909,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        return emit_lg2(emit, inst);
     case TGSI_OPCODE_LIT:
        return emit_lit(emit, inst);
        return emit_lg2(emit, inst);
     case TGSI_OPCODE_LIT:
        return emit_lit(emit, inst);
+   case TGSI_OPCODE_LODQ:
+      return emit_lodq(emit, inst);
     case TGSI_OPCODE_LOG:
        return emit_log(emit, inst);
     case TGSI_OPCODE_LRP:
     case TGSI_OPCODE_LOG:
        return emit_log(emit, inst);
     case TGSI_OPCODE_LRP:
@@ -5731,8 +5923,6 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        return emit_rsq(emit, inst);
     case TGSI_OPCODE_SAMPLE:
        return emit_sample(emit, inst);
        return emit_rsq(emit, inst);
     case TGSI_OPCODE_SAMPLE:
        return emit_sample(emit, inst);
-   case TGSI_OPCODE_SCS:
-      return emit_scs(emit, inst);
     case TGSI_OPCODE_SEQ:
        return emit_seq(emit, inst);
     case TGSI_OPCODE_SGE:
     case TGSI_OPCODE_SEQ:
        return emit_seq(emit, inst);
     case TGSI_OPCODE_SGE:
@@ -5751,10 +5941,12 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        return emit_ssg(emit, inst);
     case TGSI_OPCODE_ISSG:
        return emit_issg(emit, inst);
        return emit_ssg(emit, inst);
     case TGSI_OPCODE_ISSG:
        return emit_issg(emit, inst);
-   case TGSI_OPCODE_SUB:
-      return emit_sub(emit, inst);
     case TGSI_OPCODE_TEX:
        return emit_tex(emit, inst);
     case TGSI_OPCODE_TEX:
        return emit_tex(emit, inst);
+   case TGSI_OPCODE_TG4:
+      return emit_tg4(emit, inst);
+   case TGSI_OPCODE_TEX2:
+      return emit_tex2(emit, inst);
     case TGSI_OPCODE_TXP:
        return emit_txp(emit, inst);
     case TGSI_OPCODE_TXB:
     case TGSI_OPCODE_TXP:
        return emit_txp(emit, inst);
     case TGSI_OPCODE_TXB:
@@ -5765,12 +5957,12 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
        return emit_txd(emit, inst);
     case TGSI_OPCODE_TXF:
        return emit_txf(emit, inst);
        return emit_txd(emit, inst);
     case TGSI_OPCODE_TXF:
        return emit_txf(emit, inst);
+   case TGSI_OPCODE_TXL2:
+      return emit_txl2(emit, inst);
     case TGSI_OPCODE_TXQ:
        return emit_txq(emit, inst);
     case TGSI_OPCODE_UIF:
        return emit_if(emit, inst);
     case TGSI_OPCODE_TXQ:
        return emit_txq(emit, inst);
     case TGSI_OPCODE_UIF:
        return emit_if(emit, inst);
-   case TGSI_OPCODE_XPD:
-      return emit_xpd(emit, inst);
     case TGSI_OPCODE_UMUL_HI:
     case TGSI_OPCODE_IMUL_HI:
     case TGSI_OPCODE_UDIV:
     case TGSI_OPCODE_UMUL_HI:
     case TGSI_OPCODE_IMUL_HI:
     case TGSI_OPCODE_UDIV:
@@ -6095,6 +6287,55 @@ emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
  }
  
  
  }
  
  
+/**
+ * Emit the extra code to get the current sample position value and
+ * put it into a temp register.
+ */
+static void
+emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
+      assert(emit->version >= 41);
+
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
+      struct tgsi_full_src_register half =
+         make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
+
+      struct tgsi_full_src_register tmp_src =
+         make_src_temp_reg(emit->fs.sample_pos_tmp_index);
+      struct tgsi_full_src_register sample_index_reg =
+         make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
+                             emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
+
+      /* The first src register is a shader resource (if we want a
+       * multisampled resource sample position) or the rasterizer register
+       * (if we want the current sample position in the color buffer).  We
+       * want the later.
+       */
+
+      /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_rasterizer_register(emit);
+      emit_src_register(emit, &sample_index_reg);
+      end_emit_instruction(emit);
+
+      /* Convert from D3D coords to GL coords by adding 0.5 bias */
+      /* ADD dst, dst, half */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_src_register(emit, &tmp_src);
+      emit_src_register(emit, &half);
+      end_emit_instruction(emit);
+   }
+}
+
+
  /**
   * Emit extra instructions to adjust VS inputs/attributes.  This can
   * mean casting a vertex attribute from int to float or setting the
  /**
   * Emit extra instructions to adjust VS inputs/attributes.  This can
   * mean casting a vertex attribute from int to float or setting the
@@ -6141,6 +6382,11 @@ emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
  
        while (adjust_mask) {
           unsigned index = u_bit_scan(&adjust_mask);
  
        while (adjust_mask) {
           unsigned index = u_bit_scan(&adjust_mask);
+
+         /* skip the instruction if this vertex attribute is not being used */
+         if (emit->info.input_usage_mask[index] == 0)
+            continue;
+
           unsigned tmp = emit->vs.adjusted_input[index];
           struct tgsi_full_src_register input_src =
              make_src_reg(TGSI_FILE_INPUT, index);
           unsigned tmp = emit->vs.adjusted_input[index];
           struct tgsi_full_src_register input_src =
              make_src_reg(TGSI_FILE_INPUT, index);
@@ -6215,15 +6461,17 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
     emit->common_immediate_pos[n++] =
        alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
  
     emit->common_immediate_pos[n++] =
        alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
  
-   emit->common_immediate_pos[n++] =
-      alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f);
+   if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
+      emit->common_immediate_pos[n++] =
+         alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
+   }
  
     emit->common_immediate_pos[n++] =
        alloc_immediate_int4(emit, 0, 1, 0, -1);
  
     if (emit->key.vs.attrib_puint_to_snorm) {
        emit->common_immediate_pos[n++] =
  
     emit->common_immediate_pos[n++] =
        alloc_immediate_int4(emit, 0, 1, 0, -1);
  
     if (emit->key.vs.attrib_puint_to_snorm) {
        emit->common_immediate_pos[n++] =
-         alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
+         alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
     }
  
     if (emit->key.vs.attrib_puint_to_uscaled) {
     }
  
     if (emit->key.vs.attrib_puint_to_uscaled) {
@@ -6239,7 +6487,18 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
           alloc_immediate_int4(emit, 22, 30, 0, 0);
     }
  
           alloc_immediate_int4(emit, 22, 30, 0, 0);
     }
  
-   assert(n <= Elements(emit->common_immediate_pos));
+   unsigned i;
+
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      if (emit->key.tex[i].texel_bias) {
+         /* Replace 0.0f if more immediate float value is needed */
+         emit->common_immediate_pos[n++] =
+            alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
+         break;
+      }
+   }
+
+   assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
     emit->num_common_immediates = n;
  }
  
     emit->num_common_immediates = n;
  }
  
@@ -6296,6 +6555,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
        emit_frontface_instructions(emit);
        emit_fragcoord_instructions(emit);
     if (emit->unit == PIPE_SHADER_FRAGMENT) {
        emit_frontface_instructions(emit);
        emit_fragcoord_instructions(emit);
+      emit_sample_position_instructions(emit);
     }
     else if (emit->unit == PIPE_SHADER_VERTEX) {
        emit_vertex_attrib_instructions(emit);
     }
     else if (emit->unit == PIPE_SHADER_VERTEX) {
        emit_vertex_attrib_instructions(emit);
@@ -6305,6 +6565,47 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
  }
  
  
  }
  
  
+/**
+ * The device has no direct support for the pipe_blend_state::alpha_to_one
+ * option so we implement it here with shader code.
+ *
+ * Note that this is kind of pointless, actually.  Here we're clobbering
+ * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
+ * up with 100% coverage.  That's almost certainly not what the user wants.
+ * The work-around is to add extra shader code to compute coverage from alpha
+ * and write it to the coverage output register (if the user's shader doesn't
+ * do so already).  We'll probably do that in the future.
+ */
+static void
+emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
+                               unsigned fs_color_tmp_index)
+{
+   struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
+   unsigned i;
+
+   /* Note: it's not 100% clear from the spec if we're supposed to clobber
+    * the alpha for all render targets.  But that's what NVIDIA does and
+    * that's what Piglit tests.
+    */
+   for (i = 0; i < emit->fs.num_color_outputs; i++) {
+      struct tgsi_full_dst_register color_dst;
+
+      if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
+         /* write to the temp color register */
+         color_dst = make_dst_temp_reg(fs_color_tmp_index);
+      }
+      else {
+         /* write directly to the color[i] output */
+         color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
+      }
+
+      color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
+
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE);
+   }
+}
+
+
  /**
   * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
   * against the alpha reference value and discards the fragment if the
  /**
   * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
   * against the alpha reference value and discards the fragment if the
@@ -6341,7 +6642,8 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
     emit_src_register(emit, &tmp_src_x);
     end_emit_instruction(emit);
  
     emit_src_register(emit, &tmp_src_x);
     end_emit_instruction(emit);
  
-   /* If we don't need to broadcast the color below, emit final color here */
+   /* If we don't need to broadcast the color below, emit the final color here.
+    */
     if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
        /* MOV output.color, tempcolor */
        emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
     if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
        /* MOV output.color, tempcolor */
        emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
@@ -6355,7 +6657,8 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
  /**
   * Emit instructions for writing a single color output to multiple
   * color buffers.
  /**
   * Emit instructions for writing a single color output to multiple
   * color buffers.
- * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
+ * when key.fs.white_fragments is true).
   * property is set and the number of render targets is greater than one.
   * \param fs_color_tmp_index  index of the temp register that holds the
   *                            color to broadcast.
   * property is set and the number of render targets is greater than one.
   * \param fs_color_tmp_index  index of the temp register that holds the
   *                            color to broadcast.
@@ -6366,11 +6669,19 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
  {
     const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
     unsigned i;
  {
     const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
     unsigned i;
-   struct tgsi_full_src_register color_src =
-      make_src_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register color_src;
+
+   if (emit->key.fs.white_fragments) {
+      /* set all color outputs to white */
+      color_src = make_immediate_reg_float(emit, 1.0f);
+   }
+   else {
+      /* set all color outputs to TEMP[fs_color_tmp_index] */
+      assert(fs_color_tmp_index != INVALID_INDEX);
+      color_src = make_src_temp_reg(fs_color_tmp_index);
+   }
  
     assert(emit->unit == PIPE_SHADER_FRAGMENT);
  
     assert(emit->unit == PIPE_SHADER_FRAGMENT);
-   assert(n > 1);
  
     for (i = 0; i < n; i++) {
        unsigned output_reg = emit->fs.color_out_index[i];
  
     for (i = 0; i < n; i++) {
        unsigned output_reg = emit->fs.color_out_index[i];
@@ -6404,15 +6715,22 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit)
     else if (emit->unit == PIPE_SHADER_FRAGMENT) {
        const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
  
     else if (emit->unit == PIPE_SHADER_FRAGMENT) {
        const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
  
+      assert(!(emit->key.fs.white_fragments &&
+               emit->key.fs.write_color0_to_n_cbufs == 0));
+
        /* We no longer want emit_dst_register() to substitute the
         * temporary fragment color register for the real color output.
         */
        emit->fs.color_tmp_index = INVALID_INDEX;
  
        /* We no longer want emit_dst_register() to substitute the
         * temporary fragment color register for the real color output.
         */
        emit->fs.color_tmp_index = INVALID_INDEX;
  
+      if (emit->key.fs.alpha_to_one) {
+         emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
+      }
        if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
           emit_alpha_test_instructions(emit, fs_color_tmp_index);
        }
        if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
           emit_alpha_test_instructions(emit, fs_color_tmp_index);
        }
-      if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+      if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
+          emit->key.fs.white_fragments) {
           emit_broadcast_color_instructions(emit, fs_color_tmp_index);
        }
     }
           emit_broadcast_color_instructions(emit, fs_color_tmp_index);
        }
     }
@@ -6490,8 +6808,8 @@ emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
     VGPU10ProgramToken ptoken;
  
     /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
     VGPU10ProgramToken ptoken;
  
     /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
-   ptoken.majorVersion = 4;
-   ptoken.minorVersion = 0;
+   ptoken.majorVersion = emit->version / 10;
+   ptoken.minorVersion = emit->version % 10;
     ptoken.programType = translate_shader_type(emit->unit);
     if (!emit_dword(emit, ptoken.value))
        return FALSE;
     ptoken.programType = translate_shader_type(emit->unit);
     if (!emit_dword(emit, ptoken.value))
        return FALSE;
@@ -6551,12 +6869,13 @@ transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
        tgsi_dump(tokens,0);
     }
  
        tgsi_dump(tokens,0);
     }
  
-   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0);
+   new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
+                                                     TGSI_FILE_INPUT);
  
     emit->fs.pstipple_sampler_unit = unit;
  
     /* Setup texture state for stipple */
  
     emit->fs.pstipple_sampler_unit = unit;
  
     /* Setup texture state for stipple */
-   emit->key.tex[unit].texture_target = PIPE_TEXTURE_2D;
+   emit->sampler_target[unit] = TGSI_TEXTURE_2D;
     emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
     emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
     emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
     emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
     emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
     emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
@@ -6596,7 +6915,7 @@ struct svga_shader_variant *
  svga_tgsi_vgpu10_translate(struct svga_context *svga,
                             const struct svga_shader *shader,
                             const struct svga_compile_key *key,
  svga_tgsi_vgpu10_translate(struct svga_context *svga,
                             const struct svga_shader *shader,
                             const struct svga_compile_key *key,
-                           unsigned unit)
+                           enum pipe_shader_type unit)
  {
     struct svga_shader_variant *variant = NULL;
     struct svga_shader_emitter_v10 *emit;
  {
     struct svga_shader_variant *variant = NULL;
     struct svga_shader_emitter_v10 *emit;
@@ -6611,14 +6930,17 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
     /* These two flags cannot be used together */
     assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
  
     /* These two flags cannot be used together */
     assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
  
+   SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
     /*
      * Setup the code emitter
      */
     emit = alloc_emitter();
     if (!emit)
     /*
      * Setup the code emitter
      */
     emit = alloc_emitter();
     if (!emit)
-      return NULL;
+      goto done;
  
     emit->unit = unit;
  
     emit->unit = unit;
+   emit->version = svga_have_sm4_1(svga) ? 41 : 40;
+
     emit->key = *key;
  
     emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
     emit->key = *key;
  
     emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
@@ -6630,6 +6952,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
     emit->fs.color_tmp_index = INVALID_INDEX;
     emit->fs.face_input_index = INVALID_INDEX;
     emit->fs.fragcoord_input_index = INVALID_INDEX;
     emit->fs.color_tmp_index = INVALID_INDEX;
     emit->fs.face_input_index = INVALID_INDEX;
     emit->fs.fragcoord_input_index = INVALID_INDEX;
+   emit->fs.sample_id_sys_index = INVALID_INDEX;
+   emit->fs.sample_pos_sys_index = INVALID_INDEX;
  
     emit->gs.prim_id_index = INVALID_INDEX;
  
  
     emit->gs.prim_id_index = INVALID_INDEX;
  
@@ -6691,6 +7015,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
        svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
     }
  
        svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
     }
  
+   /* Since vertex shader does not need to go through the linker to
+    * establish the input map, we need to make sure the highest index
+    * of input registers is set properly here.
+    */
+   emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
+                                      emit->info.file_max[TGSI_FILE_INPUT]);
+
     determine_clipping_mode(emit);
  
     if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
     determine_clipping_mode(emit);
  
     if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
@@ -6762,11 +7093,22 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
  
     variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
  
  
     variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
  
+   /* If there was exactly one write to a fragment shader output register
+    * and it came from a constant buffer, we know all fragments will have
+    * the same color (except for blending).
+    */
+   variant->constant_color_output =
+      emit->constant_color_output && emit->num_output_writes == 1;
+
     /** keep track in the variant if flat interpolation is used
      *  for any of the varyings.
      */
     variant->uses_flat_interp = emit->uses_flat_interp;
  
     /** keep track in the variant if flat interpolation is used
      *  for any of the varyings.
      */
     variant->uses_flat_interp = emit->uses_flat_interp;
  
+   variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+
+   variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+
     if (tokens != shader->tokens) {
        tgsi_free_tokens(tokens);
     }
     if (tokens != shader->tokens) {
        tgsi_free_tokens(tokens);
     }
@@ -6774,5 +7116,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
  cleanup:
     free_emitter(emit);
  
  cleanup:
     free_emitter(emit);
  
+done:
+   SVGA_STATS_TIME_POP(svga_sws(svga));
     return variant;
  }
     return variant;
  }