zink: correct depth-stencil format
[mesa.git] / src / gallium / drivers / svga / svga_tgsi_vgpu10.c
index 423f215e53fa070d48f692a9591c4c754606fd82..099ede6017dc2b3efda51ded8238e37352368187 100644 (file)
@@ -185,6 +185,11 @@ struct svga_shader_emitter_v10
 
       /** Which texture units are doing shadow comparison in the FS code */
       unsigned shadow_compare_units;
+
+      unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
+
+      unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
+      unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
    } fs;
 
    /* For geometry shaders only */
@@ -410,6 +415,9 @@ check_register_index(struct svga_shader_emitter_v10 *emit,
          emit->register_overflow = TRUE;
       }
       break;
+   case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
+      /* nothing */
+      break;
    default:
       assert(0);
       ; /* nothing */
@@ -644,6 +652,19 @@ translate_opcode(enum tgsi_opcode opcode)
       return VGPU10_OPCODE_LT;
    case TGSI_OPCODE_ROUND:
       return VGPU10_OPCODE_ROUND_NE;
+   case TGSI_OPCODE_SAMPLE_POS:
+      /* Note: we never actually get this opcode because there's no GLSL
+       * function to query multisample resource sample positions.  There's
+       * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
+       * position of the current sample in the render target.
+       */
+      /* FALL-THROUGH */
+   case TGSI_OPCODE_SAMPLE_INFO:
+      /* NOTE: we never actually get this opcode because the GLSL compiler
+       * implements the gl_NumSamples variable with a simple constant in the
+       * constant buffer.
+       */
+      /* FALL-THROUGH */
    default:
       assert(!"Unexpected TGSI opcode in translate_opcode()");
       return VGPU10_OPCODE_NOP;
@@ -903,6 +924,15 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
             emit_dword(emit, operand0.value);
             return;
          }
+         else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
+            /* Fragment sample mask output */
+            operand0.value = 0;
+            operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+            operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+            operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
+            emit_dword(emit, operand0.value);
+            return;
+         }
          else if (index == emit->fs.color_out_index[0] &&
              emit->fs.color_tmp_index != INVALID_INDEX) {
             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
@@ -965,6 +995,9 @@ emit_dst_register(struct svga_shader_emitter_v10 *emit,
 
 /**
  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
+ * In quite a few cases, we do register substitution.  For example, if
+ * the TGSI register is the front/back-face register, we replace that with
+ * a temp register containing a value we computed earlier.
  */
 static void
 emit_src_register(struct svga_shader_emitter_v10 *emit,
@@ -1005,6 +1038,20 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
             index = emit->linkage.input_map[index];
          }
       }
+      else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         if (index == emit->fs.sample_pos_sys_index) {
+            assert(emit->version >= 41);
+            /* Current sample position is in a temp register */
+            file = TGSI_FILE_TEMPORARY;
+            index = emit->fs.sample_pos_tmp_index;
+         }
+         else {
+            /* Map the TGSI system value to a VGPU10 input register */
+            assert(index < ARRAY_SIZE(emit->system_value_indexes));
+            file = TGSI_FILE_INPUT;
+            index = emit->system_value_indexes[index];
+         }
+      }
    }
    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
       if (file == TGSI_FILE_INPUT) {
@@ -1027,7 +1074,9 @@ emit_src_register(struct svga_shader_emitter_v10 *emit,
          }
       }
       else if (file == TGSI_FILE_SYSTEM_VALUE) {
+         /* Map the TGSI system value to a VGPU10 input register */
          assert(index < ARRAY_SIZE(emit->system_value_indexes));
+         file = TGSI_FILE_INPUT;
          index = emit->system_value_indexes[index];
       }
    }
@@ -1195,6 +1244,32 @@ emit_face_register(struct svga_shader_emitter_v10 *emit)
 }
 
 
+/**
+ * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
+ * instruction.
+ */
+static void
+emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OperandToken0 operand0;
+
+   /* init */
+   operand0.value = 0;
+
+   /* No register index for rasterizer index (there's only one) */
+   operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
+   operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
+   operand0.swizzleX = VGPU10_COMPONENT_X;
+   operand0.swizzleY = VGPU10_COMPONENT_Y;
+   operand0.swizzleZ = VGPU10_COMPONENT_Z;
+   operand0.swizzleW = VGPU10_COMPONENT_W;
+
+   emit_dword(emit, operand0.value);
+}
+
+
 /**
  * Emit the token for a VGPU10 opcode.
  * \param saturate   clamp result to [0,1]?
@@ -1384,6 +1459,29 @@ make_src_reg(enum tgsi_file_type file, unsigned index)
 }
 
 
+/**
+ * Create a tgsi_full_src_register with a swizzle such that all four
+ * vector components have the same scalar value.
+ */
+static struct tgsi_full_src_register
+make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
+{
+   struct tgsi_full_src_register reg;
+
+   assert(component >= TGSI_SWIZZLE_X);
+   assert(component <= TGSI_SWIZZLE_W);
+
+   memset(&reg, 0, sizeof(reg));
+   reg.Register.File = file;
+   reg.Register.Index = index;
+   reg.Register.SwizzleX =
+   reg.Register.SwizzleY =
+   reg.Register.SwizzleZ =
+   reg.Register.SwizzleW = component;
+   return reg;
+}
+
+
 /**
  * Create a tgsi_full_src_register for a temporary.
  */
@@ -1792,7 +1890,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
 static unsigned
 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
 {
-   const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index;
+   const unsigned n = emit->linkage.input_map_max + 1 + index;
    assert(index < ARRAY_SIZE(emit->system_value_indexes));
    emit->system_value_indexes[index] = n;
    return n;
@@ -1860,13 +1958,25 @@ translate_interpolation(const struct svga_shader_emitter_v10 *emit,
    case TGSI_INTERPOLATE_CONSTANT:
       return VGPU10_INTERPOLATION_CONSTANT;
    case TGSI_INTERPOLATE_LINEAR:
-      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
-             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID :
-             VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
+      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
+                 emit->version >= 41) {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
+      } else {
+         return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
+      }
+      break;
    case TGSI_INTERPOLATE_PERSPECTIVE:
-      return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ?
-             VGPU10_INTERPOLATION_LINEAR_CENTROID :
-             VGPU10_INTERPOLATION_LINEAR;
+      if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
+         return VGPU10_INTERPOLATION_LINEAR_CENTROID;
+      } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
+                 emit->version >= 41) {
+         return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
+      } else {
+         return VGPU10_INTERPOLATION_LINEAR;
+      }
+      break;
    default:
       assert(!"Unexpected interpolation mode");
       return VGPU10_INTERPOLATION_CONSTANT;
@@ -2012,7 +2122,9 @@ emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
                       unsigned index, unsigned size)
 {
    assert(opcode0.opcodeType);
-   assert(operand0.mask);
+   assert(operand0.mask ||
+          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
+          (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK));
 
    begin_emit_instruction(emit);
    emit_dword(emit, opcode0.value);
@@ -2074,6 +2186,7 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
+          opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID);
@@ -2085,13 +2198,17 @@ emit_input_declaration(struct svga_shader_emitter_v10 *emit,
           name == VGPU10_NAME_INSTANCE_ID ||
           name == VGPU10_NAME_VERTEX_ID ||
           name == VGPU10_NAME_PRIMITIVE_ID ||
-          name == VGPU10_NAME_IS_FRONT_FACE);
+          name == VGPU10_NAME_IS_FRONT_FACE ||
+          name == VGPU10_NAME_SAMPLE_INDEX);
+
    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
           interpMode == VGPU10_INTERPOLATION_LINEAR ||
           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
-          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID);
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
+          interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
 
    check_register_index(emit, opcodeType, index);
 
@@ -2178,7 +2295,32 @@ emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
-   operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
+   operand0.mask = 0;
+
+   emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
+}
+
+
+/**
+ * Emit the declaration for the fragment sample mask/coverage output.
+ */
+static void
+emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
+{
+   VGPU10OpcodeToken0 opcode0;
+   VGPU10OperandToken0 operand0;
+   VGPU10NameToken name_token;
+
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+   assert(emit->version >= 41);
+
+   opcode0.value = operand0.value = name_token.value = 0;
+
+   opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
+   operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
+   operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
+   operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
+   operand0.mask = 0;
 
    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
 }
@@ -2216,8 +2358,33 @@ emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
                              VGPU10_INTERPOLATION_UNDEFINED);
       break;
+   case TGSI_SEMANTIC_SAMPLEID:
+      assert(emit->unit == PIPE_SHADER_FRAGMENT);
+      emit->fs.sample_id_sys_index = index;
+      index = alloc_system_value_index(emit, index);
+      emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
+                             VGPU10_OPERAND_TYPE_INPUT,
+                             VGPU10_OPERAND_INDEX_1D,
+                             index, 1,
+                             VGPU10_NAME_SAMPLE_INDEX,
+                             VGPU10_OPERAND_4_COMPONENT,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
+                             VGPU10_OPERAND_4_COMPONENT_MASK_X,
+                             VGPU10_INTERPOLATION_CONSTANT);
+      break;
+   case TGSI_SEMANTIC_SAMPLEPOS:
+      /* This system value contains the position of the current sample
+       * when using per-sample shading.  We implement this by calling
+       * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
+       * index as the argument.  See emit_sample_position_instructions().
+       */
+      assert(emit->version >= 41);
+      emit->fs.sample_pos_sys_index = index;
+      index = alloc_system_value_index(emit, index);
+      break;
    default:
-      ; /* XXX */
+      debug_printf("unexpected sytem value semantic index %u\n",
+         semantic_name);
    }
 }
 
@@ -2390,6 +2557,12 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit)
             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
             name = VGPU10_NAME_PRIMITIVE_ID;
          }
+         else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
+            /* sample index / ID */
+            type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
+            interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
+            name = VGPU10_NAME_SAMPLE_INDEX;
+         }
          else {
             /* general fragment input */
             type = VGPU10_OPCODE_DCL_INPUT_PS;
@@ -2553,6 +2726,10 @@ emit_output_declarations(struct svga_shader_emitter_v10 *emit)
             /* Fragment depth output */
             emit_fragdepth_output_declaration(emit);
          }
+         else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
+            /* Fragment depth output */
+            emit_samplemask_output_declaration(emit);
+         }
          else {
             assert(!"Bad output semantic name");
          }
@@ -2747,6 +2924,11 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
          emit->fs.fragcoord_tmp_index = total_temps;
          total_temps += 1;
       }
+
+      if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
+         /* Allocate a temp for the sample position */
+         emit->fs.sample_pos_tmp_index = total_temps++;
+      }
    }
 
    for (i = 0; i < emit->num_address_regs; i++) {
@@ -2978,12 +3160,20 @@ emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
 
 
 /**
- * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x.
+ * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
  */
 static unsigned
 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
+                                   unsigned num_samples,
                                    boolean is_array)
 {
+   if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
+      target = TGSI_TEXTURE_2D;
+   }
+   else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
+      target = TGSI_TEXTURE_2D_ARRAY;
+   }
+
    switch (target) {
    case TGSI_TEXTURE_BUFFER:
       return VGPU10_RESOURCE_DIMENSION_BUFFER;
@@ -3017,7 +3207,8 @@ tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
    case TGSI_TEXTURE_CUBE_ARRAY:
    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
-      return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY;
+      return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
+         : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
    default:
       assert(!"Unexpected resource type");
       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
@@ -3069,6 +3260,7 @@ emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
       opcode0.resourceDimension =
          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
+                                            emit->key.tex[i].num_samples,
                                             emit->key.tex[i].is_array);
       opcode0.sampleCount = emit->key.tex[i].num_samples;
       operand0.value = 0;
@@ -4073,6 +4265,30 @@ emit_lit(struct svga_shader_emitter_v10 *emit,
 }
 
 
+/**
+ * Emit Level Of Detail Query (LODQ) instruction.
+ */
+static boolean
+emit_lodq(struct svga_shader_emitter_v10 *emit,
+          const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[1].Register.Index;
+
+   assert(emit->version >= 41);
+
+   /* LOD dst, coord, resource, sampler */
+   begin_emit_instruction(emit);
+   emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
+   emit_dst_register(emit, &inst->Dst[0]);
+   emit_src_register(emit, &inst->Src[0]); /* coord */
+   emit_resource_register(emit, unit);
+   emit_sampler_register(emit, unit);
+   end_emit_instruction(emit);
+
+   return TRUE;
+}
+
+
 /**
  * Emit code for TGSI_OPCODE_LOG instruction.
  */
@@ -4751,7 +4967,7 @@ setup_texcoord(struct svga_shader_emitter_v10 *emit,
                unsigned unit,
                const struct tgsi_full_src_register *coord)
 {
-   if (emit->key.tex[unit].unnormalized) {
+   if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) {
       unsigned scale_index = emit->texcoord_scale_index[unit];
       unsigned tmp = get_temp_index(emit);
       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
@@ -5114,6 +5330,60 @@ emit_tex(struct svga_shader_emitter_v10 *emit,
    return TRUE;
 }
 
+/**
+ * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
+ */
+static boolean
+emit_tg4(struct svga_shader_emitter_v10 *emit,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->Src[2].Register.Index;
+   struct tgsi_full_src_register src;
+   int offsets[3];
+
+   /* check that the sampler returns a float */
+   if (!is_valid_tex_instruction(emit, inst))
+      return TRUE;
+
+   /* Only a single channel is supported in SM4_1 and we report
+    * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
+    * Only the 0th component will be gathered.
+    */
+   switch (emit->key.tex[unit].swizzle_r) {
+   case PIPE_SWIZZLE_X:
+      get_texel_offsets(emit, inst, offsets);
+      src = setup_texcoord(emit, unit, &inst->Src[0]);
+
+      /* Gather dst, coord, resource, sampler */
+      begin_emit_instruction(emit);
+      emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
+                         inst->Instruction.Saturate, offsets);
+      emit_dst_register(emit, &inst->Dst[0]);
+      emit_src_register(emit, &src);
+      emit_resource_register(emit, unit);
+      emit_sampler_register(emit, unit);
+      end_emit_instruction(emit);
+      break;
+   case PIPE_SWIZZLE_W:
+   case PIPE_SWIZZLE_1:
+      src = make_immediate_reg_float(emit, 1.0);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           &inst->Dst[0], &src, FALSE);
+      break;
+   case PIPE_SWIZZLE_Y:
+   case PIPE_SWIZZLE_Z:
+   case PIPE_SWIZZLE_0:
+   default:
+      src = make_immediate_reg_float(emit, 0.0);
+      emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
+                           &inst->Dst[0], &src, FALSE);
+      break;
+   }
+
+   return TRUE;
+}
+
+
 
 /**
  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
@@ -5268,7 +5538,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
          const struct tgsi_full_instruction *inst)
 {
    const uint unit = inst->Src[1].Register.Index;
-   const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture);
+   const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
+      && emit->key.tex[unit].num_samples > 1;
    int offsets[3];
    struct tex_swizzle_info swz_info;
 
@@ -5277,6 +5548,8 @@ emit_txf(struct svga_shader_emitter_v10 *emit,
    get_texel_offsets(emit, inst, offsets);
 
    if (msaa) {
+      assert(emit->key.tex[unit].num_samples > 1);
+
       /* Fetch one sample from an MSAA texture */
       struct tgsi_full_src_register sampleIndex =
          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
@@ -5636,6 +5909,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
       return emit_lg2(emit, inst);
    case TGSI_OPCODE_LIT:
       return emit_lit(emit, inst);
+   case TGSI_OPCODE_LODQ:
+      return emit_lodq(emit, inst);
    case TGSI_OPCODE_LOG:
       return emit_log(emit, inst);
    case TGSI_OPCODE_LRP:
@@ -5668,6 +5943,8 @@ emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
       return emit_issg(emit, inst);
    case TGSI_OPCODE_TEX:
       return emit_tex(emit, inst);
+   case TGSI_OPCODE_TG4:
+      return emit_tg4(emit, inst);
    case TGSI_OPCODE_TEX2:
       return emit_tex2(emit, inst);
    case TGSI_OPCODE_TXP:
@@ -6010,6 +6287,55 @@ emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
 }
 
 
+/**
+ * Emit the extra code to get the current sample position value and
+ * put it into a temp register.
+ */
+static void
+emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
+{
+   assert(emit->unit == PIPE_SHADER_FRAGMENT);
+
+   if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
+      assert(emit->version >= 41);
+
+      struct tgsi_full_dst_register tmp_dst =
+         make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
+      struct tgsi_full_src_register half =
+         make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
+
+      struct tgsi_full_src_register tmp_src =
+         make_src_temp_reg(emit->fs.sample_pos_tmp_index);
+      struct tgsi_full_src_register sample_index_reg =
+         make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
+                             emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
+
+      /* The first src register is a shader resource (if we want a
+       * multisampled resource sample position) or the rasterizer register
+       * (if we want the current sample position in the color buffer).  We
+       * want the later.
+       */
+
+      /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_rasterizer_register(emit);
+      emit_src_register(emit, &sample_index_reg);
+      end_emit_instruction(emit);
+
+      /* Convert from D3D coords to GL coords by adding 0.5 bias */
+      /* ADD dst, dst, half */
+      begin_emit_instruction(emit);
+      emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
+      emit_dst_register(emit, &tmp_dst);
+      emit_src_register(emit, &tmp_src);
+      emit_src_register(emit, &half);
+      end_emit_instruction(emit);
+   }
+}
+
+
 /**
  * Emit extra instructions to adjust VS inputs/attributes.  This can
  * mean casting a vertex attribute from int to float or setting the
@@ -6229,6 +6555,7 @@ emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
    if (emit->unit == PIPE_SHADER_FRAGMENT) {
       emit_frontface_instructions(emit);
       emit_fragcoord_instructions(emit);
+      emit_sample_position_instructions(emit);
    }
    else if (emit->unit == PIPE_SHADER_VERTEX) {
       emit_vertex_attrib_instructions(emit);
@@ -6625,6 +6952,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
    emit->fs.color_tmp_index = INVALID_INDEX;
    emit->fs.face_input_index = INVALID_INDEX;
    emit->fs.fragcoord_input_index = INVALID_INDEX;
+   emit->fs.sample_id_sys_index = INVALID_INDEX;
+   emit->fs.sample_pos_sys_index = INVALID_INDEX;
 
    emit->gs.prim_id_index = INVALID_INDEX;
 
@@ -6686,6 +7015,13 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
       svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage);
    }
 
+   /* Since vertex shader does not need to go through the linker to
+    * establish the input map, we need to make sure the highest index
+    * of input registers is set properly here.
+    */
+   emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
+                                      emit->info.file_max[TGSI_FILE_INPUT]);
+
    determine_clipping_mode(emit);
 
    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) {
@@ -6730,7 +7066,7 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
    /*
     * Create, initialize the 'variant' object.
     */
-   variant = svga_new_shader_variant(svga);
+   variant = svga_new_shader_variant(svga, unit);
    if (!variant)
       goto cleanup;
 
@@ -6771,6 +7107,8 @@ svga_tgsi_vgpu10_translate(struct svga_context *svga,
 
    variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
 
+   variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
+
    if (tokens != shader->tokens) {
       tgsi_free_tokens(tokens);
    }