gallium/gallivm/tgsi: enable tessellation shaders
authorJan Zielinski <jan.zielinski@intel.com>
Thu, 12 Dec 2019 15:03:46 +0000 (16:03 +0100)
committerJan Zielinski <jan.zielinski@intel.com>
Mon, 30 Dec 2019 10:32:47 +0000 (11:32 +0100)
Tessellation Control and Evaluation shaders are implementing
tessellation and require special handling of their inputs
and outputs.

TCS can write out not only per-vertex, but also per-patch
(per-primitive) attributes and tessellation factor values
that control the tessellator.

TES can read TCS outputs, plus must be feeded with new
system values (tessellation coordinates) that are
outputs of the tessellator fixed function.

TCS can also contain calls to barrier() function (similar
to compute shaders).

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Alok Hota <alok.hota@intel.com>
src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c

index 2c3be8fb1275ae91f3f4c84e20bd57f7248af904..3b111138b79a5185b171ce6fa9a17aeb24ea05b6 100644 (file)
@@ -549,6 +549,10 @@ lp_build_tgsi_llvm(
       }
    }
 
+   if (bld_base->emit_prologue_post_decl) {
+      bld_base->emit_prologue_post_decl(bld_base);
+   }
+
    while (bld_base->pc != -1) {
       const struct tgsi_full_instruction *instr =
          bld_base->instructions + bld_base->pc;
index cb8c91eadd2b925ba329c81ca84f1ba6d6fcf005..e8cbceb27fa3dae564e7975097ef0a844bf99817 100644 (file)
@@ -61,6 +61,7 @@ struct tgsi_full_declaration;
 struct tgsi_full_immediate;
 struct tgsi_full_instruction;
 struct tgsi_full_src_register;
+struct tgsi_full_dst_register;
 struct tgsi_opcode_info;
 struct tgsi_token;
 struct tgsi_shader_info;
@@ -178,6 +179,10 @@ struct lp_bld_tgsi_system_values {
    LLVMValueRef front_facing;
    LLVMValueRef work_dim;
    LLVMValueRef block_size;
+   LLVMValueRef tess_coord;
+   LLVMValueRef tess_outer;
+   LLVMValueRef tess_inner;
+   LLVMValueRef vertices_in;
 };
 
 
@@ -253,6 +258,8 @@ struct lp_build_tgsi_params {
    const struct lp_build_sampler_soa *sampler;
    const struct tgsi_shader_info *info;
    const struct lp_build_gs_iface *gs_iface;
+   const struct lp_build_tcs_iface *tcs_iface;
+   const struct lp_build_tes_iface *tes_iface;
    LLVMValueRef ssbo_ptr;
    LLVMValueRef ssbo_sizes_ptr;
    const struct lp_build_image_soa *image;
@@ -302,6 +309,14 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *,
                                         enum tgsi_opcode_type,
                                         unsigned);
 
+typedef void (*lp_build_emit_store_reg_fn)(struct lp_build_tgsi_context *,
+                              enum tgsi_opcode_type,
+                              const struct tgsi_full_dst_register *,
+                              unsigned,
+                              unsigned,
+                              LLVMValueRef,
+                              LLVMValueRef);
+
 struct lp_build_tgsi_context
 {
    struct lp_build_context base;
@@ -331,6 +346,7 @@ struct lp_build_tgsi_context
    const struct tgsi_shader_info *info;
 
    lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
+   lp_build_emit_store_reg_fn emit_store_reg_funcs[TGSI_FILE_COUNT];
 
    LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *,
                          LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
@@ -371,6 +387,12 @@ struct lp_build_tgsi_context
      */
    void (*emit_prologue)(struct lp_build_tgsi_context*);
 
+   /** This function allows the user to insert some instructions after
+     * declarations section, but before any other code.
+     * It is optional and does not need to be implemented.
+     */
+   void (*emit_prologue_post_decl)(struct lp_build_tgsi_context*);
+
    /** This function allows the user to insert some instructions at the end of
      * the program.  This callback is intended to be used for emitting
      * instructions to handle the export for the output registers, but it can
@@ -405,6 +427,57 @@ struct lp_build_gs_iface
                        LLVMValueRef emitted_prims_vec);
 };
 
+struct lp_build_tcs_iface
+{
+   void (*emit_prologue)(struct lp_build_context * bld);
+   void (*emit_epilogue)(struct lp_build_context * bld);
+   void (*emit_barrier)(struct lp_build_context *bld_base);
+
+   void (*emit_store_output)(const struct lp_build_tcs_iface *tcs_iface,
+                             struct lp_build_context * bld,
+                             unsigned name,
+                             boolean is_vindex_indirect,
+                             LLVMValueRef vertex_index,
+                             boolean is_aindex_indirect,
+                             LLVMValueRef attrib_index,
+                             LLVMValueRef swizzle_index,
+                             LLVMValueRef value);
+
+   LLVMValueRef (*emit_fetch_input)(const struct lp_build_tcs_iface *tcs_iface,
+                                    struct lp_build_context * bld,
+                                    boolean is_vindex_indirect,
+                                    LLVMValueRef vertex_index,
+                                    boolean is_aindex_indirect,
+                                    LLVMValueRef attrib_index,
+                                    LLVMValueRef swizzle_index);
+
+   LLVMValueRef (*emit_fetch_output)(const struct lp_build_tcs_iface *tcs_iface,
+                                    struct lp_build_context * bld,
+                                    boolean is_vindex_indirect,
+                                    LLVMValueRef vertex_index,
+                                    boolean is_aindex_indirect,
+                                    LLVMValueRef attrib_index,
+                                    LLVMValueRef swizzle_index,
+                                    uint32_t name);
+};
+
+struct lp_build_tes_iface
+{
+   LLVMValueRef (*fetch_vertex_input)(const struct lp_build_tes_iface *tes_iface,
+                                      struct lp_build_context * bld,
+                                      boolean is_vindex_indirect,
+                                      LLVMValueRef vertex_index,
+                                      boolean is_aindex_indirect,
+                                      LLVMValueRef attrib_index,
+                                      LLVMValueRef swizzle_index);
+
+   LLVMValueRef (*fetch_patch_input)(const struct lp_build_tes_iface *tes_iface,
+                                     struct lp_build_context * bld,
+                                     boolean is_aindex_indirect,
+                                     LLVMValueRef attrib_index,
+                                     LLVMValueRef swizzle_index);
+};
+
 struct lp_build_tgsi_soa_context
 {
    struct lp_build_tgsi_context bld_base;
@@ -413,6 +486,9 @@ struct lp_build_tgsi_soa_context
    struct lp_build_context elem_bld;
 
    const struct lp_build_gs_iface *gs_iface;
+   const struct lp_build_tcs_iface *tcs_iface;
+   const struct lp_build_tes_iface *tes_iface;
+
    LLVMValueRef emitted_prims_vec_ptr;
    LLVMValueRef total_emitted_vertices_vec_ptr;
    LLVMValueRef emitted_vertices_vec_ptr;
index 5d18cfba4acfa46ec20a4d2f6b7a23330fed3ec4..b1a2e96e804a13807dac1757fb578f6847094b7f 100644 (file)
@@ -1,9 +1,9 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 VMware, Inc.
  * Copyright 2007-2008 VMware, Inc.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -23,7 +23,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 /**
@@ -1199,6 +1199,199 @@ emit_fetch_gs_input(
    return res;
 }
 
+static LLVMValueRef
+emit_fetch_tcs_input(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   enum tgsi_opcode_type stype,
+   unsigned swizzle_in)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   const struct tgsi_shader_info *info = bld->bld_base.info;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef attrib_index = NULL;
+   LLVMValueRef vertex_index = NULL;
+   unsigned swizzle = swizzle_in & 0xffff;
+   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
+   LLVMValueRef res;
+
+   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
+      /* This is really a system value not a regular input */
+      assert(!reg->Register.Indirect);
+      assert(!reg->Dimension.Indirect);
+      res = bld->system_values.prim_id;
+      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
+         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
+      }
+      return res;
+   }
+
+   if (reg->Register.Indirect) {
+      int index_limit = info->file_max[reg->Register.File];
+      attrib_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Register.Index,
+                                        &reg->Indirect,
+                                        index_limit);
+   } else {
+      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
+   }
+
+   if (reg->Dimension.Indirect) {
+      vertex_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Dimension.Index,
+                                        &reg->DimIndirect,
+                                        PIPE_MAX_SHADER_INPUTS);
+   } else {
+      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
+   }
+
+   // TCS can read from its own outputs
+   if (reg->Register.File == TGSI_FILE_OUTPUT) {
+      res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
+                                              reg->Dimension.Indirect,
+                                              vertex_index,
+                                              reg->Register.Indirect,
+                                              attrib_index,
+                                              swizzle_index,
+                                              bld_base->info->output_semantic_name[reg->Register.Index]);
+   } else {
+      res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
+                                             reg->Dimension.Indirect,
+                                             vertex_index,
+                                             reg->Register.Indirect,
+                                             attrib_index,
+                                             swizzle_index);
+   }
+
+
+   assert(res);
+   if (tgsi_type_is_64bit(stype)) {
+      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
+      LLVMValueRef res2;
+      if (reg->Register.File == TGSI_FILE_OUTPUT) {
+         res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
+                                                  reg->Dimension.Indirect,
+                                                  vertex_index,
+                                                  reg->Register.Indirect,
+                                                  attrib_index,
+                                                  swizzle_index,
+                                                  bld_base->info->output_semantic_name[reg->Register.Index]);
+      } else {
+         res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
+                                                 reg->Dimension.Indirect,
+                                                 vertex_index,
+                                                 reg->Register.Indirect,
+                                                 attrib_index,
+                                                 swizzle_index);
+      }
+      assert(res2);
+      res = emit_fetch_64bit(bld_base, stype, res, res2);
+   } else if (stype == TGSI_TYPE_UNSIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_SIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   }
+
+   return res;
+}
+
+static LLVMValueRef
+emit_fetch_tes_input(
+   struct lp_build_tgsi_context * bld_base,
+   const struct tgsi_full_src_register * reg,
+   enum tgsi_opcode_type stype,
+   unsigned swizzle_in)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   const struct tgsi_shader_info *info = bld->bld_base.info;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef attrib_index = NULL;
+   LLVMValueRef vertex_index = NULL;
+   unsigned swizzle = swizzle_in & 0xffff;
+   LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
+   LLVMValueRef res;
+
+   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
+      /* This is really a system value not a regular input */
+      assert(!reg->Register.Indirect);
+      assert(!reg->Dimension.Indirect);
+      res = bld->system_values.prim_id;
+      if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
+         res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
+      }
+      return res;
+   }
+
+   if (reg->Register.Indirect) {
+      int index_limit = info->file_max[reg->Register.File];
+      attrib_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Register.Index,
+                                        &reg->Indirect,
+                                        index_limit);
+   } else {
+      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
+   }
+
+   if (reg->Dimension.Indirect) {
+      vertex_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Dimension.Index,
+                                        &reg->DimIndirect,
+                                        PIPE_MAX_SHADER_INPUTS);
+   } else {
+      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
+   }
+
+   if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
+      res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
+                                     reg->Register.Indirect,
+                                     attrib_index,
+                                     swizzle_index);
+   } else {
+      res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
+                                       reg->Dimension.Indirect,
+                                       vertex_index,
+                                       reg->Register.Indirect,
+                                       attrib_index,
+                                       swizzle_index);
+   }
+
+   assert(res);
+   if (tgsi_type_is_64bit(stype)) {
+      LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
+      LLVMValueRef res2;
+      if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
+         res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
+                                    reg->Register.Indirect,
+                                    attrib_index,
+                                    swizzle_index);
+      }
+      else {
+         res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
+                                             reg->Dimension.Indirect,
+                                             vertex_index,
+                                             reg->Register.Indirect,
+                                             attrib_index,
+                                             swizzle_index);
+      }
+      assert(res2);
+      res = emit_fetch_64bit(bld_base, stype, res, res2);
+   } else if (stype == TGSI_TYPE_UNSIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_SIGNED) {
+      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   }
+
+   return res;
+}
+
+
+
 static LLVMValueRef
 emit_fetch_temporary(
    struct lp_build_tgsi_context * bld_base,
@@ -1317,7 +1510,10 @@ emit_fetch_system_value(
       break;
 
    case TGSI_SEMANTIC_INVOCATIONID:
-      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+      if (info->processor == PIPE_SHADER_TESS_CTRL)
+         res = bld->system_values.invocation_id;
+      else
+         res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
       atype = TGSI_TYPE_UNSIGNED;
       break;
 
@@ -1341,15 +1537,43 @@ emit_fetch_system_value(
       atype = TGSI_TYPE_UNSIGNED;
       break;
 
+   case TGSI_SEMANTIC_TESSCOORD:
+      {
+         LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
+         LLVMValueRef array_indexed = LLVMBuildGEP(gallivm->builder, bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
+         res = LLVMBuildLoad(builder, array_indexed, "tess_coord");
+      }
+      atype = TGSI_TYPE_FLOAT;
+      break;
+
    case TGSI_SEMANTIC_FACE:
       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
       break;
 
-   case TGSI_SEMANTIC_DRAWID:
+  case TGSI_SEMANTIC_DRAWID:
       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
       atype = TGSI_TYPE_UNSIGNED;
       break;
 
+   case TGSI_SEMANTIC_TESSOUTER:
+      res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
+                                       bld->system_values.tess_outer,
+                                       lp_build_const_int32(gallivm, swizzle_in));
+      atype = TGSI_TYPE_FLOAT;
+      break;
+
+   case TGSI_SEMANTIC_TESSINNER:
+      res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
+                                       bld->system_values.tess_inner,
+                                       lp_build_const_int32(gallivm, swizzle_in));
+      atype = TGSI_TYPE_FLOAT;
+      break;
+
+   case TGSI_SEMANTIC_VERTICESIN:
+      res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
+      atype = TGSI_TYPE_UNSIGNED;
+      break;
+
    default:
       assert(!"unexpected semantic in emit_fetch_system_value");
       res = bld_base->base.zero;
@@ -1436,6 +1660,186 @@ emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
    lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
 }
 
+static void
+emit_store_output(struct lp_build_tgsi_context *bld_base,
+                  enum tgsi_opcode_type dtype,
+                  const struct tgsi_full_dst_register *reg,
+                  unsigned index,
+                  unsigned chan_index,
+                  LLVMValueRef indirect_index,
+                  LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *float_bld = &bld_base->base;
+
+   /* Outputs are always stored as floats */
+   value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+
+   if (reg->Register.Indirect) {
+      LLVMValueRef index_vec;  /* indexes into the output registers */
+      LLVMValueRef outputs_array;
+      LLVMTypeRef fptr_type;
+
+      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+                                          indirect_index,
+                                          chan_index,
+                                          TRUE);
+
+      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+      outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
+
+      /* Scatter store values into output registers */
+      emit_mask_scatter(bld, outputs_array, index_vec, value,
+                        &bld->exec_mask);
+   }
+   else {
+      assert(LLVMTypeOf(value) == float_bld->vec_type);
+      LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
+                                                chan_index);
+
+      if (tgsi_type_is_64bit(dtype)) {
+         LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
+                                                   chan_index + 1);
+         emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
+                                 value);
+      } else
+         lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
+   }
+}
+
+static void
+emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
+                      enum tgsi_opcode_type dtype,
+                      const struct tgsi_full_dst_register *reg,
+                      unsigned index,
+                      unsigned chan_index,
+                      LLVMValueRef indirect_index,
+                      LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   const struct tgsi_shader_info *info = bld->bld_base.info;
+   LLVMValueRef attrib_index = NULL;
+   LLVMValueRef vertex_index = NULL;
+   LLVMValueRef channel_index = NULL;
+
+   if (reg->Register.Indirect) {
+      /*
+       * XXX: this is possibly not quite the right value, since file_max may be
+       * larger than the max attrib index, due to it being the max of declared
+       * inputs AND the max vertices per prim (which is 6 for tri adj).
+       * It should however be safe to use (since we always allocate
+       * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
+       */
+      int index_limit = info->file_max[reg->Register.File];
+      attrib_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Register.Index,
+                                        &reg->Indirect,
+                                        index_limit);
+   } else {
+      attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
+   }
+
+   if (reg->Dimension.Indirect) {
+      vertex_index = get_indirect_index(bld,
+                                        reg->Register.File,
+                                        reg->Dimension.Index,
+                                        &reg->DimIndirect,
+                                        PIPE_MAX_SHADER_OUTPUTS);
+   } else {
+      vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
+   }
+
+   channel_index = lp_build_const_int32(gallivm, chan_index);
+
+   assert(bld->tcs_iface->emit_store_output);
+   bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
+                                          bld_base->info->output_semantic_name[reg->Register.Index],
+                                          reg->Dimension.Indirect,
+                                          vertex_index,
+                                          reg->Register.Indirect,
+                                          attrib_index,
+                                          channel_index,
+                                          value);
+}
+
+static void
+emit_store_temp(struct lp_build_tgsi_context *bld_base,
+                  enum tgsi_opcode_type dtype,
+                  const struct tgsi_full_dst_register *reg,
+                  unsigned index,
+                  unsigned chan_index,
+                  LLVMValueRef indirect_index,
+                  LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *float_bld = &bld_base->base;
+
+   /* Temporaries are always stored as floats */
+   if (!tgsi_type_is_64bit(dtype))
+      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+   else
+      value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
+
+   if (reg->Register.Indirect) {
+      LLVMValueRef index_vec;  /* indexes into the temp registers */
+      LLVMValueRef temps_array;
+      LLVMTypeRef fptr_type;
+
+      index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+                                          indirect_index,
+                                          chan_index,
+                                          TRUE);
+
+      fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+      temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
+
+      /* Scatter store values into temp registers */
+      emit_mask_scatter(bld, temps_array, index_vec, value,
+                        &bld->exec_mask);
+   }
+   else {
+      LLVMValueRef temp_ptr;
+      temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
+
+      if (tgsi_type_is_64bit(dtype)) {
+         LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
+                                                      reg->Register.Index,
+                                                      chan_index + 1);
+         emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
+                                 value);
+      }
+      else
+         lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
+   }
+}
+
+static void
+emit_store_address(struct lp_build_tgsi_context *bld_base,
+                   enum tgsi_opcode_type dtype,
+                   const struct tgsi_full_dst_register *reg,
+                   unsigned index,
+                   unsigned chan_index,
+                   LLVMValueRef indirect_index,
+                   LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *int_bld = &bld_base->int_bld;
+
+   assert(dtype == TGSI_TYPE_SIGNED);
+   assert(LLVMTypeOf(value) == int_bld->vec_type);
+   value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
+   lp_exec_mask_store(&bld->exec_mask, int_bld, value,
+                        bld->addr[reg->Register.Index][chan_index]);
+}
+
 /**
  * Register store.
  */
@@ -1452,7 +1856,6 @@ emit_store_chan(
    LLVMBuilderRef builder = gallivm->builder;
    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    struct lp_build_context *float_bld = &bld_base->base;
-   struct lp_build_context *int_bld = &bld_base->int_bld;
    LLVMValueRef indirect_index = NULL;
    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
 
@@ -1488,93 +1891,14 @@ emit_store_chan(
       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
    }
 
-   switch( reg->Register.File ) {
-   case TGSI_FILE_OUTPUT:
-      /* Outputs are always stored as floats */
-      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-
-      if (reg->Register.Indirect) {
-         LLVMValueRef index_vec;  /* indexes into the output registers */
-         LLVMValueRef outputs_array;
-         LLVMTypeRef fptr_type;
-
-         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
-                                           indirect_index,
-                                           chan_index,
-                                           TRUE);
-
-         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
-         outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
-
-         /* Scatter store values into output registers */
-         emit_mask_scatter(bld, outputs_array, index_vec, value,
-                           &bld->exec_mask);
-      }
-      else {
-         LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
-                                                  chan_index);
-
-         if (tgsi_type_is_64bit(dtype)) {
-            LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
-                                                      chan_index + 1);
-            emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
-                                  value);
-         } else
-            lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
-      }
-      break;
-
-   case TGSI_FILE_TEMPORARY:
-      /* Temporaries are always stored as floats */
-      if (!tgsi_type_is_64bit(dtype))
-         value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
-      else
-         value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
-
-      if (reg->Register.Indirect) {
-         LLVMValueRef index_vec;  /* indexes into the temp registers */
-         LLVMValueRef temps_array;
-         LLVMTypeRef fptr_type;
-
-         index_vec = get_soa_array_offsets(&bld_base->uint_bld,
-                                           indirect_index,
-                                           chan_index,
-                                           TRUE);
-
-         fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
-         temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
-
-         /* Scatter store values into temp registers */
-         emit_mask_scatter(bld, temps_array, index_vec, value,
-                           &bld->exec_mask);
-      }
-      else {
-         LLVMValueRef temp_ptr;
-         temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
-
-         if (tgsi_type_is_64bit(dtype)) {
-            LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
-                                                         reg->Register.Index,
-                                                         chan_index + 1);
-            emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
-                                  value);
-         }
-         else
-            lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
-      }
-      break;
-
-   case TGSI_FILE_ADDRESS:
-      assert(dtype == TGSI_TYPE_SIGNED);
-      assert(LLVMTypeOf(value) == int_bld->vec_type);
-      value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
-      lp_exec_mask_store(&bld->exec_mask, int_bld, value,
-                         bld->addr[reg->Register.Index][chan_index]);
-      break;
-
-   default:
-      assert( 0 );
-   }
+   assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
+   bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
+                                                      dtype,
+                                                      reg,
+                                                      index,
+                                                      chan_index,
+                                                      indirect_index,
+                                                      value);
 
    (void)dtype;
 }
@@ -3717,6 +4041,20 @@ end_primitive(
    }
 }
 
+static void
+barrier_emit_tcs(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   if (bld->tcs_iface->emit_barrier) {
+      bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
+   }
+}
+
+
 static void
 cal_emit(
    const struct lp_build_tgsi_action * action,
@@ -3931,7 +4269,8 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
 
    /* If we have indirect addressing in inputs we need to copy them into
     * our alloca array to be able to iterate over them */
-   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
+   if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
+       !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
       unsigned index, chan;
       LLVMTypeRef vec_type = bld_base->base.vec_type;
       LLVMValueRef array_size = lp_build_const_int32(gallivm,
@@ -3988,6 +4327,15 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
    }
 }
 
+static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+   if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
+      bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
+   }
+}
+
 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
 {
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
@@ -4002,6 +4350,10 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
       lp_build_printf(bld_base->base.gallivm, "\n");
    }
 
+   if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
+      bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
+   }
+
    /* If we have indirect addressing in outputs we need to copy our alloca array
     * to the outputs slots specified by the caller */
    if (bld->gs_iface) {
@@ -4011,7 +4363,7 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
          vertices in the cache. Note must not call end_primitive here
          since the exec_mask is not valid at this point. */
       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
-      
+
       total_emitted_vertices_vec =
          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
       emitted_prims_vec =
@@ -4108,12 +4460,17 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
+
    bld.bld_base.emit_store = emit_store;
+   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
+   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
+   bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
 
    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
 
    bld.bld_base.emit_prologue = emit_prologue;
+   bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
    bld.bld_base.emit_epilogue = emit_epilogue;
 
    /* Set opcode actions */
@@ -4209,6 +4566,24 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                                 max_output_vertices);
    }
 
+   if (params->tes_iface) {
+      /* inputs are always indirect with tes */
+      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
+      bld.tes_iface = params->tes_iface;
+      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
+   }
+
+   if (params->tcs_iface) {
+      bld.tcs_iface = params->tcs_iface;
+      /* outputs and inputs are always indirect with tcs */
+      bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
+      bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
+      bld.indirect_files |= (1 << TGSI_FILE_INPUT);
+      bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
+      bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
+      bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
+   }
+
    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
 
    bld.system_values = *params->system_values;