gallium/swr: implementation of tessellation shaders compilation
authorJan Zielinski <jan.zielinski@intel.com>
Tue, 21 Jan 2020 15:01:05 +0000 (16:01 +0100)
committerMarge Bot <eric+marge@anholt.net>
Fri, 24 Jan 2020 11:38:03 +0000 (11:38 +0000)
TCS and TES shaders compilation mechanisms in SWR and state
management implementation.

Reviewed-by: Krzysztof Raszkowski <krzysztof.raszkowski@intel.com>
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
Acked-by: Roland Scheidegger <sroland@vmware.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3484>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3484>

16 files changed:
src/gallium/drivers/swr/rasterizer/core/binner.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/tessellator.h
src/gallium/drivers/swr/swr_context.cpp
src/gallium/drivers/swr/swr_context.h
src/gallium/drivers/swr/swr_draw.cpp
src/gallium/drivers/swr/swr_fence_work.cpp
src/gallium/drivers/swr/swr_fence_work.h
src/gallium/drivers/swr/swr_scratch.cpp
src/gallium/drivers/swr/swr_scratch.h
src/gallium/drivers/swr/swr_screen.cpp
src/gallium/drivers/swr/swr_shader.cpp
src/gallium/drivers/swr/swr_shader.h
src/gallium/drivers/swr/swr_state.cpp
src/gallium/drivers/swr/swr_state.h
src/gallium/drivers/swr/swr_tex_sample.cpp

index dbc387e47e017d3eb169eb522785fff1ddb10063..75aa467d575b9b50550057952a22795e41b38326 100644 (file)
@@ -347,7 +347,8 @@ struct EarlyRastHelper<SIMD512>
 /// @param oneTileMask - defines triangles for ER to work on
 ///                      (tris that fit into ER tile)
 template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
+uint32_t SIMDCALL EarlyRasterizer(DRAW_CONTEXT*       pDC,
+                                  SIMDBBOX_T<SIMD_T>& er_bbox,
                                   Integer<SIMD_T> (&vAi)[3],
                                   Integer<SIMD_T> (&vBi)[3],
                                   Integer<SIMD_T> (&vXi)[3],
@@ -1025,7 +1026,7 @@ void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
 
             // Try early rasterization
             triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(
-                er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
+                pDC, er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
 
             if (!triMask)
             {
index 45bc545b164381a3ba626c6cecb98cceee7b75eb..97d435394eddfe95b6eaa3e21dbd6eb45353760a 100644 (file)
@@ -1337,6 +1337,13 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
     // Max storage for one attribute for an entire simdprimitive
     simdvector simdattrib[MAX_NUM_VERTS_PER_PRIM];
 
+    // Assemble position separately
+    // TESS_TODO: this could be avoided - fix it
+    pa.Assemble(VERTEX_POSITION_SLOT, simdattrib);
+    for (uint32_t i = 0; i < numVertsPerPrim; ++i) {
+        hsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = simdattrib[i];
+    }
+
     // assemble all attributes for the input primitives
     for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot)
     {
@@ -1364,6 +1371,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
 #if defined(_DEBUG)
     //memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
 #endif
+    memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH);
 
 #if USE_SIMD16_FRONTEND
     uint32_t numPrims = numPrims_simd8;
@@ -1389,7 +1397,7 @@ static void TessellationStages(DRAW_CONTEXT* pDC,
         SWR_TESSELLATION_FACTORS tessFactors;
         tessFactors                    = hsContext.pCPout[p].tessFactors;
 
-        // Run Tessellator
+          // Run Tessellator
         SWR_TS_TESSELLATED_DATA tsData = {0};
         RDTSC_BEGIN(pDC->pContext->pBucketMgr, FETessellation, pDC->drawId);
         TSTessellate(tsCtx, tessFactors, tsData);
index 04ae0237fef519324c3fc9d812ca0e88ec72fd96..66b6d5e102abe408b87359aa8306f4629f533127 100644 (file)
@@ -60,14 +60,14 @@ namespace Tessellator
                 D3D11_TESSELLATOR_PARTITIONING_INTEGER,         // SWR_TS_INTEGER
                 D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD,  // SWR_TS_ODD_FRACTIONAL
                 D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN, // SWR_TS_EVEN_FRACTIONAL
-                D3D11_TESSELLATOR_PARTITIONING_POW2,            // SWR_TS_POW2
+                D3D11_TESSELLATOR_PARTITIONING_POW2            // SWR_TS_POW2
             };
 
             static D3D11_TESSELLATOR_OUTPUT_PRIMITIVE CVT_TS_D3D_OUTPUT_TOPOLOGY[] = {
                 D3D11_TESSELLATOR_OUTPUT_POINT,        // SWR_TS_OUTPUT_POINT
                 D3D11_TESSELLATOR_OUTPUT_LINE,         // SWR_TS_OUTPUT_LINE
-                D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW,  // SWR_TS_OUTPUT_TRI_CW
-                D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW, // SWR_TS_OUTPUT_TRI_CCW
+                D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW,  // SWR_TS_OUTPUT_TRI_CW - inverted logic, because DX
+                D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW // SWR_TS_OUTPUT_TRI_CCW - inverted logic, because DX
             };
 
             SUPER::Init(CVT_TS_D3D_PARTITIONING[tsPartitioning],
index dbc4487fbdaaaca984d3885ad551d825f5229880..83ea856ecd703b4387907d0c6c850d79204ccc7c 100644 (file)
@@ -320,6 +320,8 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
    util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);
    util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);
    util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);
+   util_blitter_save_tessctrl_shader(ctx->blitter, (void*)ctx->tcs);
+   util_blitter_save_tesseval_shader(ctx->blitter, (void*)ctx->tes);
    util_blitter_save_so_targets(
       ctx->blitter,
       ctx->num_so_targets,
index 465357cc519769c0b6090d01706bb064503cb8ed..0fafe1449e7eb9a18bc84a888d159273a765cd46 100644 (file)
 #define SWR_NEW_CLIP (1 << 16)
 #define SWR_NEW_SO (1 << 17)
 #define SWR_BLOCK_CLIENT_DRAW ( 1 << 18) // Indicates client draw will block
+#define SWR_NEW_TCS (1 << 19)
+#define SWR_NEW_TES (1 << 20)
+#define SWR_NEW_TCSCONSTANTS (1 << 21)
+#define SWR_NEW_TESCONSTANTS (1 << 22)
 
 namespace std
 {
@@ -91,6 +95,10 @@ struct swr_draw_context {
    uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS];
    const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS];
    uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS];
+   const float *constantTCS[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t num_constantsTCS[PIPE_MAX_CONSTANT_BUFFERS];
+   const float *constantTES[PIPE_MAX_CONSTANT_BUFFERS];
+   uint32_t num_constantsTES[PIPE_MAX_CONSTANT_BUFFERS];
 
    swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
    swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS];
@@ -98,6 +106,10 @@ struct swr_draw_context {
    swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
    swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
    swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS];
+   swr_jit_texture texturesTCS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   swr_jit_sampler samplersTCS[PIPE_MAX_SAMPLERS];
+   swr_jit_texture texturesTES[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   swr_jit_sampler samplersTES[PIPE_MAX_SAMPLERS];
 
    float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
 
@@ -118,6 +130,8 @@ struct swr_context {
 
    HANDLE swrContext;
 
+   SWR_TS_STATE tsState;
+
    /** Constant state objects */
    struct swr_blend_state *blend;
    struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
@@ -127,6 +141,8 @@ struct swr_context {
    struct swr_vertex_shader *vs;
    struct swr_fragment_shader *fs;
    struct swr_geometry_shader *gs;
+   struct swr_tess_control_shader *tcs;
+   struct swr_tess_evaluation_shader *tes;
    struct swr_vertex_element_state *velems;
 
    /** Other rendering state */
index b7f354cd2a21826a6ad08b2e70241c03ada1f789..399821c32eaba9acb66262adb8535242437b2c43 100644 (file)
@@ -31,6 +31,8 @@
 #include "util/u_draw.h"
 #include "util/u_prim.h"
 
+#include <algorithm>
+#include <iostream>
 /*
  * Draw vertex arrays, with optional indexing, optional instancing.
  */
@@ -154,16 +156,22 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    // between all the shader stages, so it has to be large enough to
    // incorporate all interfaces between stages
 
-   // max of gs and vs num_outputs
+   // max of frontend shaders num_outputs
    feState.vsVertexSize = ctx->vs->info.base.num_outputs;
-   if (ctx->gs &&
-       ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
-      feState.vsVertexSize = ctx->gs->info.base.num_outputs;
+   if (ctx->gs) {
+      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs);
+   }
+   if (ctx->tcs) {
+      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs);
+   }
+   if (ctx->tes) {
+      feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs);
    }
 
+
    if (ctx->vs->info.base.num_outputs) {
       // gs does not adjust for position in SGV slot at input from vs
-      if (!ctx->gs)
+      if (!ctx->gs && !ctx->tcs && !ctx->tes)
          feState.vsVertexSize--;
    }
 
@@ -180,7 +188,6 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    if (ctx->rasterizer->sprite_coord_enable)
       feState.vsVertexSize++;
 
-
    if (ctx->rasterizer->flatshade_first) {
       feState.provokingVertex = {1, 0, 0};
    } else {
@@ -222,7 +229,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 
    if (info->index_size)
       ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,
-                                          swr_convert_prim_topology(info->mode),
+                                          swr_convert_prim_topology(info->mode, info->vertices_per_patch),
                                           info->count,
                                           info->instance_count,
                                           info->start,
@@ -230,7 +237,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
                                           info->start_instance);
    else
       ctx->api.pfnSwrDrawInstanced(ctx->swrContext,
-                                   swr_convert_prim_topology(info->mode),
+                                   swr_convert_prim_topology(info->mode, info->vertices_per_patch),
                                    info->count,
                                    info->instance_count,
                                    info->start,
index 594ebaac859f0aae51acc9f85cb7aeb4355b212d..6df55666a3664b98d2df8b8e460e4f53f0f00f87 100644 (file)
@@ -83,7 +83,7 @@ swr_free_cb(struct swr_fence_work *work)
 {
    if (aligned_free)
       AlignedFree(work->free.data);
-   else  
+   else
       FREE(work->free.data);
 }
 
@@ -105,6 +105,19 @@ swr_delete_gs_cb(struct swr_fence_work *work)
    delete work->free.swr_gs;
 }
 
+static void
+swr_delete_tcs_cb(struct swr_fence_work *work)
+{
+   delete work->free.swr_tcs;
+}
+
+static void
+swr_delete_tes_cb(struct swr_fence_work *work)
+{
+   delete work->free.swr_tes;
+}
+
+
 bool
 swr_fence_work_free(struct pipe_fence_handle *fence, void *data,
                     bool aligned_free)
@@ -167,3 +180,34 @@ swr_fence_work_delete_gs(struct pipe_fence_handle *fence,
 
    return true;
 }
+
+bool
+swr_fence_work_delete_tcs(struct pipe_fence_handle *fence,
+                          struct swr_tess_control_shader *swr_tcs)
+{
+   struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
+   if (!work)
+      return false;
+   work->callback = swr_delete_tcs_cb;
+   work->free.swr_tcs = swr_tcs;
+
+   swr_add_fence_work(fence, work);
+
+   return true;
+}
+
+
+bool
+swr_fence_work_delete_tes(struct pipe_fence_handle *fence,
+                          struct swr_tess_evaluation_shader *swr_tes)
+{
+   struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work);
+   if (!work)
+      return false;
+   work->callback = swr_delete_tes_cb;
+   work->free.swr_tes = swr_tes;
+
+   swr_add_fence_work(fence, work);
+
+   return true;
+}
\ No newline at end of file
index a1e72fd20aa221e9443bb19bd3eca5702f2e87fa..ab411599ca5dc85ce5c54f19659c2061b7a2c645 100644 (file)
@@ -32,6 +32,8 @@ struct swr_fence_work {
       struct swr_vertex_shader *swr_vs;
       struct swr_fragment_shader *swr_fs;
       struct swr_geometry_shader *swr_gs;
+      struct swr_tess_control_shader *swr_tcs;
+      struct swr_tess_evaluation_shader *swr_tes;
    } free;
 
    struct swr_fence_work *next;
@@ -47,4 +49,8 @@ bool swr_fence_work_delete_fs(struct pipe_fence_handle *fence,
                               struct swr_fragment_shader *swr_vs);
 bool swr_fence_work_delete_gs(struct pipe_fence_handle *fence,
                               struct swr_geometry_shader *swr_gs);
+bool swr_fence_work_delete_tcs(struct pipe_fence_handle *fence,
+                               struct swr_tess_control_shader *swr_tcs);
+bool swr_fence_work_delete_tes(struct pipe_fence_handle *fence,
+                               struct swr_tess_evaluation_shader *swr_tes);
 #endif
index 810132a76c5ae6eebf33b5107b3c1b43ac1cec76..83cb319b4108474a85d8f40d07f80fba5bdd6a15 100644 (file)
@@ -94,6 +94,8 @@ swr_destroy_scratch_buffers(struct swr_context *ctx)
       AlignedFree(scratch->vs_constants.base);
       AlignedFree(scratch->fs_constants.base);
       AlignedFree(scratch->gs_constants.base);
+      AlignedFree(scratch->tcs_constants.base);
+      AlignedFree(scratch->tes_constants.base);
       AlignedFree(scratch->vertex_buffer.base);
       AlignedFree(scratch->index_buffer.base);
       FREE(scratch);
index 79c9b7aad11c84d2c7a3468c90952c69df60bd2f..4d1c82fc6fc80bd1e381e59f9aac36c5815af811 100644 (file)
@@ -36,6 +36,8 @@ struct swr_scratch_buffers {
    struct swr_scratch_space vs_constants;
    struct swr_scratch_space fs_constants;
    struct swr_scratch_space gs_constants;
+   struct swr_scratch_space tcs_constants;
+   struct swr_scratch_space tes_constants;
    struct swr_scratch_space vertex_buffer;
    struct swr_scratch_space index_buffer;
 };
index a27462f1a45f0b609174a43d386342dc1b933cb2..5b7e5ab0f348db5d440079792b61e27d54a47b63 100644 (file)
@@ -421,10 +421,13 @@ swr_get_shader_param(struct pipe_screen *screen,
 {
    if (shader == PIPE_SHADER_VERTEX ||
        shader == PIPE_SHADER_FRAGMENT ||
-       shader == PIPE_SHADER_GEOMETRY)
+       shader == PIPE_SHADER_GEOMETRY
+       || shader == PIPE_SHADER_TESS_CTRL ||
+       shader == PIPE_SHADER_TESS_EVAL
+   )
       return gallivm_get_shader_param(param);
 
-   // Todo: tesselation, compute
+   // Todo: compute
    return 0;
 }
 
index f21a6847bc08fba78e71c005f69cd579e0e1c4e6..028ebb7d7b63bbf70cecb52f6d4b60ac44061af1 100644 (file)
@@ -50,6 +50,8 @@
 #include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_struct.h"
 #include "gallivm/lp_bld_tgsi.h"
+#include "gallivm/lp_bld_const.h"
+#include "gallivm/lp_bld_printf.h"
 
 #include "swr_context.h"
 #include "gen_surf_state_llvm.h"
 #include "swr_state.h"
 #include "swr_screen.h"
 
+
+/////////////////////////////////////////////////////////////////////////
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include "gallivm/lp_bld_type.h"
+
+#ifdef DEBUG
+constexpr bool verbose_shader = true;
+#else
+constexpr bool verbose_shader = false;
+#endif
+
 using namespace SwrJit;
 using namespace llvm;
 
@@ -84,6 +104,17 @@ bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
    return !memcmp(&lhs, &rhs, sizeof(lhs));
 }
 
+bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs)
+{
+   return !memcmp(&lhs, &rhs, sizeof(lhs));
+}
+
+bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs)
+{
+   return !memcmp(&lhs, &rhs, sizeof(lhs));
+}
+
+
 static void
 swr_generate_sampler_key(const struct lp_tgsi_info &info,
                          struct swr_context *ctx,
@@ -159,6 +190,8 @@ swr_generate_fs_key(struct swr_jit_fs_key &key,
    struct tgsi_shader_info *pPrevShader;
    if (ctx->gs)
       pPrevShader = &ctx->gs->info.base;
+   else if (ctx->tes)
+      pPrevShader = &ctx->tes->info.base;
    else
       pPrevShader = &ctx->vs->info.base;
 
@@ -206,7 +239,13 @@ swr_generate_gs_key(struct swr_jit_gs_key &key,
 {
    memset(&key, 0, sizeof(key));
 
-   struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
+   struct tgsi_shader_info *pPrevShader = nullptr;
+
+   if (ctx->tes) {
+      pPrevShader = &ctx->tes->info.base;
+   } else {
+      pPrevShader = &ctx->vs->info.base;
+   }
 
    memcpy(&key.vs_output_semantic_name,
           &pPrevShader->output_semantic_name,
@@ -218,6 +257,63 @@ swr_generate_gs_key(struct swr_jit_gs_key &key,
    swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
 }
 
+void
+swr_generate_tcs_key(struct swr_jit_tcs_key &key,
+                    struct swr_context *ctx,
+                    swr_tess_control_shader *swr_tcs)
+{
+   memset(&key, 0, sizeof(key));
+
+   struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
+
+   memcpy(&key.vs_output_semantic_name,
+          &pPrevShader->output_semantic_name,
+          sizeof(key.vs_output_semantic_name));
+   memcpy(&key.vs_output_semantic_idx,
+          &pPrevShader->output_semantic_index,
+          sizeof(key.vs_output_semantic_idx));
+
+   key.clip_plane_mask =
+      swr_tcs->info.base.clipdist_writemask ?
+      swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
+      ctx->rasterizer->clip_plane_enable;
+
+   swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key);
+}
+
+void
+swr_generate_tes_key(struct swr_jit_tes_key &key,
+                    struct swr_context *ctx,
+                    swr_tess_evaluation_shader *swr_tes)
+{
+   memset(&key, 0, sizeof(key));
+
+   struct tgsi_shader_info *pPrevShader = nullptr;
+
+   if (ctx->tcs) {
+      pPrevShader = &ctx->tcs->info.base;
+   }
+   else {
+      pPrevShader = &ctx->vs->info.base;
+   }
+
+   SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined");
+
+   memcpy(&key.prev_output_semantic_name,
+         &pPrevShader->output_semantic_name,
+         sizeof(key.prev_output_semantic_name));
+   memcpy(&key.prev_output_semantic_idx,
+         &pPrevShader->output_semantic_index,
+         sizeof(key.prev_output_semantic_idx));
+
+   key.clip_plane_mask =
+      swr_tes->info.base.clipdist_writemask ?
+      swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
+      ctx->rasterizer->clip_plane_enable;
+
+   swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key);
+}
+
 struct BuilderSWR : public Builder {
    BuilderSWR(JitManager *pJitMgr, const char *pName)
       : Builder(pJitMgr)
@@ -238,7 +334,10 @@ struct BuilderSWR : public Builder {
    PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
    PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
    PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
+   PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key);
+   PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key);
 
+   // GS-specific emit functions
    LLVMValueRef
    swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
                            struct lp_build_context * bld,
@@ -267,6 +366,61 @@ struct BuilderSWR : public Builder {
                         LLVMValueRef total_emitted_vertices_vec,
                         LLVMValueRef emitted_prims_vec);
 
+   // TCS-specific emit functions
+   void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld);
+   void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld);
+
+   LLVMValueRef
+   swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
+                            struct lp_build_tgsi_context * bld_base,
+                            boolean is_vindex_indirect,
+                            LLVMValueRef vertex_index,
+                            boolean is_aindex_indirect,
+                            LLVMValueRef attrib_index,
+                            LLVMValueRef swizzle_index);
+
+   LLVMValueRef
+   swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
+                             struct lp_build_tgsi_context * bld_base,
+                             boolean is_vindex_indirect,
+                             LLVMValueRef vertex_index,
+                             boolean is_aindex_indirect,
+                             LLVMValueRef attrib_index,
+                             LLVMValueRef swizzle_index,
+                             uint32_t name);
+
+   void
+   swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
+                            struct lp_build_tgsi_context * bld_base,
+                            unsigned name,
+                            boolean is_vindex_indirect,
+                            LLVMValueRef vertex_index,
+                            boolean is_aindex_indirect,
+                            LLVMValueRef attrib_index,
+                            LLVMValueRef swizzle_index,
+                            LLVMValueRef value);
+
+   // Barrier implementation (available only in TCS)
+   void
+   swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
+                             struct lp_build_tgsi_context *bld_base);
+
+   // TES-specific emit functions
+   LLVMValueRef
+   swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
+                            struct lp_build_tgsi_context * bld_base,
+                            boolean is_vindex_indirect,
+                            LLVMValueRef vertex_index,
+                            boolean is_aindex_indirect,
+                            LLVMValueRef attrib_index,
+                            LLVMValueRef swizzle_index);
+
+   LLVMValueRef
+   swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
+                            struct lp_build_tgsi_context * bld_base,
+                            boolean is_aindex_indirect,
+                            LLVMValueRef attrib_index,
+                            LLVMValueRef swizzle_index);
 };
 
 struct swr_gs_llvm_iface {
@@ -283,6 +437,39 @@ struct swr_gs_llvm_iface {
    Value *pVtxAttribMap;
 };
 
+struct swr_tcs_llvm_iface {
+   struct lp_build_tcs_iface base;
+   struct tgsi_shader_info *info;
+
+   BuilderSWR *pBuilder;
+
+   Value *pTcsCtx;
+   SWR_TS_STATE *pTsState;
+
+   uint32_t output_vertices;
+
+   struct lp_build_for_loop_state loop_state;
+
+   Value *pVtxAttribMap;
+   Value *pVtxOutputAttribMap;
+   Value *pPatchOutputAttribMap;
+};
+
+struct swr_tes_llvm_iface {
+   struct lp_build_tes_iface base;
+   struct tgsi_shader_info *info;
+
+   BuilderSWR *pBuilder;
+
+   Value *pTesCtx;
+   SWR_TS_STATE *pTsState;
+
+   uint32_t num_outputs;
+
+   Value *pVtxAttribMap;
+   Value *pPatchAttribMap;
+};
+
 // trampoline functions so we can use the builder llvm construction methods
 static LLVMValueRef
 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
@@ -347,6 +534,137 @@ swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
                                          emitted_prims_vec);
 }
 
+static LLVMValueRef
+swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
+                         struct lp_build_context * bld,
+                         boolean is_vindex_indirect,
+                         LLVMValueRef vertex_index,
+                         boolean is_aindex_indirect,
+                         LLVMValueRef attrib_index,
+                         LLVMValueRef swizzle_index)
+{
+    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+    struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
+
+    return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base,
+                                                     is_vindex_indirect,
+                                                     vertex_index,
+                                                     is_aindex_indirect,
+                                                     attrib_index,
+                                                     swizzle_index);
+}
+
+static LLVMValueRef
+swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
+                          struct lp_build_context * bld,
+                          boolean is_vindex_indirect,
+                          LLVMValueRef vertex_index,
+                          boolean is_aindex_indirect,
+                          LLVMValueRef attrib_index,
+                          LLVMValueRef swizzle_index,
+                          uint32_t name)
+{
+    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+    struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
+
+    return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base,
+                                                      is_vindex_indirect,
+                                                      vertex_index,
+                                                      is_aindex_indirect,
+                                                      attrib_index,
+                                                      swizzle_index,
+                                                      name);
+}
+
+
+static void
+swr_tcs_llvm_emit_prologue(struct lp_build_context* bld)
+{
+   lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
+   iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base);
+}
+
+static void
+swr_tcs_llvm_emit_epilogue(struct lp_build_context* bld)
+{
+   lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
+   iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base);
+}
+
+static
+void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
+                         struct lp_build_context * bld,
+                         unsigned name,
+                         boolean is_vindex_indirect,
+                         LLVMValueRef vertex_index,
+                         boolean is_aindex_indirect,
+                         LLVMValueRef attrib_index,
+                         LLVMValueRef swizzle_index,
+                         LLVMValueRef value)
+{
+    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+    struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
+
+    iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface,
+                                               bld_base,
+                                               name,
+                                               is_vindex_indirect,
+                                               vertex_index,
+                                               is_aindex_indirect,
+                                               attrib_index,
+                                               swizzle_index,
+                                               value);
+}
+
+
+static
+void swr_tcs_llvm_emit_barrier(struct lp_build_context *bld)
+{
+   lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
+
+   iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base);
+}
+
+
+static LLVMValueRef
+swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
+                             struct lp_build_context * bld,
+                             boolean is_vindex_indirect,
+                             LLVMValueRef vertex_index,
+                             boolean is_aindex_indirect,
+                             LLVMValueRef attrib_index,
+                             LLVMValueRef swizzle_index)
+{
+    swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
+    struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
+
+    return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base,
+                                                     is_vindex_indirect,
+                                                     vertex_index,
+                                                     is_aindex_indirect,
+                                                     attrib_index,
+                                                     swizzle_index);
+}
+
+static LLVMValueRef
+swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
+                               struct lp_build_context * bld,
+                               boolean is_aindex_indirect,
+                               LLVMValueRef attrib_index,
+                               LLVMValueRef swizzle_index)
+{
+    swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
+    struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
+
+    return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base,
+                                                     is_aindex_indirect,
+                                                     attrib_index,
+                                                     swizzle_index);
+}
+
 LLVMValueRef
 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
                            struct lp_build_context * bld,
@@ -608,153 +926,959 @@ BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
    }
 }
 
-PFN_GS_FUNC
-BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
+void
+BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld)
 {
-   SWR_GS_STATE *pGS = &ctx->gs->gsState;
-   struct tgsi_shader_info *info = &ctx->gs->info.base;
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
 
-   memset(pGS, 0, sizeof(*pGS));
+   // Iterate for all the vertices in the output patch
+   lp_build_for_loop_begin(&iface->loop_state, gallivm,
+                        lp_build_const_int32(gallivm, 0),
+                        LLVMIntULT,
+                        lp_build_const_int32(gallivm, iface->output_vertices),
+                        lp_build_const_int32(gallivm, 1));
 
-   pGS->gsEnable = true;
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+   bld->system_values.invocation_id  = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
 
-   pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
-   pGS->outputTopology =
-      swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "Prologue LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
+      lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
+   }
+}
 
-   /* It's +1 because emit_vertex in swr is always called exactly one time more
-    * than max_vertices passed in Geometry Shader. We need to allocate more memory
-    * to avoid crash/memory overwritten.
-    */
-   pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
-   pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
+void
+BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld)
+{
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
 
-   // If point primitive then assume to use multiple streams
-   if(pGS->outputTopology == TOP_POINT_LIST) {
-      pGS->isSingleStream = false;
-   } else {
-      pGS->isSingleStream = true;
-      pGS->singleStreamID = 0;
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "Epilogue LOOP: Iteration %d END\n", iface->loop_state.counter);
    }
+   lp_build_for_loop_end(&iface->loop_state);
+}
 
-   pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
-   pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
-   pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
-   pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
-   pGS->controlDataOffset = VERTEX_COUNT_SIZE;
-   pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
+LLVMValueRef
+BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
+                                     struct lp_build_tgsi_context * bld_base,
+                                     boolean is_vindex_indirect,
+                                     LLVMValueRef vertex_index,
+                                     boolean is_aindex_indirect,
+                                     LLVMValueRef attrib_index,
+                                     LLVMValueRef swizzle_index)
+{
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+   Value *vert_index = unwrap(vertex_index);
+   Value *attr_index = unwrap(attrib_index);
 
-   pGS->allocationSize =
-      VERTEX_COUNT_SIZE + // vertex count
-      CONTROL_HEADER_SIZE + // control header
-      (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
-      pGS->maxNumVerts; // num verts
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-   struct swr_geometry_shader *gs = ctx->gs;
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "TCS: Vertex index: ", vertex_index);
+      lp_build_print_value(gallivm, "TCS: Attrib index: ", attrib_index);
+      lp_build_print_value(gallivm, "TCS: Swizzle index: ", swizzle_index);
+   }
 
-   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
-   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+   if (is_vindex_indirect) {
+      vert_index = VEXTRACT(vert_index, C(0));
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "TCS: Extracted vertex index: ", vertex_index);
+      }
+   }
 
-   memset(outputs, 0, sizeof(outputs));
+   if (is_aindex_indirect) {
+      attr_index = VEXTRACT(attr_index, C(0));
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "TCS: Extracted attrib index: ", attrib_index);
+      }
+   }
 
-   AttrBuilder attrBuilder;
-   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+   Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "TCS: Attrib index loaded from map: ", wrap(attrib));
+   }
 
-   std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
-                              PointerType::get(mInt8Ty, 0),
-                              PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
-   FunctionType *vsFuncType =
-      FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
+   Value *pBase = GEP(iface->pTcsCtx,
+                     { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
+                     C(simdvertex_attrib), attrib /*attr_index*/, unwrap(swizzle_index) });
 
-   // create new vertex shader function
-   auto pFunction = Function::Create(vsFuncType,
-                                     GlobalValue::ExternalLinkage,
-                                     "GS",
-                                     JM()->mpCurrentModule);
-#if LLVM_VERSION_MAJOR < 5
-   AttributeSet attrSet = AttributeSet::get(
-      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
-   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
-#else
-   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
-#endif
+   LLVMValueRef res = wrap(LOAD(pBase));
 
-   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
-   IRB()->SetInsertPoint(block);
-   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "TCS input fetched: ", res);
+   }
+   return res;
+}
 
-   auto argitr = pFunction->arg_begin();
-   Value *hPrivateData = &*argitr++;
-   hPrivateData->setName("hPrivateData");
-   Value *pWorkerData = &*argitr++;
-   pWorkerData->setName("pWorkerData");
-   Value *pGsCtx = &*argitr++;
-   pGsCtx->setName("gsCtx");
+LLVMValueRef
+BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
+                                      struct lp_build_tgsi_context * bld_base,
+                                      boolean is_vindex_indirect,
+                                      LLVMValueRef vertex_index,
+                                      boolean is_aindex_indirect,
+                                      LLVMValueRef attrib_index,
+                                      LLVMValueRef swizzle_index,
+                                      uint32_t name)
+{
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
 
-   Value *consts_ptr =
-      GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
-   consts_ptr->setName("gs_constants");
-   Value *const_sizes_ptr =
-      GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
-   const_sizes_ptr->setName("num_gs_constants");
+   Value *vert_index = unwrap(vertex_index);
+   Value *attr_index = unwrap(attrib_index);
 
-   struct lp_build_sampler_soa *sampler =
-      swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-   struct lp_bld_tgsi_system_values system_values;
-   memset(&system_values, 0, sizeof(system_values));
-   system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
-   system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "++TCSo: Vertex index: ", vertex_index);
+      lp_build_print_value(gallivm, "++TCSo: Attrib index: ", wrap(attr_index));
+      lp_build_print_value(gallivm, "++TCSo: Swizzle index: ", swizzle_index);
+   }
 
-   std::vector<Constant*> mapConstants;
-   Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
-   for (unsigned slot = 0; slot < info->num_inputs; slot++) {
-      ubyte semantic_name = info->input_semantic_name[slot];
-      ubyte semantic_idx = info->input_semantic_index[slot];
+   if (is_vindex_indirect) {
+      vert_index = VEXTRACT(vert_index, C(0));
+      if (verbose_shader)
+      {
+         lp_build_print_value(gallivm, "TCSo: Extracted vertex index: ", vertex_index);
+      }
+   }
 
-      unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+   if (is_aindex_indirect) {
+      attr_index = VEXTRACT(attr_index, C(0));
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "TCSo: Extracted attrib index: ", attrib_index);
+      }
+   }
 
-      vs_slot += VERTEX_ATTRIB_START_SLOT;
+   Value* res = unwrap(bld_base->base.zero);
 
-      if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
-         vs_slot--;
+   for (uint32_t lane = 0; lane < mVWidth; lane++) {
+      Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
+      Value* pCpOut = GEP(p1, {lane});
 
-      if (semantic_name == TGSI_SEMANTIC_POSITION)
-         vs_slot = VERTEX_POSITION_SLOT;
+      if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
 
-      STORE(C(vs_slot), vtxAttribMap, {0, slot});
-      mapConstants.push_back(C(vs_slot));
+         Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
+         Value* tessFactorArray = nullptr;
+         if (name == TGSI_SEMANTIC_TESSOUTER) {
+            tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
+         } else {
+            tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
+         }
+         Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
+         res = VINSERT(res, LOAD(tessFactor), C(lane));
+
+      } else if (name == TGSI_SEMANTIC_PATCH) {
+         lp_build_print_value(gallivm, "bbbbb TCS per-patch attr_index: ", wrap(attr_index));
+         Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index, unwrap(swizzle_index)});
+         res = VINSERT(res, LOAD(attr), C(lane));
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "++TCSo per-patch lane (patch-id): ", wrap(C(lane)));
+            lp_build_print_value(gallivm, "++TCSo per-patch loaded value: ", wrap(res));
+         }
+      } else {
+         // Generic attribute
+         Value *attrib =
+             LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
+         if (verbose_shader)
+         {
+            lp_build_print_value(gallivm, "TCSo: Attrib index from map: ", wrap(attrib));
+         }
+         Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_index,
+                                    C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
+
+         res = VINSERT(res, LOAD(attr_chan), C(lane));
+      }
    }
 
-   struct lp_build_mask_context mask;
-   Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
-   lp_build_mask_begin(&mask, gallivm,
-                       lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "TCSo: output fetched: ", wrap(res));
+   }
+   return wrap(res);
+}
 
-   // zero out cut buffer so we can load/modify/store bits
-   for (uint32_t lane = 0; lane < mVWidth; ++lane)
-   {
+void
+BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
+                                      struct lp_build_tgsi_context *bld_base,
+                                      unsigned name,
+                                      boolean is_vindex_indirect,
+                                      LLVMValueRef vertex_index,
+                                      boolean is_aindex_indirect,
+                                      LLVMValueRef attrib_index,
+                                      LLVMValueRef swizzle_index,
+                                      LLVMValueRef value)
+{
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+   struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+    if (verbose_shader) {
+      lp_build_printf(gallivm, "[TCS OUT] =============================================\n");
+    }
+
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask);
+      lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value);
+   }
+
+   Value *vert_index = unwrap(vertex_index);
+   Value *attr_index = unwrap(attrib_index);
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index);
+      lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index));
+      lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index);
+   }
+
+   if (is_vindex_indirect) {
+      vert_index = VEXTRACT(vert_index, C(0));
+      if (verbose_shader)
+      {
+         lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index);
+      }
+   }
+
+   if (is_aindex_indirect) {
+      attr_index = VEXTRACT(attr_index, C(0));
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index));
+      }
+   }
+
+   for (uint32_t lane = 0; lane < mVWidth; lane++) {
+      Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
+      Value* pCpOut = GEP(p1, {lane});
+
+      if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
+         Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
+         Value* tessFactorArray = nullptr;
+         if (name == TGSI_SEMANTIC_TESSOUTER) {
+            tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
+         } else {
+            tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
+         }
+         Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
+         Value* valueToStore = VEXTRACT(unwrap(value), C(lane));
+         struct lp_exec_mask *mask = &bld->exec_mask;
+         if (mask->has_mask) {
+            Value *originalVal = LOAD(tessFactor);
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
+            valueToStore = SELECT(vMask, valueToStore, originalVal);
+         }
+         STORE(valueToStore, tessFactor);
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore));
+         }
+      } else if (name == TGSI_SEMANTIC_PATCH) {
+         Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index}));
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect)));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect)));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib));
+         }
+         Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib});
+         Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane)));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value);
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store));
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index);
+         }
+         struct lp_exec_mask *mask = &bld->exec_mask;
+         if (mask->has_mask) {
+            Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)});
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
+            value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
+            if (verbose_shader) {
+               lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", bld->exec_mask.exec_mask);
+               lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal));
+               lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask));
+               lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store));
+            }
+         }
+         STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)});
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store));
+         }
+      } else {
+         Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
+         Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
+
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id);
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index));
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib));
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index);
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value);
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store));
+         }
+
+         Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp),
+                                    VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)),
+                                    C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
+
+         // Mask output values if needed
+         struct lp_exec_mask *mask = &bld->exec_mask;
+         if (mask->has_mask) {
+            Value *originalVal = LOAD(attr_chan);
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
+            // convert input to float before trying to store
+            value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
+         }
+         STORE(value_to_store, attr_chan);
+         if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store));
+         }
+      }
+   }
+}
+
+
+
+void
+BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
+                                      struct lp_build_tgsi_context *bld_base)
+{
+   swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+   struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
+
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_state.counter);
+   }
+
+   // End previous loop
+   lp_build_for_loop_end(&iface->loop_state);
+
+   // Start new one
+   lp_build_for_loop_begin(&iface->loop_state, gallivm,
+                        lp_build_const_int32(gallivm, 0),
+                        LLVMIntULT,
+                        lp_build_const_int32(gallivm, iface->output_vertices),
+                        lp_build_const_int32(gallivm, 1));
+
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   bld->system_values.invocation_id  = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
+
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "Barrier LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
+      lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
+   }
+}
+
+
+LLVMValueRef
+BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
+                                     struct lp_build_tgsi_context * bld_base,
+                                     boolean is_aindex_indirect,
+                                     LLVMValueRef attrib_index,
+                                     LLVMValueRef swizzle_index)
+{
+    swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
+    Value *attr_index = unwrap(attrib_index);
+    Value *res = unwrap(bld_base->base.zero);
+
+    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n");
+   }
+
+    if (is_aindex_indirect) {
+       int i;
+       struct lp_type type = bld_base->base.type;
+
+       for (i = 0; i < type.length; i++) {
+          Value *attr_chan_index = attr_index;
+
+          if (is_aindex_indirect) {
+             attr_chan_index = VEXTRACT(attr_index, C(i));
+          }
+
+          Value *attrib =
+             LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index}));
+
+          Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
+          Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
+          Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
+          Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
+          if (verbose_shader) {
+            lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
+            lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index));
+            lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
+            lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
+            lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
+          }
+          res = VINSERT(res, Val, C(i));
+       }
+    } else {
+      Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index}));
+
+      Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
+      Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
+      Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
+      Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
+         lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index));
+         lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
+         lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
+         lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
+      }
+      res = VBROADCAST(Val);
+    }
+    if (verbose_shader) {
+       lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res));
+    }
+    return wrap(res);
+}
+
+
+
+LLVMValueRef
+BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
+                                     struct lp_build_tgsi_context * bld_base,
+                                     boolean is_vindex_indirect,
+                                     LLVMValueRef vertex_index,
+                                     boolean is_aindex_indirect,
+                                     LLVMValueRef attrib_index,
+                                     LLVMValueRef swizzle_index)
+{
+    swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
+    Value *vert_index = unwrap(vertex_index);
+    Value *attr_index = unwrap(attrib_index);
+
+    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+    if (verbose_shader) {
+      lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n");
+    }
+
+    Value *res = unwrap(bld_base->base.zero);
+    if (is_vindex_indirect || is_aindex_indirect) {
+       int i;
+       struct lp_type type = bld_base->base.type;
+
+       for (i = 0; i < type.length; i++) {
+          Value *vert_chan_index = vert_index;
+          Value *attr_chan_index = attr_index;
+
+          if (is_vindex_indirect) {
+             vert_chan_index = VEXTRACT(vert_index, C(i));
+          }
+          if (is_aindex_indirect) {
+             attr_chan_index = VEXTRACT(attr_index, C(i));
+          }
+
+          Value *attrib =
+             LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
+
+          Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
+          Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
+          Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index});
+          Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
+          Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
+          Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
+          if (verbose_shader) {
+             lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
+             lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
+             lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
+             lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
+             lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
+          }
+          res = VINSERT(res, Val, C(i));
+       }
+    } else {
+      Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
+
+      Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
+      Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
+      Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index});
+      Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
+      Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
+      Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
+      if (verbose_shader) {
+         lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
+         lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
+         lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
+         lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
+         lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
+      }
+      res = VBROADCAST(Val);
+    }
+    if (verbose_shader) {
+       lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res));
+    }
+    return wrap(res);
+}
+
+
+
+
+PFN_GS_FUNC
+BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
+{
+   SWR_GS_STATE *pGS = &ctx->gs->gsState;
+   struct tgsi_shader_info *info = &ctx->gs->info.base;
+
+   memset(pGS, 0, sizeof(*pGS));
+
+   pGS->gsEnable = true;
+
+   pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
+   pGS->outputTopology =
+      swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0);
+
+   /* It's +1 because emit_vertex in swr is always called exactly one time more
+    * than max_vertices passed in Geometry Shader. We need to allocate more memory
+    * to avoid crash/memory overwritten.
+    */
+   pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
+   pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
+
+   // If point primitive then assume to use multiple streams
+   if(pGS->outputTopology == TOP_POINT_LIST) {
+      pGS->isSingleStream = false;
+   } else {
+      pGS->isSingleStream = true;
+      pGS->singleStreamID = 0;
+   }
+
+   pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
+   pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
+   pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
+   pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
+   pGS->controlDataOffset = VERTEX_COUNT_SIZE;
+   pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
+
+   pGS->allocationSize =
+      VERTEX_COUNT_SIZE + // vertex count
+      CONTROL_HEADER_SIZE + // control header
+      (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
+      pGS->maxNumVerts; // num verts
+
+   struct swr_geometry_shader *gs = ctx->gs;
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+   memset(outputs, 0, sizeof(outputs));
+
+   AttrBuilder attrBuilder;
+   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+
+   std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+                              PointerType::get(mInt8Ty, 0),
+                              PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
+   FunctionType *vsFuncType =
+      FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
+
+   // create new vertex shader function
+   auto pFunction = Function::Create(vsFuncType,
+                                     GlobalValue::ExternalLinkage,
+                                     "GS",
+                                     JM()->mpCurrentModule);
+#if LLVM_VERSION_MAJOR < 5
+   AttributeSet attrSet = AttributeSet::get(
+      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
+#endif
+
+   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+   IRB()->SetInsertPoint(block);
+   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+   auto argitr = pFunction->arg_begin();
+   Value *hPrivateData = &*argitr++;
+   hPrivateData->setName("hPrivateData");
+   Value *pWorkerData = &*argitr++;
+   pWorkerData->setName("pWorkerData");
+   Value *pGsCtx = &*argitr++;
+   pGsCtx->setName("gsCtx");
+
+   Value *consts_ptr =
+      GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
+   consts_ptr->setName("gs_constants");
+   Value *const_sizes_ptr =
+      GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
+   const_sizes_ptr->setName("num_gs_constants");
+
+   struct lp_build_sampler_soa *sampler =
+      swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
+
+   struct lp_bld_tgsi_system_values system_values;
+   memset(&system_values, 0, sizeof(system_values));
+   system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
+   system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
+
+   std::vector<Constant*> mapConstants;
+   Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+   for (unsigned slot = 0; slot < info->num_inputs; slot++) {
+      ubyte semantic_name = info->input_semantic_name[slot];
+      ubyte semantic_idx = info->input_semantic_index[slot];
+
+      unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+
+      vs_slot += VERTEX_ATTRIB_START_SLOT;
+
+      if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
+         vs_slot--;
+
+      if (semantic_name == TGSI_SEMANTIC_POSITION)
+         vs_slot = VERTEX_POSITION_SLOT;
+
+      STORE(C(vs_slot), vtxAttribMap, {0, slot});
+      mapConstants.push_back(C(vs_slot));
+   }
+
+   struct lp_build_mask_context mask;
+   Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
+   lp_build_mask_begin(&mask, gallivm,
+                       lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+
+   // zero out cut buffer so we can load/modify/store bits
+   for (uint32_t lane = 0; lane < mVWidth; ++lane)
+   {
       Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
 #if LLVM_VERSION_MAJOR >= 10
       MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH));
 #else
-      MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
+      MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
+#endif
+   }
+
+   struct swr_gs_llvm_iface gs_iface;
+   gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
+   gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
+   gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
+   gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
+   gs_iface.pBuilder = this;
+   gs_iface.pGsCtx = pGsCtx;
+   gs_iface.pGsState = pGS;
+   gs_iface.num_outputs = gs->info.base.num_outputs;
+   gs_iface.num_verts_per_prim =
+      u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
+   gs_iface.info = info;
+   gs_iface.pVtxAttribMap = vtxAttribMap;
+
+   struct lp_build_tgsi_params params;
+   memset(&params, 0, sizeof(params));
+   params.type = lp_type_float_vec(32, 32 * 8);
+   params.mask = & mask;
+   params.consts_ptr = wrap(consts_ptr);
+   params.const_sizes_ptr = wrap(const_sizes_ptr);
+   params.system_values = &system_values;
+   params.inputs = inputs;
+   params.context_ptr = wrap(hPrivateData);
+   params.sampler = sampler;
+   params.info = &gs->info.base;
+   params.gs_iface = &gs_iface.base;
+
+   lp_build_tgsi_soa(gallivm,
+                     gs->pipe.tokens,
+                     &params,
+                     outputs);
+
+   lp_build_mask_end(&mask);
+
+   sampler->destroy(sampler);
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   RET_VOID();
+
+   gallivm_verify_function(gallivm, wrap(pFunction));
+   gallivm_compile_module(gallivm);
+
+   PFN_GS_FUNC pFunc =
+      (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
+
+   debug_printf("geom shader  %p\n", pFunc);
+   assert(pFunc && "Error: GeomShader = NULL");
+
+   JM()->mIsModuleFinalized = true;
+
+   return pFunc;
+}
+
+PFN_TES_FUNC
+BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
+{
+   SWR_TS_STATE *pTS = &ctx->tsState;
+   struct tgsi_shader_info *info = &ctx->tes->info.base;
+
+   // tessellation is enabled if TES is present
+   // clear tessellation state here then
+   memset(pTS, 0, sizeof(*pTS));
+
+   pTS->tsEnable = true;
+
+   unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
+   unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
+   bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
+   bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
+   SWR_TS_DOMAIN type;
+   SWR_TS_PARTITIONING partitioning;
+   SWR_TS_OUTPUT_TOPOLOGY topology;
+   PRIMITIVE_TOPOLOGY postDSTopology;
+
+   // TESS_TODO: move this to helper functions to improve readability
+   switch (tes_prim_mode) {
+   case PIPE_PRIM_LINES:
+      type = SWR_TS_ISOLINE;
+      postDSTopology = TOP_LINE_LIST;
+      break;
+   case PIPE_PRIM_TRIANGLES:
+      type = SWR_TS_TRI;
+      postDSTopology = TOP_TRIANGLE_LIST;
+      break;
+   case PIPE_PRIM_QUADS:
+      type = SWR_TS_QUAD;
+      // See OpenGL spec - quads are tessellated into triangles
+      postDSTopology = TOP_TRIANGLE_LIST;
+      break;
+   default:
+      assert(0);
+   }
+
+   switch (tes_spacing) {
+   case PIPE_TESS_SPACING_FRACTIONAL_ODD:
+      partitioning = SWR_TS_ODD_FRACTIONAL;
+      break;
+   case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
+      partitioning = SWR_TS_EVEN_FRACTIONAL;
+      break;
+   case PIPE_TESS_SPACING_EQUAL:
+      partitioning = SWR_TS_INTEGER;
+      break;
+   default:
+      assert(0);
+   }
+
+   if (tes_point_mode) {
+      topology = SWR_TS_OUTPUT_POINT;
+      postDSTopology = TOP_POINT_LIST;
+   }
+   else if (tes_prim_mode == PIPE_PRIM_LINES) {
+      topology = SWR_TS_OUTPUT_LINE;
+   }
+   else if (tes_vertex_order_cw) {
+      topology = SWR_TS_OUTPUT_TRI_CW;
+   }
+   else {
+      topology = SWR_TS_OUTPUT_TRI_CCW;
+   }
+
+   pTS->domain = type;
+   pTS->tsOutputTopology = topology;
+   pTS->partitioning = partitioning;
+   pTS->numDsOutputAttribs = info->num_outputs;
+   pTS->postDSTopology = postDSTopology;
+
+   pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM;
+   pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
+   pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
+   pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT;
+
+   struct swr_tess_evaluation_shader *tes = ctx->tes;
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+   memset(outputs, 0, sizeof(outputs));
+
+   AttrBuilder attrBuilder;
+   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+
+   std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
+                               PointerType::get(mInt8Ty, 0),
+                               PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
+   FunctionType *tesFuncType =
+      FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false);
+
+   // create new vertex shader function
+   auto pFunction = Function::Create(tesFuncType,
+                                     GlobalValue::ExternalLinkage,
+                                     "TES",
+                                     JM()->mpCurrentModule);
+
+#if LLVM_VERSION_MAJOR < 5
+   AttributeSet attrSet = AttributeSet::get(
+      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
 #endif
+
+   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+   IRB()->SetInsertPoint(block);
+   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+   auto argitr = pFunction->arg_begin();
+   Value *hPrivateData = &*argitr++;
+   hPrivateData->setName("hPrivateData");
+   Value *pWorkerData = &*argitr++;
+   pWorkerData->setName("pWorkerData");
+   Value *pTesCtx = &*argitr++;
+   pTesCtx->setName("tesCtx");
+
+   Value *consts_ptr =
+      GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)});
+   consts_ptr->setName("tes_constants");
+   Value *const_sizes_ptr =
+      GEP(hPrivateData, {0, swr_draw_context_num_constantsTES});
+   const_sizes_ptr->setName("num_tes_constants");
+
+   struct lp_build_sampler_soa *sampler =
+      swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL);
+
+   struct lp_bld_tgsi_system_values system_values;
+   memset(&system_values, 0, sizeof(system_values));
+
+   // Load and calculate system values
+   // Tessellation coordinates (gl_TessCoord)
+   Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset");
+   Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride");
+   Value *vecIndex  = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset});
+
+   Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3));
+
+   Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU");
+   STORE(tessCoordU, tess_coord, {0, 0});
+   Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV");
+   STORE(tessCoordV, tess_coord, {0, 1});
+   Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW");
+   STORE(tessCoordW, tess_coord, {0, 2});
+   system_values.tess_coord = wrap(tess_coord);
+
+   // Primitive ID
+   system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID"));
+
+   // Tessellation factors
+   Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn});
+   Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)});
+
+   assert(SWR_NUM_OUTER_TESS_FACTORS == 4);
+   Value* sys_value_outer_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
+   for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) {
+      Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i});
+      sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter");
    }
+   system_values.tess_outer = wrap(sys_value_outer_factors);
 
-   struct swr_gs_llvm_iface gs_iface;
-   gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
-   gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
-   gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
-   gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
-   gs_iface.pBuilder = this;
-   gs_iface.pGsCtx = pGsCtx;
-   gs_iface.pGsState = pGS;
-   gs_iface.num_outputs = gs->info.base.num_outputs;
-   gs_iface.num_verts_per_prim =
-      u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
-   gs_iface.info = info;
-   gs_iface.pVtxAttribMap = vtxAttribMap;
+   assert(SWR_NUM_INNER_TESS_FACTORS == 2);
+   Value* sys_value_inner_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
+   for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) {
+      Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i});
+      sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner");
+   }
+   system_values.tess_inner = wrap(sys_value_inner_factors);
+
+   if (verbose_shader)
+   {
+      lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord);
+   }
+
+   struct tgsi_shader_info *pPrevShader = nullptr;
+
+   if (ctx->tcs) {
+      pPrevShader = &ctx->tcs->info.base;
+   }
+   else {
+      pPrevShader = &ctx->vs->info.base;
+   }
+
+   // Figure out how many per-patch attributes we have
+   unsigned perPatchAttrs = 0;
+   unsigned genericAttrs = 0;
+   unsigned tessLevelAttrs = 0;
+   unsigned sgvAttrs = 0;
+   for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) {
+      switch (pPrevShader->output_semantic_name[slot]) {
+      case TGSI_SEMANTIC_PATCH:
+         perPatchAttrs++;
+         break;
+      case TGSI_SEMANTIC_GENERIC:
+         genericAttrs++;
+         break;
+      case TGSI_SEMANTIC_TESSINNER:
+      case TGSI_SEMANTIC_TESSOUTER:
+         tessLevelAttrs++;
+         break;
+      case TGSI_SEMANTIC_POSITION:
+      case TGSI_SEMANTIC_CLIPDIST:
+      case TGSI_SEMANTIC_PSIZE:
+         sgvAttrs++;
+         break;
+      default:
+         assert(!"Unknown semantic input in TES");
+      }
+   }
+
+   std::vector<Constant *> mapConstants;
+   Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+   Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+   for (unsigned slot = 0; slot < info->num_inputs; slot++) {
+      ubyte semantic_name = info->input_semantic_name[slot];
+      ubyte semantic_idx = info->input_semantic_index[slot];
+
+      // Where in TCS output is my attribute?
+      // TESS_TODO: revisit after implement pass-through TCS
+      unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader);
+
+      // Skip tessellation levels - these go to the tessellator, not TES
+      switch (semantic_name) {
+      case TGSI_SEMANTIC_GENERIC:
+         tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs;
+         break;
+      case TGSI_SEMANTIC_PATCH:
+         tcs_slot = semantic_idx;
+         break;
+      case TGSI_SEMANTIC_POSITION:
+         tcs_slot = VERTEX_POSITION_SLOT;
+         break;
+      case TGSI_SEMANTIC_CLIPDIST:
+      case TGSI_SEMANTIC_PSIZE:
+         break;
+      default:
+         assert(!"Unexpected semantic found while builiding TES input map");
+      }
+      if (semantic_name == TGSI_SEMANTIC_PATCH) {
+         STORE(C(tcs_slot), patchAttribMap, {0, slot});
+      } else {
+         STORE(C(tcs_slot), vtxAttribMap, {0, slot});
+      }
+      mapConstants.push_back(C(tcs_slot));
+   }
+
+   // Build execution mask
+   struct lp_build_mask_context mask;
+   Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask");
+
+   if (verbose_shader)
+      lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val));
+
+   lp_build_mask_begin(&mask, gallivm,
+                       lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+
+   struct swr_tes_llvm_iface tes_iface;
+
+   tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input;
+   tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input;
+
+   tes_iface.pBuilder = this;
+   tes_iface.pTesCtx = pTesCtx;
+   tes_iface.pTsState = pTS;
+   tes_iface.num_outputs = tes->info.base.num_outputs;
+   tes_iface.info = info;
+   tes_iface.pVtxAttribMap = vtxAttribMap;
+   tes_iface.pPatchAttribMap = patchAttribMap;
 
    struct lp_build_tgsi_params params;
    memset(&params, 0, sizeof(params));
@@ -766,11 +1890,12 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
    params.inputs = inputs;
    params.context_ptr = wrap(hPrivateData);
    params.sampler = sampler;
-   params.info = &gs->info.base;
-   params.gs_iface = &gs_iface.base;
+   params.info = &tes->info.base;
+   params.tes_iface = &tes_iface.base;
 
+   // Build LLVM IR
    lp_build_tgsi_soa(gallivm,
-                     gs->pipe.tokens,
+                     tes->pipe.tokens,
                      &params,
                      outputs);
 
@@ -780,22 +1905,284 @@ BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
+   // Write output attributes
+   Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut");
+
+   for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
+      for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
+         if (!outputs[attrib][channel])
+            continue;
+
+         Value *val = LOAD(unwrap(outputs[attrib][channel]));;
+         Value *attribOffset =
+            LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset});
+
+         // Assume we write possition
+         Value* outputSlot = C(VERTEX_POSITION_SLOT);
+         if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) {
+            // No, it's a generic attribute, not a position - let's calculate output slot
+            uint32_t outSlot = attrib;
+            if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
+               // this shader will write position, so in shader's term
+               // output starts at attrib 1, but we will handle that separately,
+               // so let's fix the outSlot
+               outSlot--;
+            }
+            outputSlot = ADD(attribOffset, C(outSlot));
+         }
+
+         Value *attribVecIndex =
+            ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset);
+
+         uint32_t outputComponent = 0;
+         uint32_t curComp = outputComponent + channel;
+         auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp)));
+         STOREV(val, dclOut, {outValIndex});
+
+         if (verbose_shader) {
+             lp_build_printf(gallivm,
+                            "TES output [%d][%d]",
+                            C(attrib),
+                            C(channel));
+            lp_build_print_value(gallivm, " = ", wrap(val));
+         }
+      }
+   }
+
    RET_VOID();
 
+   JM()->DumpToFile(pFunction, "src");
    gallivm_verify_function(gallivm, wrap(pFunction));
+
    gallivm_compile_module(gallivm);
+   JM()->DumpToFile(pFunction, "optimized");
 
-   PFN_GS_FUNC pFunc =
-      (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
+   PFN_TES_FUNC pFunc =
+      (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
 
-   debug_printf("geom shader  %p\n", pFunc);
-   assert(pFunc && "Error: GeomShader = NULL");
+   debug_printf("tess evaluation shader  %p\n", pFunc);
+   assert(pFunc && "Error: TessEvaluationShader = NULL");
+
+   JM()->DumpAsm(pFunction, "asm");
+
+   JM()->mIsModuleFinalized = true;
+
+   return pFunc;
+}
+
+PFN_TCS_FUNC
+BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key)
+{
+   SWR_TS_STATE *pTS = &ctx->tsState;
+   struct tgsi_shader_info *info = &ctx->tcs->info.base;
+
+   pTS->numHsInputAttribs = info->num_inputs;
+   pTS->numHsOutputAttribs = info->num_outputs;
+
+   pTS->hsAllocationSize = sizeof(ScalarPatch);
+
+   pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
+   pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
+
+   struct swr_tess_control_shader *tcs = ctx->tcs;
+
+   LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
+   LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+
+   memset(outputs, 0, sizeof(outputs));
+
+   AttrBuilder attrBuilder;
+   attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
+
+   std::vector<Type *> tcsArgs{
+      PointerType::get(Gen_swr_draw_context(JM()), 0),
+      PointerType::get(mInt8Ty, 0),
+      PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
+   FunctionType *tcsFuncType =
+      FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false);
+
+   // create new vertex shader function
+   auto pFunction = Function::Create(tcsFuncType,
+                                     GlobalValue::ExternalLinkage,
+                                     "TCS",
+                                     JM()->mpCurrentModule);
+
+#if LLVM_VERSION_MAJOR < 5
+   AttributeSet attrSet = AttributeSet::get(
+      JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
+   pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
+#else
+   pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
+#endif
+
+   BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
+   IRB()->SetInsertPoint(block);
+   LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
+
+   auto argitr = pFunction->arg_begin();
+   Value *hPrivateData = &*argitr++;
+   hPrivateData->setName("hPrivateData");
+   Value *pWorkerData = &*argitr++;
+   pWorkerData->setName("pWorkerData");
+   Value *pTcsCtx = &*argitr++;
+   pTcsCtx->setName("tcsCtx");
+
+   Value *consts_ptr =
+      GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)});
+   consts_ptr->setName("tcs_constants");
+   Value *const_sizes_ptr =
+      GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS});
+   const_sizes_ptr->setName("num_tcs_constants");
+
+   struct lp_build_sampler_soa *sampler =
+      swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL);
+
+   struct lp_bld_tgsi_system_values system_values;
+   memset(&system_values, 0, sizeof(system_values));
+
+   system_values.prim_id =
+      wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID}));
+
+   Constant *vInvocationId;
+   if (mVWidth == 8) {
+      vInvocationId = C({0, 1, 2, 3, 4, 5, 6, 7});
+   } else {
+      vInvocationId =
+         C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
+   }
+
+   system_values.invocation_id = wrap(vInvocationId);
+   system_values.vertices_in = wrap(C(tcs->vertices_per_patch));
+
+   if (verbose_shader) {
+      lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id);
+      lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id);
+      lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in);
+   }
+
+   std::vector<Constant *> mapConstants;
+   Value *vtxAttribMap =
+      ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+
+   for (unsigned slot = 0; slot < info->num_inputs; slot++) {
+      ubyte semantic_name = info->input_semantic_name[slot];
+      ubyte semantic_idx = info->input_semantic_index[slot];
+
+      unsigned vs_slot =
+         locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
+
+      vs_slot += VERTEX_ATTRIB_START_SLOT;
+
+      if (ctx->vs->info.base.output_semantic_name[0]
+          == TGSI_SEMANTIC_POSITION)
+         vs_slot--;
+
+      if (semantic_name == TGSI_SEMANTIC_POSITION)
+         vs_slot = VERTEX_POSITION_SLOT;
+
+      STORE(C(vs_slot), vtxAttribMap, {0, slot});
+      mapConstants.push_back(C(vs_slot));
+   }
+
+   // Prepare map of output attributes. Needed when shader instance wants
+   // to read own output or output of other instance, which is allowed in TCS
+   Value *vtxOutputAttribMap =
+      ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+   // Map for per-patch attributes
+   Value *patchOutputAttribMap =
+      ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
+   for (unsigned slot = 0; slot < info->num_outputs; slot++) {
+      ubyte name = info->output_semantic_name[slot];
+      int32_t idx = info->output_semantic_index[slot];
+      if (name == TGSI_SEMANTIC_PATCH) {
+         STORE(C(idx), patchOutputAttribMap, {0, slot});
+      } else {
+         int32_t target_slot = slot;
+         if (name == TGSI_SEMANTIC_GENERIC) {
+            target_slot += VERTEX_ATTRIB_START_SLOT;
+         }
+         // Now normalize target slot
+         for (ubyte as = 0; as < slot; as++) {
+            ubyte name = info->output_semantic_name[as];
+            switch (name) {
+               case TGSI_SEMANTIC_TESSOUTER:
+               case TGSI_SEMANTIC_TESSINNER:
+               case TGSI_SEMANTIC_PATCH:
+               case TGSI_SEMANTIC_POSITION:
+                  target_slot--;
+            }
+         }
+         if (name == TGSI_SEMANTIC_POSITION) {
+            target_slot = VERTEX_POSITION_SLOT;
+         }
+         STORE(C(target_slot), vtxOutputAttribMap, {0, slot});
+         mapConstants.push_back(C(target_slot));
+      }
+   }
+
+   struct lp_build_mask_context mask;
+   Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask");
+   lp_build_mask_begin(
+      &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
+
+   struct swr_tcs_llvm_iface tcs_iface;
+
+   tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output;
+   tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input;
+   tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output;
+   tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier;
+   tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue;
+   tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue;
+
+   tcs_iface.pBuilder = this;
+   tcs_iface.pTcsCtx = pTcsCtx;
+   tcs_iface.pTsState = pTS;
+   tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
+   tcs_iface.info = info;
+   tcs_iface.pVtxAttribMap = vtxAttribMap;
+   tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap;
+   tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap;
+
+   struct lp_build_tgsi_params params;
+   memset(&params, 0, sizeof(params));
+   params.type = lp_type_float_vec(32, 32 * 8);
+   params.mask = &mask;
+   params.consts_ptr = wrap(consts_ptr);
+   params.const_sizes_ptr = wrap(const_sizes_ptr);
+   params.system_values = &system_values;
+   params.inputs = inputs;
+   params.context_ptr = wrap(hPrivateData);
+   params.sampler = sampler;
+   params.info = &tcs->info.base;
+   params.tcs_iface = &tcs_iface.base;
+
+   lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, &params, outputs);
+
+   lp_build_mask_end(&mask);
+
+   sampler->destroy(sampler);
+
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+   RET_VOID();
+
+   JM()->DumpToFile(pFunction, "src");
+   gallivm_verify_function(gallivm, wrap(pFunction));
+   gallivm_compile_module(gallivm);
+   JM()->DumpToFile(pFunction, "optimized");
+
+   PFN_TCS_FUNC pFunc =
+      (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
+
+   debug_printf("tess control shader  %p\n", pFunc);
+   assert(pFunc && "Error: TessControlShader = NULL");
+   JM()->DumpAsm(pFunction, "asm");
 
    JM()->mIsModuleFinalized = true;
 
    return pFunc;
 }
 
+
 PFN_GS_FUNC
 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
 {
@@ -808,6 +2195,34 @@ swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
    return func;
 }
 
+PFN_TCS_FUNC
+swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key)
+{
+   BuilderSWR builder(
+      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
+      "TCS");
+   PFN_TCS_FUNC func = builder.CompileTCS(ctx, key);
+
+   ctx->tcs->map.insert(
+      std::make_pair(key, std::make_unique<VariantTCS>(builder.gallivm, func)));
+
+   return func;
+}
+
+PFN_TES_FUNC
+swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key)
+{
+   BuilderSWR builder(
+      reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
+      "TES");
+   PFN_TES_FUNC func = builder.CompileTES(ctx, key);
+
+   ctx->tes->map.insert(
+      std::make_pair(key, std::make_unique<VariantTES>(builder.gallivm, func)));
+
+   return func;
+}
+
 void
 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
 {
@@ -822,6 +2237,10 @@ BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned
 #else
    Value *pOut = GEP(pVtxOutput, {0, 0, slot});
    STORE(pVal, pOut, {0, channel});
+   if (verbose_shader) {
+      lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel));
+      lp_build_print_value(gallivm, "", wrap(pVal));
+   }
 #endif
 }
 
@@ -984,12 +2403,23 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
       LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
       LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
 
+      tgsi_shader_info *pLastFE = &ctx->vs->info.base;
+
+      if (ctx->gs) {
+         pLastFE = &ctx->gs->info.base;
+      }
+      else if (ctx->tes) {
+         pLastFE = &ctx->tes->info.base;
+      }
+      else if (ctx->tcs) {
+         pLastFE = &ctx->tcs->info.base;
+      }
+
       for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
          // clip distance overrides user clip planes
-         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
-             ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
-            unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
-                                         &swr_vs->info.base);
+         if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) ||
+             ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) {
+            unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE);
             if (val < 4) {
                LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
                WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
@@ -1032,14 +2462,17 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
 
    RET_VOID();
 
+   JM()->DumpToFile(pFunction, "vs_function1");
    gallivm_verify_function(gallivm, wrap(pFunction));
    gallivm_compile_module(gallivm);
+   JM()->DumpToFile(pFunction, "vs_function2");
 
    //   lp_debug_dump_value(func);
 
    PFN_VERTEX_FUNC pFunc =
       (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
 
+   JM()->DumpAsm(pFunction, "vs_function_asm");
    debug_printf("vert shader  %p\n", pFunc);
    assert(pFunc && "Error: VertShader = NULL");
 
@@ -1111,6 +2544,8 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
    struct tgsi_shader_info *pPrevShader;
    if (ctx->gs)
       pPrevShader = &ctx->gs->info.base;
+   else if (ctx->tes)
+      pPrevShader = &ctx->tes->info.base;
    else
       pPrevShader = &ctx->vs->info.base;
 
index 6468874dc3fc8775d7ae949897d9e8fa8d0122d8..cabe915f312eb57e2b271c0551707c376bcef0fd 100644 (file)
 struct swr_vertex_shader;
 struct swr_fragment_shader;
 struct swr_geometry_shader;
+struct swr_tess_control_shader;
+struct swr_tess_evaluation_shader;
+
 struct swr_jit_fs_key;
 struct swr_jit_vs_key;
 struct swr_jit_gs_key;
+struct swr_jit_tcs_key;
+struct swr_jit_tes_key;
+
+using PFN_TCS_FUNC = PFN_HS_FUNC;
+using PFN_TES_FUNC = PFN_DS_FUNC;
 
 unsigned swr_so_adjust_attrib(unsigned in_attrib,
                               swr_vertex_shader *swr_vs);
@@ -42,6 +50,12 @@ swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key);
 PFN_GS_FUNC
 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key);
 
+PFN_TCS_FUNC
+swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key);
+
+PFN_TES_FUNC
+swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key);
+
 void swr_generate_fs_key(struct swr_jit_fs_key &key,
                          struct swr_context *ctx,
                          swr_fragment_shader *swr_fs);
@@ -57,6 +71,14 @@ void swr_generate_gs_key(struct swr_jit_gs_key &key,
                          struct swr_context *ctx,
                          swr_geometry_shader *swr_gs);
 
+void swr_generate_tcs_key(struct swr_jit_tcs_key &key,
+                          struct swr_context *ctx,
+                          swr_tess_control_shader *swr_tcs);
+
+void swr_generate_tes_key(struct swr_jit_tes_key &key,
+                          struct swr_context *ctx,
+                          swr_tess_evaluation_shader *swr_tes);
+
 struct swr_jit_sampler_key {
    unsigned nr_samplers;
    unsigned nr_sampler_views;
@@ -85,6 +107,21 @@ struct swr_jit_gs_key : swr_jit_sampler_key {
    ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
 };
 
+// TESS_TODO: revisit this - we probably need to use
+// primitive modes, number of vertices emitted, etc.
+struct swr_jit_tcs_key : swr_jit_sampler_key {
+   ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned clip_plane_mask; // from rasterizer state & tcs_info
+};
+
+// TESS_TODO: revisit this
+struct swr_jit_tes_key : swr_jit_sampler_key {
+   ubyte prev_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
+   ubyte prev_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS];
+   unsigned clip_plane_mask; // from rasterizer state & tes_info
+};
+
 namespace std
 {
 template <> struct hash<swr_jit_fs_key> {
@@ -114,9 +151,25 @@ template <> struct hash<swr_jit_gs_key> {
       return util_hash_crc32(&k, sizeof(k));
    }
 };
+
+template <> struct hash<swr_jit_tcs_key> {
+   std::size_t operator()(const swr_jit_tcs_key &k) const
+   {
+      return util_hash_crc32(&k, sizeof(k));
+   }
+};
+
+template <> struct hash<swr_jit_tes_key> {
+   std::size_t operator()(const swr_jit_tes_key &k) const
+   {
+      return util_hash_crc32(&k, sizeof(k));
+   }
+};
 };
 
 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs);
 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs);
 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs);
 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs);
+bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs);
+bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs);
index cd55e68b8d44dc59aa03f3addb09cfd71ee21849..ea6d09ffb5f030fe528e224f32860766ef4f207e 100644 (file)
@@ -439,7 +439,6 @@ swr_create_gs_state(struct pipe_context *pipe,
    return swr_gs;
 }
 
-
 static void
 swr_bind_gs_state(struct pipe_context *pipe, void *gs)
 {
@@ -463,6 +462,78 @@ swr_delete_gs_state(struct pipe_context *pipe, void *gs)
    swr_fence_work_delete_gs(screen->flush_fence, swr_gs);
 }
 
+static void *
+swr_create_tcs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *tcs)
+{
+   struct swr_tess_control_shader *swr_tcs = new swr_tess_control_shader;
+   if (!swr_tcs)
+      return NULL;
+
+   swr_tcs->pipe.tokens = tgsi_dup_tokens(tcs->tokens);
+   lp_build_tgsi_info(tcs->tokens, &swr_tcs->info);
+   return swr_tcs;
+}
+
+static void
+swr_bind_tcs_state(struct pipe_context *pipe, void *tcs)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->tcs == tcs)
+      return;
+
+   ctx->tcs = (swr_tess_control_shader *)tcs;
+   ctx->dirty |= SWR_NEW_TCS;
+}
+
+static void
+swr_delete_tcs_state(struct pipe_context *pipe, void *tcs)
+{
+   struct swr_tess_control_shader *swr_tcs = (swr_tess_control_shader *)tcs;
+   FREE((void *)swr_tcs->pipe.tokens);
+   struct swr_screen *screen = swr_screen(pipe->screen);
+
+   /* Defer deleton of tcs state */
+   swr_fence_work_delete_tcs(screen->flush_fence, swr_tcs);
+}
+
+static void *
+swr_create_tes_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *tes)
+{
+   struct swr_tess_evaluation_shader *swr_tes = new swr_tess_evaluation_shader;
+   if (!swr_tes)
+      return NULL;
+
+   swr_tes->pipe.tokens = tgsi_dup_tokens(tes->tokens);
+   lp_build_tgsi_info(tes->tokens, &swr_tes->info);
+   return swr_tes;
+}
+
+static void
+swr_bind_tes_state(struct pipe_context *pipe, void *tes)
+{
+   struct swr_context *ctx = swr_context(pipe);
+
+   if (ctx->tes == tes)
+      return;
+
+   ctx->tes = (swr_tess_evaluation_shader *)tes;
+   ctx->dirty |= SWR_NEW_TES;
+}
+
+static void
+swr_delete_tes_state(struct pipe_context *pipe, void *tes)
+{
+   struct swr_tess_evaluation_shader *swr_tes = (swr_tess_evaluation_shader *)tes;
+   FREE((void *)swr_tes->pipe.tokens);
+   struct swr_screen *screen = swr_screen(pipe->screen);
+
+   /* Defer deleton of tes state */
+   swr_fence_work_delete_tes(screen->flush_fence, swr_tes);
+}
+
 static void
 swr_set_constant_buffer(struct pipe_context *pipe,
                         enum pipe_shader_type shader,
@@ -484,8 +555,11 @@ swr_set_constant_buffer(struct pipe_context *pipe,
       ctx->dirty |= SWR_NEW_FSCONSTANTS;
    } else if (shader == PIPE_SHADER_GEOMETRY) {
       ctx->dirty |= SWR_NEW_GSCONSTANTS;
+   } else if (shader == PIPE_SHADER_TESS_CTRL) {
+      ctx->dirty |= SWR_NEW_TCSCONSTANTS;
+   } else if (shader == PIPE_SHADER_TESS_EVAL) {
+      ctx->dirty |= SWR_NEW_TESCONSTANTS;
    }
-
    if (cb && cb->user_buffer) {
       pipe_resource_reference(&constants, NULL);
    }
@@ -876,8 +950,18 @@ swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
       num_constants = pDC->num_constantsGS;
       scratch = &ctx->scratch->gs_constants;
       break;
+   case PIPE_SHADER_TESS_CTRL:
+      constant = pDC->constantTCS;
+      num_constants = pDC->num_constantsTCS;
+      scratch = &ctx->scratch->tcs_constants;
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      constant = pDC->constantTES;
+      num_constants = pDC->num_constantsTES;
+      scratch = &ctx->scratch->tes_constants;
+      break;
    default:
-      debug_printf("Unsupported shader type constants\n");
+      assert(0 && "Unsupported shader type constants");
       return;
    }
 
@@ -1041,6 +1125,25 @@ swr_update_poly_stipple(struct swr_context *ctx)
           sizeof(ctx->poly_stipple.pipe.stipple));
 }
 
+
+static struct tgsi_shader_info *
+swr_get_last_fe(const struct swr_context *ctx)
+{
+   tgsi_shader_info *pLastFE = &ctx->vs->info.base;
+
+   if (ctx->gs) {
+      pLastFE = &ctx->gs->info.base;
+   }
+   else if (ctx->tes) {
+      pLastFE = &ctx->tes->info.base;
+   }
+   else if (ctx->tcs) {
+      pLastFE = &ctx->tcs->info.base;
+   }
+   return pLastFE;
+}
+
+
 void
 swr_update_derived(struct pipe_context *pipe,
                    const struct pipe_draw_info *p_draw_info)
@@ -1128,6 +1231,8 @@ swr_update_derived(struct pipe_context *pipe,
    /* Raster state */
    if (ctx->dirty & (SWR_NEW_RASTERIZER |
                      SWR_NEW_VS | // clipping
+                     SWR_NEW_TES |
+                     SWR_NEW_TCS |
                      SWR_NEW_FRAMEBUFFER)) {
       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
       pipe_framebuffer_state *fb = &ctx->framebuffer;
@@ -1399,6 +1504,8 @@ swr_update_derived(struct pipe_context *pipe,
    /* GeometryShader */
    if (ctx->dirty & (SWR_NEW_GS |
                      SWR_NEW_VS |
+                     SWR_NEW_TCS |
+                     SWR_NEW_TES |
                      SWR_NEW_SAMPLER |
                      SWR_NEW_SAMPLER_VIEW)) {
       if (ctx->gs) {
@@ -1437,12 +1544,106 @@ swr_update_derived(struct pipe_context *pipe,
       }
    }
 
-   /* VertexShader */
-   if (ctx->dirty & (SWR_NEW_VS |
-                     SWR_NEW_RASTERIZER | // for clip planes
+   // Tessellation Evaluation Shader
+   // Compile TES first, because TCS is optional
+   if (ctx->dirty & (SWR_NEW_GS |
+                     SWR_NEW_VS |
+                     SWR_NEW_TCS |
+                     SWR_NEW_TES |
                      SWR_NEW_SAMPLER |
-                     SWR_NEW_SAMPLER_VIEW |
-                     SWR_NEW_FRAMEBUFFER)) {
+                     SWR_NEW_SAMPLER_VIEW)) {
+      if (ctx->tes) {
+         swr_jit_tes_key key;
+         swr_generate_tes_key(key, ctx, ctx->tes);
+
+         auto search = ctx->tes->map.find(key);
+         PFN_TES_FUNC func;
+         if (search != ctx->tes->map.end()) {
+            func = search->second->shader;
+         } else {
+            func = swr_compile_tes(ctx, key);
+         }
+
+         ctx->api.pfnSwrSetDsFunc(ctx->swrContext, func);
+
+         /* JIT sampler state */
+         if (ctx->dirty & SWR_NEW_SAMPLER) {
+            swr_update_sampler_state(ctx,
+                                     PIPE_SHADER_TESS_EVAL,
+                                     key.nr_samplers,
+                                     ctx->swrDC.samplersTES);
+         }
+
+         /* JIT sampler view state */
+         if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+            swr_update_texture_state(ctx,
+                                     PIPE_SHADER_TESS_EVAL,
+                                     key.nr_sampler_views,
+                                     ctx->swrDC.texturesTES);
+         }
+
+         ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState);
+
+      } else {
+         SWR_TS_STATE state = { 0 };
+         ctx->api.pfnSwrSetTsState(ctx->swrContext, &state);
+         ctx->api.pfnSwrSetDsFunc(ctx->swrContext, NULL);
+      }
+   }
+
+
+   /* Tessellation Control Shader */
+   if (ctx->dirty & (SWR_NEW_GS |
+                     SWR_NEW_VS |
+                     SWR_NEW_TCS |
+                     SWR_NEW_TES |
+                     SWR_NEW_SAMPLER |
+                     SWR_NEW_SAMPLER_VIEW)) {
+      if (ctx->tcs) {
+         ctx->tcs->vertices_per_patch = p_draw_info->vertices_per_patch;
+
+         swr_jit_tcs_key key;
+         swr_generate_tcs_key(key, ctx, ctx->tcs);
+
+         auto search = ctx->tcs->map.find(key);
+         PFN_TCS_FUNC func;
+         if (search != ctx->tcs->map.end()) {
+            func = search->second->shader;
+         } else {
+            func = swr_compile_tcs(ctx, key);
+         }
+
+         ctx->api.pfnSwrSetHsFunc(ctx->swrContext, func);
+
+         /* JIT sampler state */
+         if (ctx->dirty & SWR_NEW_SAMPLER) {
+            swr_update_sampler_state(ctx,
+                                     PIPE_SHADER_TESS_CTRL,
+                                     key.nr_samplers,
+                                     ctx->swrDC.samplersTCS);
+         }
+
+         /* JIT sampler view state */
+         if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
+            swr_update_texture_state(ctx,
+                                     PIPE_SHADER_TESS_CTRL,
+                                     key.nr_sampler_views,
+                                     ctx->swrDC.texturesTCS);
+         }
+
+         ctx->api.pfnSwrSetTsState(ctx->swrContext, &ctx->tsState);
+
+      } else {
+         SWR_TS_STATE state = { 0 };
+         ctx->api.pfnSwrSetTsState(ctx->swrContext, &state);
+         ctx->api.pfnSwrSetHsFunc(ctx->swrContext, NULL);
+      }
+   }
+
+   /* VertexShader */
+   if (ctx->dirty
+       & (SWR_NEW_VS | SWR_NEW_RASTERIZER | // for clip planes
+          SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
       swr_jit_vs_key key;
       swr_generate_vs_key(key, ctx, ctx->vs);
       auto search = ctx->vs->map.find(key);
@@ -1456,10 +1657,8 @@ swr_update_derived(struct pipe_context *pipe,
 
       /* JIT sampler state */
       if (ctx->dirty & SWR_NEW_SAMPLER) {
-         swr_update_sampler_state(ctx,
-                                  PIPE_SHADER_VERTEX,
-                                  key.nr_samplers,
-                                  ctx->swrDC.samplersVS);
+         swr_update_sampler_state(
+            ctx, PIPE_SHADER_VERTEX, key.nr_samplers, ctx->swrDC.samplersVS);
       }
 
       /* JIT sampler view state */
@@ -1488,6 +1687,8 @@ swr_update_derived(struct pipe_context *pipe,
    if (ctx->dirty & (SWR_NEW_FS |
                      SWR_NEW_VS |
                      SWR_NEW_GS |
+                     SWR_NEW_TES |
+                     SWR_NEW_TCS |
                      SWR_NEW_RASTERIZER |
                      SWR_NEW_SAMPLER |
                      SWR_NEW_SAMPLER_VIEW |
@@ -1578,6 +1779,16 @@ swr_update_derived(struct pipe_context *pipe,
       swr_update_constants(ctx, PIPE_SHADER_GEOMETRY);
    }
 
+   /* Tessellation Control Shader Constants */
+   if (ctx->dirty & SWR_NEW_TCSCONSTANTS) {
+      swr_update_constants(ctx, PIPE_SHADER_TESS_CTRL);
+   }
+
+   /* Tessellation Evaluation Shader Constants */
+   if (ctx->dirty & SWR_NEW_TESCONSTANTS) {
+      swr_update_constants(ctx, PIPE_SHADER_TESS_EVAL);
+   }
+
    /* Depth/stencil state */
    if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
       struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
@@ -1718,7 +1929,7 @@ swr_update_derived(struct pipe_context *pipe,
             compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
 
             compileState.Canonicalize();
-            
+
             PFN_BLEND_JIT_FUNC func = NULL;
             auto search = ctx->blendJIT->find(compileState);
             if (search != ctx->blendJIT->end()) {
@@ -1741,7 +1952,7 @@ swr_update_derived(struct pipe_context *pipe,
       swr_update_poly_stipple(ctx);
    }
 
-   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
+   if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_TCS | SWR_NEW_TES | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
       ctx->vs->soState.rasterizerDisable =
          ctx->rasterizer->rasterizer_discard;
       ctx->api.pfnSwrSetSoState(ctx->swrContext, &ctx->vs->soState);
@@ -1768,7 +1979,7 @@ swr_update_derived(struct pipe_context *pipe,
    if (ctx->dirty & (SWR_NEW_CLIP | SWR_NEW_RASTERIZER | SWR_NEW_VS)) {
       // shader exporting clip distances overrides all user clip planes
       if (ctx->rasterizer->clip_plane_enable &&
-          !ctx->vs->info.base.num_written_clipdistance)
+          !swr_get_last_fe(ctx)->num_written_clipdistance)
       {
          swr_draw_context *pDC = &ctx->swrDC;
          memcpy(pDC->userClipPlanes,
@@ -1781,7 +1992,12 @@ swr_update_derived(struct pipe_context *pipe,
    SWR_BACKEND_STATE backendState = {0};
    if (ctx->gs) {
       backendState.numAttributes = ctx->gs->info.base.num_outputs - 1;
-   } else {
+   } else
+   if (ctx->tes) {
+      backendState.numAttributes = ctx->tes->info.base.num_outputs - 1;
+      // no case for TCS, because if TCS is active, TES must be active
+      // as well - pipeline stages after tessellation does not support patches
+   }  else {
       backendState.numAttributes = ctx->vs->info.base.num_outputs - 1;
       if (ctx->fs->info.base.uses_primid) {
          backendState.numAttributes++;
@@ -1805,21 +2021,19 @@ swr_update_derived(struct pipe_context *pipe,
       (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
    backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
 
-   struct tgsi_shader_info *pLastFE =
-      ctx->gs ?
-      &ctx->gs->info.base :
-      &ctx->vs->info.base;
+   struct tgsi_shader_info *pLastFE = swr_get_last_fe(ctx);
+
    backendState.readRenderTargetArrayIndex = pLastFE->writes_layer;
    backendState.readViewportArrayIndex = pLastFE->writes_viewport_index;
    backendState.vertexAttribOffset = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
 
    backendState.clipDistanceMask =
-      ctx->vs->info.base.num_written_clipdistance ?
-      ctx->vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
+      pLastFE->num_written_clipdistance ?
+      pLastFE->clipdist_writemask & ctx->rasterizer->clip_plane_enable :
       ctx->rasterizer->clip_plane_enable;
 
    backendState.cullDistanceMask =
-      ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
+      pLastFE->culldist_writemask << pLastFE->num_written_clipdistance;
 
    // Assume old layout of SGV, POSITION, CLIPCULL, ATTRIB
    backendState.vertexClipCullOffset = backendState.vertexAttribOffset - 2;
@@ -1929,6 +2143,14 @@ swr_state_init(struct pipe_context *pipe)
    pipe->bind_gs_state = swr_bind_gs_state;
    pipe->delete_gs_state = swr_delete_gs_state;
 
+   pipe->create_tcs_state = swr_create_tcs_state;
+   pipe->bind_tcs_state = swr_bind_tcs_state;
+   pipe->delete_tcs_state = swr_delete_tcs_state;
+
+   pipe->create_tes_state = swr_create_tes_state;
+   pipe->bind_tes_state = swr_bind_tes_state;
+   pipe->delete_tes_state = swr_delete_tes_state;
+
    pipe->set_constant_buffer = swr_set_constant_buffer;
 
    pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
index 45d0925e8e7ecb69458d63ed964ae7b66c01411d..09463241b965c6384d9d269a472fe03a5701157f 100644 (file)
@@ -46,9 +46,14 @@ struct ShaderVariant {
    ~ShaderVariant() { gallivm_destroy(gallivm); }
 };
 
+using PFN_TCS_FUNC = PFN_HS_FUNC;
+using PFN_TES_FUNC = PFN_DS_FUNC;
+
 typedef ShaderVariant<PFN_VERTEX_FUNC> VariantVS;
 typedef ShaderVariant<PFN_PIXEL_KERNEL> VariantFS;
 typedef ShaderVariant<PFN_GS_FUNC> VariantGS;
+typedef ShaderVariant<PFN_TCS_FUNC> VariantTCS;
+typedef ShaderVariant<PFN_TES_FUNC> VariantTES;
 
 /* skeleton */
 struct swr_vertex_shader {
@@ -76,6 +81,22 @@ struct swr_geometry_shader {
    std::unordered_map<swr_jit_gs_key, std::unique_ptr<VariantGS>> map;
 };
 
+struct swr_tess_control_shader {
+   struct pipe_shader_state pipe;
+   struct lp_tgsi_info info;
+   uint32_t vertices_per_patch;
+
+   std::unordered_map<swr_jit_tcs_key, std::unique_ptr<VariantTCS>> map;
+};
+
+struct swr_tess_evaluation_shader {
+   struct pipe_shader_state pipe;
+   struct lp_tgsi_info info;
+
+   std::unordered_map<swr_jit_tes_key, std::unique_ptr<VariantTES>> map;
+};
+
+
 /* Vertex element state */
 struct swr_vertex_element_state {
    FETCH_COMPILE_STATE fsState;
@@ -340,7 +361,7 @@ swr_convert_target_type(const enum pipe_texture_target target)
  * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY
  */
 static INLINE enum PRIMITIVE_TOPOLOGY
-swr_convert_prim_topology(const unsigned mode)
+swr_convert_prim_topology(const unsigned mode, const unsigned tcs_verts)
 {
    switch (mode) {
    case PIPE_PRIM_POINTS:
@@ -371,6 +392,9 @@ swr_convert_prim_topology(const unsigned mode)
       return TOP_TRI_LIST_ADJ;
    case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
       return TOP_TRI_STRIP_ADJ;
+   case PIPE_PRIM_PATCHES:
+      // rasterizer has a separate type for each possible number of patch vertices
+      return (PRIMITIVE_TOPOLOGY)((unsigned)TOP_PATCHLIST_BASE + tcs_verts);
    default:
       assert(0 && "Unknown topology");
       return TOP_UNKNOWN;
@@ -396,4 +420,5 @@ swr_convert_fill_mode(const unsigned mode)
    }
 }
 
+
 #endif
index fb9d8ed22c096c62a23c91ea16465eefb351673d..fc50cf37ea3ad0c4cc596c50c6737881c11c43e1 100644 (file)
@@ -127,6 +127,12 @@ swr_texture_member(const struct lp_sampler_dynamic_state *base,
    case PIPE_SHADER_GEOMETRY:
       indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesGS);
       break;
+   case PIPE_SHADER_TESS_CTRL:
+      indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesTCS);
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesTES);
+      break;
    default:
       assert(0 && "unsupported shader type");
       break;
@@ -224,6 +230,12 @@ swr_sampler_member(const struct lp_sampler_dynamic_state *base,
    case PIPE_SHADER_GEOMETRY:
       indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersGS);
       break;
+   case PIPE_SHADER_TESS_CTRL:
+      indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersTCS);
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersTES);
+      break;
    default:
       assert(0 && "unsupported shader type");
       break;