gallium/swr: Fix compilation with LLVM 12
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
index 943215703698326f19767b7e0d96b096fc3b90e1..15347aaf9450488a7d6ec3689d82b921a9a20796 100644 (file)
@@ -52,6 +52,7 @@
 #include "gallivm/lp_bld_tgsi.h"
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_printf.h"
+#include "gallivm/lp_bld_logic.h"
 
 #include "swr_context.h"
 #include "gen_surf_state_llvm.h"
 
 #include "gallivm/lp_bld_type.h"
 
-#ifdef DEBUG
-constexpr bool verbose_shader = true;
+#if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
+constexpr bool verbose_shader          = true;
+constexpr bool verbose_tcs_shader_in   = true;
+constexpr bool verbose_tcs_shader_out  = true;
+constexpr bool verbose_tcs_shader_loop = true;
+constexpr bool verbose_vs_shader       = true;
 #else
-constexpr bool verbose_shader = false;
+constexpr bool verbose_shader          = false;
+constexpr bool verbose_tcs_shader_in   = false;
+constexpr bool verbose_tcs_shader_out  = false;
+constexpr bool verbose_tcs_shader_loop = false;
+constexpr bool verbose_vs_shader       = false;
 #endif
 
 using namespace SwrJit;
@@ -180,7 +189,7 @@ swr_generate_fs_key(struct swr_jit_fs_key &key,
                     struct swr_context *ctx,
                     swr_fragment_shader *swr_fs)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    key.nr_cbufs = ctx->framebuffer.nr_cbufs;
    key.light_twoside = ctx->rasterizer->light_twoside;
@@ -212,7 +221,7 @@ swr_generate_vs_key(struct swr_jit_vs_key &key,
                     struct swr_context *ctx,
                     swr_vertex_shader *swr_vs)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    key.clip_plane_mask =
       swr_vs->info.base.clipdist_writemask ?
@@ -226,7 +235,7 @@ void
 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
                        struct swr_vertex_element_state *velems)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    key.fsState = velems->fsState;
 }
@@ -236,7 +245,7 @@ swr_generate_gs_key(struct swr_jit_gs_key &key,
                     struct swr_context *ctx,
                     swr_geometry_shader *swr_gs)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    struct tgsi_shader_info *pPrevShader = nullptr;
 
@@ -261,7 +270,7 @@ swr_generate_tcs_key(struct swr_jit_tcs_key &key,
                     struct swr_context *ctx,
                     swr_tess_control_shader *swr_tcs)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
 
@@ -285,7 +294,7 @@ swr_generate_tes_key(struct swr_jit_tes_key &key,
                     struct swr_context *ctx,
                     swr_tess_evaluation_shader *swr_tes)
 {
-   memset(&key, 0, sizeof(key));
+   memset((void*)&key, 0, sizeof(key));
 
    struct tgsi_shader_info *pPrevShader = nullptr;
 
@@ -318,7 +327,7 @@ struct BuilderSWR : public Builder {
       : Builder(pJitMgr)
    {
       pJitMgr->SetupNewModule();
-      gallivm = gallivm_create(pName, wrap(&JM()->mContext));
+      gallivm = gallivm_create(pName, wrap(&JM()->mContext), NULL);
       pJitMgr->mpCurrentModule = unwrap(gallivm->module);
    }
 
@@ -397,7 +406,8 @@ struct BuilderSWR : public Builder {
                             boolean is_aindex_indirect,
                             LLVMValueRef attrib_index,
                             LLVMValueRef swizzle_index,
-                            LLVMValueRef value);
+                            LLVMValueRef value,
+                            LLVMValueRef mask_vec);
 
    // Barrier implementation (available only in TCS)
    void
@@ -447,7 +457,7 @@ struct swr_tcs_llvm_iface {
 
    uint32_t output_vertices;
 
-   struct lp_build_for_loop_state loop_state;
+   LLVMValueRef loop_var;
 
    Value *pVtxAttribMap;
    Value *pVtxOutputAttribMap;
@@ -494,6 +504,7 @@ swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
                            struct lp_build_context * bld,
                            LLVMValueRef (*outputs)[4],
                            LLVMValueRef emitted_vertices_vec,
+                           LLVMValueRef mask_vec,
                            LLVMValueRef stream_id)
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
@@ -510,7 +521,7 @@ swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
                              LLVMValueRef total_emitted_vertices_vec_ptr,
                              LLVMValueRef verts_per_prim_vec,
                              LLVMValueRef emitted_prims_vec,
-                             LLVMValueRef mask_vec)
+                             LLVMValueRef mask_vec, unsigned stream_id)
 {
     swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
 
@@ -600,6 +611,7 @@ void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
                          LLVMValueRef vertex_index,
                          boolean is_aindex_indirect,
                          LLVMValueRef attrib_index,
+                         boolean is_sindex_indirect,
                          LLVMValueRef swizzle_index,
                          LLVMValueRef value,
                          LLVMValueRef mask_vec)
@@ -615,7 +627,8 @@ void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
                                                is_aindex_indirect,
                                                attrib_index,
                                                swizzle_index,
-                                               value);
+                                               value,
+                                               mask_vec);
 }
 
 
@@ -744,7 +757,7 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
     Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
 
     Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
-    Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth));
+    Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, mVWidth));
 
     Value *pStack = STACKSAVE();
     Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
@@ -867,7 +880,7 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
     IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
     Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
-    Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
+    Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, 8));
 
     uint32_t vertsPerPrim = iface->num_verts_per_prim;
 
@@ -880,13 +893,13 @@ BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
     Value *mask = unwrap(mask_vec);
     Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
     mask = AND(mask, cmpMask);
-    vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
+    vMask1 = TRUNC(mask, getVectorType(mInt1Ty, 8));
 
     vCount = SUB(vCount, VIMMED1(1));
     Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
     Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
 
-    vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
+    vValue = TRUNC(vValue, getVectorType(mInt8Ty, 8));
 
     Value *pStack = STACKSAVE();
     Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
@@ -931,20 +944,20 @@ BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld)
 {
    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
 
-   // Iterate for all the vertices in the output patch
-   lp_build_for_loop_begin(&iface->loop_state, gallivm,
-                        lp_build_const_int32(gallivm, 0),
-                        LLVMIntULT,
-                        lp_build_const_int32(gallivm, iface->output_vertices),
-                        lp_build_const_int32(gallivm, 1));
+   Value* loop_var = ALLOCA(mSimdInt32Ty);
+   STORE(VBROADCAST(C(0)), loop_var);
+
+   iface->loop_var = wrap(loop_var);
+
+   lp_exec_bgnloop(&bld->exec_mask, true);
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-   bld->system_values.invocation_id  = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
+   bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
 
-   if (verbose_shader) {
-      lp_build_printf(gallivm, "Prologue LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
-      lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
+   if (verbose_tcs_shader_loop) {
+      lp_build_print_value(gallivm, "Prologue LOOP Iteration BEGIN:", bld->system_values.invocation_id);
    }
+
 }
 
 void
@@ -952,10 +965,19 @@ BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld)
 {
    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
 
-   if (verbose_shader) {
-      lp_build_printf(gallivm, "Epilogue LOOP: Iteration %d END\n", iface->loop_state.counter);
+   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+
+   STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
+   if (verbose_tcs_shader_loop) {
+      lp_build_print_value(gallivm, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface->loop_var))));
    }
-   lp_build_for_loop_end(&iface->loop_state);
+
+   LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
+                                   wrap(VBROADCAST(C(iface->output_vertices))));
+   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
+   lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
+   lp_exec_mask_cond_pop(&bld->exec_mask);
+   lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
 }
 
 LLVMValueRef
@@ -968,46 +990,75 @@ BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
                                      LLVMValueRef swizzle_index)
 {
    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
+
    Value *vert_index = unwrap(vertex_index);
    Value *attr_index = unwrap(attrib_index);
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-   if (verbose_shader) {
-      lp_build_print_value(gallivm, "TCS: Vertex index: ", vertex_index);
-      lp_build_print_value(gallivm, "TCS: Attrib index: ", attrib_index);
-      lp_build_print_value(gallivm, "TCS: Swizzle index: ", swizzle_index);
+   if (verbose_tcs_shader_in) {
+      lp_build_printf(gallivm, "[TCS IN][VTX] ======================================\n");
+      lp_build_print_value(gallivm, "[TCS IN][VTX] vertex_index: ", vertex_index);
+      lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
+      lp_build_printf(gallivm, "[TCS IN][VTX] --------------------------------------\n");
    }
 
-   if (is_vindex_indirect) {
-      vert_index = VEXTRACT(vert_index, C(0));
-      if (verbose_shader) {
-         lp_build_print_value(gallivm, "TCS: Extracted vertex index: ", vertex_index);
-      }
-   }
+   Value *res = unwrap(bld_base->base.zero);
+   if (is_vindex_indirect || is_aindex_indirect) {
+      int i;
+      struct lp_type type = bld_base->base.type;
 
-   if (is_aindex_indirect) {
-      attr_index = VEXTRACT(attr_index, C(0));
-      if (verbose_shader) {
-         lp_build_print_value(gallivm, "TCS: Extracted attrib index: ", attrib_index);
-      }
-   }
+      for (i = 0; i < type.length; i++) {
+         Value *vert_chan_index = vert_index;
+         Value *attr_chan_index = attr_index;
 
-   Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
-   if (verbose_shader) {
-      lp_build_print_value(gallivm, "TCS: Attrib index loaded from map: ", wrap(attrib));
-   }
+         if (is_vindex_indirect) {
+            vert_chan_index = VEXTRACT(vert_index, C(i));
+         }
+         if (is_aindex_indirect) {
+            attr_chan_index = VEXTRACT(attr_index, C(i));
+         }
 
-   Value *pBase = GEP(iface->pTcsCtx,
-                     { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
-                     C(simdvertex_attrib), attrib /*attr_index*/, unwrap(swizzle_index) });
+         Value *attrib =
+            LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
 
-   LLVMValueRef res = wrap(LOAD(pBase));
+         Value *pBase = GEP(iface->pTcsCtx,
+                        { C(0), C(SWR_HS_CONTEXT_vert), vert_chan_index,
+                        C(simdvertex_attrib), attrib, unwrap(swizzle_index), C(i) });
 
-   if (verbose_shader) {
-      lp_build_print_value(gallivm, "TCS input fetched: ", res);
+         Value *val = LOAD(pBase);
+
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index));
+            lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
+            lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
+            lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
+            lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
+            lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(val));
+         }
+         res = VINSERT(res, val, C(i));
+      }
+   } else {
+      Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
+
+      Value *pBase = GEP(iface->pTcsCtx,
+                        { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
+                        C(simdvertex_attrib), attrib, unwrap(swizzle_index) });
+
+      res = LOAD(pBase);
+
+      if (verbose_tcs_shader_in) {
+         lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
+         lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
+         lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
+         lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
+         lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(res));
+      }
    }
-   return res;
+   if (verbose_tcs_shader_in) {
+      lp_build_print_value(gallivm, "[TCS IN][VTX] returning: ", wrap(res));
+   }
+   return wrap(res);
 }
 
 LLVMValueRef
@@ -1027,25 +1078,10 @@ BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-   if (verbose_shader) {
-      lp_build_print_value(gallivm, "++TCSo: Vertex index: ", vertex_index);
-      lp_build_print_value(gallivm, "++TCSo: Attrib index: ", wrap(attr_index));
-      lp_build_print_value(gallivm, "++TCSo: Swizzle index: ", swizzle_index);
-   }
-
-   if (is_vindex_indirect) {
-      vert_index = VEXTRACT(vert_index, C(0));
-      if (verbose_shader)
-      {
-         lp_build_print_value(gallivm, "TCSo: Extracted vertex index: ", vertex_index);
-      }
-   }
-
-   if (is_aindex_indirect) {
-      attr_index = VEXTRACT(attr_index, C(0));
-      if (verbose_shader) {
-         lp_build_print_value(gallivm, "TCSo: Extracted attrib index: ", attrib_index);
-      }
+   if (verbose_tcs_shader_in) {
+      lp_build_print_value(gallivm, "[TCS INOUT] Vertex index: ", vertex_index);
+      lp_build_print_value(gallivm, "[TCS INOUT] Attrib index: ", wrap(attr_index));
+      lp_build_print_value(gallivm, "[TCS INOUT] Swizzle index: ", swizzle_index);
    }
 
    Value* res = unwrap(bld_base->base.zero);
@@ -1054,8 +1090,24 @@ BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface
       Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
       Value* pCpOut = GEP(p1, {lane});
 
-      if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
+      Value *vert_chan_index = vert_index;
+      Value *attr_chan_index = attr_index;
+
+      if (is_vindex_indirect) {
+         vert_chan_index = VEXTRACT(vert_index, C(lane));
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index));
+         }
+      }
+
+      if (is_aindex_indirect) {
+         attr_chan_index = VEXTRACT(attr_index, C(lane));
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index));
+         }
+      }
 
+      if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
          Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
          Value* tessFactorArray = nullptr;
          if (name == TGSI_SEMANTIC_TESSOUTER) {
@@ -1065,33 +1117,36 @@ BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface
          }
          Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
          res = VINSERT(res, LOAD(tessFactor), C(lane));
-
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane)));
+            lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] loaded value: ", wrap(res));
+         }
       } else if (name == TGSI_SEMANTIC_PATCH) {
-         lp_build_print_value(gallivm, "bbbbb TCS per-patch attr_index: ", wrap(attr_index));
-         Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index, unwrap(swizzle_index)});
-         res = VINSERT(res, LOAD(attr), C(lane));
-         if (verbose_shader) {
-            lp_build_print_value(gallivm, "++TCSo per-patch lane (patch-id): ", wrap(C(lane)));
-            lp_build_print_value(gallivm, "++TCSo per-patch loaded value: ", wrap(res));
+         Value* attr_index_from_map = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_chan_index}));
+         Value* attr_value = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index_from_map, unwrap(swizzle_index)});
+         res = VINSERT(res, LOAD(attr_value), C(lane));
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map));
+            lp_build_print_value(gallivm, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane)));
+            lp_build_print_value(gallivm, "[TCS INOUT][PATCH] loaded value: ", wrap(res));
          }
       } else {
          // Generic attribute
          Value *attrib =
-             LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
-         if (verbose_shader)
-         {
-            lp_build_print_value(gallivm, "TCSo: Attrib index from map: ", wrap(attrib));
+             LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_chan_index}));
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib));
          }
-         Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_index,
+         Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_chan_index,
                                     C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
 
          res = VINSERT(res, LOAD(attr_chan), C(lane));
+         if (verbose_tcs_shader_in) {
+            lp_build_print_value(gallivm, "[TCS INOUT][VTX] loaded value: ", wrap(res));
+         }
       }
    }
 
-   if (verbose_shader) {
-      lp_build_print_value(gallivm, "TCSo: output fetched: ", wrap(res));
-   }
    return wrap(res);
 }
 
@@ -1104,18 +1159,19 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
                                       boolean is_aindex_indirect,
                                       LLVMValueRef attrib_index,
                                       LLVMValueRef swizzle_index,
-                                      LLVMValueRef value)
+                                      LLVMValueRef value,
+                                      LLVMValueRef mask_vec)
 {
    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
    struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-    if (verbose_shader) {
+    if (verbose_tcs_shader_out) {
       lp_build_printf(gallivm, "[TCS OUT] =============================================\n");
     }
 
-   if (verbose_shader) {
+   if (verbose_tcs_shader_out) {
       lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask);
       lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value);
    }
@@ -1123,9 +1179,7 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
    Value *vert_index = unwrap(vertex_index);
    Value *attr_index = unwrap(attrib_index);
 
-   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
-
-   if (verbose_shader) {
+   if (verbose_tcs_shader_out) {
       lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index);
       lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index));
       lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index);
@@ -1133,19 +1187,26 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
 
    if (is_vindex_indirect) {
       vert_index = VEXTRACT(vert_index, C(0));
-      if (verbose_shader)
-      {
+      if (verbose_tcs_shader_out) {
          lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index);
       }
    }
 
    if (is_aindex_indirect) {
       attr_index = VEXTRACT(attr_index, C(0));
-      if (verbose_shader) {
+      if (verbose_tcs_shader_out) {
          lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index));
       }
    }
 
+   if (verbose_tcs_shader_out) {
+      if (bld->exec_mask.has_mask) {
+         lp_build_print_value(gallivm, "[TCS OUT] Exec mask: ", bld->exec_mask.exec_mask);
+      }
+      else {
+         lp_build_printf(gallivm, "[TCS OUT] has no mask\n");
+      }
+   }
    for (uint32_t lane = 0; lane < mVWidth; lane++) {
       Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
       Value* pCpOut = GEP(p1, {lane});
@@ -1160,19 +1221,21 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
          }
          Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
          Value* valueToStore = VEXTRACT(unwrap(value), C(lane));
-         struct lp_exec_mask *mask = &bld->exec_mask;
-         if (mask->has_mask) {
+         valueToStore = BITCAST(valueToStore, mFP32Ty);
+         if (mask_vec) {
             Value *originalVal = LOAD(tessFactor);
-            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
             valueToStore = SELECT(vMask, valueToStore, originalVal);
          }
          STORE(valueToStore, tessFactor);
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out)
+         {
+            lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec);
             lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore));
          }
       } else if (name == TGSI_SEMANTIC_PATCH) {
          Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index}));
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out) {
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index));
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index));
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect)));
@@ -1181,33 +1244,34 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
          }
          Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib});
          Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out) {
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane)));
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value);
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store));
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index);
          }
-         struct lp_exec_mask *mask = &bld->exec_mask;
-         if (mask->has_mask) {
+         value_to_store = BITCAST(value_to_store, mFP32Ty);
+         if (mask_vec) {
             Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)});
-            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
-            value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
-            if (verbose_shader) {
-               lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", bld->exec_mask.exec_mask);
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
+            value_to_store = SELECT(vMask, value_to_store, originalVal);
+            if (verbose_tcs_shader_out) {
+               lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", mask_vec);
                lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal));
                lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask));
                lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store));
             }
          }
          STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)});
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out) {
             lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store));
          }
       } else {
          Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
          Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
 
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out) {
+            lp_build_printf(gallivm, "[TCS OUT] Writting attribute\n");
             lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id);
             lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index));
             lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib));
@@ -1221,23 +1285,21 @@ BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface
                                     C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
 
          // Mask output values if needed
-         struct lp_exec_mask *mask = &bld->exec_mask;
-         if (mask->has_mask) {
+         value_to_store = BITCAST(value_to_store, mFP32Ty);
+         if (mask_vec) {
             Value *originalVal = LOAD(attr_chan);
-            Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
-            // convert input to float before trying to store
-            value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
+            Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
+            value_to_store = SELECT(vMask, value_to_store, originalVal);
          }
          STORE(value_to_store, attr_chan);
-         if (verbose_shader) {
+         if (verbose_tcs_shader_out) {
+            lp_build_print_value(gallivm, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec);
             lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store));
          }
       }
    }
 }
 
-
-
 void
 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
                                       struct lp_build_tgsi_context *bld_base)
@@ -1245,28 +1307,34 @@ BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface
    swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
    struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
 
-   if (verbose_shader) {
-      lp_build_printf(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_state.counter);
+   if (verbose_tcs_shader_loop) {
+      lp_build_print_value(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_var);
    }
 
-   // End previous loop
-   lp_build_for_loop_end(&iface->loop_state);
+   struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+
+   STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
 
-   // Start new one
-   lp_build_for_loop_begin(&iface->loop_state, gallivm,
-                        lp_build_const_int32(gallivm, 0),
-                        LLVMIntULT,
-                        lp_build_const_int32(gallivm, iface->output_vertices),
-                        lp_build_const_int32(gallivm, 1));
+   LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
+                                   wrap(VBROADCAST(C(iface->output_vertices))));
 
+   lp_exec_mask_cond_push(&bld->exec_mask, tmp);
+   lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
+   lp_exec_mask_cond_pop(&bld->exec_mask);
+   lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
 
    IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
 
-   bld->system_values.invocation_id  = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
+   STORE(VBROADCAST(C(0)), unwrap(iface->loop_var));
+   lp_exec_bgnloop(&bld->exec_mask, true);
 
-   if (verbose_shader) {
-      lp_build_printf(gallivm, "Barrier LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
-      lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
+   IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
+
+   bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
+
+   if (verbose_tcs_shader_loop) {
+      lp_build_print_value(gallivm, "Barrier LOOP: Iteration BEGIN: ", iface->loop_var);
+      lp_build_print_value(gallivm, "Barrier LOOP: InvocationId: \n", bld->system_values.invocation_id);
    }
 }
 
@@ -1768,7 +1836,7 @@ BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
    Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)});
 
    assert(SWR_NUM_OUTER_TESS_FACTORS == 4);
-   Value* sys_value_outer_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
+   Value* sys_value_outer_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
    for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) {
       Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i});
       sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter");
@@ -1776,7 +1844,7 @@ BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
    system_values.tess_outer = wrap(sys_value_outer_factors);
 
    assert(SWR_NUM_INNER_TESS_FACTORS == 2);
-   Value* sys_value_inner_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
+   Value* sys_value_inner_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
    for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) {
       Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i});
       sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner");
@@ -2048,15 +2116,7 @@ BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key)
    system_values.prim_id =
       wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID}));
 
-   Constant *vInvocationId;
-   if (mVWidth == 8) {
-      vInvocationId = C({0, 1, 2, 3, 4, 5, 6, 7});
-   } else {
-      vInvocationId =
-         C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
-   }
-
-   system_values.invocation_id = wrap(vInvocationId);
+   system_values.invocation_id = wrap(VBROADCAST(C(0)));
    system_values.vertices_in = wrap(C(tcs->vertices_per_patch));
 
    if (verbose_shader) {
@@ -2243,7 +2303,7 @@ BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned
 #else
    Value *pOut = GEP(pVtxOutput, {0, 0, slot});
    STORE(pVal, pOut, {0, channel});
-   if (verbose_shader) {
+   if (verbose_vs_shader) {
       lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel));
       lp_build_print_value(gallivm, "", wrap(pVal));
    }