gallivm/gs_iface: pass stream into end primitive interface.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir_soa.c
index 05f16d81e56e554b0b15472ed5976dbcbeeb589e..a8b7d0dd86e2d5f5608c702136b3e393e8178d02 100644 (file)
@@ -69,8 +69,13 @@ emit_fetch_64bit(
    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
 
    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
    }
    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
 
@@ -78,40 +83,54 @@ emit_fetch_64bit(
 }
 
 static void
-emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
-                      LLVMValueRef chan_ptr,
-                      LLVMValueRef chan_ptr2,
-                      LLVMValueRef value)
+emit_store_64bit_split(struct lp_build_nir_context *bld_base,
+                       LLVMValueRef value,
+                       LLVMValueRef split_values[2])
 {
-   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
-   struct lp_build_context *float_bld = &bld_base->base;
    unsigned i;
-   LLVMValueRef temp, temp2;
    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
    int len = bld_base->base.type.length * 2;
 
    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
    for (i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+      shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
+#endif
    }
 
-   temp = LLVMBuildShuffleVector(builder, value,
+   split_values[0] = LLVMBuildShuffleVector(builder, value,
                                  LLVMGetUndef(LLVMTypeOf(value)),
                                  LLVMConstVector(shuffles,
                                                  bld_base->base.type.length),
                                  "");
-   temp2 = LLVMBuildShuffleVector(builder, value,
+   split_values[1] = LLVMBuildShuffleVector(builder, value,
                                   LLVMGetUndef(LLVMTypeOf(value)),
                                   LLVMConstVector(shuffles2,
                                                   bld_base->base.type.length),
                                   "");
+}
+
+static void
+emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
+                      LLVMValueRef chan_ptr,
+                      LLVMValueRef chan_ptr2,
+                      LLVMValueRef value)
+{
+   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct lp_build_context *float_bld = &bld_base->base;
+   LLVMValueRef split_vals[2];
 
-   lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
-   lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
+   emit_store_64bit_split(bld_base, value, split_vals);
+
+   lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
+   lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
 }
 
 static LLVMValueRef
@@ -280,6 +299,7 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
                            unsigned bit_size,
                            nir_variable *var,
                            unsigned vertex_index,
+                           LLVMValueRef indir_vertex_index,
                            unsigned const_index,
                            LLVMValueRef indir_index,
                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
@@ -288,7 +308,7 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    int dmul = bit_size == 64 ? 2 : 1;
    switch (deref_mode) {
-   case nir_var_shader_in: {
+   case nir_var_shader_in:
       for (unsigned i = 0; i < num_components; i++) {
          int idx = (i * dmul) + var->data.location_frac;
          if (bld->gs_iface) {
@@ -304,6 +324,59 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
             }
+         } else if (bld->tes_iface) {
+            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+            LLVMValueRef attrib_index_val;
+            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+            LLVMValueRef result2;
+
+            if (indir_index)
+               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+            else
+               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+            if (var->data.patch) {
+               result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+                                                             indir_index ? true : false, attrib_index_val, swizzle_index_val);
+               if (bit_size == 64) {
+                  LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+                  result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                  result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+               }
+            }
+            else {
+               result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+                                                              indir_vertex_index ? true : false,
+                                                              indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
+               if (bit_size == 64) {
+                  LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+                  result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+                                                               indir_vertex_index ? true : false,
+                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                  result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+               }
+            }
+         } else if (bld->tcs_iface) {
+            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+            LLVMValueRef attrib_index_val;
+            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+            if (indir_index)
+               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+            else
+               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+            result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                         indir_index ? true : false, attrib_index_val, swizzle_index_val);
+            if (bit_size == 64) {
+               LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+               LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+                                                                       indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                                       indir_index ? true : false, attrib_index_val, swizzle_index_val);
+               result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+            }
          } else {
             if (indir_index) {
                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
@@ -350,7 +423,33 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
             }
          }
       }
-   }
+      break;
+   case nir_var_shader_out:
+      for (unsigned i = 0; i < num_components; i++) {
+         int idx = (i * dmul) + var->data.location_frac;
+         if (bld->tcs_iface) {
+            LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+            LLVMValueRef attrib_index_val;
+            LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+            if (indir_index)
+               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+            else
+               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+
+            result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                          indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+            if (bit_size == 64) {
+               LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+               LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                                        indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+               result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+            }
+         }
+      }
+      break;
    default:
       break;
    }
@@ -383,19 +482,79 @@ static void emit_store_chan(struct lp_build_nir_context *bld_base,
    }
 }
 
+static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
+                                unsigned bit_size,
+                                unsigned location,
+                                unsigned const_index,
+                                LLVMValueRef indir_vertex_index,
+                                LLVMValueRef indir_index,
+                                unsigned comp,
+                                unsigned chan,
+                                LLVMValueRef chan_val)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+   unsigned swizzle = chan;
+   if (bit_size == 64) {
+      swizzle *= 2;
+      swizzle += comp;
+      if (swizzle >= 4) {
+         swizzle -= 4;
+         location++;
+      }
+   } else
+      swizzle += comp;
+   LLVMValueRef attrib_index_val;
+   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
+
+   if (indir_index)
+      attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
+   else
+      attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
+   if (bit_size == 64) {
+      LLVMValueRef split_vals[2];
+      LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
+      emit_store_64bit_split(bld_base, chan_val, split_vals);
+      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+                                        indir_vertex_index ? true : false,
+                                        indir_vertex_index,
+                                        indir_index ? true : false,
+                                        attrib_index_val, swizzle_index_val,
+                                        split_vals[0], mask_vec(bld_base));
+      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+                                        indir_vertex_index ? true : false,
+                                        indir_vertex_index,
+                                        indir_index ? true : false,
+                                        attrib_index_val, swizzle_index_val2,
+                                        split_vals[1], mask_vec(bld_base));
+   } else {
+      chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
+      bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+                                        indir_vertex_index ? true : false,
+                                        indir_vertex_index,
+                                        indir_index ? true : false,
+                                        attrib_index_val, swizzle_index_val,
+                                        chan_val, mask_vec(bld_base));
+   }
+}
+
 static void emit_store_var(struct lp_build_nir_context *bld_base,
                            nir_variable_mode deref_mode,
-                           unsigned bit_size,
                            unsigned num_components,
+                           unsigned bit_size,
+                           nir_variable *var,
                            unsigned writemask,
+                           LLVMValueRef indir_vertex_index,
                            unsigned const_index,
-                           nir_variable *var, LLVMValueRef dst)
+                           LLVMValueRef indir_index,
+                           LLVMValueRef dst)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    switch (deref_mode) {
    case nir_var_shader_out: {
-      unsigned location = var->data.driver_location + const_index;
+      unsigned location = var->data.driver_location;
       unsigned comp = var->data.location_frac;
       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
          if (var->data.location == FRAG_RESULT_STENCIL)
@@ -403,10 +562,14 @@ static void emit_store_var(struct lp_build_nir_context *bld_base,
          else if (var->data.location == FRAG_RESULT_DEPTH)
             comp = 2;
       }
+
       for (unsigned chan = 0; chan < num_components; chan++) {
          if (writemask & (1u << chan)) {
             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
-            emit_store_chan(bld_base, deref_mode, bit_size, location, comp, chan, chan_val);
+            if (bld->tcs_iface) {
+               emit_store_tcs_chan(bld_base, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
+            } else
+               emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
          }
       }
       break;
@@ -425,7 +588,7 @@ static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    int nc = reg->reg->num_components;
-   LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS];
+   LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    if (reg->reg->num_array_elems) {
       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
@@ -1049,10 +1212,12 @@ static void emit_barrier(struct lp_build_nir_context *bld_base)
 static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base,
                                          LLVMValueRef index)
 {
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
    struct lp_build_context *bld_broad = &bld_base->uint_bld;
-   LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, bld_broad->zero, ""));
+   LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
+                                              LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
    return lp_build_broadcast_scalar(bld_broad, size_ptr);
 }
 
@@ -1060,10 +1225,17 @@ static void emit_image_op(struct lp_build_nir_context *bld_base,
                           struct lp_img_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+
    params->type = bld_base->base.type;
    params->context_ptr = bld->context_ptr;
    params->thread_data_ptr = bld->thread_data_ptr;
    params->exec_mask = mask_vec(bld_base);
+
+   if (params->image_index_offset)
+      params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
+                                                           lp_build_const_int32(gallivm, 0), "");
+
    bld->image->emit_op(bld->image,
                        bld->bld_base.base.gallivm,
                        params);
@@ -1074,10 +1246,14 @@ static void emit_image_size(struct lp_build_nir_context *bld_base,
                             struct lp_sampler_size_query_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
 
    params->int_type = bld_base->int_bld.type;
    params->context_ptr = bld->context_ptr;
 
+   if (params->texture_unit_offset)
+      params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
+                                                            lp_build_const_int32(gallivm, 0), "");
    bld->image->emit_size_query(bld->image,
                                bld->bld_base.base.gallivm,
                                params);
@@ -1090,6 +1266,8 @@ static void init_var_slots(struct lp_build_nir_context *bld_base,
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
 
+   if (!bld->outputs)
+     return;
    for (unsigned comp = sc; comp < slots + sc; comp++) {
       unsigned this_loc = var->data.driver_location + (comp / 4);
       unsigned this_chan = comp % 4;
@@ -1124,11 +1302,64 @@ static void emit_tex(struct lp_build_nir_context *bld_base,
                      struct lp_sampler_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
 
    params->type = bld_base->base.type;
    params->context_ptr = bld->context_ptr;
    params->thread_data_ptr = bld->thread_data_ptr;
 
+   if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
+      /* this is horrible but this can be dynamic */
+      LLVMValueRef coords[5];
+      LLVMValueRef *orig_texel_ptr;
+      struct lp_build_context *uint_bld = &bld_base->uint_bld;
+      LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type) };
+      LLVMValueRef texel[4], orig_offset;
+      unsigned i;
+      orig_texel_ptr = params->texel;
+
+      for (i = 0; i < 5; i++) {
+         coords[i] = params->coords[i];
+      }
+      orig_offset = params->texture_index_offset;
+
+      for (unsigned v = 0; v < uint_bld->type.length; v++) {
+         LLVMValueRef idx = lp_build_const_int32(gallivm, v);
+         LLVMValueRef new_coords[5];
+         for (i = 0; i < 5; i++) {
+            new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
+                                                    coords[i], idx, "");
+         }
+         params->coords = new_coords;
+         params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
+                                                                orig_offset,
+                                                                idx, "");
+         params->type = lp_elem_type(bld_base->base.type);
+
+         params->texel = texel;
+         bld->sampler->emit_tex_sample(bld->sampler,
+                                       gallivm,
+                                       params);
+
+         for (i = 0; i < 4; i++) {
+            result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
+         }
+      }
+      for (i = 0; i < 4; i++) {
+         orig_texel_ptr[i] = result[i];
+      }
+      return;
+   }
+
+   if (params->texture_index_offset)
+      params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+                                                             params->texture_index_offset,
+                                                             lp_build_const_int32(bld_base->base.gallivm, 0), "");
+
+   params->type = bld_base->base.type;
    bld->sampler->emit_tex_sample(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  params);
@@ -1142,6 +1373,10 @@ static void emit_tex_size(struct lp_build_nir_context *bld_base,
    params->int_type = bld_base->int_bld.type;
    params->context_ptr = bld->context_ptr;
 
+   if (params->texture_unit_offset)
+      params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+                                                             params->texture_unit_offset,
+                                                             lp_build_const_int32(bld_base->base.gallivm, 0), "");
    bld->sampler->emit_size_query(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  params);
@@ -1182,7 +1417,10 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
          result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""));
       break;
    case nir_intrinsic_load_invocation_id:
-      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+      if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
+         result[0] = bld->system_values.invocation_id;
+      else
+         result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
       break;
    case nir_intrinsic_load_front_face:
       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
@@ -1199,9 +1437,47 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_load_work_dim:
       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
       break;
+   case nir_intrinsic_load_tess_coord:
+      for (unsigned i = 0; i < 3; i++) {
+        result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
+      }
+      break;
+   case nir_intrinsic_load_tess_level_outer:
+      for (unsigned i = 0; i < 4; i++)
+         result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
+      break;
+   case nir_intrinsic_load_tess_level_inner:
+      for (unsigned i = 0; i < 2; i++)
+         result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
+      break;
+   case nir_intrinsic_load_patch_vertices_in:
+      result[0] = bld->system_values.vertices_in;
+      break;
+   case nir_intrinsic_load_sample_id:
+      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
+      break;
+   case nir_intrinsic_load_sample_pos:
+      for (unsigned i = 0; i < 2; i++) {
+         LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
+         idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
+         LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
+         result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
+      }
+      break;
+   case nir_intrinsic_load_sample_mask_in:
+      result[0] = bld->system_values.sample_mask_in;
+      break;
    }
 }
 
+static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
+                                   LLVMValueRef *dst)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
+}
+
 static void bgnloop(struct lp_build_nir_context *bld_base)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
@@ -1357,7 +1633,7 @@ end_primitive_masked(struct lp_build_nir_context * bld_base,
    if (stream_id == 0)
       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
                                    total_emitted_vertices_vec,
-                                   emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base));
+                                   emitted_vertices_vec, emitted_prims_vec, mask, 0);
    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
                              mask);
    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
@@ -1366,7 +1642,7 @@ end_primitive_masked(struct lp_build_nir_context * bld_base,
 
 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
 {
-   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
 
    assert(bld->gs_iface->end_primitive);
 
@@ -1378,7 +1654,7 @@ static void
 emit_prologue(struct lp_build_nir_soa_context *bld)
 {
    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
-   if (bld->indirects & nir_var_shader_in && !bld->gs_iface) {
+   if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
       uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
       unsigned index, chan;
       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
@@ -1413,7 +1689,7 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, n
    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
 
    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
-   LLVMValueRef init_val;
+   LLVMValueRef init_val = NULL;
    if (instr->intrinsic == nir_intrinsic_vote_ieq) {
       /* for equal we unfortunately have to loop and find the first valid one. */
       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
@@ -1427,7 +1703,6 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, n
       lp_build_endif(&ifthen);
       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
                             NULL, LLVMIntUGE);
-      lp_build_print_value(gallivm, "init_val is ", LLVMBuildLoad(builder, res_store, ""));
       init_val = LLVMBuildLoad(builder, res_store, "");
    } else {
       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
@@ -1459,6 +1734,26 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, n
    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
 }
 
+static void
+emit_interp_at(struct lp_build_nir_context *bld_base,
+               unsigned num_components,
+               nir_variable *var,
+               bool centroid,
+               bool sample,
+               unsigned const_index,
+               LLVMValueRef indir_index,
+               LLVMValueRef offsets[2],
+               LLVMValueRef dst[4])
+{
+   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+
+   for (unsigned i = 0; i < num_components; i++) {
+      dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
+                                        const_index + var->data.driver_location, i + var->data.location_frac,
+                                        centroid, sample, indir_index, offsets);
+   }
+}
+
 void lp_build_nir_soa(struct gallivm_state *gallivm,
                       struct nir_shader *shader,
                       const struct lp_build_tgsi_params *params,
@@ -1554,6 +1849,8 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.bld_base.image_op = emit_image_op;
    bld.bld_base.image_size = emit_image_size;
    bld.bld_base.vote = emit_vote;
+   bld.bld_base.helper_invocation = emit_helper_invocation;
+   bld.bld_base.interp_at = emit_interp_at;
 
    bld.mask = params->mask;
    bld.inputs = params->inputs;
@@ -1576,12 +1873,16 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
       bld.indirects |= nir_var_shader_in;
 
    bld.gs_iface = params->gs_iface;
+   bld.tcs_iface = params->tcs_iface;
+   bld.tes_iface = params->tes_iface;
+   bld.fs_iface = params->fs_iface;
    if (bld.gs_iface) {
       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
 
+      bld.gs_vertex_streams = params->gs_vertex_streams;
       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
                                                            shader->info.gs.vertices_out);
-      for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+      for (int i = 0; i < params->gs_vertex_streams; i++) {
          bld.emitted_prims_vec_ptr[i] =
             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
          bld.emitted_vertices_vec_ptr[i] =
@@ -1604,8 +1905,9 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
       LLVMValueRef total_emitted_vertices_vec;
       LLVMValueRef emitted_prims_vec;
 
-      end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), 0);
-      for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+      for (int i = 0; i < params->gs_vertex_streams; i++) {
+         end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
+
          total_emitted_vertices_vec =
             LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");