gallivm/nir: allow 64-bit arit ops
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir_soa.c
index c54df7e98a3a1d88cfb3483558955a78923f9b5a..b83acd0b58fda00a4ca07fef64ed5c917194307d 100644 (file)
@@ -69,8 +69,13 @@ emit_fetch_64bit(
    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
 
    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
    }
    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
 
@@ -91,8 +96,13 @@ emit_store_64bit_split(struct lp_build_nir_context *bld_base,
 
    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
    for (i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+      shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
+#endif
    }
 
    split_values[0] = LLVMBuildShuffleVector(builder, value,
@@ -297,15 +307,27 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    int dmul = bit_size == 64 ? 2 : 1;
+   unsigned location = var->data.driver_location;
+   unsigned location_frac = var->data.location_frac;
+
+   if (!var->data.compact && !indir_index)
+      location += const_index;
+   else if (var->data.compact) {
+      location += const_index / 4;
+      location_frac += const_index % 4;
+      const_index = 0;
+   }
    switch (deref_mode) {
    case nir_var_shader_in:
       for (unsigned i = 0; i < num_components; i++) {
-         int idx = (i * dmul) + var->data.location_frac;
+         int idx = (i * dmul) + location_frac;
+
          if (bld->gs_iface) {
             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
-            LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+            LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, location);
             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
             LLVMValueRef result2;
+
             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
             if (bit_size == 64) {
@@ -320,10 +342,15 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
             LLVMValueRef result2;
 
-            if (indir_index)
-               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
-            else
-               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+            if (indir_index) {
+               if (var->data.compact) {
+                  swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
+                  attrib_index_val = lp_build_const_int32(gallivm, location);
+               } else
+                  attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+            } else
+               attrib_index_val = lp_build_const_int32(gallivm, location);
+
             if (var->data.patch) {
                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
@@ -338,13 +365,14 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
                                                               indir_vertex_index ? true : false,
                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                                                              (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+                                                              (indir_index && var->data.compact) ? true : false, swizzle_index_val);
                if (bit_size == 64) {
                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
                                                                indir_vertex_index ? true : false,
                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                                                               indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
                }
             }
@@ -353,18 +381,24 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
             LLVMValueRef attrib_index_val;
             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
 
-            if (indir_index)
-               attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
-            else
-               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+            if (indir_index) {
+               if (var->data.compact) {
+                  swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
+                  attrib_index_val = lp_build_const_int32(gallivm, location);
+               } else
+                  attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+            } else
+               attrib_index_val = lp_build_const_int32(gallivm, location);
             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                         indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                                                         (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+                                                         (indir_index && var->data.compact) ? true : false, swizzle_index_val);
             if (bit_size == 64) {
                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                                       indir_index ? true : false, attrib_index_val, swizzle_index_val);
+                                                                       indir_index ? true : false, attrib_index_val,
+                                                                       false, swizzle_index_val);
                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
             }
          } else {
@@ -388,12 +422,12 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
             } else {
                if (bld->indirects & nir_var_shader_in) {
                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
-                                                             var->data.driver_location * 4 + idx);
+                                                             location * 4 + idx);
                   LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
                                                              bld->inputs_array, lindex);
                   if (bit_size == 64) {
                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
-                                                                 var->data.driver_location * 4 + (idx + 1));
+                                                                 location * 4 + (idx + 1));
                      LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
                                                                     bld->inputs_array, lindex2);
                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
@@ -403,11 +437,11 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
                } else {
                   if (bit_size == 64) {
                      LLVMValueRef tmp[2];
-                     tmp[0] = bld->inputs[var->data.driver_location + const_index][idx];
-                     tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1];
+                     tmp[0] = bld->inputs[location][idx];
+                     tmp[1] = bld->inputs[location][idx + 1];
                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
                   } else {
-                     result[i] = bld->inputs[var->data.driver_location + const_index][idx];
+                     result[i] = bld->inputs[location][idx];
                   }
                }
             }
@@ -415,8 +449,12 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
       }
       break;
    case nir_var_shader_out:
+      if (bld->fs_iface && bld->fs_iface->fb_fetch) {
+         bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
+         return;
+      }
       for (unsigned i = 0; i < num_components; i++) {
-         int idx = (i * dmul) + var->data.location_frac;
+         int idx = (i * dmul) + location_frac;
          if (bld->tcs_iface) {
             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
             LLVMValueRef attrib_index_val;
@@ -425,16 +463,18 @@ static void emit_load_var(struct lp_build_nir_context *bld_base,
             if (indir_index)
                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
             else
-               attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+               attrib_index_val = lp_build_const_int32(gallivm, location);
 
             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
-                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                          indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
             if (bit_size == 64) {
                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
-                                                                        indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+                                                                        indir_index ? true : false, attrib_index_val,
+                                                                        false, swizzle_index_val, 0);
                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
             }
          }
@@ -473,6 +513,7 @@ static void emit_store_chan(struct lp_build_nir_context *bld_base,
 }
 
 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
+                                bool is_compact,
                                 unsigned bit_size,
                                 unsigned location,
                                 unsigned const_index,
@@ -498,9 +539,13 @@ static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
    LLVMValueRef attrib_index_val;
    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
 
-   if (indir_index)
-      attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
-   else
+   if (indir_index) {
+      if (is_compact) {
+         swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
+         attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
+      } else
+         attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
+   } else
       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
    if (bit_size == 64) {
       LLVMValueRef split_vals[2];
@@ -510,21 +555,25 @@ static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
                                         indir_vertex_index ? true : false,
                                         indir_vertex_index,
                                         indir_index ? true : false,
-                                        attrib_index_val, swizzle_index_val,
+                                        attrib_index_val,
+                                        false, swizzle_index_val,
                                         split_vals[0], mask_vec(bld_base));
       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
                                         indir_vertex_index ? true : false,
                                         indir_vertex_index,
                                         indir_index ? true : false,
-                                        attrib_index_val, swizzle_index_val2,
+                                        attrib_index_val,
+                                        false, swizzle_index_val2,
                                         split_vals[1], mask_vec(bld_base));
    } else {
       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
       bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
                                         indir_vertex_index ? true : false,
                                         indir_vertex_index,
-                                        indir_index ? true : false,
-                                        attrib_index_val, swizzle_index_val,
+                                        indir_index && !is_compact ? true : false,
+                                        attrib_index_val,
+                                        indir_index && is_compact ? true : false,
+                                        swizzle_index_val,
                                         chan_val, mask_vec(bld_base));
    }
 }
@@ -553,11 +602,17 @@ static void emit_store_var(struct lp_build_nir_context *bld_base,
             comp = 2;
       }
 
+      if (var->data.compact) {
+         location += const_index / 4;
+         comp += const_index % 4;
+         const_index = 0;
+      }
+
       for (unsigned chan = 0; chan < num_components; chan++) {
          if (writemask & (1u << chan)) {
             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
             if (bld->tcs_iface) {
-               emit_store_tcs_chan(bld_base, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
+               emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
             } else
                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
          }
@@ -915,13 +970,11 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base,
       LLVMValueRef overflow_mask;
       LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
 
-      num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), "");
       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
       for (unsigned c = 0; c < nc; c++) {
          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
                                           this_offset, num_consts);
-
          result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
       }
    }
@@ -1215,10 +1268,17 @@ static void emit_image_op(struct lp_build_nir_context *bld_base,
                           struct lp_img_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+
    params->type = bld_base->base.type;
    params->context_ptr = bld->context_ptr;
    params->thread_data_ptr = bld->thread_data_ptr;
    params->exec_mask = mask_vec(bld_base);
+
+   if (params->image_index_offset)
+      params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
+                                                           lp_build_const_int32(gallivm, 0), "");
+
    bld->image->emit_op(bld->image,
                        bld->bld_base.base.gallivm,
                        params);
@@ -1229,10 +1289,14 @@ static void emit_image_size(struct lp_build_nir_context *bld_base,
                             struct lp_sampler_size_query_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
 
    params->int_type = bld_base->int_bld.type;
    params->context_ptr = bld->context_ptr;
 
+   if (params->texture_unit_offset)
+      params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
+                                                            lp_build_const_int32(gallivm, 0), "");
    bld->image->emit_size_query(bld->image,
                                bld->bld_base.base.gallivm,
                                params);
@@ -1281,11 +1345,64 @@ static void emit_tex(struct lp_build_nir_context *bld_base,
                      struct lp_sampler_params *params)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
 
    params->type = bld_base->base.type;
    params->context_ptr = bld->context_ptr;
    params->thread_data_ptr = bld->thread_data_ptr;
 
+   if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
+      /* this is horrible but this can be dynamic */
+      LLVMValueRef coords[5];
+      LLVMValueRef *orig_texel_ptr;
+      struct lp_build_context *uint_bld = &bld_base->uint_bld;
+      LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type),
+                                 LLVMGetUndef(bld_base->base.vec_type) };
+      LLVMValueRef texel[4], orig_offset;
+      unsigned i;
+      orig_texel_ptr = params->texel;
+
+      for (i = 0; i < 5; i++) {
+         coords[i] = params->coords[i];
+      }
+      orig_offset = params->texture_index_offset;
+
+      for (unsigned v = 0; v < uint_bld->type.length; v++) {
+         LLVMValueRef idx = lp_build_const_int32(gallivm, v);
+         LLVMValueRef new_coords[5];
+         for (i = 0; i < 5; i++) {
+            new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
+                                                    coords[i], idx, "");
+         }
+         params->coords = new_coords;
+         params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
+                                                                orig_offset,
+                                                                idx, "");
+         params->type = lp_elem_type(bld_base->base.type);
+
+         params->texel = texel;
+         bld->sampler->emit_tex_sample(bld->sampler,
+                                       gallivm,
+                                       params);
+
+         for (i = 0; i < 4; i++) {
+            result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
+         }
+      }
+      for (i = 0; i < 4; i++) {
+         orig_texel_ptr[i] = result[i];
+      }
+      return;
+   }
+
+   if (params->texture_index_offset)
+      params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+                                                             params->texture_index_offset,
+                                                             lp_build_const_int32(bld_base->base.gallivm, 0), "");
+
+   params->type = bld_base->base.type;
    bld->sampler->emit_tex_sample(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  params);
@@ -1299,6 +1416,10 @@ static void emit_tex_size(struct lp_build_nir_context *bld_base,
    params->int_type = bld_base->int_bld.type;
    params->context_ptr = bld->context_ptr;
 
+   if (params->texture_unit_offset)
+      params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+                                                             params->texture_unit_offset,
+                                                             lp_build_const_int32(bld_base->base.gallivm, 0), "");
    bld->sampler->emit_size_query(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  params);
@@ -1375,9 +1496,31 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_load_patch_vertices_in:
       result[0] = bld->system_values.vertices_in;
       break;
+   case nir_intrinsic_load_sample_id:
+      result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
+      break;
+   case nir_intrinsic_load_sample_pos:
+      for (unsigned i = 0; i < 2; i++) {
+         LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
+         idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
+         LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
+         result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
+      }
+      break;
+   case nir_intrinsic_load_sample_mask_in:
+      result[0] = bld->system_values.sample_mask_in;
+      break;
    }
 }
 
+static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
+                                   LLVMValueRef *dst)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
+}
+
 static void bgnloop(struct lp_build_nir_context *bld_base)
 {
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
@@ -1493,6 +1636,8 @@ static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_i
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 
+   if (stream_id >= bld->gs_vertex_streams)
+      return;
    assert(bld->gs_iface->emit_vertex);
    LLVMValueRef total_emitted_vertices_vec =
       LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
@@ -1502,6 +1647,7 @@ static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_i
    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
                               bld->outputs,
                               total_emitted_vertices_vec,
+                              mask,
                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
 
    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
@@ -1517,6 +1663,8 @@ end_primitive_masked(struct lp_build_nir_context * bld_base,
    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 
+   if (stream_id >= bld->gs_vertex_streams)
+      return;
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    LLVMValueRef emitted_vertices_vec =
       LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
@@ -1530,10 +1678,9 @@ end_primitive_masked(struct lp_build_nir_context * bld_base,
                                             emitted_vertices_vec,
                                             uint_bld->zero);
    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
-   if (stream_id == 0)
-      bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
-                                   total_emitted_vertices_vec,
-                                   emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base));
+   bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
+                               total_emitted_vertices_vec,
+                               emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
                              mask);
    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
@@ -1634,6 +1781,26 @@ static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, n
    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
 }
 
+static void
+emit_interp_at(struct lp_build_nir_context *bld_base,
+               unsigned num_components,
+               nir_variable *var,
+               bool centroid,
+               bool sample,
+               unsigned const_index,
+               LLVMValueRef indir_index,
+               LLVMValueRef offsets[2],
+               LLVMValueRef dst[4])
+{
+   struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+
+   for (unsigned i = 0; i < num_components; i++) {
+      dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
+                                        const_index + var->data.driver_location, i + var->data.location_frac,
+                                        centroid, sample, indir_index, offsets);
+   }
+}
+
 void lp_build_nir_soa(struct gallivm_state *gallivm,
                       struct nir_shader *shader,
                       const struct lp_build_tgsi_params *params,
@@ -1729,6 +1896,8 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.bld_base.image_op = emit_image_op;
    bld.bld_base.image_size = emit_image_size;
    bld.bld_base.vote = emit_vote;
+   bld.bld_base.helper_invocation = emit_helper_invocation;
+   bld.bld_base.interp_at = emit_interp_at;
 
    bld.mask = params->mask;
    bld.inputs = params->inputs;
@@ -1753,12 +1922,14 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
    bld.gs_iface = params->gs_iface;
    bld.tcs_iface = params->tcs_iface;
    bld.tes_iface = params->tes_iface;
+   bld.fs_iface = params->fs_iface;
    if (bld.gs_iface) {
       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
 
+      bld.gs_vertex_streams = params->gs_vertex_streams;
       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
                                                            shader->info.gs.vertices_out);
-      for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+      for (int i = 0; i < params->gs_vertex_streams; i++) {
          bld.emitted_prims_vec_ptr[i] =
             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
          bld.emitted_vertices_vec_ptr[i] =
@@ -1781,8 +1952,9 @@ void lp_build_nir_soa(struct gallivm_state *gallivm,
       LLVMValueRef total_emitted_vertices_vec;
       LLVMValueRef emitted_prims_vec;
 
-      end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), 0);
-      for (int i = 0; i < PIPE_MAX_VERTEX_STREAMS; i++) {
+      for (int i = 0; i < params->gs_vertex_streams; i++) {
+         end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
+
          total_emitted_vertices_vec =
             LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");