radeonsi/nir: add support for packed inputs
authorTimothy Arceri <tarceri@itsqueeze.com>
Fri, 17 Nov 2017 06:04:22 +0000 (17:04 +1100)
committerTimothy Arceri <tarceri@itsqueeze.com>
Sun, 3 Dec 2017 22:10:30 +0000 (09:10 +1100)
Because NIR can create non vec4 variables when implementing component
packing we need to make sure not to reprocess the same slot again.

Also we can drop the fs_attr_idx counter and just use driver_location.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_shader_nir.c

index 5d82715f7a7c8aea226165992729de1d2965f873..12eb8763edb2cbd4bf0075e8e1fc914e5b448914 100644 (file)
@@ -444,21 +444,18 @@ si_lower_nir(struct si_shader_selector* sel)
 }
 
 static void declare_nir_input_vs(struct si_shader_context *ctx,
-                                struct nir_variable *variable, unsigned rel,
+                                struct nir_variable *variable,
                                 LLVMValueRef out[4])
 {
-       si_llvm_load_input_vs(ctx, variable->data.driver_location / 4 + rel, out);
+       si_llvm_load_input_vs(ctx, variable->data.driver_location / 4, out);
 }
 
 static void declare_nir_input_fs(struct si_shader_context *ctx,
-                                struct nir_variable *variable, unsigned rel,
-                                unsigned *fs_attr_idx,
+                                struct nir_variable *variable,
+                                unsigned input_index,
                                 LLVMValueRef out[4])
 {
-       unsigned slot = variable->data.location + rel;
-
-       assert(variable->data.location >= VARYING_SLOT_VAR0 || rel == 0);
-
+       unsigned slot = variable->data.location;
        if (slot == VARYING_SLOT_POS) {
                out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
                out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
@@ -468,8 +465,7 @@ static void declare_nir_input_fs(struct si_shader_context *ctx,
                return;
        }
 
-       si_llvm_load_input_fs(ctx, *fs_attr_idx, out);
-       (*fs_attr_idx)++;
+       si_llvm_load_input_fs(ctx, input_index, out);
 }
 
 static LLVMValueRef
@@ -523,25 +519,33 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
 {
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
 
-       unsigned fs_attr_idx = 0;
+       uint64_t processed_inputs = 0;
        nir_foreach_variable(variable, &nir->inputs) {
                unsigned attrib_count = glsl_count_attribute_slots(variable->type,
                                                                   nir->info.stage == MESA_SHADER_VERTEX);
                unsigned input_idx = variable->data.driver_location;
 
-               for (unsigned i = 0; i < attrib_count; ++i) {
-                       LLVMValueRef data[4];
+               assert(attrib_count == 1);
 
-                       if (nir->info.stage == MESA_SHADER_VERTEX)
-                               declare_nir_input_vs(ctx, variable, i, data);
-                       else if (nir->info.stage == MESA_SHADER_FRAGMENT)
-                               declare_nir_input_fs(ctx, variable, i, &fs_attr_idx, data);
+               LLVMValueRef data[4];
+               unsigned loc = variable->data.location;
 
-                       for (unsigned chan = 0; chan < 4; chan++) {
-                               ctx->inputs[input_idx + chan] =
-                                       LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
-                       }
+               /* Packed components share the same location so skip
+                * them if we have already processed the location.
+                */
+               if (processed_inputs & ((uint64_t)1 << loc))
+                       continue;
+
+               if (nir->info.stage == MESA_SHADER_VERTEX)
+                       declare_nir_input_vs(ctx, variable, data);
+               else if (nir->info.stage == MESA_SHADER_FRAGMENT)
+                       declare_nir_input_fs(ctx, variable, input_idx / 4, data);
+
+               for (unsigned chan = 0; chan < 4; chan++) {
+                       ctx->inputs[input_idx + chan] =
+                               LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
                }
+               processed_inputs |= ((uint64_t)1 << loc);
        }
 
        ctx->abi.inputs = &ctx->inputs[0];