radeonsi/nir: Use NIR barycentric intrinsics
authorConnor Abbott <cwabbott0@gmail.com>
Tue, 14 May 2019 11:29:52 +0000 (13:29 +0200)
committerConnor Abbott <cwabbott0@gmail.com>
Mon, 8 Jul 2019 12:18:46 +0000 (14:18 +0200)
This is simpler than radv, since the driver_location is already assigned
for us.

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/amd/common/ac_nir_to_llvm.c
src/amd/common/ac_shader_abi.h
src/gallium/drivers/radeonsi/si_get.c
src/gallium/drivers/radeonsi/si_shader_nir.c

index 51d26e00c9770969e53dbd456915c46ccf841f46..d9332700105458e495d7e2ed96a2e2b8e624518d 100644 (file)
@@ -3341,6 +3341,12 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
        case nir_intrinsic_load_helper_invocation:
                result = ac_build_load_helper_invocation(&ctx->ac);
                break;
+       case nir_intrinsic_load_color0:
+               result = ctx->abi->color0;
+               break;
+       case nir_intrinsic_load_color1:
+               result = ctx->abi->color1;
+               break;
        case nir_intrinsic_load_instance_id:
                result = ctx->abi->instance_id;
                break;
index 2051f22d29bcb5c8ce3d4bddfa777a89b89af558..083ab6c2298bae7b943d192c6d717c3204fb5a63 100644 (file)
@@ -63,6 +63,8 @@ struct ac_shader_abi {
        LLVMValueRef ancillary;
        LLVMValueRef sample_coverage;
        LLVMValueRef prim_mask;
+       LLVMValueRef color0;
+       LLVMValueRef color1;
        /* CS */
        LLVMValueRef local_invocation_ids;
        LLVMValueRef num_work_groups;
index 59d07523fbe384ef2b01ec9a2b47e3b8aca60167..8e07cbe989f87e54daba3d5d3bc2baedcfe1e1b0 100644 (file)
@@ -510,6 +510,7 @@ static const struct nir_shader_compiler_options nir_options = {
        .lower_rotate = true,
        .optimize_sample_mask_in = true,
        .max_unroll_iterations = 32,
+       .use_interpolated_input_intrinsics = true,
 };
 
 static const void *
index 365c137f8fccf9d6e36c6203090c1e217fbfd4f2..9ce88df8056899b6cdaa75d3dded448019716144 100644 (file)
@@ -31,6 +31,7 @@
 
 #include "compiler/nir/nir.h"
 #include "compiler/nir_types.h"
+#include "compiler/nir/nir_builder.h"
 
 static nir_variable* tex_get_texture_var(nir_tex_instr *instr)
 {
@@ -461,51 +462,43 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                        if (semantic_name == TGSI_SEMANTIC_PRIMID)
                                info->uses_primid = true;
 
-                       if (variable->data.sample)
-                               info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
-                       else if (variable->data.centroid)
-                               info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
-                       else
-                               info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
+                        enum glsl_base_type base_type =
+                                glsl_get_base_type(glsl_without_array(variable->type));
 
-                       enum glsl_base_type base_type =
-                               glsl_get_base_type(glsl_without_array(variable->type));
+                        switch (variable->data.interpolation) {
+                        case INTERP_MODE_NONE:
+                                if (glsl_base_type_is_integer(base_type)) {
+                                        info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+                                        break;
+                                }
 
-                       switch (variable->data.interpolation) {
-                       case INTERP_MODE_NONE:
-                               if (glsl_base_type_is_integer(base_type)) {
-                                       info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
-                                       break;
-                               }
+                                if (semantic_name == TGSI_SEMANTIC_COLOR) {
+                                        info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
+                                        break;
+                                }
+                                /* fall-through */
 
-                               if (semantic_name == TGSI_SEMANTIC_COLOR) {
-                                       info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
-                                       break;
-                               }
-                               /* fall-through */
+                        case INTERP_MODE_SMOOTH:
+                                assert(!glsl_base_type_is_integer(base_type));
 
-                       case INTERP_MODE_SMOOTH:
-                               assert(!glsl_base_type_is_integer(base_type));
+                                info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
+                                break;
 
-                               info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
-                               break;
+                        case INTERP_MODE_NOPERSPECTIVE:
+                                assert(!glsl_base_type_is_integer(base_type));
 
-                       case INTERP_MODE_NOPERSPECTIVE:
-                               assert(!glsl_base_type_is_integer(base_type));
+                                info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
+                                break;
 
-                               info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
-                               break;
-
-                       case INTERP_MODE_FLAT:
-                               info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
-                               break;
-                       }
+                        case INTERP_MODE_FLAT:
+                                info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
+                                break;
+                        }
                }
        }
 
        info->num_inputs = num_inputs;
 
-
        i = 0;
        uint64_t processed_outputs = 0;
        unsigned num_outputs = 0;
@@ -856,6 +849,53 @@ si_nir_opts(struct nir_shader *nir)
        } while (progress);
 }
 
+static int
+type_size_vec4(const struct glsl_type *type, bool bindless)
+{
+       return glsl_count_attribute_slots(type, false);
+}
+
+static void
+si_nir_lower_color(nir_shader *nir)
+{
+        nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+
+        nir_builder b;
+        nir_builder_init(&b, entrypoint);
+
+        nir_foreach_block(block, entrypoint) {
+                nir_foreach_instr_safe(instr, block) {
+                        if (instr->type != nir_instr_type_intrinsic)
+                                continue;
+
+                        nir_intrinsic_instr *intrin =
+                                nir_instr_as_intrinsic(instr);
+
+                        if (intrin->intrinsic != nir_intrinsic_load_deref)
+                                continue;
+
+                        nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+                        if (deref->mode != nir_var_shader_in)
+                                continue;
+
+                        b.cursor = nir_before_instr(instr);
+                        nir_variable *var = nir_deref_instr_get_variable(deref);
+                        nir_ssa_def *def;
+
+                        if (var->data.location == VARYING_SLOT_COL0) {
+                                def = nir_load_color0(&b);
+                        } else if (var->data.location == VARYING_SLOT_COL1) {
+                                def = nir_load_color1(&b);
+                        } else {
+                                continue;
+                        }
+
+                        nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(def));
+                        nir_instr_remove(instr);
+                }
+        }
+}
+
 /**
  * Perform "lowering" operations on the NIR that are run once when the shader
  * selector is created.
@@ -867,8 +907,29 @@ si_lower_nir(struct si_shader_selector* sel)
         * interprets them as slots, while the ac/nir backend interprets them
         * as individual components.
         */
-       nir_foreach_variable(variable, &sel->nir->inputs)
-               variable->data.driver_location *= 4;
+       if (sel->nir->info.stage != MESA_SHADER_FRAGMENT) {
+               nir_foreach_variable(variable, &sel->nir->inputs)
+                       variable->data.driver_location *= 4;
+       } else {
+               NIR_PASS_V(sel->nir, nir_lower_io_to_temporaries,
+                          nir_shader_get_entrypoint(sel->nir), false, true);
+
+               /* Since we're doing nir_lower_io_to_temporaries late, we need
+                * to lower all the copy_deref's introduced by
+                * lower_io_to_temporaries before calling nir_lower_io.
+                */
+               NIR_PASS_V(sel->nir, nir_split_var_copies);
+                NIR_PASS_V(sel->nir, nir_lower_var_copies);
+               NIR_PASS_V(sel->nir, nir_lower_global_vars_to_local);
+
+               si_nir_lower_color(sel->nir);
+               NIR_PASS_V(sel->nir, nir_lower_io, nir_var_shader_in, type_size_vec4, 0);
+
+                /* This pass needs actual constants */
+                NIR_PASS_V(sel->nir, nir_opt_constant_folding);
+                NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base,
+                           nir_var_shader_in);
+       }
 
        nir_foreach_variable(variable, &sel->nir->outputs) {
                variable->data.driver_location *= 4;
@@ -924,24 +985,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx,
        si_llvm_load_input_vs(ctx, input_index, out);
 }
 
-static void declare_nir_input_fs(struct si_shader_context *ctx,
-                                struct nir_variable *variable,
-                                unsigned input_index,
-                                LLVMValueRef out[4])
-{
-       unsigned slot = variable->data.location;
-       if (slot == VARYING_SLOT_POS) {
-               out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
-               out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
-               out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
-               out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
-                               LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
-               return;
-       }
-
-       si_llvm_load_input_fs(ctx, input_index, out);
-}
-
 LLVMValueRef
 si_nir_lookup_interp_param(struct ac_shader_abi *abi,
                           enum glsl_interp_mode interp, unsigned location)
@@ -1067,20 +1110,16 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
 {
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
 
-       if (nir->info.stage == MESA_SHADER_VERTEX ||
-           nir->info.stage == MESA_SHADER_FRAGMENT) {
+       if (nir->info.stage == MESA_SHADER_VERTEX) {
                uint64_t processed_inputs = 0;
                nir_foreach_variable(variable, &nir->inputs) {
                        unsigned attrib_count = glsl_count_attribute_slots(variable->type,
-                                                                          nir->info.stage == MESA_SHADER_VERTEX);
+                                                                          true);
                        unsigned input_idx = variable->data.driver_location;
 
                        LLVMValueRef data[4];
                        unsigned loc = variable->data.location;
 
-                       if (loc >= VARYING_SLOT_VAR0 && nir->info.stage == MESA_SHADER_FRAGMENT)
-                               ctx->abi.fs_input_attr_indices[loc - VARYING_SLOT_VAR0] = input_idx / 4;
-
                        for (unsigned i = 0; i < attrib_count; i++) {
                                /* Packed components share the same location so skip
                                 * them if we have already processed the location.
@@ -1090,24 +1129,50 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
                                        continue;
                                }
 
-                               if (nir->info.stage == MESA_SHADER_VERTEX) {
+                               declare_nir_input_vs(ctx, variable, input_idx / 4, data);
+                               bitcast_inputs(ctx, data, input_idx);
+                               if (glsl_type_is_dual_slot(variable->type)) {
+                                       input_idx += 4;
                                        declare_nir_input_vs(ctx, variable, input_idx / 4, data);
                                        bitcast_inputs(ctx, data, input_idx);
-                                       if (glsl_type_is_dual_slot(variable->type)) {
-                                               input_idx += 4;
-                                               declare_nir_input_vs(ctx, variable, input_idx / 4, data);
-                                               bitcast_inputs(ctx, data, input_idx);
-                                       }
-                               } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
-                                       declare_nir_input_fs(ctx, variable, input_idx / 4, data);
-                                       bitcast_inputs(ctx, data, input_idx);
                                }
 
                                processed_inputs |= ((uint64_t)1 << (loc + i));
                                input_idx += 4;
                        }
                }
-       }
+       } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+                unsigned colors_read =
+                        ctx->shader->selector->info.colors_read;
+                LLVMValueRef main_fn = ctx->main_fn;
+
+                LLVMValueRef undef = LLVMGetUndef(ctx->f32);
+
+                unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
+
+                if (colors_read & 0x0f) {
+                        unsigned mask = colors_read & 0x0f;
+                        LLVMValueRef values[4];
+                        values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
+                        ctx->abi.color0 =
+                                ac_to_integer(&ctx->ac,
+                                              ac_build_gather_values(&ctx->ac, values, 4));
+                }
+                if (colors_read & 0xf0) {
+                        unsigned mask = (colors_read & 0xf0) >> 4;
+                        LLVMValueRef values[4];
+                        values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
+                        values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
+                        ctx->abi.color1 =
+                                ac_to_integer(&ctx->ac,
+                                              ac_build_gather_values(&ctx->ac, values, 4));
+                }
+        }
 
        ctx->abi.inputs = &ctx->inputs[0];
        ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;