radeonsi/nir: set input_interpolate_loc for color inputs
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_nir.c
index 9ce88df8056899b6cdaa75d3dded448019716144..20031ad3f60321298e3ee2827a25146df6871b90 100644 (file)
@@ -52,10 +52,10 @@ static nir_variable* intrinsic_get_var(nir_intrinsic_instr *instr)
        return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0]));
 }
 
-static void gather_intrinsic_load_deref_info(const nir_shader *nir,
-                                            const nir_intrinsic_instr *instr,
-                                            nir_variable *var,
-                                            struct tgsi_shader_info *info)
+static void gather_intrinsic_load_deref_input_info(const nir_shader *nir,
+                                                  const nir_intrinsic_instr *instr,
+                                                  nir_variable *var,
+                                                  struct tgsi_shader_info *info)
 {
        assert(var && var->data.mode == nir_var_shader_in);
 
@@ -63,29 +63,105 @@ static void gather_intrinsic_load_deref_info(const nir_shader *nir,
        case MESA_SHADER_VERTEX: {
                unsigned i = var->data.driver_location;
                unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+               uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
 
                for (unsigned j = 0; j < attrib_count; j++, i++) {
                        if (glsl_type_is_64bit(glsl_without_array(var->type))) {
-                               /* TODO: set usage mask more accurately for doubles */
-                               info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW;
+                               unsigned dmask = mask;
+
+                               if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2)
+                                       dmask >>= 2;
+
+                               dmask <<= var->data.location_frac / 2;
+
+                               if (dmask & 0x1)
+                                       info->input_usage_mask[i] |= TGSI_WRITEMASK_XY;
+                               if (dmask & 0x2)
+                                       info->input_usage_mask[i] |= TGSI_WRITEMASK_ZW;
                        } else {
-                               uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
-                               info->input_usage_mask[i] |= mask << var->data.location_frac;
+                               info->input_usage_mask[i] |=
+                                       (mask << var->data.location_frac) & 0xf;
                        }
                }
                break;
        }
-       default: {
-               unsigned semantic_name, semantic_index;
-               tgsi_get_gl_varying_semantic(var->data.location, true,
-                                            &semantic_name, &semantic_index);
-
-               if (semantic_name == TGSI_SEMANTIC_COLOR) {
+       case MESA_SHADER_FRAGMENT:
+               if (var->data.location == VARYING_SLOT_COL0 ||
+                   var->data.location == VARYING_SLOT_COL1) {
+                       unsigned index = var->data.location == VARYING_SLOT_COL1;
                        uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
-                       info->colors_read |= mask << (semantic_index * 4);
+                       info->colors_read |= mask << (index * 4);
                }
                break;
+       default:;
        }
+}
+
+static void gather_intrinsic_load_deref_output_info(const nir_shader *nir,
+                                                   const nir_intrinsic_instr *instr,
+                                                   nir_variable *var,
+                                                   struct tgsi_shader_info *info)
+{
+       assert(var && var->data.mode == nir_var_shader_out);
+
+       switch (nir->info.stage) {
+       case MESA_SHADER_TESS_CTRL:
+               if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+                   var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+                       info->reads_tessfactor_outputs = true;
+               else if (var->data.patch)
+                       info->reads_perpatch_outputs = true;
+               else
+                       info->reads_pervertex_outputs = true;
+               break;
+
+       case MESA_SHADER_FRAGMENT:
+               if (var->data.fb_fetch_output)
+                       info->uses_fbfetch = true;
+               break;
+       default:;
+       }
+}
+
+static void gather_intrinsic_store_deref_output_info(const nir_shader *nir,
+                                                    const nir_intrinsic_instr *instr,
+                                                    nir_variable *var,
+                                                    struct tgsi_shader_info *info)
+{
+       assert(var && var->data.mode == nir_var_shader_out);
+
+       switch (nir->info.stage) {
+       case MESA_SHADER_VERTEX: /* needed by LS, ES */
+       case MESA_SHADER_TESS_EVAL: /* needed by ES */
+       case MESA_SHADER_GEOMETRY: {
+               unsigned i = var->data.driver_location;
+               unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+               unsigned mask = nir_intrinsic_write_mask(instr);
+
+               assert(!var->data.compact);
+
+               for (unsigned j = 0; j < attrib_count; j++, i++) {
+                       if (glsl_type_is_64bit(glsl_without_array(var->type))) {
+                               unsigned dmask = mask;
+
+                               if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2)
+                                       dmask >>= 2;
+
+                               dmask <<= var->data.location_frac / 2;
+
+                               if (dmask & 0x1)
+                                       info->output_usagemask[i] |= TGSI_WRITEMASK_XY;
+                               if (dmask & 0x2)
+                                       info->output_usagemask[i] |= TGSI_WRITEMASK_ZW;
+                       } else {
+                               info->output_usagemask[i] |=
+                                       (mask << var->data.location_frac) & 0xf;
+                       }
+
+               }
+               break;
+       }
+       default:;
        }
 }
 
@@ -206,9 +282,11 @@ static void scan_instruction(const struct nir_shader *nir,
                                info->uses_bindless_image_store = true;
 
                        info->writes_memory = true;
+                       info->num_memory_instructions++; /* we only care about stores */
                        break;
                case nir_intrinsic_image_deref_store:
                        info->writes_memory = true;
+                       info->num_memory_instructions++; /* we only care about stores */
                        break;
                case nir_intrinsic_bindless_image_atomic_add:
                case nir_intrinsic_bindless_image_atomic_min:
@@ -226,6 +304,7 @@ static void scan_instruction(const struct nir_shader *nir,
                                info->uses_bindless_image_atomic = true;
 
                        info->writes_memory = true;
+                       info->num_memory_instructions++; /* we only care about stores */
                        break;
                case nir_intrinsic_image_deref_atomic_add:
                case nir_intrinsic_image_deref_atomic_min:
@@ -236,6 +315,7 @@ static void scan_instruction(const struct nir_shader *nir,
                case nir_intrinsic_image_deref_atomic_exchange:
                case nir_intrinsic_image_deref_atomic_comp_swap:
                        info->writes_memory = true;
+                       info->num_memory_instructions++; /* we only care about stores */
                        break;
                case nir_intrinsic_store_ssbo:
                case nir_intrinsic_ssbo_atomic_add:
@@ -249,6 +329,7 @@ static void scan_instruction(const struct nir_shader *nir,
                case nir_intrinsic_ssbo_atomic_exchange:
                case nir_intrinsic_ssbo_atomic_comp_swap:
                        info->writes_memory = true;
+                       info->num_memory_instructions++; /* we only care about stores */
                        break;
                case nir_intrinsic_load_deref: {
                        nir_variable *var = intrinsic_get_var(intr);
@@ -257,7 +338,7 @@ static void scan_instruction(const struct nir_shader *nir,
                                glsl_get_base_type(glsl_without_array(var->type));
 
                        if (mode == nir_var_shader_in) {
-                               gather_intrinsic_load_deref_info(nir, intr, var, info);
+                               gather_intrinsic_load_deref_input_info(nir, intr, var, info);
 
                                switch (var->data.interpolation) {
                                case INTERP_MODE_NONE:
@@ -283,9 +364,18 @@ static void scan_instruction(const struct nir_shader *nir,
                                                info->uses_linear_center = true;
                                        break;
                                }
+                       } else if (mode == nir_var_shader_out) {
+                               gather_intrinsic_load_deref_output_info(nir, intr, var, info);
                        }
                        break;
                }
+               case nir_intrinsic_store_deref: {
+                       nir_variable *var = intrinsic_get_var(intr);
+
+                       if (var->data.mode == nir_var_shader_out)
+                               gather_intrinsic_store_deref_output_info(nir, intr, var, info);
+                       break;
+               }
                case nir_intrinsic_interp_deref_at_centroid:
                case nir_intrinsic_interp_deref_at_sample:
                case nir_intrinsic_interp_deref_at_offset: {
@@ -462,6 +552,16 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                        if (semantic_name == TGSI_SEMANTIC_PRIMID)
                                info->uses_primid = true;
 
+                       if (semantic_name == TGSI_SEMANTIC_COLOR) {
+                               /* We only need this for color inputs. */
+                               if (variable->data.sample)
+                                       info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
+                               else if (variable->data.centroid)
+                                       info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
+                               else
+                                       info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
+                       }
+
                         enum glsl_base_type base_type =
                                 glsl_get_base_type(glsl_without_array(variable->type));
 
@@ -580,22 +680,18 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                        unsigned streamw = (gs_out_streams >> 6) & 3;
 
                        if (usagemask & TGSI_WRITEMASK_X) {
-                               info->output_usagemask[i] |= TGSI_WRITEMASK_X;
                                info->output_streams[i] |= streamx;
                                info->num_stream_output_components[streamx]++;
                        }
                        if (usagemask & TGSI_WRITEMASK_Y) {
-                               info->output_usagemask[i] |= TGSI_WRITEMASK_Y;
                                info->output_streams[i] |= streamy << 2;
                                info->num_stream_output_components[streamy]++;
                        }
                        if (usagemask & TGSI_WRITEMASK_Z) {
-                               info->output_usagemask[i] |= TGSI_WRITEMASK_Z;
                                info->output_streams[i] |= streamz << 4;
                                info->num_stream_output_components[streamz]++;
                        }
                        if (usagemask & TGSI_WRITEMASK_W) {
-                               info->output_usagemask[i] |= TGSI_WRITEMASK_W;
                                info->output_streams[i] |= streamw << 6;
                                info->num_stream_output_components[streamw]++;
                        }
@@ -647,20 +743,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                                        info->writes_position = true;
                                break;
                        }
-
-                       if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
-                               switch (semantic_name) {
-                               case TGSI_SEMANTIC_PATCH:
-                                       info->reads_perpatch_outputs = true;
-                               break;
-                               case TGSI_SEMANTIC_TESSINNER:
-                               case TGSI_SEMANTIC_TESSOUTER:
-                                       info->reads_tessfactor_outputs = true;
-                               break;
-                               default:
-                                       info->reads_pervertex_outputs = true;
-                               }
-                       }
                }
 
                unsigned loc = variable->data.location;
@@ -744,22 +826,12 @@ void si_nir_scan_shader(const struct nir_shader *nir,
                /* We rely on the fact that nir_lower_samplers_as_deref has
                 * eliminated struct dereferences.
                 */
-               if (base_type == GLSL_TYPE_SAMPLER) {
-                       if (variable->data.bindless) {
-                               info->const_buffers_declared |= 1;
-                               info->const_file_max[0] = max_slot;
-                       } else {
-                               info->samplers_declared |=
-                                       u_bit_consecutive(variable->data.binding, aoa_size);
-                       }
-               } else if (base_type == GLSL_TYPE_IMAGE) {
-                       if (variable->data.bindless) {
-                               info->const_buffers_declared |= 1;
-                               info->const_file_max[0] = max_slot;
-                       } else {
-                               info->images_declared |=
-                                       u_bit_consecutive(variable->data.binding, aoa_size);
-                       }
+               if (base_type == GLSL_TYPE_SAMPLER && !variable->data.bindless) {
+                       info->samplers_declared |=
+                               u_bit_consecutive(variable->data.binding, aoa_size);
+               } else if (base_type == GLSL_TYPE_IMAGE && !variable->data.bindless) {
+                       info->images_declared |=
+                               u_bit_consecutive(variable->data.binding, aoa_size);
                } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
                        info->const_buffers_declared |= 1;
                        info->const_file_max[0] = max_slot;
@@ -901,7 +973,7 @@ si_nir_lower_color(nir_shader *nir)
  * selector is created.
  */
 void
-si_lower_nir(struct si_shader_selector* sel)
+si_lower_nir(struct si_shader_selector* sel, unsigned wave_size)
 {
        /* Adjust the driver location of inputs and outputs. The state tracker
         * interprets them as slots, while the ac/nir backend interprets them
@@ -956,8 +1028,8 @@ si_lower_nir(struct si_shader_selector* sel)
        NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
 
        const nir_lower_subgroups_options subgroups_options = {
-               .subgroup_size = 64,
-               .ballot_bit_size = 64,
+               .subgroup_size = wave_size,
+               .ballot_bit_size = wave_size,
                .lower_to_scalar = true,
                .lower_subgroup_masks = true,
                .lower_vote_trivial = false,
@@ -1031,7 +1103,6 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned const_index = base_index + constant_index;
-       bool dcc_off = write;
 
        assert(!descriptor_set);
        assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
@@ -1049,7 +1120,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                                             LLVMConstInt(ctx->i64, 2, 0), "");
 
                        return si_load_image_desc(ctx, list, dynamic_index, desc_type,
-                                                 dcc_off, true);
+                                                 write, true);
                }
 
                /* Since bindless handle arithmetic can contain an unsigned integer
@@ -1088,7 +1159,7 @@ si_nir_load_sampler_desc(struct ac_shader_abi *abi,
                index = LLVMBuildSub(ctx->ac.builder,
                                     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
                                     index, "");
-               return si_load_image_desc(ctx, list, index, desc_type, dcc_off, false);
+               return si_load_image_desc(ctx, list, index, desc_type, write, false);
        }
 
        index = LLVMBuildAdd(ctx->ac.builder, index,