if (var->data.compact) {
assert(instr->deref_type == nir_deref_type_array);
- nir_const_value *v = nir_src_as_const_value(instr->arr.index);
- assert(v);
- *const_out = v->u32[0];
+ *const_out = nir_src_as_uint(instr->arr.index);
return;
}
}
} else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, false);
- nir_const_value *v = nir_src_as_const_value(path.path[idx_lvl]->arr.index);
- if (v)
- const_offset += v->u32[0] * size;
+ if (nir_src_is_const(path.path[idx_lvl]->arr.index))
+ const_offset += nir_src_as_uint(path.path[idx_lvl]->arr.index) * size;
} else
unreachable("Uhandled deref type in get_deref_instr_offset");
}
case MESA_SHADER_VERTEX: {
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
- if (var->data.mode == nir_var_shader_in) {
+ if (var && var->data.mode == nir_var_shader_in) {
unsigned idx = var->data.location;
uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
}
}
+static uint32_t
+widen_writemask(uint32_t wrmask)
+{
+ uint32_t new_wrmask = 0;
+ for(unsigned i = 0; i < 4; i++)
+ new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
+ return new_wrmask;
+}
+
static void
set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
uint8_t *output_usage_mask)
nir_deref_instr *deref_instr =
nir_instr_as_deref(instr->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref_instr);
- unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ unsigned attrib_count = glsl_count_attribute_slots(deref_instr->type, false);
unsigned idx = var->data.location;
unsigned comp = var->data.location_frac;
unsigned const_offset = 0;
get_deref_offset(deref_instr, &const_offset);
- if (idx == VARYING_SLOT_CLIP_DIST0) {
- /* Special case for clip/cull distances because there are
- * combined into a single array that contains both.
- */
- output_usage_mask[idx] |= 1 << const_offset;
+ if (var->data.compact) {
+ assert(!glsl_type_is_64bit(deref_instr->type));
+ const_offset += comp;
+ output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
return;
}
- for (unsigned i = 0; i < attrib_count; i++) {
+ uint32_t wrmask = nir_intrinsic_write_mask(instr);
+ if (glsl_type_is_64bit(deref_instr->type))
+ wrmask = widen_writemask(wrmask);
+
+ for (unsigned i = 0; i < attrib_count; i++)
output_usage_mask[idx + i + const_offset] |=
- instr->const_index[0] << comp;
- }
+ ((wrmask >> (i * 4)) & 0xf) << comp;
+}
+
+static void
+set_writes_memory(const nir_shader *nir, struct radv_shader_info *info)
+{
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.writes_memory = true;
+ else if (nir->info.stage == MESA_SHADER_GEOMETRY)
+ info->gs.writes_memory = true;
}
static void
{
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
- if (var->data.mode == nir_var_shader_out) {
+ if (var && var->data.mode == nir_var_shader_out) {
unsigned idx = var->data.location;
switch (nir->info.stage) {
type = glsl_get_array_element(var->type);
unsigned slots =
- var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
+ var->data.compact ? DIV_ROUND_UP(var->data.location_frac + glsl_get_length(type), 4)
: glsl_count_attribute_slots(type, false);
- if (idx == VARYING_SLOT_CLIP_DIST0)
- slots = (nir->info.clip_distance_array_size +
- nir->info.cull_distance_array_size > 4) ? 2 : 1;
-
mark_tess_output(info, var->data.patch, param, slots);
break;
}
}
}
+static void
+gather_push_constant_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
+{
+ int base = nir_intrinsic_base(instr);
+
+ if (!nir_src_is_const(instr->src[0])) {
+ info->has_indirect_push_constants = true;
+ } else {
+ uint32_t min = base + nir_src_as_uint(instr->src[0]);
+ uint32_t max = min + instr->num_components * 4;
+
+ info->max_push_constant_used =
+ MAX2(max, info->max_push_constant_used);
+ info->min_push_constant_used =
+ MIN2(min, info->min_push_constant_used);
+ }
+
+ if (instr->dest.ssa.bit_size != 32)
+ info->has_only_32bit_push_constants = false;
+
+ info->loads_push_constants = true;
+}
+
static void
gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
struct radv_shader_info *info)
{
switch (instr->intrinsic) {
- case nir_intrinsic_interp_deref_at_sample:
+ case nir_intrinsic_load_barycentric_at_sample:
info->ps.needs_sample_positions = true;
break;
case nir_intrinsic_load_draw_id:
if (nir->info.stage == MESA_SHADER_FRAGMENT)
info->ps.layer_input = true;
break;
+ case nir_intrinsic_load_layer_id:
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.layer_input = true;
+ break;
case nir_intrinsic_load_invocation_id:
info->uses_invocation_id = true;
break;
info->uses_prim_id = true;
break;
case nir_intrinsic_load_push_constant:
- info->loads_push_constants = true;
+ gather_push_constant_info(nir, instr, info);
break;
case nir_intrinsic_vulkan_resource_index:
info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_store:
case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_min:
- case nir_intrinsic_image_deref_atomic_max:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
case nir_intrinsic_image_deref_atomic_and:
case nir_intrinsic_image_deref_atomic_or:
case nir_intrinsic_image_deref_atomic_xor:
case nir_intrinsic_image_deref_atomic_comp_swap:
case nir_intrinsic_image_deref_size: {
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
- const struct glsl_type *type = glsl_without_array(var->type);
-
- enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
- if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
- dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
- info->ps.layer_input = true;
- info->ps.uses_input_attachments = true;
- }
mark_sampler_desc(var, info);
- if (nir_intrinsic_image_deref_store ||
- nir_intrinsic_image_deref_atomic_add ||
- nir_intrinsic_image_deref_atomic_min ||
- nir_intrinsic_image_deref_atomic_max ||
- nir_intrinsic_image_deref_atomic_and ||
- nir_intrinsic_image_deref_atomic_or ||
- nir_intrinsic_image_deref_atomic_xor ||
- nir_intrinsic_image_deref_atomic_exchange ||
- nir_intrinsic_image_deref_atomic_comp_swap) {
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.writes_memory = true;
+ if (instr->intrinsic == nir_intrinsic_image_deref_store ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
+ set_writes_memory(nir, info);
}
break;
}
case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.writes_memory = true;
+ set_writes_memory(nir, info);
break;
case nir_intrinsic_load_deref:
gather_intrinsic_load_deref_info(nir, instr, info);
break;
case nir_intrinsic_store_deref:
gather_intrinsic_store_deref_info(nir, instr, info);
+ /* fallthrough */
+ case nir_intrinsic_deref_atomic_add:
+ case nir_intrinsic_deref_atomic_imin:
+ case nir_intrinsic_deref_atomic_umin:
+ case nir_intrinsic_deref_atomic_imax:
+ case nir_intrinsic_deref_atomic_umax:
+ case nir_intrinsic_deref_atomic_and:
+ case nir_intrinsic_deref_atomic_or:
+ case nir_intrinsic_deref_atomic_xor:
+ case nir_intrinsic_deref_atomic_exchange:
+ case nir_intrinsic_deref_atomic_comp_swap: {
+ if (nir_src_as_deref(instr->src[0])->mode & (nir_var_mem_global | nir_var_mem_ssbo))
+ set_writes_memory(nir, info);
break;
+ }
default:
break;
}
static void
gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info)
+ struct radv_shader_info *info,
+ const struct radv_shader_variant_key *key)
{
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
int idx = var->data.location;
if (idx >= VERT_ATTRIB_GENERIC0 && idx <= VERT_ATTRIB_GENERIC15)
info->vs.has_vertex_buffers = true;
+
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
+
+ if (key->vs.instance_rate_inputs & (1u << attrib_index))
+ info->vs.needs_instance_id = true;
+ }
}
+static void
+mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type,
+ int location)
+{
+ if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+ unsigned attrib_count = glsl_count_attribute_slots(type, false);
+ if (glsl_type_is_16bit(type)) {
+ info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+ }
+ } else if (glsl_type_is_array(type)) {
+ unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+ for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+ mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
+ }
+ } else {
+ assert(glsl_type_is_struct_or_ifc(type));
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
+ location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
+ }
+ }
+}
static void
gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
struct radv_shader_info *info)
info->ps.layer_input = true;
break;
case VARYING_SLOT_CLIP_DIST0:
- info->ps.num_input_clips_culls = attrib_count;
+ case VARYING_SLOT_CLIP_DIST1:
+ info->ps.num_input_clips_culls += attrib_count;
break;
default:
break;
if (var->data.sample)
info->ps.force_persample = true;
}
+
+ if (var->data.compact) {
+ unsigned component_count = var->data.location_frac +
+ glsl_get_length(var->type);
+ attrib_count = (component_count + 3) / 4;
+ } else {
+ mark_16bit_ps_input(info, var->type, var->data.driver_location);
+ }
+
+ uint64_t mask = ((1ull << attrib_count) - 1);
+
+ if (var->data.interpolation == INTERP_MODE_FLAT)
+ info->ps.flat_shaded_mask |= mask << var->data.driver_location;
+ if (var->data.interpolation == INTERP_MODE_EXPLICIT)
+ info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
+
+ if (var->data.location >= VARYING_SLOT_VAR0)
+ info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
}
static void
gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info)
+ struct radv_shader_info *info,
+ const struct radv_shader_variant_key *key)
{
switch (nir->info.stage) {
case MESA_SHADER_VERTEX:
- gather_info_input_decl_vs(nir, var, info);
+ gather_info_input_decl_vs(nir, var, info, key);
break;
case MESA_SHADER_FRAGMENT:
gather_info_input_decl_ps(nir, var, info);
int idx = var->data.location;
unsigned param = shader_io_get_unique_index(idx);
int num_slots = glsl_count_attribute_slots(var->type, false);
- if (idx == VARYING_SLOT_CLIP_DIST0)
- num_slots = (nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4) ? 2 : 1;
+ if (var->data.compact)
+ num_slots = DIV_ROUND_UP(var->data.location_frac + glsl_get_length(var->type), 4);
mark_ls_output(info, param, num_slots);
}
static void
gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
struct radv_shader_info *info,
- const struct radv_nir_compiler_options *options)
+ const struct radv_shader_variant_key *key)
{
+ struct radv_vs_output_info *vs_info = NULL;
+
switch (nir->info.stage) {
case MESA_SHADER_FRAGMENT:
gather_info_output_decl_ps(nir, var, info);
break;
case MESA_SHADER_VERTEX:
- if (options->key.vs.as_ls)
+ if (!key->vs_common_out.as_ls &&
+ !key->vs_common_out.as_es)
+ vs_info = &info->vs.outinfo;
+
+ if (key->vs_common_out.as_ls)
gather_info_output_decl_ls(nir, var, info);
+ else if (key->vs_common_out.as_ngg)
+ gather_info_output_decl_gs(nir, var, info);
break;
case MESA_SHADER_GEOMETRY:
+ vs_info = &info->vs.outinfo;
gather_info_output_decl_gs(nir, var, info);
break;
+ case MESA_SHADER_TESS_EVAL:
+ if (!key->vs_common_out.as_es)
+ vs_info = &info->tes.outinfo;
+ break;
default:
break;
}
+
+ if (vs_info) {
+ switch (var->data.location) {
+ case VARYING_SLOT_CLIP_DIST0:
+ vs_info->clip_dist_mask =
+ (1 << nir->info.clip_distance_array_size) - 1;
+ vs_info->cull_dist_mask =
+ (1 << nir->info.cull_distance_array_size) - 1;
+ vs_info->cull_dist_mask <<= nir->info.clip_distance_array_size;
+ break;
+ case VARYING_SLOT_PSIZ:
+ vs_info->writes_pointsize = true;
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ vs_info->writes_viewport_index = true;
+ break;
+ case VARYING_SLOT_LAYER:
+ vs_info->writes_layer = true;
+ break;
+ default:
+ break;
+ }
+ }
}
static void
}
for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
- so->strides[i] = xfb->strides[i] / 4;
+ so->strides[i] = xfb->buffers[i].stride / 4;
}
ralloc_free(xfb);
}
+void
+radv_nir_shader_info_init(struct radv_shader_info *info)
+{
+ /* Assume that shaders only have 32-bit push constants by default. */
+ info->min_push_constant_used = UINT8_MAX;
+ info->has_only_32bit_push_constants = true;
+}
+
void
radv_nir_shader_info_pass(const struct nir_shader *nir,
- const struct radv_nir_compiler_options *options,
+ const struct radv_pipeline_layout *layout,
+ const struct radv_shader_variant_key *key,
struct radv_shader_info *info)
{
struct nir_function *func =
(struct nir_function *)exec_list_get_head_const(&nir->functions);
- if (options->layout && options->layout->dynamic_offset_count)
+ if (layout && layout->dynamic_offset_count &&
+ (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
info->loads_push_constants = true;
+ info->loads_dynamic_offsets = true;
+ }
nir_foreach_variable(variable, &nir->inputs)
- gather_info_input_decl(nir, variable, info);
+ gather_info_input_decl(nir, variable, info, key);
nir_foreach_block(block, func->impl) {
gather_info_block(nir, block, info);
}
nir_foreach_variable(variable, &nir->outputs)
- gather_info_output_decl(nir, variable, info, options);
+ gather_info_output_decl(nir, variable, info, key);
if (nir->info.stage == MESA_SHADER_VERTEX ||
nir->info.stage == MESA_SHADER_TESS_EVAL ||
nir->info.stage == MESA_SHADER_GEOMETRY)
gather_xfb_info(nir, info);
+
+ /* Make sure to export the LayerID if the fragment shader needs it. */
+ if (key->vs_common_out.export_layer_id) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->gs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Make sure to export the LayerID if the subpass has multiviews. */
+ if (key->has_multiview_view_index) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.outinfo.writes_layer = true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.outinfo.writes_layer = true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->vs.outinfo.writes_layer = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Make sure to export the PrimitiveID if the fragment shader needs it. */
+ if (key->vs_common_out.export_prim_id) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.outinfo.export_prim_id = true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.outinfo.export_prim_id = true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->vs.outinfo.export_prim_id = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.num_interp = nir->num_inputs;
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_COMPUTE:
+ for (int i = 0; i < 3; ++i)
+ info->cs.block_size[i] = nir->info.cs.local_size[i];
+ break;
+ case MESA_SHADER_FRAGMENT:
+ info->ps.can_discard = nir->info.fs.uses_discard;
+ info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
+ info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->gs.vertices_in = nir->info.gs.vertices_in;
+ info->gs.vertices_out = nir->info.gs.vertices_out;
+ info->gs.output_prim = nir->info.gs.output_primitive;
+ info->gs.invocations = nir->info.gs.invocations;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.primitive_mode = nir->info.tess.primitive_mode;
+ info->tes.spacing = nir->info.tess.spacing;
+ info->tes.ccw = nir->info.tess.ccw;
+ info->tes.point_mode = nir->info.tess.point_mode;
+ info->tes.as_es = key->vs_common_out.as_es;
+ info->tes.export_prim_id = key->vs_common_out.export_prim_id;
+ info->is_ngg = key->vs_common_out.as_ngg;
+ info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ break;
+ case MESA_SHADER_VERTEX:
+ info->vs.as_es = key->vs_common_out.as_es;
+ info->vs.as_ls = key->vs_common_out.as_ls;
+ info->vs.export_prim_id = key->vs_common_out.export_prim_id;
+ info->is_ngg = key->vs_common_out.as_ngg;
+ info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+ break;
+ default:
+ break;
+ }
+
+ if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned add_clip = nir->info.clip_distance_array_size +
+ nir->info.cull_distance_array_size > 4;
+ info->gs.gsvs_vertex_size =
+ (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
+ info->gs.max_gsvs_emit_size =
+ info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
+ }
+
+ /* Compute the ESGS item size for VS or TES as ES. */
+ if ((nir->info.stage == MESA_SHADER_VERTEX ||
+ nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+ key->vs_common_out.as_es) {
+ struct radv_es_output_info *es_info =
+ nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
+ uint32_t max_output_written = 0;
+
+ uint64_t output_mask = nir->info.outputs_written;
+ while (output_mask) {
+ const int i = u_bit_scan64(&output_mask);
+ unsigned param_index = shader_io_get_unique_index(i);
+
+ max_output_written = MAX2(param_index, max_output_written);
+ }
+
+ es_info->esgs_itemsize = (max_output_written + 1) * 16;
+ }
+
+ info->float_controls_mode = nir->info.float_controls_execution_mode;
}