static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr,
bool is_input)
{
- unsigned interp = TGSI_INTERPOLATE_CONSTANT; /* load_input uses flat shading */
+ unsigned interp = INTERP_MODE_FLAT; /* load_input uses flat shading */
if (intr->intrinsic == nir_intrinsic_load_interpolated_input) {
nir_intrinsic_instr *baryc = nir_instr_as_intrinsic(intr->src[0].ssa->parent_instr);
if (baryc) {
if (nir_intrinsic_infos[baryc->intrinsic].index_map[NIR_INTRINSIC_INTERP_MODE] > 0)
- interp = tgsi_get_interp_mode(nir_intrinsic_interp_mode(baryc), false);
+ interp = nir_intrinsic_interp_mode(baryc);
else
unreachable("unknown barycentric intrinsic");
} else {
}
unsigned mask, bit_size;
- bool dual_slot;
+ bool dual_slot, is_output_load;
if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) {
mask = nir_intrinsic_write_mask(intr); /* store */
bit_size = nir_src_bit_size(intr->src[0]);
dual_slot = bit_size == 64 && nir_src_num_components(intr->src[0]) >= 3;
+ is_output_load = false;
} else {
mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
bit_size = intr->dest.ssa.bit_size;
dual_slot = bit_size == 64 && intr->dest.ssa.num_components >= 3;
+ is_output_load = !is_input;
}
/* Convert the 64-bit component mask to a 32-bit component mask. */
mask <<= nir_intrinsic_component(intr);
- unsigned name, index;
- if (info->processor == PIPE_SHADER_VERTEX && is_input) {
- /* VS doesn't have semantics. */
- name = 0;
- index = 0;
- } else if (info->processor == PIPE_SHADER_FRAGMENT && !is_input) {
- tgsi_get_gl_frag_result_semantic(nir_intrinsic_io_semantics(intr).location,
- &name, &index);
- /* Adjust for dual source blending. */
- if (nir_intrinsic_io_semantics(intr).dual_source_blend_index)
- index++;
- } else {
- tgsi_get_gl_varying_semantic(nir_intrinsic_io_semantics(intr).location,
- true, &name, &index);
- }
-
nir_src offset = *nir_get_io_offset_src(intr);
bool indirect = !nir_src_is_const(offset);
if (!indirect)
assert(nir_src_as_uint(offset) == 0);
+ unsigned semantic = 0;
+ /* VS doesn't have semantics. */
+ if (info->stage != MESA_SHADER_VERTEX || !is_input)
+ semantic = nir_intrinsic_io_semantics(intr).location;
+
+ if (info->stage == MESA_SHADER_FRAGMENT && !is_input) {
+ /* Never use FRAG_RESULT_COLOR directly. */
+ if (semantic == FRAG_RESULT_COLOR) {
+ semantic = FRAG_RESULT_DATA0;
+ info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true;
+ }
+ semantic += nir_intrinsic_io_semantics(intr).dual_source_blend_index;
+ }
+
unsigned driver_location = nir_intrinsic_base(intr);
unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : (1 + dual_slot);
unsigned loc = driver_location + i;
unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
- info->input_semantic_name[loc] = name;
- info->input_semantic_index[loc] = index + i;
+ info->input_semantic[loc] = semantic + i;
info->input_interpolate[loc] = interp;
if (slot_mask) {
info->input_usage_mask[loc] |= slot_mask;
info->num_inputs = MAX2(info->num_inputs, loc + 1);
- if (name == TGSI_SEMANTIC_PRIMID)
+ if (semantic == VARYING_SLOT_PRIMITIVE_ID)
info->uses_primid = true;
}
}
} else {
/* Outputs. */
assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
+ assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot));
for (unsigned i = 0; i < num_slots; i++) {
unsigned loc = driver_location + i;
unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
- info->output_semantic_name[loc] = name;
- info->output_semantic_index[loc] = index + i;
+ info->output_semantic[loc] = semantic + i;
+ info->output_semantic_to_slot[semantic + i] = loc;
- if (slot_mask) {
- if (info->processor == PIPE_SHADER_GEOMETRY) {
+ if (is_output_load) {
+ /* Output loads have only a few things that we need to track. */
+ info->output_readmask[loc] |= slot_mask;
+
+ if (info->stage == MESA_SHADER_FRAGMENT &&
+ nir_intrinsic_io_semantics(intr).fb_fetch_output)
+ info->uses_fbfetch = true;
+ } else if (slot_mask) {
+ /* Output stores. */
+ if (info->stage == MESA_SHADER_GEOMETRY) {
unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
(nir_intrinsic_component(intr) * 2);
unsigned new_mask = slot_mask & ~info->output_usagemask[loc];
info->output_usagemask[loc] |= slot_mask;
info->num_outputs = MAX2(info->num_outputs, loc + 1);
- switch (name) {
- case TGSI_SEMANTIC_PRIMID:
- info->writes_primid = true;
- break;
- case TGSI_SEMANTIC_VIEWPORT_INDEX:
- info->writes_viewport_index = true;
- break;
- case TGSI_SEMANTIC_LAYER:
- info->writes_layer = true;
- break;
- case TGSI_SEMANTIC_PSIZE:
- info->writes_psize = true;
- break;
- case TGSI_SEMANTIC_CLIPVERTEX:
- info->writes_clipvertex = true;
- break;
- case TGSI_SEMANTIC_COLOR:
- info->colors_written |= 1 << (index + i);
-
- if (info->processor == PIPE_SHADER_FRAGMENT &&
- nir_intrinsic_io_semantics(intr).location == FRAG_RESULT_COLOR)
- info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] = true;
- break;
- case TGSI_SEMANTIC_STENCIL:
- info->writes_stencil = true;
- break;
- case TGSI_SEMANTIC_SAMPLEMASK:
- info->writes_samplemask = true;
- break;
- case TGSI_SEMANTIC_EDGEFLAG:
- info->writes_edgeflag = true;
- break;
- case TGSI_SEMANTIC_POSITION:
- if (info->processor == PIPE_SHADER_FRAGMENT)
+ if (info->stage == MESA_SHADER_FRAGMENT) {
+ switch (semantic) {
+ case FRAG_RESULT_DEPTH:
info->writes_z = true;
- else
+ break;
+ case FRAG_RESULT_STENCIL:
+ info->writes_stencil = true;
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ info->writes_samplemask = true;
+ break;
+ default:
+ if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
+ unsigned index = semantic - FRAG_RESULT_DATA0;
+ info->colors_written |= 1 << (index + i);
+ }
+ break;
+ }
+ } else {
+ switch (semantic) {
+ case VARYING_SLOT_PRIMITIVE_ID:
+ info->writes_primid = true;
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ info->writes_viewport_index = true;
+ break;
+ case VARYING_SLOT_LAYER:
+ info->writes_layer = true;
+ break;
+ case VARYING_SLOT_PSIZ:
+ info->writes_psize = true;
+ break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ info->writes_clipvertex = true;
+ break;
+ case VARYING_SLOT_EDGE:
+ info->writes_edgeflag = true;
+ break;
+ case VARYING_SLOT_POS:
info->writes_position = true;
- break;
+ break;
+ }
}
}
}
case nir_intrinsic_load_interpolated_input:
scan_io_usage(info, intr, true);
break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
case nir_intrinsic_store_output:
case nir_intrinsic_store_per_vertex_output:
scan_io_usage(info, intr, false);
break;
- case nir_intrinsic_load_output: {
- unsigned location = nir_intrinsic_io_semantics(intr).location;
-
- if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
- if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
- location == VARYING_SLOT_TESS_LEVEL_OUTER)
- info->reads_tessfactor_outputs = true;
- else
- info->reads_perpatch_outputs = true;
- } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- if (nir_intrinsic_io_semantics(intr).fb_fetch_output)
- info->uses_fbfetch = true;
- }
- break;
- }
- case nir_intrinsic_load_per_vertex_output:
- info->reads_pervertex_outputs = true;
- break;
case nir_intrinsic_load_deref:
case nir_intrinsic_store_deref:
case nir_intrinsic_interp_deref_at_centroid:
{
nir_function *func;
- info->processor = pipe_shader_type_from_mesa(nir->info.stage);
-
+ info->stage = nir->info.stage;
info->properties[TGSI_PROPERTY_NEXT_SHADER] = pipe_shader_type_from_mesa(nir->info.next_stage);
if (nir->info.stage == MESA_SHADER_VERTEX) {
}
}
- info->color_interpolate[0] = tgsi_get_interp_mode(nir->info.fs.color0_interp, true);
- info->color_interpolate[1] = tgsi_get_interp_mode(nir->info.fs.color1_interp, true);
+ info->color_interpolate[0] = nir->info.fs.color0_interp;
+ info->color_interpolate[1] = nir->info.fs.color1_interp;
+ for (unsigned i = 0; i < 2; i++) {
+ if (info->color_interpolate[i] == INTERP_MODE_NONE)
+ info->color_interpolate[i] = INTERP_MODE_COLOR;
+ }
+
info->color_interpolate_loc[0] = nir->info.fs.color0_sample ? TGSI_INTERPOLATE_LOC_SAMPLE :
nir->info.fs.color0_centroid ? TGSI_INTERPOLATE_LOC_CENTROID :
TGSI_INTERPOLATE_LOC_CENTER;
info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
- if (info->processor == PIPE_SHADER_FRAGMENT)
+ if (info->stage == MESA_SHADER_FRAGMENT)
info->uses_kill = nir->info.fs.uses_discard;
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir);
}
+ memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot));
+
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
nir_foreach_block (block, func->impl) {
nir_foreach_instr (instr, block)
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
for (unsigned i = 0; i < 2; i++) {
if ((info->colors_read >> (i * 4)) & 0xf) {
- info->input_semantic_name[info->num_inputs] = TGSI_SEMANTIC_COLOR;
- info->input_semantic_index[info->num_inputs] = i;
+ info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i;
info->input_interpolate[info->num_inputs] = info->color_interpolate[i];
info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4);
info->num_inputs++;
}
}
}
+
+ /* Trim output read masks based on write masks. */
+ for (unsigned i = 0; i < info->num_outputs; i++)
+ info->output_readmask[i] &= info->output_usagemask[i];
}
static void si_nir_opts(struct nir_shader *nir, bool first)
NIR_PASS_V(nir, nir_opt_constant_folding);
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in);
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
+
+ /* Remove dead derefs, so that nir_validate doesn't fail. */
+ NIR_PASS_V(nir, nir_opt_dce);
+
+ /* Remove input and output nir_variables, because we don't need them
+ * anymore. Also remove uniforms, because those should have been lowered
+ * to UBOs already.
+ */
+ unsigned modes = nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
+ nir_foreach_variable_with_modes_safe(var, nir, modes) {
+ if (var->data.mode == nir_var_uniform &&
+ (glsl_type_get_image_count(var->type) ||
+ glsl_type_get_sampler_count(var->type)))
+ continue;
+
+ exec_node_remove(&var->node);
+ }
}
/**