case nir_intrinsic_store_output: {
// src[] = { value, offset }.
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
b->cursor = nir_instr_remove(&intr->instr);
nir_ssa_def *vertex_id = build_vertex_id(b, state);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
- nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
store->src[1] = nir_src_for_ssa(offset);
-
store->num_components = intr->num_components;
nir_builder_instr_insert(b, &store->instr);
}
void
-ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
+ unsigned topology)
{
struct state state = { };
build_primitive_map(shader, &state.map, &shader->outputs);
- memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+ memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
nir_builder_init(&b, impl);
b.cursor = nir_before_cf_list(&impl->body);
- if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+ if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
state.header = nir_load_tcs_header_ir3(&b);
else
state.header = nir_load_gs_header_ir3(&b);
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
- s->output_size = state.map.stride;
+ v->output_size = state.map.stride;
}
b->cursor = nir_before_instr(&intr->instr);
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
nir_ssa_def *value = intr->src[0].ssa;
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[1].ssa, intr->src[2].ssa, var);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
- nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
-
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+ nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
break;
}
b->cursor = nir_before_instr(&intr->instr);
if (levels) {
- for (int i = 0; i < 4; i++)
- if (nir_intrinsic_write_mask(intr) & (1 << i))
- levels[i] = nir_channel(b, intr->src[0].ssa, i);
+ for (int i = 0; i < 4; i++) {
+ if (nir_intrinsic_write_mask(intr) & (1 << i)) {
+ uint32_t component = nir_intrinsic_component(intr);
+ levels[i + component] = nir_channel(b, intr->src[0].ssa, i);
+ }
+ }
nir_instr_remove(&intr->instr);
} else {
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
debug_assert(nir_intrinsic_component(intr) == 0);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
- intr->src[0].ssa, address, offset);
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+ intr->src[0].ssa, address, offset);
}
break;
}
nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
nir_ssa_def *levels[2];
+ if (!state->outer_levels[0])
+ return;
+
/* Then emit the epilogue that actually writes out the tessellation levels
* to the BOs.
*/
store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[0]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
if (levels[1]) {
store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(b, &store->instr);
store->num_components = levels[1]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
}
/* Finally, Insert endpatch instruction:
}
void
-ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
+ unsigned topology)
{
struct state state = { .topology = topology };
}
build_primitive_map(shader, &state.map, &shader->outputs);
- memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
- s->output_size = state.map.stride;
+ memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
+ v->output_size = state.map.stride;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
case nir_intrinsic_load_tess_level_inner:
case nir_intrinsic_load_tess_level_outer: {
+ unsigned dest_comp = nir_intrinsic_dest_components(intr);
b->cursor = nir_before_instr(&intr->instr);
gl_varying_slot slot;
* component individually.
*/
nir_ssa_def *levels[4];
- for (unsigned i = 0; i < intr->num_components; i++) {
+ for (unsigned i = 0; i < dest_comp; i++) {
nir_intrinsic_instr *new_intr =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
levels[i] = &new_intr->dest.ssa;
}
- nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
+ nir_ssa_def *v = nir_vec(b, levels, dest_comp);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
nir_foreach_variable(out_var, &producer->shader->nir->outputs) {
if (in_var->data.location == out_var->data.location) {
locs[in_var->data.driver_location] =
- producer->shader->output_loc[out_var->data.driver_location] * factor;
+ producer->output_loc[out_var->data.driver_location] * factor;
debug_assert(num_loc <= in_var->data.driver_location + 1);
num_loc = in_var->data.driver_location + 1;