#include "compiler/nir/nir_builder.h"
struct state {
+ uint32_t topology;
+
struct primitive_map {
unsigned loc[32];
unsigned size[32];
nir_variable *vertex_flags_out;
nir_variable *output_vars[32];
+
+ nir_ssa_def *outer_levels[4];
+ nir_ssa_def *inner_levels[2];
};
static nir_ssa_def *
nir_ssa_def *attr_offset;
nir_ssa_def *vertex_stride;
- if (b->shader->info.stage == MESA_SHADER_VERTEX) {
+ switch (b->shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
vertex_stride = nir_imm_int(b, state->map.stride * 4);
attr_offset = nir_imm_int(b, state->map.loc[base] * 4);
- } else if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_GEOMETRY:
vertex_stride = nir_load_vs_vertex_stride_ir3(b);
attr_offset = nir_load_primitive_location_ir3(b, base);
- } else {
+ break;
+ default:
unreachable("bad shader stage");
}
}
void
-ir3_nir_lower_vs_to_explicit_io(nir_shader *shader, struct ir3_shader *s)
+ir3_nir_lower_to_explicit_io(nir_shader *shader, struct ir3_shader *s, unsigned topology)
{
struct state state = { };
nir_builder_init(&b, impl);
b.cursor = nir_before_cf_list(&impl->body);
- state.header = nir_load_gs_header_ir3(&b);
+ if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+ state.header = nir_load_tcs_header_ir3(&b);
+ else
+ state.header = nir_load_gs_header_ir3(&b);
nir_foreach_block_safe(block, impl)
lower_vs_block(block, &b, &state);
s->output_size = state.map.stride;
}
+static nir_ssa_def *
+build_per_vertex_offset(nir_builder *b, struct state *state,
+ nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
+{
+ nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+ nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
+ nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
+ nir_ssa_def *attr_offset;
+ int loc = var->data.driver_location;
+
+ switch (b->shader->info.stage) {
+ case MESA_SHADER_TESS_CTRL:
+ attr_offset = nir_imm_int(b, state->map.loc[loc]);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ attr_offset = nir_load_primitive_location_ir3(b, loc);
+ break;
+ default:
+ unreachable("bad shader state");
+ }
+
+ nir_ssa_def *attr_stride = nir_imm_int(b, state->map.size[loc]);
+ nir_ssa_def *vertex_offset = nir_imul24(b, vertex, attr_stride);
+
+ return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset),
+ nir_iadd(b, vertex_offset, nir_ishl(b, offset, nir_imm_int(b, 2))));
+}
+
+static nir_ssa_def *
+build_patch_offset(nir_builder *b, struct state *state, nir_ssa_def *offset, nir_variable *var)
+{
+ debug_assert(var && var->data.patch);
+
+ return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
+}
+
+static nir_ssa_def *
+build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
+{
+ uint32_t inner_levels, outer_levels;
+ switch (state->topology) {
+ case IR3_TESS_TRIANGLES:
+ inner_levels = 1;
+ outer_levels = 3;
+ break;
+ case IR3_TESS_QUADS:
+ inner_levels = 2;
+ outer_levels = 4;
+ break;
+ case IR3_TESS_ISOLINES:
+ inner_levels = 0;
+ outer_levels = 2;
+ break;
+ default:
+ unreachable("bad");
+ }
+
+ const uint32_t patch_stride = 1 + inner_levels + outer_levels;
+
+ nir_ssa_def *primitive_id = nir_load_primitive_id(b);
+
+ nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
+
+ uint32_t offset;
+ switch (slot) {
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ /* There's some kind of header dword, tess levels start at index 1. */
+ offset = 1;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ offset = 1 + outer_levels;
+ break;
+ default:
+ unreachable("bad");
+ }
+
+ return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
+}
+
+static void
+lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
+{
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_invocation_id:
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *invocation_id = build_invocation_id(b, state);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_src_for_ssa(invocation_id));
+ nir_instr_remove(&intr->instr);
+ break;
+
+ case nir_intrinsic_barrier:
+ /* Hull shaders dispatch 32 wide so an entire patch will always
+ * fit in a single warp and execute in lock-step. Consequently,
+ * we don't need to do anything for TCS barriers so just remove
+ * the intrinsic. Otherwise we'll emit an actual barrier
+ * instructions, which will deadlock.
+ */
+ nir_instr_remove(&intr->instr);
+ break;
+
+ case nir_intrinsic_load_per_vertex_output: {
+ // src[] = { vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_ssa_def *offset = build_per_vertex_offset(b, state,
+ intr->src[0].ssa, intr->src[1].ssa, var);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+ break;
+ }
+
+ case nir_intrinsic_store_per_vertex_output: {
+ // src[] = { value, vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *value = intr->src[0].ssa;
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_ssa_def *offset = build_per_vertex_offset(b, state,
+ intr->src[1].ssa, intr->src[2].ssa, var);
+
+ nir_intrinsic_instr *store =
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+ nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
+
+ nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+
+ break;
+ }
+
+ case nir_intrinsic_load_per_vertex_input: {
+ // src[] = { vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *offset = build_local_offset(b, state,
+ intr->src[0].ssa, // this is typically gl_InvocationID
+ nir_intrinsic_base(intr),
+ intr->src[1].ssa);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+ break;
+ }
+
+ case nir_intrinsic_load_tess_level_inner:
+ case nir_intrinsic_load_tess_level_outer: {
+ b->cursor = nir_before_instr(&intr->instr);
+
+ gl_varying_slot slot;
+ if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+ slot = VARYING_SLOT_TESS_LEVEL_INNER;
+ else
+ slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+ nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+ nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+ break;
+ }
+
+ case nir_intrinsic_load_output: {
+ // src[] = { offset }.
+
+ nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+ break;
+ }
+
+ case nir_intrinsic_store_output: {
+ // src[] = { value, offset }.
+
+ /* write patch output to bo */
+
+ nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+
+ nir_ssa_def **levels = NULL;
+ if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ levels = state->outer_levels;
+ else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
+ levels = state->inner_levels;
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ if (levels) {
+ for (int i = 0; i < 4; i++)
+ if (nir_intrinsic_write_mask(intr) & (1 << i))
+ levels[i] = nir_channel(b, intr->src[0].ssa, i);
+ nir_instr_remove(&intr->instr);
+ } else {
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
+
+ debug_assert(nir_intrinsic_component(intr) == 0);
+
+ nir_intrinsic_instr *store =
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+ intr->src[0].ssa, address, offset);
+
+ nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+static void
+emit_tess_epilouge(nir_builder *b, struct state *state)
+{
+ nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
+ nir_ssa_def *levels[2];
+
+ /* Then emit the epilogue that actually writes out the tessellation levels
+ * to the BOs.
+ */
+ switch (state->topology) {
+ case IR3_TESS_TRIANGLES:
+ levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+ state->outer_levels[2], state->inner_levels[0]);
+ levels[1] = NULL;
+ break;
+ case IR3_TESS_QUADS:
+ levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
+ state->outer_levels[2], state->outer_levels[3]);
+ levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
+ break;
+ case IR3_TESS_ISOLINES:
+ levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
+ levels[1] = NULL;
+ break;
+ default:
+ unreachable("nope");
+ }
+
+ nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+
+ store->src[0] = nir_src_for_ssa(levels[0]);
+ store->src[1] = nir_src_for_ssa(tessfactor_address);
+ store->src[2] = nir_src_for_ssa(offset);
+ nir_builder_instr_insert(b, &store->instr);
+ store->num_components = levels[0]->num_components;
+ nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
+
+ if (levels[1]) {
+ store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
+ offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
+
+ store->src[0] = nir_src_for_ssa(levels[1]);
+ store->src[1] = nir_src_for_ssa(tessfactor_address);
+ store->src[2] = nir_src_for_ssa(offset);
+ nir_builder_instr_insert(b, &store->instr);
+ store->num_components = levels[1]->num_components;
+ nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
+ }
+
+ /* Finally, Insert endpatch instruction, maybe signalling the tess engine
+ * that another primitive is ready?
+ */
+
+ nir_intrinsic_instr *end_patch =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_end_patch_ir3);
+ nir_builder_instr_insert(b, &end_patch->instr);
+}
+
+void
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+{
+ struct state state = { .topology = topology };
+
+ if (shader_debug_enabled(shader->info.stage)) {
+ fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+ _mesa_shader_stage_to_string(shader->info.stage));
+ nir_print_shader(shader, stderr);
+ }
+
+ build_primitive_map(shader, &state.map, &shader->outputs);
+ memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+ s->output_size = state.map.stride;
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ state.header = nir_load_tcs_header_ir3(&b);
+
+ nir_foreach_block_safe(block, impl)
+ lower_tess_ctrl_block(block, &b, &state);
+
+ /* Now move the body of the TCS into a conditional:
+ *
+ * if (gl_InvocationID < num_vertices)
+ * // body
+ *
+ */
+
+ nir_cf_list body;
+ nir_cf_extract(&body, nir_before_cf_list(&impl->body),
+ nir_after_cf_list(&impl->body));
+
+ b.cursor = nir_after_cf_list(&impl->body);
+
+ /* Re-emit the header, since the old one got moved into the if branch */
+ state.header = nir_load_tcs_header_ir3(&b);
+ nir_ssa_def *iid = build_invocation_id(&b, &state);
+
+ const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
+ nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
+
+ nir_if *nif = nir_push_if(&b, cond);
+
+ nir_cf_reinsert(&body, b.cursor);
+
+ b.cursor = nir_after_cf_list(&nif->then_list);
+
+ /* Insert conditional exit for threads invocation id != 0 */
+ nir_ssa_def *iid0_cond = nir_ieq(&b, iid, nir_imm_int(&b, 0));
+ nir_intrinsic_instr *cond_end =
+ nir_intrinsic_instr_create(shader, nir_intrinsic_cond_end_ir3);
+ cond_end->src[0] = nir_src_for_ssa(iid0_cond);
+ nir_builder_instr_insert(&b, &cond_end->instr);
+
+ emit_tess_epilouge(&b, &state);
+
+ nir_pop_if(&b, nif);
+
+ nir_metadata_preserve(impl, 0);
+}
+
+
+static void
+lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
+{
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_tess_coord: {
+ b->cursor = nir_after_instr(&intr->instr);
+ nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
+ nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
+ nir_ssa_def *z;
+
+ if (state->topology == IR3_TESS_TRIANGLES)
+ z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
+ else
+ z = nir_imm_float(b, 0.0f);
+
+ nir_ssa_def *coord = nir_vec3(b, x, y, z);
+
+ nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
+ nir_src_for_ssa(coord),
+ b->cursor.instr);
+ break;
+ }
+
+ case nir_intrinsic_load_per_vertex_input: {
+ // src[] = { vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+ nir_ssa_def *offset = build_per_vertex_offset(b, state,
+ intr->src[0].ssa, intr->src[1].ssa, var);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+ break;
+ }
+
+ case nir_intrinsic_load_tess_level_inner:
+ case nir_intrinsic_load_tess_level_outer: {
+ b->cursor = nir_before_instr(&intr->instr);
+
+ gl_varying_slot slot;
+ if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
+ slot = VARYING_SLOT_TESS_LEVEL_INNER;
+ else
+ slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+
+ nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
+ nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+
+ /* Loading across a vec4 (16b) memory boundary is problematic
+ * if we don't use components from the second vec4. The tess
+ * levels aren't guaranteed to be vec4 aligned and we don't
+ * know which levels are actually used, so we load each
+ * component individually.
+ */
+ nir_ssa_def *levels[4];
+ for (unsigned i = 0; i < intr->num_components; i++) {
+ nir_intrinsic_instr *new_intr =
+ nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
+
+ new_intr->src[0] = nir_src_for_ssa(address);
+ new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
+ new_intr->num_components = 1;
+ nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
+ nir_builder_instr_insert(b, &new_intr->instr);
+ levels[i] = &new_intr->dest.ssa;
+ }
+
+ nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
+
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
+
+ nir_instr_remove(&intr->instr);
+ break;
+ }
+
+ case nir_intrinsic_load_input: {
+ // src[] = { offset }.
+
+ nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+
+ debug_assert(var->data.patch);
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
+ nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+void
+ir3_nir_lower_tess_eval(nir_shader *shader, unsigned topology)
+{
+ struct state state = { .topology = topology };
+
+ if (shader_debug_enabled(shader->info.stage)) {
+ fprintf(stderr, "NIR (before tess lowering) for %s shader:\n",
+ _mesa_shader_stage_to_string(shader->info.stage));
+ nir_print_shader(shader, stderr);
+ }
+
+ /* Build map of inputs so we have the sizes. */
+ build_primitive_map(shader, &state.map, &shader->inputs);
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block_safe(block, impl)
+ lower_tess_eval_block(block, &b, &state);
+
+ nir_metadata_preserve(impl, 0);
+}
+
static void
lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
{