nir_variable *vertex_count_var;
nir_variable *emitted_vertex_var;
- nir_variable *vertex_flags_var;
nir_variable *vertex_flags_out;
- nir_variable *output_vars[32];
+ struct exec_list old_outputs;
+ struct exec_list new_outputs;
+ struct exec_list emit_outputs;
- nir_ssa_def *outer_levels[4];
- nir_ssa_def *inner_levels[2];
+ /* tess ctrl shader on a650 gets the local primitive id at different bits: */
+ unsigned local_primitive_id_start;
};
static nir_ssa_def *
static nir_ssa_def *
build_local_primitive_id(nir_builder *b, struct state *state)
{
- return bitfield_extract(b, state->header, 0, 63);
+ return bitfield_extract(b, state->header, state->local_primitive_id_start, 63);
}
static nir_variable *
-get_var(struct exec_list *list, int driver_location)
+get_var(nir_shader *shader, nir_variable_mode mode, int driver_location)
{
- nir_foreach_variable(v, list) {
+ nir_foreach_variable_with_modes (v, shader, mode) {
if (v->data.driver_location == driver_location) {
return v;
}
return NULL;
}
+static bool
+is_tess_levels(nir_variable *var)
+{
+ return (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_INNER);
+}
+
static nir_ssa_def *
build_local_offset(nir_builder *b, struct state *state,
nir_ssa_def *vertex, uint32_t base, nir_ssa_def *offset)
}
static void
-build_primitive_map(nir_shader *shader, struct primitive_map *map, struct exec_list *list)
+build_primitive_map(nir_shader *shader, nir_variable_mode mode, struct primitive_map *map)
{
- nir_foreach_variable(var, list) {
+ nir_foreach_variable_with_modes (var, shader, mode) {
switch (var->data.location) {
case VARYING_SLOT_TESS_LEVEL_OUTER:
case VARYING_SLOT_TESS_LEVEL_INNER:
for (uint32_t i = 0; i < ARRAY_SIZE(map->size); i++) {
if (map->size[i] == 0)
continue;
- nir_variable *var = get_var(list, i);
+ nir_variable *var = get_var(shader, mode, i);
map->loc[i] = loc;
loc += map->size[i];
}
static void
-lower_vs_block(nir_block *block, nir_builder *b, struct state *state)
+lower_block_to_explicit_output(nir_block *block, nir_builder *b, struct state *state)
{
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe (instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
case nir_intrinsic_store_output: {
// src[] = { value, offset }.
+ /* nir_lower_io_to_temporaries replaces all access to output
+ * variables with temp variables and then emits a nir_copy_var at
+ * the end of the shader. Thus, we should always get a full wrmask
+ * here.
+ */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
b->cursor = nir_instr_remove(&intr->instr);
nir_ssa_def *vertex_id = build_vertex_id(b, state);
nir_intrinsic_instr *store =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_shared_ir3);
- nir_intrinsic_set_write_mask(store, MASK(intr->num_components));
store->src[0] = nir_src_for_ssa(intr->src[0].ssa);
store->src[1] = nir_src_for_ssa(offset);
-
store->num_components = intr->num_components;
nir_builder_instr_insert(b, &store->instr);
}
void
-ir3_nir_lower_to_explicit_io(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_to_explicit_output(nir_shader *shader, struct ir3_shader_variant *v,
+ unsigned topology)
{
struct state state = { };
- build_primitive_map(shader, &state.map, &shader->outputs);
- memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
+ build_primitive_map(shader, nir_var_shader_out, &state.map);
+ memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
nir_builder_init(&b, impl);
b.cursor = nir_before_cf_list(&impl->body);
- if (s->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
+ if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
state.header = nir_load_tcs_header_ir3(&b);
else
state.header = nir_load_gs_header_ir3(&b);
- nir_foreach_block_safe(block, impl)
- lower_vs_block(block, &b, &state);
+ nir_foreach_block_safe (block, impl)
+ lower_block_to_explicit_output(block, &b, &state);
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_metadata_dominance);
- s->output_size = state.map.stride;
+ v->output_size = state.map.stride;
+}
+
+
+static void
+lower_block_to_explicit_input(nir_block *block, nir_builder *b, struct state *state)
+{
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_per_vertex_input: {
+ // src[] = { vertex, offset }.
+
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *offset = build_local_offset(b, state,
+ intr->src[0].ssa, // this is typically gl_InvocationID
+ nir_intrinsic_base(intr),
+ intr->src[1].ssa);
+
+ replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
+ break;
+ }
+
+ case nir_intrinsic_load_invocation_id: {
+ b->cursor = nir_before_instr(&intr->instr);
+
+ nir_ssa_def *iid = build_invocation_id(b, state);
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
+ nir_instr_remove(&intr->instr);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+}
+
+void
+ir3_nir_lower_to_explicit_input(nir_shader *shader, struct ir3_compiler *compiler)
+{
+ struct state state = { };
+
+ /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
+ * HS uses a different primitive id, which starts at bit 16 in the header
+ */
+ if (shader->info.stage == MESA_SHADER_TESS_CTRL && compiler->tess_use_shared)
+ state.local_primitive_id_start = 16;
+
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ assert(impl);
+
+ nir_builder b;
+ nir_builder_init(&b, impl);
+ b.cursor = nir_before_cf_list(&impl->body);
+
+ if (shader->info.stage == MESA_SHADER_GEOMETRY)
+ state.header = nir_load_gs_header_ir3(&b);
+ else
+ state.header = nir_load_tcs_header_ir3(&b);
+
+ nir_foreach_block_safe (block, impl)
+ lower_block_to_explicit_input(block, &b, &state);
}
+
static nir_ssa_def *
build_per_vertex_offset(nir_builder *b, struct state *state,
nir_ssa_def *vertex, nir_ssa_def *offset, nir_variable *var)
return build_per_vertex_offset(b, state, nir_imm_int(b, 0), offset, var);
}
-static nir_ssa_def *
-build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
+static void
+tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
{
- uint32_t inner_levels, outer_levels;
switch (state->topology) {
case IR3_TESS_TRIANGLES:
- inner_levels = 1;
- outer_levels = 3;
+ *inner = 1;
+ *outer = 3;
break;
case IR3_TESS_QUADS:
- inner_levels = 2;
- outer_levels = 4;
+ *inner = 2;
+ *outer = 4;
break;
case IR3_TESS_ISOLINES:
- inner_levels = 0;
- outer_levels = 2;
+ *inner = 0;
+ *outer = 2;
break;
default:
unreachable("bad");
}
+}
+
+static nir_ssa_def *
+build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
+{
+ uint32_t inner_levels, outer_levels;
+ tess_level_components(state, &inner_levels, &outer_levels);
const uint32_t patch_stride = 1 + inner_levels + outer_levels;
static void
lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
{
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe (instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
- case nir_intrinsic_load_invocation_id:
- b->cursor = nir_before_instr(&intr->instr);
-
- nir_ssa_def *invocation_id = build_invocation_id(b, state);
- nir_ssa_def_rewrite_uses(&intr->dest.ssa,
- nir_src_for_ssa(invocation_id));
- nir_instr_remove(&intr->instr);
- break;
-
- case nir_intrinsic_barrier:
+ case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier_tcs_patch:
/* Hull shaders dispatch 32 wide so an entire patch will always
* fit in a single warp and execute in lock-step. Consequently,
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
- nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[0].ssa, intr->src[1].ssa, var);
b->cursor = nir_before_instr(&intr->instr);
+ /* sparse writemask not supported */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
+
nir_ssa_def *value = intr->src[0].ssa;
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
- nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[1].ssa, intr->src[2].ssa, var);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
- nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
-
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value, address,
+ nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr))));
break;
}
- case nir_intrinsic_load_per_vertex_input: {
- // src[] = { vertex, offset }.
-
- b->cursor = nir_before_instr(&intr->instr);
-
- nir_ssa_def *offset = build_local_offset(b, state,
- intr->src[0].ssa, // this is typically gl_InvocationID
- nir_intrinsic_base(intr),
- intr->src[1].ssa);
+ case nir_intrinsic_load_output: {
+ // src[] = { offset }.
- replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
- break;
- }
+ nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
- case nir_intrinsic_load_tess_level_inner:
- case nir_intrinsic_load_tess_level_outer: {
b->cursor = nir_before_instr(&intr->instr);
- gl_varying_slot slot;
- if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
- slot = VARYING_SLOT_TESS_LEVEL_INNER;
- else
- slot = VARYING_SLOT_TESS_LEVEL_OUTER;
+ nir_ssa_def *address, *offset;
- nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
- nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
+ /* note if vectorization of the tess level loads ever happens:
+ * "ldg" across 16-byte boundaries can behave incorrectly if results
+ * are never used. most likely some issue with (sy) not properly
+ * syncing with values coming from a second memory transaction.
+ */
+ if (is_tess_levels(var)) {
+ assert(intr->dest.ssa.num_components == 1);
+ address = nir_load_tess_factor_base_ir3(b);
+ offset = build_tessfactor_base(b, var->data.location, state);
+ } else {
+ address = nir_load_tess_param_base_ir3(b);
+ offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+ }
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
break;
}
- case nir_intrinsic_load_output: {
- // src[] = { offset }.
+ case nir_intrinsic_store_output: {
+ // src[] = { value, offset }.
+
+ /* write patch output to bo */
- nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_variable *var = get_var(b->shader, nir_var_shader_out, nir_intrinsic_base(intr));
b->cursor = nir_before_instr(&intr->instr);
- nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
- nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+ /* sparse writemask not supported */
+ assert(util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
- replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
- break;
- }
+ if (is_tess_levels(var)) {
+ /* with tess levels are defined as float[4] and float[2],
+ * but tess factor BO has smaller sizes for tris/isolines,
+ * so we have to discard any writes beyond the number of
+ * components for inner/outer levels */
+ uint32_t inner_levels, outer_levels, levels;
+ tess_level_components(state, &inner_levels, &outer_levels);
- case nir_intrinsic_store_output: {
- // src[] = { value, offset }.
+ if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ levels = outer_levels;
+ else
+ levels = inner_levels;
- /* write patch output to bo */
+ assert(intr->src[0].ssa->num_components == 1);
- nir_variable *var = get_var(&b->shader->outputs, nir_intrinsic_base(intr));
+ nir_ssa_def *offset =
+ nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
- nir_ssa_def **levels = NULL;
- if (var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
- levels = state->outer_levels;
- else if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER)
- levels = state->inner_levels;
+ nir_if *nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
- b->cursor = nir_before_instr(&intr->instr);
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+ intr->src[0].ssa,
+ nir_load_tess_factor_base_ir3(b),
+ nir_iadd(b, offset, build_tessfactor_base(b, var->data.location, state)));
- if (levels) {
- for (int i = 0; i < 4; i++)
- if (nir_intrinsic_write_mask(intr) & (1 << i))
- levels[i] = nir_channel(b, intr->src[0].ssa, i);
- nir_instr_remove(&intr->instr);
+ nir_pop_if(b, nif);
} else {
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
nir_ssa_def *offset = build_patch_offset(b, state, intr->src[1].ssa, var);
debug_assert(nir_intrinsic_component(intr) == 0);
- nir_intrinsic_instr *store =
- replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
- intr->src[0].ssa, address, offset);
-
- nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intr));
+ replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
+ intr->src[0].ssa, address, offset);
}
break;
}
static void
emit_tess_epilouge(nir_builder *b, struct state *state)
{
- nir_ssa_def *tessfactor_address = nir_load_tess_factor_base_ir3(b);
- nir_ssa_def *levels[2];
-
- /* Then emit the epilogue that actually writes out the tessellation levels
- * to the BOs.
- */
- switch (state->topology) {
- case IR3_TESS_TRIANGLES:
- levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
- state->outer_levels[2], state->inner_levels[0]);
- levels[1] = NULL;
- break;
- case IR3_TESS_QUADS:
- levels[0] = nir_vec4(b, state->outer_levels[0], state->outer_levels[1],
- state->outer_levels[2], state->outer_levels[3]);
- levels[1] = nir_vec2(b, state->inner_levels[0], state->inner_levels[1]);
- break;
- case IR3_TESS_ISOLINES:
- levels[0] = nir_vec2(b, state->outer_levels[0], state->outer_levels[1]);
- levels[1] = NULL;
- break;
- default:
- unreachable("nope");
- }
-
- nir_ssa_def *offset = build_tessfactor_base(b, VARYING_SLOT_TESS_LEVEL_OUTER, state);
-
- nir_intrinsic_instr *store =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
-
- store->src[0] = nir_src_for_ssa(levels[0]);
- store->src[1] = nir_src_for_ssa(tessfactor_address);
- store->src[2] = nir_src_for_ssa(offset);
- nir_builder_instr_insert(b, &store->instr);
- store->num_components = levels[0]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[0]->num_components) - 1);
-
- if (levels[1]) {
- store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_global_ir3);
- offset = nir_iadd(b, offset, nir_imm_int(b, levels[0]->num_components));
-
- store->src[0] = nir_src_for_ssa(levels[1]);
- store->src[1] = nir_src_for_ssa(tessfactor_address);
- store->src[2] = nir_src_for_ssa(offset);
- nir_builder_instr_insert(b, &store->instr);
- store->num_components = levels[1]->num_components;
- nir_intrinsic_set_write_mask(store, (1 << levels[1]->num_components) - 1);
- }
-
- /* Finally, Insert endpatch instruction, maybe signalling the tess engine
- * that another primitive is ready?
+ /* Insert endpatch instruction:
+ *
+ * TODO we should re-work this to use normal flow control.
*/
nir_intrinsic_instr *end_patch =
}
void
-ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader *s, unsigned topology)
+ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
+ unsigned topology)
{
struct state state = { .topology = topology };
nir_print_shader(shader, stderr);
}
- build_primitive_map(shader, &state.map, &shader->outputs);
- memcpy(s->output_loc, state.map.loc, sizeof(s->output_loc));
- s->output_size = state.map.stride;
+ build_primitive_map(shader, nir_var_shader_out, &state.map);
+ memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
+ v->output_size = state.map.stride;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
state.header = nir_load_tcs_header_ir3(&b);
- nir_foreach_block_safe(block, impl)
+ nir_foreach_block_safe (block, impl)
lower_tess_ctrl_block(block, &b, &state);
/* Now move the body of the TCS into a conditional:
static void
lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
{
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe (instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
- nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+ nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
nir_ssa_def *offset = build_per_vertex_offset(b, state,
intr->src[0].ssa, intr->src[1].ssa, var);
break;
}
- case nir_intrinsic_load_tess_level_inner:
- case nir_intrinsic_load_tess_level_outer: {
- b->cursor = nir_before_instr(&intr->instr);
-
- gl_varying_slot slot;
- if (intr->intrinsic == nir_intrinsic_load_tess_level_inner)
- slot = VARYING_SLOT_TESS_LEVEL_INNER;
- else
- slot = VARYING_SLOT_TESS_LEVEL_OUTER;
-
- nir_ssa_def *address = nir_load_tess_factor_base_ir3(b);
- nir_ssa_def *offset = build_tessfactor_base(b, slot, state);
-
- /* Loading across a vec4 (16b) memory boundary is problematic
- * if we don't use components from the second vec4. The tess
- * levels aren't guaranteed to be vec4 aligned and we don't
- * know which levels are actually used, so we load each
- * component individually.
- */
- nir_ssa_def *levels[4];
- for (unsigned i = 0; i < intr->num_components; i++) {
- nir_intrinsic_instr *new_intr =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_global_ir3);
-
- new_intr->src[0] = nir_src_for_ssa(address);
- new_intr->src[1] = nir_src_for_ssa(nir_iadd(b, offset, nir_imm_int(b, i)));
- new_intr->num_components = 1;
- nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, 1, 32, NULL);
- nir_builder_instr_insert(b, &new_intr->instr);
- levels[i] = &new_intr->dest.ssa;
- }
-
- nir_ssa_def *v = nir_vec(b, levels, intr->num_components);
-
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(v));
-
- nir_instr_remove(&intr->instr);
- break;
- }
-
case nir_intrinsic_load_input: {
// src[] = { offset }.
- nir_variable *var = get_var(&b->shader->inputs, nir_intrinsic_base(intr));
+ nir_variable *var = get_var(b->shader, nir_var_shader_in, nir_intrinsic_base(intr));
debug_assert(var->data.patch);
b->cursor = nir_before_instr(&intr->instr);
- nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
- nir_ssa_def *offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+ nir_ssa_def *address, *offset;
+
+ /* note if vectorization of the tess level loads ever happens:
+ * "ldg" across 16-byte boundaries can behave incorrectly if results
+ * are never used. most likely some issue with (sy) not properly
+ * syncing with values coming from a second memory transaction.
+ */
+ if (is_tess_levels(var)) {
+ assert(intr->dest.ssa.num_components == 1);
+ address = nir_load_tess_factor_base_ir3(b);
+ offset = build_tessfactor_base(b, var->data.location, state);
+ } else {
+ address = nir_load_tess_param_base_ir3(b);
+ offset = build_patch_offset(b, state, intr->src[0].ssa, var);
+ }
+
+ offset = nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address, offset, NULL);
break;
}
/* Build map of inputs so we have the sizes. */
- build_primitive_map(shader, &state.map, &shader->inputs);
+ build_primitive_map(shader, nir_var_shader_in, &state.map);
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
nir_builder b;
nir_builder_init(&b, impl);
- nir_foreach_block_safe(block, impl)
+ nir_foreach_block_safe (block, impl)
lower_tess_eval_block(block, &b, &state);
nir_metadata_preserve(impl, 0);
static void
lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
{
- nir_intrinsic_instr *outputs[32] = {};
-
- nir_foreach_instr_safe(instr, block) {
+ nir_foreach_instr_safe (instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
- case nir_intrinsic_store_output: {
- // src[] = { value, offset }.
-
- uint32_t loc = nir_intrinsic_base(intr);
- outputs[loc] = intr;
- break;
- }
-
case nir_intrinsic_end_primitive: {
b->cursor = nir_before_instr(&intr->instr);
- nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 4), 0x1);
+ nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
nir_instr_remove(&intr->instr);
break;
}
case nir_intrinsic_emit_vertex: {
-
/* Load the vertex count */
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
- for (uint32_t i = 0; i < ARRAY_SIZE(outputs); i++) {
- if (outputs[i]) {
- nir_store_var(b, state->output_vars[i],
- outputs[i]->src[0].ssa,
- (1 << outputs[i]->num_components) - 1);
-
- nir_instr_remove(&outputs[i]->instr);
- }
- outputs[i] = NULL;
+ foreach_two_lists(dest_node, &state->emit_outputs, src_node, &state->old_outputs) {
+ nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
+ nir_variable *src = exec_node_data(nir_variable, src_node, node);
+ nir_copy_var(b, dest, src);
}
nir_instr_remove(&intr->instr);
nir_store_var(b, state->emitted_vertex_var,
nir_iadd(b, nir_load_var(b, state->emitted_vertex_var), nir_imm_int(b, 1)), 0x1);
- nir_store_var(b, state->vertex_flags_out,
- nir_load_var(b, state->vertex_flags_var), 0x1);
-
nir_pop_if(b, NULL);
/* Increment the vertex count by 1 */
nir_store_var(b, state->vertex_count_var,
nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
- nir_store_var(b, state->vertex_flags_var, nir_imm_int(b, 0), 0x1);
-
- break;
- }
-
- case nir_intrinsic_load_per_vertex_input: {
- // src[] = { vertex, offset }.
-
- b->cursor = nir_before_instr(&intr->instr);
-
- nir_ssa_def *offset = build_local_offset(b, state,
- intr->src[0].ssa, // this is typically gl_InvocationID
- nir_intrinsic_base(intr),
- intr->src[1].ssa);
-
- replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL, NULL);
- break;
- }
+ nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
- case nir_intrinsic_load_invocation_id: {
- b->cursor = nir_before_instr(&intr->instr);
-
- nir_ssa_def *iid = build_invocation_id(b, state);
- nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(iid));
- nir_instr_remove(&intr->instr);
break;
}
}
}
-static void
-emit_store_outputs(nir_builder *b, struct state *state)
-{
- /* This also stores the internally added vertex_flags output. */
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->output_vars); i++) {
- if (!state->output_vars[i])
- continue;
-
- nir_intrinsic_instr *store =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
-
- nir_intrinsic_set_base(store, i);
- store->src[0] = nir_src_for_ssa(nir_load_var(b, state->output_vars[i]));
- store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
- store->num_components = store->src[0].ssa->num_components;
-
- nir_builder_instr_insert(b, &store->instr);
- }
-}
-
-static void
-clean_up_split_vars(nir_shader *shader, struct exec_list *list)
-{
- uint32_t components[32] = {};
-
- nir_foreach_variable(var, list) {
- uint32_t mask =
- ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
- components[var->data.driver_location] |= mask;
- }
-
- nir_foreach_variable_safe(var, list) {
- uint32_t mask =
- ((1 << glsl_get_components(glsl_without_array(var->type))) - 1) << var->data.location_frac;
- bool subset =
- (components[var->data.driver_location] | mask) != mask;
- if (subset)
- exec_node_remove(&var->node);
- }
-}
-
void
-ir3_nir_lower_gs(nir_shader *shader, struct ir3_shader *s)
+ir3_nir_lower_gs(nir_shader *shader)
{
struct state state = { };
nir_print_shader(shader, stderr);
}
- clean_up_split_vars(shader, &shader->inputs);
- clean_up_split_vars(shader, &shader->outputs);
-
- build_primitive_map(shader, &state.map, &shader->inputs);
-
- uint32_t loc = 0;
- nir_foreach_variable(var, &shader->outputs) {
- uint32_t end = var->data.driver_location + glsl_count_attribute_slots(var->type, false);
- loc = MAX2(loc, end);
- }
+ build_primitive_map(shader, nir_var_shader_in, &state.map);
+ /* Create an output var for vertex_flags. This will be shadowed below,
+ * same way regular outputs get shadowed, and this variable will become a
+ * temporary.
+ */
state.vertex_flags_out = nir_variable_create(shader, nir_var_shader_out,
glsl_uint_type(), "vertex_flags");
- state.vertex_flags_out->data.driver_location = loc;
+ state.vertex_flags_out->data.driver_location = shader->num_outputs++;
state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
+ state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
state.header = nir_load_gs_header_ir3(&b);
- nir_foreach_variable(var, &shader->outputs) {
- state.output_vars[var->data.driver_location] =
- nir_local_variable_create(impl, var->type,
- ralloc_asprintf(var, "%s:gs-temp", var->name));
+ /* Generate two set of shadow vars for the output variables. The first
+ * set replaces the real outputs and the second set (emit_outputs) we'll
+ * assign in the emit_vertex conditionals. Then at the end of the shader
+ * we copy the emit_outputs to the real outputs, so that we get
+ * store_output in uniform control flow.
+ */
+ exec_list_make_empty(&state.old_outputs);
+ nir_foreach_shader_out_variable_safe(var, shader) {
+ exec_node_remove(&var->node);
+ exec_list_push_tail(&state.old_outputs, &var->node);
+ }
+ exec_list_make_empty(&state.new_outputs);
+ exec_list_make_empty(&state.emit_outputs);
+ nir_foreach_variable_in_list(var, &state.old_outputs) {
+ /* Create a new output var by cloning the original output var and
+ * stealing the name.
+ */
+ nir_variable *output = nir_variable_clone(var, shader);
+ exec_list_push_tail(&state.new_outputs, &output->node);
+
+ /* Rewrite the original output to be a shadow variable. */
+ var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
+ var->data.mode = nir_var_shader_temp;
+
+ /* Clone the shadow variable to create the emit shadow variable that
+ * we'll assign in the emit conditionals.
+ */
+ nir_variable *emit_output = nir_variable_clone(var, shader);
+ emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
+ exec_list_push_tail(&state.emit_outputs, &emit_output->node);
}
+ /* During the shader we'll keep track of which vertex we're currently
+ * emitting for the EmitVertex test and how many vertices we emitted so we
+ * know to discard if didn't emit any. In most simple shaders, this can
+ * all be statically determined and gets optimized away.
+ */
state.vertex_count_var =
nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
state.emitted_vertex_var =
nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
- state.vertex_flags_var =
- nir_local_variable_create(impl, glsl_uint_type(), "vertex_flags");
- state.vertex_flags_out = state.output_vars[state.vertex_flags_out->data.driver_location];
- /* initialize to 0 */
+ /* Initialize to 0. */
b.cursor = nir_before_cf_list(&impl->body);
nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
- nir_store_var(&b, state.vertex_flags_var, nir_imm_int(&b, 4), 0x1);
+ nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
- nir_foreach_block_safe(block, impl)
+ nir_foreach_block_safe (block, impl)
lower_gs_block(block, &b, &state);
set_foreach(impl->end_block->predecessors, block_entry) {
nir_builder_instr_insert(&b, &discard_if->instr);
- emit_store_outputs(&b, &state);
+ foreach_two_lists(dest_node, &state.new_outputs, src_node, &state.emit_outputs) {
+ nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
+ nir_variable *src = exec_node_data(nir_variable, src_node, node);
+ nir_copy_var(&b, dest, src);
+ }
}
+ exec_list_append(&shader->variables, &state.old_outputs);
+ exec_list_append(&shader->variables, &state.emit_outputs);
+ exec_list_append(&shader->variables, &state.new_outputs);
+
nir_metadata_preserve(impl, 0);
+ nir_lower_global_vars_to_local(shader);
+ nir_split_var_copies(shader);
+ nir_lower_var_copies(shader);
+
+ nir_fixup_deref_modes(shader);
+
if (shader_debug_enabled(shader->info.stage)) {
fprintf(stderr, "NIR (after gs lowering):\n");
nir_print_shader(shader, stderr);
}
}
+
+uint32_t
+ir3_link_geometry_stages(const struct ir3_shader_variant *producer,
+ const struct ir3_shader_variant *consumer,
+ uint32_t *locs)
+{
+ uint32_t num_loc = 0, factor;
+
+ switch (consumer->type) {
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_GEOMETRY:
+ /* These stages load with ldlw, which expects byte offsets. */
+ factor = 4;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ /* The tess eval shader uses ldg, which takes dword offsets. */
+ factor = 1;
+ break;
+ default:
+ unreachable("bad shader stage");
+ }
+
+ nir_foreach_shader_in_variable(in_var, consumer->shader->nir) {
+ nir_foreach_shader_out_variable(out_var, producer->shader->nir) {
+ if (in_var->data.location == out_var->data.location) {
+ locs[in_var->data.driver_location] =
+ producer->output_loc[out_var->data.driver_location] * factor;
+
+ debug_assert(num_loc <= in_var->data.driver_location + 1);
+ num_loc = in_var->data.driver_location + 1;
+ }
+ }
+ }
+
+ return num_loc;
+}