From: Kenneth Graunke Date: Tue, 12 May 2015 08:05:29 +0000 (-0700) Subject: nir: Add new GS intrinsics that maintain a count of emitted vertices. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=542d40d698a698dc656c7a64ddcea07060707555;p=mesa.git nir: Add new GS intrinsics that maintain a count of emitted vertices. This patch also introduces a lowering pass to convert the simple GS intrinsics to the new ones. See the comments above that for the rationale behind the new intrinsics. This should be useful for i965; it's a generic enough mechanism that I could see other drivers potentially using it as well, so I don't feel too bad about putting it in the generic code. v2: - Use nir_after_block_before_jump for the cursor (caught by Jason Ekstrand - I'd mistakenly used nir_after_block when rebasing this code onto the new NIR control flow API). - Remove the old emit_vertex intrinsic at the end, rather than in the middle (requested by Jason). - Use state->... directly rather than locals (requested by Jason). - Report progress from nir_lower_gs_intrinsics() (requested by me). - Remove "Authors:" section from file comment (requested by Michael Schellenberger Costa). Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index f7c69f4fe3f..a8f4994cf34 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -37,6 +37,7 @@ NIR_FILES = \ nir/nir_lower_atomics.c \ nir/nir_lower_clip.c \ nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ nir/nir_lower_load_const_to_scalar.c \ nir/nir_lower_locals_to_regs.c \ nir/nir_lower_idiv.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 619a363b540..4f45770e02d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1883,6 +1883,8 @@ void nir_lower_two_sided_color(nir_shader *shader); void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); +bool nir_lower_gs_intrinsics(nir_shader *shader); + bool nir_normalize_cubemap_coords(nir_shader *shader); void nir_live_variables_impl(nir_function_impl *impl); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 70cae4256a3..b21460da5c0 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -79,9 +79,30 @@ BARRIER(memory_barrier) /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) +/** + * Basic Geometry Shader intrinsics. + * + * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single + * index, which is the stream ID to write to. + * + * end_primitive implements GLSL's EndPrimitive() built-in. + */ INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) +/** + * Geometry Shader intrinsics with a vertex count. + * + * Alternatively, drivers may implement these intrinsics, and use + * nir_lower_gs_intrinsics() to convert from the basic intrinsics. + * + * These maintain a count of the number of vertices emitted, as an additional + * unsigned integer source. + */ +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0) +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0) +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) + /* * Atomic counters * diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c new file mode 100644 index 00000000000..2ee4e5c45d6 --- /dev/null +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c @@ -0,0 +1,218 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * \file nir_lower_gs_intrinsics.c + * + * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an + * arbitrary number of vertices. However, the shader must declare the maximum + * number of vertices that it will ever output - further attempts to emit + * vertices result in undefined behavior according to the GLSL specification. + * + * Drivers might use this maximum number of vertices to allocate enough space + * to hold the geometry shader's output. Some drivers (such as i965) need to + * implement "safety checks" which ensure that the shader hasn't emitted too + * many vertices, to avoid overflowing that space and trashing other memory. + * + * The count of emitted vertices can also be useful in buffer offset + * calculations, so drivers know where to write the GS output. + * + * However, for simple geometry shaders that emit a statically determinable + * number of vertices, this extra bookkeeping is unnecessary and inefficient. + * By tracking the vertex count in NIR, we allow constant folding/propagation + * and dead control flow optimizations to eliminate most of it where possible. + * + * This pass introduces a new global variable which stores the current vertex + * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics + * to their *_with_counter variants. emit_vertex is also wrapped in a safety + * check to avoid buffer overflows. Finally, it adds a set_vertex_count + * intrinsic at the end of the program, informing the driver of the final + * vertex count. + */ + +struct state { + nir_builder *builder; + nir_variable *vertex_count_var; + bool progress; +}; + +/** + * Replace emit_vertex intrinsics with: + * + * if (vertex_count < max_vertices) { + * emit_vertex_with_counter vertex_count ... + * vertex_count += 1 + * } + */ +static void +rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) +{ + nir_builder *b = state->builder; + + /* Load the vertex count */ + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out); + + /* Create: if (vertex_count < max_vertices) and insert it. + * + * The new if statement needs to be hooked up to the control flow graph + * before we start inserting instructions into it. + */ + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices)); + nir_builder_cf_insert(b, &if_stmt->cf_node); + + /* Fill out the new then-block */ + b->cursor = nir_after_cf_list(&if_stmt->then_list); + + nir_intrinsic_instr *lowered = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_emit_vertex_with_counter); + lowered->const_index[0] = intrin->const_index[0]; + lowered->src[0] = nir_src_for_ssa(count); + nir_builder_instr_insert(b, &lowered->instr); + + /* Increment the vertex count by 1 */ + nir_store_var(b, state->vertex_count_var, + nir_iadd(b, count, nir_imm_int(b, 1))); + + nir_instr_remove(&intrin->instr); + + state->progress = true; +} + +/** + * Replace end_primitive with end_primitive_with_counter. + */ +static void +rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state) +{ + nir_builder *b = state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_intrinsic_instr *lowered = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_end_primitive_with_counter); + lowered->const_index[0] = intrin->const_index[0]; + lowered->src[0] = nir_src_for_ssa(count); + nir_builder_instr_insert(b, &lowered->instr); + + nir_instr_remove(&intrin->instr); + + state->progress = true; +} + +static bool +rewrite_intrinsics(nir_block *block, void *closure) +{ + struct state *state = closure; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_emit_vertex: + rewrite_emit_vertex(intrin, state); + break; + case nir_intrinsic_end_primitive: + rewrite_end_primitive(intrin, state); + break; + default: + /* not interesting; skip this */ + break; + } + } + + return true; +} + +/** + * Add a set_vertex_count intrinsic at the end of the program + * (representing the final vertex count). + */ +static void +append_set_vertex_count(nir_block *end_block, struct state *state) +{ + nir_builder *b = state->builder; + nir_shader *shader = state->builder->shader; + + /* Insert the new intrinsic in all of the predecessors of the end block, + * but before any jump instructions (return). + */ + struct set_entry *entry; + set_foreach(end_block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + b->cursor = nir_after_block_before_jump(pred); + + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_intrinsic_instr *set_vertex_count = + nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count); + set_vertex_count->src[0] = nir_src_for_ssa(count); + + nir_builder_instr_insert(b, &set_vertex_count->instr); + } +} + +bool +nir_lower_gs_intrinsics(nir_shader *shader) +{ + struct state state; + state.progress = false; + + /* Create the counter variable */ + nir_variable *var = rzalloc(shader, nir_variable); + var->data.mode = nir_var_global; + var->type = glsl_uint_type(); + var->name = "vertex_count"; + var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */ + + exec_list_push_tail(&shader->globals, &var->node); + state.vertex_count_var = var; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder b; + nir_builder_init(&b, overload->impl); + state.builder = &b; + + nir_foreach_block(overload->impl, rewrite_intrinsics, &state); + + /* This only works because we have a single main() function. */ + append_set_vertex_count(overload->impl->end_block, &state); + + nir_metadata_preserve(overload->impl, 0); + } + } + + return state.progress; +}