From: Iago Toral Quiroga Date: Tue, 29 Oct 2019 09:12:28 +0000 (+0100) Subject: v3d: handle writes to gl_Layer from geometry shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6c7a2b69f8df5a9c0cb8c8f3b099694bbe1b0b53;p=mesa.git v3d: handle writes to gl_Layer from geometry shaders When geometry shaders write a value to gl_Layer that doesn't correspond to an existing layer in the target framebuffer the rendering behavior is undefined according to the spec, however, there are CTS tests that trigger this scenario on purpose, probably to ensure that nothing terrible happens. For V3D, this situation is problematic because the binner uses the layer index to select the offset to write into the tile state data, and we only allocate tile state for MAX2(num_layers, 1), so we want to make sure we don't produce values that would lead to out of bounds writes. The simulator has an assert to catch this, although we haven't observed issues in actual hardware it is probably best to play safe. Reviewed-by: Alejandro Piñeiro --- diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index e56632590d6..e2de77ddf05 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_store_dest(c, &instr->dest, 0, vir_IID(c)); break; + case nir_intrinsic_load_fb_layers_v3d: + ntq_store_dest(c, &instr->dest, 0, + vir_uniform(c, QUNIFORM_FB_LAYERS, 0)); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 4249c181bf1..0489ebdc12e 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -279,6 +279,14 @@ enum quniform_contents { * L2T cache will effectively be the shared memory area. */ QUNIFORM_SHARED_OFFSET, + + /** + * Returns the number of layers in the framebuffer. + * + * This is used to cap gl_Layer in geometry shaders to avoid + * out-of-bounds accesses into the tile state during binning. + */ + QUNIFORM_FB_LAYERS, }; static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c index 9b7db65db7e..855b9c44b46 100644 --- a/src/broadcom/compiler/v3d_nir_lower_io.c +++ b/src/broadcom/compiler/v3d_nir_lower_io.c @@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b, v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src); } + if (var->data.location == VARYING_SLOT_LAYER) { + assert(c->s->info.stage == MESA_SHADER_GEOMETRY); + nir_ssa_def *header = nir_load_var(b, state->gs.header_var); + header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff)); + + /* From the GLES 3.2 spec: + * + * "When fragments are written to a layered framebuffer, the + * fragment’s layer number selects an image from the array + * of images at each attachment (...). If the fragment’s + * layer number is negative, or greater than or equal to + * the minimum number of layers of any attachment, the + * effects of the fragment on the framebuffer contents are + * undefined." + * + * This suggests we can just ignore that situation, however, + * for V3D an out-of-bounds layer index means that the binner + * might do out-of-bounds writes access to the tile state. The + * simulator has an assert to catch this, so we play safe here + * and we make sure that doesn't happen by setting gl_Layer + * to 0 in that case (we always allocate tile state for at + * least one layer). + */ + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_fb_layers_v3d); + load->num_components = 1; + nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); + nir_builder_instr_insert(b, &load->instr); + nir_ssa_def *fb_layers = &load->dest.ssa; + + nir_ssa_def *cond = nir_ige(b, src, fb_layers); + nir_ssa_def *layer_id = + nir_bcsel(b, cond, + nir_imm_int(b, 0), + nir_ishl(b, src, nir_imm_int(b, 16))); + header = nir_ior(b, header, layer_id); + nir_store_var(b, state->gs.header_var, header, 0x1); + } + /* Scalarize outputs if it hasn't happened already, since we want to * schedule each VPM write individually. We can skip any outut * components not read by the FS. diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 3939f8ff510..c53babdde55 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], []) # src[] = { value, render_target } # BASE = sample index store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], []) + +# V3D-specific intrinsic to load the number of layers attached to +# the target framebuffer +intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER]) diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c index c94f6be4b76..ab57880f06f 100644 --- a/src/gallium/drivers/v3d/v3d_uniforms.c +++ b/src/gallium/drivers/v3d/v3d_uniforms.c @@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job, v3d->compute_shared_memory, 0); break; + case QUNIFORM_FB_LAYERS: + cl_aligned_u32(&uniforms, job->num_layers); + break; + default: assert(quniform_contents_is_texture_p0(uinfo->contents[i])); @@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader) /* Compute always recalculates uniforms. */ break; + case QUNIFORM_FB_LAYERS: + dirty |= VC5_DIRTY_FRAMEBUFFER; + break; + default: assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i])); dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |