v3d: handle writes to gl_Layer from geometry shaders
authorIago Toral Quiroga <itoral@igalia.com>
Tue, 29 Oct 2019 09:12:28 +0000 (10:12 +0100)
committerIago Toral Quiroga <itoral@igalia.com>
Mon, 16 Dec 2019 07:42:37 +0000 (08:42 +0100)
When geometry shaders write a value to gl_Layer that doesn't correspond to
an existing layer in the target framebuffer the rendering behavior is
undefined according to the spec, however, there are CTS tests that trigger
this scenario on purpose, probably to ensure that nothing terrible happens.

For V3D, this situation is problematic because the binner uses the layer
index to select the offset to write into the tile state data, and we only
allocate tile state for MAX2(num_layers, 1), so we want to make sure we
don't produce values that would lead to out of bounds writes. The simulator
has an assert to catch this, although we haven't observed issues in actual
hardware it is probably best to play safe.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/v3d_nir_lower_io.c
src/compiler/nir/nir_intrinsics.py
src/gallium/drivers/v3d/v3d_uniforms.c

index e56632590d6dad549b83b8cdea9eb17991af5703..e2de77ddf051e27c60691ff155337c4527b4444f 100644 (file)
@@ -2346,6 +2346,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                 ntq_store_dest(c, &instr->dest, 0, vir_IID(c));
                 break;
 
+        case nir_intrinsic_load_fb_layers_v3d:
+                ntq_store_dest(c, &instr->dest, 0,
+                               vir_uniform(c, QUNIFORM_FB_LAYERS, 0));
+                break;
+
         default:
                 fprintf(stderr, "Unknown intrinsic: ");
                 nir_print_instr(&instr->instr, stderr);
index 4249c181bf113f57974dfcd934004e5ece46c718..0489ebdc12e5e866dbca273f5c1a71b991d305d2 100644 (file)
@@ -279,6 +279,14 @@ enum quniform_contents {
          * L2T cache will effectively be the shared memory area.
          */
         QUNIFORM_SHARED_OFFSET,
+
+        /**
+         * Returns the number of layers in the framebuffer.
+         *
+         * This is used to cap gl_Layer in geometry shaders to avoid
+         * out-of-bounds accesses into the tile state during binning.
+         */
+        QUNIFORM_FB_LAYERS,
 };
 
 static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
index 9b7db65db7e22561a8ea237ccee0125c97894519..855b9c44b46cebd15e0dfe0ca6d46e3da46b45d9 100644 (file)
@@ -193,6 +193,46 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
         }
 
+        if (var->data.location == VARYING_SLOT_LAYER) {
+                assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
+                nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
+                header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
+
+                /* From the GLES 3.2 spec:
+                 *
+                 *    "When fragments are written to a layered framebuffer, the
+                 *     fragment’s layer number selects an image from the array
+                 *     of images at each attachment (...). If the fragment’s
+                 *     layer number is negative, or greater than or equal to
+                 *     the minimum number of layers of any attachment, the
+                 *     effects of the fragment on the framebuffer contents are
+                 *     undefined."
+                 *
+                 * This suggests we can just ignore that situation, however,
+                 * for V3D an out-of-bounds layer index means that the binner
+                 * might do out-of-bounds writes access to the tile state. The
+                 * simulator has an assert to catch this, so we play safe here
+                 * and we make sure that doesn't happen by setting gl_Layer
+                 * to 0 in that case (we always allocate tile state for at
+                 * least one layer).
+                 */
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_fb_layers_v3d);
+                load->num_components = 1;
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+                nir_ssa_def *fb_layers = &load->dest.ssa;
+
+                nir_ssa_def *cond = nir_ige(b, src, fb_layers);
+                nir_ssa_def *layer_id =
+                        nir_bcsel(b, cond,
+                                  nir_imm_int(b, 0),
+                                  nir_ishl(b, src, nir_imm_int(b, 16)));
+                header = nir_ior(b, header, layer_id);
+                nir_store_var(b, state->gs.header_var, header, 0x1);
+        }
+
         /* Scalarize outputs if it hasn't happened already, since we want to
          * schedule each VPM write individually.  We can skip any outut
          * components not read by the FS.
index 3939f8ff5107c201e43b225e40d5eb3396ba30c8..c53babdde552c7ce608a8241c599fcb777bdd4bd 100644 (file)
@@ -868,3 +868,7 @@ load("tlb_color_v3d", 1, [BASE, COMPONENT], [])
 # src[] = { value, render_target }
 # BASE = sample index
 store("tlb_sample_color_v3d", 2, [BASE, COMPONENT, TYPE], [])
+
+# V3D-specific intrinsic to load the number of layers attached to
+# the target framebuffer
+intrinsic("load_fb_layers_v3d", dest_comp=1, flags=[CAN_ELIMINATE, CAN_REORDER])
index c94f6be4b764f6ae8fe2e43d79ccecc6f434f6ca..ab57880f06f5b5dbb7d359e13b77b8ab580ae9cf 100644 (file)
@@ -373,6 +373,10 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                                          v3d->compute_shared_memory, 0);
                         break;
 
+                case QUNIFORM_FB_LAYERS:
+                        cl_aligned_u32(&uniforms, job->num_layers);
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
 
@@ -465,6 +469,10 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
                         /* Compute always recalculates uniforms. */
                         break;
 
+                case QUNIFORM_FB_LAYERS:
+                        dirty |= VC5_DIRTY_FRAMEBUFFER;
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
                         dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |