v3d/compiler: handle compact varyings
[mesa.git] / src / broadcom / compiler / v3d_nir_lower_io.c
index 9b7db65db7e22561a8ea237ccee0125c97894519..4b616b054a72f856aa6dad198af8db0b63a9a535 100644 (file)
@@ -81,10 +81,17 @@ v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
         intr->num_components = 1;
 
         intr->src[0] = nir_src_for_ssa(chan);
-        if (offset)
-                intr->src[1] = nir_src_for_ssa(offset);
-        else
+        if (offset) {
+                /* When generating the VIR instruction, the base and the offset
+                 * are just going to get added together with an ADD instruction
+                 * so we might as well do the add here at the NIR level instead
+                 * and let the constant folding do its magic.
+                 */
+                intr->src[1] = nir_src_for_ssa(nir_iadd_imm(b, offset, base));
+                base = 0;
+        } else {
                 intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+        }
 
         nir_intrinsic_set_base(intr, base);
         nir_intrinsic_set_write_mask(intr, 0x1);
@@ -165,13 +172,14 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
         int start_comp = nir_intrinsic_component(intr);
         nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
                                            intr->num_components);
-
         nir_variable *var = NULL;
         nir_foreach_variable(scan_var, &c->s->outputs) {
+                int components = scan_var->data.compact ?
+                        glsl_get_length(scan_var->type) :
+                        glsl_get_components(scan_var->type);
                 if (scan_var->data.driver_location != nir_intrinsic_base(intr) ||
                     start_comp < scan_var->data.location_frac ||
-                    start_comp >= scan_var->data.location_frac +
-                    glsl_get_components(scan_var->type)) {
+                    start_comp >= scan_var->data.location_frac + components) {
                         continue;
                 }
                 var = scan_var;
@@ -193,6 +201,45 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
                 v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
         }
 
+        if (var->data.location == VARYING_SLOT_LAYER) {
+                assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
+                nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
+                header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
+
+                /* From the GLES 3.2 spec:
+                 *
+                 *    "When fragments are written to a layered framebuffer, the
+                 *     fragment’s layer number selects an image from the array
+                 *     of images at each attachment (...). If the fragment’s
+                 *     layer number is negative, or greater than or equal to
+                 *     the minimum number of layers of any attachment, the
+                 *     effects of the fragment on the framebuffer contents are
+                 *     undefined."
+                 *
+                 * This suggests we can just ignore that situation, however,
+                 * for V3D an out-of-bounds layer index means that the binner
+                 * might do out-of-bounds writes access to the tile state. The
+                 * simulator has an assert to catch this, so we play safe here
+                 * and we make sure that doesn't happen by setting gl_Layer
+                 * to 0 in that case (we always allocate tile state for at
+                 * least one layer).
+                 */
+                nir_intrinsic_instr *load =
+                        nir_intrinsic_instr_create(b->shader,
+                                                   nir_intrinsic_load_fb_layers_v3d);
+                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
+                nir_builder_instr_insert(b, &load->instr);
+                nir_ssa_def *fb_layers = &load->dest.ssa;
+
+                nir_ssa_def *cond = nir_ige(b, src, fb_layers);
+                nir_ssa_def *layer_id =
+                        nir_bcsel(b, cond,
+                                  nir_imm_int(b, 0),
+                                  nir_ishl(b, src, nir_imm_int(b, 16)));
+                header = nir_ior(b, header, layer_id);
+                nir_store_var(b, state->gs.header_var, header, 0x1);
+        }
+
         /* Scalarize outputs if it hasn't happened already, since we want to
          * schedule each VPM write individually.  We can skip any outut
          * components not read by the FS.
@@ -207,6 +254,9 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
                 if (vpm_offset == -1)
                         continue;
 
+                if (var->data.compact)
+                    vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
+
                 BITSET_SET(state->varyings_stored, vpm_offset);
 
                 v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,