broadcom/vc5: Add support for centroid varyings.
authorEric Anholt <eric@anholt.net>
Thu, 26 Apr 2018 16:24:32 +0000 (09:24 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 26 Apr 2018 18:30:22 +0000 (11:30 -0700)
It would be nice to share the flags packet emit logic with flat shade
flags, but I couldn't come up with a good way while still using our pack
macros.  We need to refactor this to shader record setup at compile time,
anyway.

Fixes ext_framebuffer_multisample-interpolation * centroid-*

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_draw.c
src/gallium/drivers/vc5/vc5_emit.c
src/gallium/drivers/vc5/vc5_program.c

index d6c2d1902b6b31ee68043ae9e310caa75893f311..ec8f22321f3bf22ec0b21c85f45ff9298e7ac63d 100644 (file)
@@ -436,6 +436,7 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
                 /* FALLTHROUGH */
         case INTERP_MODE_SMOOTH:
                 if (var->data.centroid) {
+                        BITSET_SET(c->centroid_flags, i);
                         return vir_FADD(c, vir_FMUL(c, vary,
                                                     c->payload_w_centroid), r5);
                 } else {
@@ -1985,6 +1986,36 @@ vir_emit_last_thrsw(struct v3d_compile *c)
                 c->last_thrsw->is_last_thrsw = true;
 }
 
+/* There's a flag in the shader for "centroid W used in addition to center W",
+ * so we need to walk the program after VIR optimization to see if both are
+ * used.
+ */
+static void
+vir_check_payload_w(struct v3d_compile *c)
+{
+        if (c->s->info.stage != MESA_SHADER_FRAGMENT)
+                return;
+
+        bool any_centroid = false;
+        for (int i = 0; i < ARRAY_SIZE(c->centroid_flags); i++) {
+                if (c->centroid_flags[i])
+                        any_centroid = true;
+        }
+        if (!any_centroid)
+                return;
+
+        vir_for_each_inst_inorder(inst, c) {
+                for (int i = 0; i < vir_get_nsrc(inst); i++) {
+                        if (inst->src[i].file == QFILE_REG &&
+                            inst->src[i].index == 0) {
+                                c->uses_centroid_and_center_w = true;
+                                return;
+                        }
+                }
+        }
+
+}
+
 void
 v3d_nir_to_vir(struct v3d_compile *c)
 {
@@ -2024,6 +2055,8 @@ v3d_nir_to_vir(struct v3d_compile *c)
         vir_optimize(c);
         vir_lower_uniforms(c);
 
+        vir_check_payload_w(c);
+
         /* XXX: vir_schedule_instructions(c); */
 
         if (V3D_DEBUG & (V3D_DEBUG_VIR |
index e89ea7be2173223f9bbc63d7779c49084554d2e8..4dba23c067e9340ff4daea388bfe2dc2a6400873 100644 (file)
@@ -478,6 +478,10 @@ struct v3d_compile {
          */
         uint32_t flat_shade_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
 
+        uint32_t centroid_flags[BITSET_WORDS(V3D_MAX_FS_INPUTS)];
+
+        bool uses_centroid_and_center_w;
+
         struct v3d_ubo_range *ubo_ranges;
         bool *ubo_range_used;
         uint32_t ubo_ranges_array_size;
@@ -657,8 +661,11 @@ struct v3d_fs_prog_data {
          */
         uint32_t flat_shade_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
 
+        uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];
+
         bool writes_z;
         bool discard;
+        bool uses_centroid_and_center_w;
 };
 
 /* Special nir_load_input intrinsic index for loading the current TLB
index 93990ee806f61de98051e89204223da724782d52..0de5335d12a6b63b92d65cd8ef35cafa0e515397 100644 (file)
@@ -758,6 +758,9 @@ v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
         for (int i = 0; i < V3D_MAX_FS_INPUTS; i++) {
                 if (BITSET_TEST(c->flat_shade_flags, i))
                         prog_data->flat_shade_flags[i / 24] |= 1 << (i % 24);
+
+                if (BITSET_TEST(c->centroid_flags, i))
+                        prog_data->centroid_flags[i / 24] |= 1 << (i % 24);
         }
 }
 
@@ -838,6 +841,7 @@ uint64_t *v3d_compile_fs(const struct v3d_compiler *compiler,
         prog_data->writes_z = (c->s->info.outputs_written &
                                (1 << FRAG_RESULT_DEPTH));
         prog_data->discard = c->s->info.fs.uses_discard;
+        prog_data->uses_centroid_and_center_w = c->uses_centroid_and_center_w;
 
         return v3d_return_qpu_insts(c, final_assembly_size);
 }
index e08a2a59909ee381801ce6552adad48f7447545b..9e55f80f9473aec6793401a79ccda900dd4e102a 100644 (file)
@@ -82,6 +82,7 @@ void vc5_job_add_bo(struct vc5_job *job, struct vc5_bo *bo);
 #define VC5_DIRTY_FS_INPUTS     (1 << 26)
 #define VC5_DIRTY_STREAMOUT     (1 << 27)
 #define VC5_DIRTY_OQ            (1 << 28)
+#define VC5_DIRTY_CENTROID_FLAGS (1 << 29)
 
 #define VC5_MAX_FS_INPUTS 64
 
index ff14d1c13584c3875f45fb120a61e9a428e5c2d9..ecb1aa37831f3883140fb6dc70b4a9cf3e985343 100644 (file)
@@ -177,6 +177,9 @@ vc5_emit_gl_shader_state(struct vc5_context *vc5,
                         (vc5->prog.fs->prog_data.fs->writes_z ||
                          vc5->prog.fs->prog_data.fs->discard);
 
+                shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
+                        vc5->prog.fs->prog_data.fs->uses_centroid_and_center_w;
+
                 shader.number_of_varyings_in_fragment_shader =
                         vc5->prog.fs->prog_data.base->num_inputs;
 
index 0d11d7e1ad14e30d54319b07db949da1912c154e..cb8af953300e4675b97eccd72cff314929ecb1d3 100644 (file)
@@ -589,6 +589,45 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
+#if V3D_VERSION >= 40
+        if (vc5->dirty & VC5_DIRTY_CENTROID_FLAGS) {
+                bool emitted_any = false;
+
+                for (int i = 0; i < ARRAY_SIZE(vc5->prog.fs->prog_data.fs->centroid_flags); i++) {
+                        if (!vc5->prog.fs->prog_data.fs->centroid_flags[i])
+                                continue;
+
+                        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
+                                flags.varying_offset_v0 = i;
+
+                                if (emitted_any) {
+                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
+                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
+                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
+                                } else {
+                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
+                                                ((i == 0) ?
+                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED :
+                                                 V3D_VARYING_FLAGS_ACTION_ZEROED);
+
+                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
+                                                V3D_VARYING_FLAGS_ACTION_ZEROED;
+                                }
+
+                                flags.centroid_flags_for_varyings_v024 =
+                                        vc5->prog.fs->prog_data.fs->centroid_flags[i];
+                        }
+
+                        emitted_any = true;
+                }
+
+                if (!emitted_any) {
+                        cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
+                }
+        }
+#endif
+
         /* Set up the transform feedback data specs (which VPM entries to
          * output to which buffers).
          */
index d885cdf2975b6b21eb5ef94cdd59c520648cbec3..23d2d73a2dc11d92352e635b0bf5b32f70410d2e 100644 (file)
@@ -477,10 +477,16 @@ vc5_update_compiled_fs(struct vc5_context *vc5, uint8_t prim_mode)
 
         vc5->dirty |= VC5_DIRTY_COMPILED_FS;
 
-        if (old_fs &&
-            vc5->prog.fs->prog_data.fs->flat_shade_flags !=
-            old_fs->prog_data.fs->flat_shade_flags) {
-                vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+        if (old_fs) {
+                if (vc5->prog.fs->prog_data.fs->flat_shade_flags !=
+                    old_fs->prog_data.fs->flat_shade_flags) {
+                        vc5->dirty |= VC5_DIRTY_FLAT_SHADE_FLAGS;
+                }
+
+                if (vc5->prog.fs->prog_data.fs->centroid_flags !=
+                    old_fs->prog_data.fs->centroid_flags) {
+                        vc5->dirty |= VC5_DIRTY_CENTROID_FLAGS;
+                }
         }
 
         if (old_fs && memcmp(vc5->prog.fs->prog_data.fs->input_slots,