broadcom/vc5: Fix transform feedback in the presence of point size.
authorEric Anholt <eric@anholt.net>
Wed, 21 Mar 2018 22:07:19 +0000 (15:07 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 27 Mar 2018 00:46:19 +0000 (17:46 -0700)
I had this note to myself, and it turns out that a lot of CTS tests use
XFB with points to get data out without using a fragment shader.  Keep
track of two sets of precomputed TF specs (point size in VPM prologue or
not), and switch between them when we enable/disable point size.

src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_emit.c
src/gallium/drivers/vc5/vc5_program.c

index 976fba90f8176015639a03693251ee2016d3d469..7272e045c4fcec4bd6da4dc0329b67713e79312a 100644 (file)
@@ -131,6 +131,7 @@ struct vc5_uncompiled_shader {
         uint32_t num_tf_outputs;
         struct v3d_varying_slot *tf_outputs;
         uint16_t tf_specs[16];
+        uint16_t tf_specs_psiz[16];
         uint32_t num_tf_specs;
 
         /**
index 1db97081df323db7a14313533d9225a910b4597b..061d6e7c9d9da5f64cfc42c1a20a877349543049 100644 (file)
@@ -572,10 +572,18 @@ v3dX(emit_state)(struct pipe_context *pctx)
         /* Set up the transform feedback data specs (which VPM entries to
          * output to which buffers).
          */
-        if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
+        if (vc5->dirty & (VC5_DIRTY_STREAMOUT |
+                          VC5_DIRTY_RASTERIZER |
+                          VC5_DIRTY_PRIM_MODE)) {
                 struct vc5_streamout_stateobj *so = &vc5->streamout;
 
                 if (so->num_targets) {
+                        bool psiz_per_vertex = (vc5->prim_mode == PIPE_PRIM_POINTS &&
+                                                vc5->rasterizer->base.point_size_per_vertex);
+                        uint16_t *tf_specs = (psiz_per_vertex ?
+                                              vc5->prog.bind_vs->tf_specs_psiz :
+                                              vc5->prog.bind_vs->tf_specs);
+
 #if V3D_VERSION >= 40
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                 tfe.number_of_16_bit_output_data_specs_following =
@@ -593,8 +601,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         };
 #endif /* V3D_VERSION < 40 */
                         for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
-                                cl_emit_prepacked(&job->bcl,
-                                                  &vc5->prog.bind_vs->tf_specs[i]);
+                                cl_emit_prepacked(&job->bcl, &tf_specs[i]);
                         }
                 }
         }
index a7a089510b22efbf55b3e5e77c60eef0ac55cc7f..7bad80a1684bece08340c61e4864b45f606435bc 100644 (file)
@@ -127,8 +127,19 @@ vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
 
                         assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
                         V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
-                                                                       (void *)&so->tf_specs[so->num_tf_specs++],
+                                                                       (void *)&so->tf_specs[so->num_tf_specs],
                                                                        &unpacked);
+
+                        /* If point size is being written by the shader, then
+                         * all the VPM start offsets are shifted up by one.
+                         * We won't know that until the variant is compiled,
+                         * though.
+                         */
+                        unpacked.first_shaded_vertex_value_to_output++;
+                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                                                                       (void *)&so->tf_specs_psiz[so->num_tf_specs],
+                                                                       &unpacked);
+                        so->num_tf_specs++;
                         vpm_start_offset += write_size;
                         vpm_size -= write_size;
                 }