broadcom/vc5: Limit each transform feedback data spec to 16 dwords.
authorEric Anholt <eric@anholt.net>
Wed, 21 Mar 2018 22:18:34 +0000 (15:18 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 27 Mar 2018 00:33:37 +0000 (17:33 -0700)
The length-1 field only has 4 bits, so we need to generate separate specs
when there's too much TF output per buffer.

Fixes
GTF-GLES3.gtf.GL3Tests.transform_feedback.transform_feedback_builtin_type
and transform_feedback_max_interleaved.

src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_program.c

index 1ab5a6b1532ce8eeca4c9c7d50bb807ee16eb2cf..976fba90f8176015639a03693251ee2016d3d469 100644 (file)
@@ -130,7 +130,7 @@ struct vc5_uncompiled_shader {
         struct pipe_shader_state base;
         uint32_t num_tf_outputs;
         struct v3d_varying_slot *tf_outputs;
-        uint16_t tf_specs[PIPE_MAX_SO_BUFFERS];
+        uint16_t tf_specs[16];
         uint32_t num_tf_specs;
 
         /**
index 87c21abe8b1236fabdc86583a5218a1be34b32a8..a7a089510b22efbf55b3e5e77c60eef0ac55cc7f 100644 (file)
@@ -49,6 +49,14 @@ vc5_get_slot_for_driver_location(nir_shader *s, uint32_t driver_location)
         return -1;
 }
 
+/**
+ * Precomputes the TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC array for the shader.
+ *
+ * A shader can have 16 of these specs, and each one of them can write up to
+ * 16 dwords.  Since we allow a total of 64 transform feedback output
+ * components (not 16 vectors), we have to group the writes of multiple
+ * varyings together in a single data spec.
+ */
 static void
 vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
                                    const struct pipe_stream_output_info *stream_output)
@@ -102,19 +110,28 @@ vc5_set_transform_feedback_outputs(struct vc5_uncompiled_shader *so,
                 if (!vpm_size)
                         continue;
 
-                struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
-                        /* We need the offset from the coordinate shader's VPM
-                         * output block, which has the [X, Y, Z, W, Xs, Ys]
-                         * values at the start.  Note that this will need some
-                         * shifting when PSIZ is also present.
-                         */
-                        .first_shaded_vertex_value_to_output = vpm_start + 6,
-                        .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = vpm_size - 1,
-                        .output_buffer_to_write_to = buffer,
-                };
-                V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
-                                                               (void *)&so->tf_specs[so->num_tf_specs++],
-                                                               &unpacked);
+                uint32_t vpm_start_offset = vpm_start + 6;
+
+                while (vpm_size) {
+                        uint32_t write_size = MIN2(vpm_size, 1 << 4);
+
+                        struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+                                /* We need the offset from the coordinate shader's VPM
+                                 * output block, which has the [X, Y, Z, W, Xs, Ys]
+                                 * values at the start.
+                                 */
+                                .first_shaded_vertex_value_to_output = vpm_start_offset,
+                                .number_of_consecutive_vertex_values_to_output_as_32_bit_values_minus_1 = write_size - 1,
+                                .output_buffer_to_write_to = buffer,
+                        };
+
+                        assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
+                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                                                                       (void *)&so->tf_specs[so->num_tf_specs++],
+                                                                       &unpacked);
+                        vpm_start_offset += write_size;
+                        vpm_size -= write_size;
+                }
         }
 
         so->num_tf_outputs = slot_count;