From: Iago Toral Quiroga Date: Thu, 1 Aug 2019 10:30:34 +0000 (+0200) Subject: v3d: use the GPU to record primitives written to transform feedback X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0f2d1dfe65bfe1ee8f02ce45f100a5508debdfd4;p=mesa.git v3d: use the GPU to record primitives written to transform feedback We can use the PRIMITIVE_COUNTS_FEEDBACK packet to write various primitive counts to a buffer, including the number of primives written to transform feedback buffers, which will handle buffer overflow correctly. There are a couple of caveats with this: Primitive counters are reset when we emit a 'Tile Binning Mode Configuration' packet, which can happen in the middle of a primitives query, so we need to read the buffer when we submit a job and accumulate the counts in the context so we don't lose them. We also need to do the same when we switch primitive type during transform feedback so we can compute the correct number of recorded vertices from the number of primitives. This is necessary so we can provide an accurate vertex count for draw from transform feedback. v2: - When computing the number of vertices for a primitive, pass in the base primitive, since that is what the hardware will count. - No need to update primitive counts when switching primitive types if the base primitives are the same. - Log perf warning when mapping the primitive counts BO for readback (Eric). - Only emit the primitive counts packet once at job end (Eric). - Use u_upload mechanism for the primitive counts buffer (Eric). - Use the XML to generate indices into the primitive counters buffer (Eric). Fixes piglit tests: spec/ext_transform_feedback/overflow-edge-cases spec/ext_transform_feedback/query-primitives_written-bufferrange spec/ext_transform_feedback/query-primitives_written-bufferrange-discard spec/ext_transform_feedback/change-size base-shrink spec/ext_transform_feedback/change-size base-grow spec/ext_transform_feedback/change-size offset-shrink spec/ext_transform_feedback/change-size offset-grow spec/ext_transform_feedback/change-size range-shrink spec/ext_transform_feedback/change-size range-grow spec/ext_transform_feedback/intervening-read prims-written Reviewed-by: Eric Anholt --- diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml index ebb7264f423..f40796612f9 100644 --- a/src/broadcom/cle/v3d_packet_v33.xml +++ b/src/broadcom/cle/v3d_packet_v33.xml @@ -262,6 +262,16 @@ + + + + + + + + + + diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c index fcd2d5ec69b..8dc8dd63581 100644 --- a/src/gallium/drivers/v3d/v3d_context.c +++ b/src/gallium/drivers/v3d/v3d_context.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_blitter.h" #include "util/u_upload_mgr.h" +#include "util/u_prim.h" #include "indices/u_primconvert.h" #include "pipe/p_screen.h" @@ -109,6 +110,39 @@ v3d_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) job->store &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); } +/** + * Flushes the current job to get up-to-date primive counts written to the + * primitive counts BO, then accumulates the transform feedback primitive count + * in the context and the corresponding vertex counts in the bound stream + * output targets. + */ +void +v3d_tf_update_counters(struct v3d_context *v3d) +{ + struct v3d_job *job = v3d_get_job_for_fbo(v3d); + if (job->draw_calls_queued == 0) + return; + + /* In order to get up-to-date primitive counts we need to submit + * the job for execution so we get the counts written to memory. + * Notice that this will require a sync wait for the buffer write. + */ + uint32_t prims_before = v3d->tf_prims_generated; + v3d_job_submit(v3d, job); + uint32_t prims_after = v3d->tf_prims_generated; + if (prims_before == prims_after) + return; + + enum pipe_prim_type prim_type = u_base_prim_type(v3d->prim_mode); + uint32_t num_verts = u_vertices_for_prims(prim_type, + prims_after - prims_before); + for (int i = 0; i < v3d->streamout.num_targets; i++) { + struct v3d_stream_output_target *so = + v3d_stream_output_target(v3d->streamout.targets[i]); + so->recorded_vertex_count += num_verts; + } +} + static void v3d_context_destroy(struct pipe_context *pctx) { @@ -127,6 +161,9 @@ v3d_context_destroy(struct pipe_context *pctx) if (v3d->state_uploader) u_upload_destroy(v3d->state_uploader); + if (v3d->prim_counts) + pipe_resource_reference(&v3d->prim_counts, NULL); + slab_destroy_child(&v3d->transfer_pool); pipe_surface_reference(&v3d->framebuffer.cbufs[0], NULL); diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h index 5f19504dbae..b2c917df240 100644 --- a/src/gallium/drivers/v3d/v3d_context.h +++ b/src/gallium/drivers/v3d/v3d_context.h @@ -498,6 +498,8 @@ struct v3d_context { struct v3d_vertexbuf_stateobj vertexbuf; struct v3d_streamout_stateobj streamout; struct v3d_bo *current_oq; + struct pipe_resource *prim_counts; + uint32_t prim_counts_offset; struct pipe_debug_callback debug; /** @} */ }; @@ -652,6 +654,8 @@ bool v3d_generate_mipmap(struct pipe_context *pctx, struct v3d_fence *v3d_fence_create(struct v3d_context *v3d); +void v3d_tf_update_counters(struct v3d_context *v3d); + #ifdef v3dX # include "v3dx_context.h" #else diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c index 3226059161a..17fc41d317a 100644 --- a/src/gallium/drivers/v3d/v3d_job.c +++ b/src/gallium/drivers/v3d/v3d_job.c @@ -429,6 +429,19 @@ v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job) clif_dump_destroy(clif); } +static void +v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d) +{ + assert(v3d->prim_counts); + + perf_debug("stalling on TF counts readback"); + struct v3d_resource *rsc = v3d_resource(v3d->prim_counts); + if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) { + uint32_t *map = v3d_bo_map(rsc->bo); + v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN]; + } +} + /** * Submits the job to the kernel and then reinitializes it. */ @@ -488,6 +501,14 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job) "Expect corruption.\n", strerror(errno)); warned = true; } + + /* If we are submitting a job in the middle of transform + * feedback we need to read the primitive counts and accumulate + * them, otherwise they will be reset at the start of the next + * draw when we emit the Tile Binning Mode Configuration packet. + */ + if (v3d->streamout.num_targets) + v3d_read_and_accumulate_primitive_counters(v3d); } done: diff --git a/src/gallium/drivers/v3d/v3d_query.c b/src/gallium/drivers/v3d/v3d_query.c index d31b9dd896b..72bb2e43c51 100644 --- a/src/gallium/drivers/v3d/v3d_query.c +++ b/src/gallium/drivers/v3d/v3d_query.c @@ -75,6 +75,11 @@ v3d_begin_query(struct pipe_context *pctx, struct pipe_query *query) q->start = v3d->prims_generated; break; case PIPE_QUERY_PRIMITIVES_EMITTED: + /* If we are inside transform feedback we need to update the + * primitive counts to skip primtives recorded before this. + */ + if (v3d->streamout.num_targets > 0) + v3d_tf_update_counters(v3d); q->start = v3d->tf_prims_generated; break; case PIPE_QUERY_OCCLUSION_COUNTER: @@ -105,6 +110,12 @@ v3d_end_query(struct pipe_context *pctx, struct pipe_query *query) q->end = v3d->prims_generated; break; case PIPE_QUERY_PRIMITIVES_EMITTED: + /* If transform feedback has ended, then we have already + * updated the primitive counts at glEndTransformFeedback() + * time. Otherwise, we have to do it now. + */ + if (v3d->streamout.num_targets > 0) + v3d_tf_update_counters(v3d); q->end = v3d->tf_prims_generated; break; case PIPE_QUERY_OCCLUSION_COUNTER: diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c index c2b3ecd8a13..fec9d54c25b 100644 --- a/src/gallium/drivers/v3d/v3dx_draw.c +++ b/src/gallium/drivers/v3d/v3dx_draw.c @@ -545,29 +545,20 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d, } /** - * Computes the various transform feedback statistics, since they can't be - * recorded by CL packets. + * Updates the number of primitvies generated from the number of vertices + * to draw. We do this here instead of using PRIMITIVE_COUNTS_FEEDBACK because + * using the GPU packet for this might require sync waits and this is trivial + * to handle in the CPU instead. */ static void -v3d_tf_statistics_record(struct v3d_context *v3d, - const struct pipe_draw_info *info) +v3d_update_primitives_generated_counter(struct v3d_context *v3d, + const struct pipe_draw_info *info) { if (!v3d->active_queries) return; uint32_t prims = u_prims_for_vertices(info->mode, info->count); v3d->prims_generated += prims; - - if (v3d->streamout.num_targets <= 0) - return; - - /* XXX: Only count if we didn't overflow. */ - v3d->tf_prims_generated += prims; - for (int i = 0; i < v3d->streamout.num_targets; i++) { - struct v3d_stream_output_target *target = - v3d_stream_output_target(v3d->streamout.targets[i]); - target->recorded_vertex_count += info->count; - } } static void @@ -665,6 +656,17 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) v3d_predraw_check_outputs(pctx); + /* If transform feedback is active and we are switching primitive type + * we need to submit the job before drawing and update the vertex count + * written to TF based on the primitive type since we will need to + * know the exact vertex count if the application decides to call + * glDrawTransformFeedback() later. + */ + if (v3d->streamout.num_targets > 0 && + u_base_prim_type(info->mode) != u_base_prim_type(v3d->prim_mode)) { + v3d_tf_update_counters(v3d); + } + struct v3d_job *job = v3d_get_job_for_fbo(v3d); /* If vertex texturing depends on the output of rendering, we need to @@ -762,7 +764,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS); #endif - v3d_tf_statistics_record(v3d, info); + v3d_update_primitives_generated_counter(v3d, info); /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c index 1dbd20b2251..84228a48760 100644 --- a/src/gallium/drivers/v3d/v3dx_job.c +++ b/src/gallium/drivers/v3d/v3dx_job.c @@ -38,6 +38,20 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job) #endif cl_packet_length(FLUSH)); + if (job->tf_enabled) { + /* Write primitive counts to memory. */ + assert(v3d->prim_counts); + struct v3d_resource *rsc = + v3d_resource(v3d->prim_counts); + cl_emit(&job->bcl, PRIMITIVE_COUNTS_FEEDBACK, counter) { + counter.address = + cl_address(rsc->bo, + v3d->prim_counts_offset); + counter.read_write_64byte = false; + counter.op = 0; + } + } + /* Disable TF at the end of the CL, so that the TF block * cleans up and finishes before it gets reset by the next * frame's tile binning mode cfg packet. (SWVC5-718). diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c index b6a57249044..c709b476f99 100644 --- a/src/gallium/drivers/v3d/v3dx_state.c +++ b/src/gallium/drivers/v3d/v3dx_state.c @@ -1222,6 +1222,14 @@ v3d_set_stream_output_targets(struct pipe_context *pctx, assert(num_targets <= ARRAY_SIZE(so->targets)); + /* Update recorded vertex counts when we are ending the recording of + * transform feedback. We do this when we switch primitive types + * at draw time, but if we haven't switched primitives in our last + * draw we need to do it here as well. + */ + if (num_targets == 0 && so->num_targets > 0) + v3d_tf_update_counters(ctx); + for (i = 0; i < num_targets; i++) { if (offsets[i] != -1) so->offsets[i] = offsets[i]; @@ -1234,6 +1242,15 @@ v3d_set_stream_output_targets(struct pipe_context *pctx, so->num_targets = num_targets; + /* Create primitive counters BO if needed */ + if (num_targets > 0 && !ctx->prim_counts) { + uint32_t zeroes[7] = { 0 }; /* Init all 7 counters to 0 */ + u_upload_data(ctx->uploader, + 0, sizeof(zeroes), 32, zeroes, + &ctx->prim_counts_offset, + &ctx->prim_counts); + } + ctx->dirty |= VC5_DIRTY_STREAMOUT; }