From 82a5ee6be4b4f5881d86f18d4b002d23c9e18ea5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 6 Sep 2013 16:59:31 -0700 Subject: [PATCH] i965: Implement glDrawTransformFeedback(). Implementing the GetTransformFeedbackVertexCount() driver hook allows the VBO module to call us with the right number of vertices. The hardware doesn't directly count the number of vertices written by SOL, so we instead use the SO_NUM_PRIMS_WRITTEN(n) counters and multiply by the number of vertices per primitive. Unfortunately, counting the number of primitives generated is tricky: a program might pause a transform feedback operation, start a second one with a different object, then switch back and resume. Both transform feedback operations share the SO_NUM_PRIMS_WRITTEN counters. To work around this, we save the counter values at Begin, Pause, Resume, and End. This "bookends" each section where transform feedback is active for the current object. Adding up differences of pairs gives us the number of primitives generated. (This is similar to what we do for occlusion queries on platforms without hardware contexts.) v2: Fix missing parenthesis in assertion (caught by Eric Anholt). v3: Reuse prim_count_bo rather than freeing it and immediately allocating a new one (suggested by Topi Pohjolainen). Signed-off-by: Kenneth Graunke Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_context.c | 2 + src/mesa/drivers/dri/i965/brw_context.h | 26 +++ src/mesa/drivers/dri/i965/gen6_sol.c | 3 + src/mesa/drivers/dri/i965/gen7_sol_state.c | 182 ++++++++++++++++++++- 4 files changed, 212 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 623273c260f..f4e04b69d47 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -252,6 +252,8 @@ brw_init_driver_functions(struct brw_context *brw, functions->NewTransformFeedback = brw_new_transform_feedback; functions->DeleteTransformFeedback = brw_delete_transform_feedback; + functions->GetTransformFeedbackVertexCount = + brw_get_transform_feedback_vertex_count; if (brw->gen >= 7) { functions->BeginTransformFeedback = gen7_begin_transform_feedback; functions->EndTransformFeedback = gen7_end_transform_feedback; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 23c27d825bc..d30c963ad48 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -887,11 +887,33 @@ struct intel_batchbuffer { } saved; }; +#define BRW_MAX_XFB_STREAMS 4 + struct brw_transform_feedback_object { struct gl_transform_feedback_object base; /** A buffer to hold SO_WRITE_OFFSET(n) values while paused. */ drm_intel_bo *offset_bo; + + /** The most recent primitive mode (GL_TRIANGLES/GL_POINTS/GL_LINES). */ + GLenum primitive_mode; + + /** + * Count of primitives generated during this transform feedback operation. + * @{ + */ + uint64_t prims_generated[BRW_MAX_XFB_STREAMS]; + drm_intel_bo *prim_count_bo; + unsigned prim_count_buffer_index; /**< in number of uint64_t units */ + /** @} */ + + /** + * Number of vertices written between last Begin/EndTransformFeedback(). + * + * Used to implement DrawTransformFeedback(). + */ + uint64_t vertices_written[BRW_MAX_XFB_STREAMS]; + bool vertices_written_valid; }; /** @@ -1592,6 +1614,10 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, void brw_end_transform_feedback(struct gl_context *ctx, struct gl_transform_feedback_object *obj); +GLsizei +brw_get_transform_feedback_vertex_count(struct gl_context *ctx, + struct gl_transform_feedback_object *obj, + GLuint stream); /* gen7_sol_state.c */ void diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index cbc95f4b51a..eede109a285 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -146,6 +146,8 @@ brw_new_transform_feedback(struct gl_context *ctx, GLuint name) brw_obj->offset_bo = drm_intel_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64); + brw_obj->prim_count_bo = + drm_intel_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64); return &brw_obj->base; } @@ -162,6 +164,7 @@ brw_delete_transform_feedback(struct gl_context *ctx, } drm_intel_bo_unreference(brw_obj->offset_bo); + drm_intel_bo_unreference(brw_obj->prim_count_bo); free(brw_obj); } diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index bdb17e3b81d..de177e20645 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -249,14 +249,171 @@ const struct brw_tracked_state gen7_sol_state = { .emit = upload_sol_state, }; +/** + * Tally the number of primitives generated so far. + * + * The buffer contains a series of pairs: + * (, ) ; + * (, ) ; + * + * For each stream, we subtract the pair of values (end - start) to get the + * number of primitives generated during one section. We accumulate these + * values, adding them up to get the total number of primitives generated. + */ +static void +gen7_tally_prims_generated(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + /* If the current batch is still contributing to the number of primitives + * generated, flush it now so the results will be present when mapped. + */ + if (drm_intel_bo_references(brw->batch.bo, obj->prim_count_bo)) + intel_batchbuffer_flush(brw); + + if (unlikely(brw->perf_debug && drm_intel_bo_busy(obj->prim_count_bo))) + perf_debug("Stalling for # of transform feedback primitives written.\n"); + + drm_intel_bo_map(obj->prim_count_bo, false); + uint64_t *prim_counts = obj->prim_count_bo->virtual; + + assert(obj->prim_count_buffer_index % (2 * BRW_MAX_XFB_STREAMS) == 0); + int pairs = obj->prim_count_buffer_index / (2 * BRW_MAX_XFB_STREAMS); + + for (int i = 0; i < pairs; i++) { + for (int s = 0; s < BRW_MAX_XFB_STREAMS; s++) { + obj->prims_generated[s] += + prim_counts[BRW_MAX_XFB_STREAMS + s] - prim_counts[s]; + } + prim_counts += 2 * BRW_MAX_XFB_STREAMS; /* move to the next pair */ + } + + drm_intel_bo_unmap(obj->prim_count_bo); + + /* We've already gathered up the old data; we can safely overwrite it now. */ + obj->prim_count_buffer_index = 0; +} + +/** + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) + * to prim_count_bo. + * + * If prim_count_bo is out of space, gather up the results so far into + * prims_generated[] and allocate a new buffer with enough space. + * + * The number of primitives written is used to compute the number of vertices + * written to a transform feedback stream, which is required to implement + * DrawTransformFeedback(). + */ +static void +gen7_save_primitives_written_counters(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + const int streams = BRW_MAX_XFB_STREAMS; + + /* Check if there's enough space for a new pair of four values. */ + if (obj->prim_count_bo != NULL && + obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) { + /* Gather up the results so far and release the BO. */ + gen7_tally_prims_generated(brw, obj); + } + + /* Flush any drawing so that the counters have the right values. */ + intel_batchbuffer_emit_mi_flush(brw); + + /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ + for (int i = 0; i < streams; i++) { + brw_store_register_mem64(brw, obj->prim_count_bo, + GEN7_SO_NUM_PRIMS_WRITTEN(i), + obj->prim_count_buffer_index + i); + } + + /* Update where to write data to. */ + obj->prim_count_buffer_index += streams; +} + +/** + * Compute the number of vertices written by this transform feedback operation. + */ +static void +brw_compute_xfb_vertices_written(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + if (obj->vertices_written_valid || !obj->base.EndedAnytime) + return; + + unsigned vertices_per_prim = 0; + + switch (obj->primitive_mode) { + case GL_POINTS: + vertices_per_prim = 1; + break; + case GL_LINES: + vertices_per_prim = 2; + break; + case GL_TRIANGLES: + vertices_per_prim = 3; + break; + default: + assert(!"Invalid transform feedback primitive mode."); + } + + /* Get the number of primitives generated. */ + gen7_tally_prims_generated(brw, obj); + + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + obj->vertices_written[i] = vertices_per_prim * obj->prims_generated[i]; + } + obj->vertices_written_valid = true; +} + +/** + * GetTransformFeedbackVertexCount() driver hook. + * + * Returns the number of vertices written to a particular stream by the last + * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback(). + */ +GLsizei +brw_get_transform_feedback_vertex_count(struct gl_context *ctx, + struct gl_transform_feedback_object *obj, + GLuint stream) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + assert(obj->EndedAnytime); + assert(stream < BRW_MAX_XFB_STREAMS); + + brw_compute_xfb_vertices_written(brw, brw_obj); + return brw_obj->vertices_written[stream]; +} + void gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, struct gl_transform_feedback_object *obj) { struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; intel_batchbuffer_flush(brw); brw->batch.needs_sol_reset = true; + + /* We're about to lose the information needed to compute the number of + * vertices written during the last Begin/EndTransformFeedback section, + * so we can't delay it any further. + */ + brw_compute_xfb_vertices_written(brw, brw_obj); + + /* No primitives have been generated yet. */ + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + brw_obj->prims_generated[i] = 0; + } + + /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + gen7_save_primitives_written_counters(brw, brw_obj); + + brw_obj->primitive_mode = mode; } void @@ -270,8 +427,18 @@ gen7_end_transform_feedback(struct gl_context *ctx, * simplicity, just do a full flush. */ struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; - intel_batchbuffer_emit_mi_flush(brw); + /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ + gen7_save_primitives_written_counters(brw, brw_obj); + + /* EndTransformFeedback() means that we need to update the number of + * vertices written. Since it's only necessary if DrawTransformFeedback() + * is called and it means mapping a buffer object, we delay computing it + * until it's absolutely necessary to try and avoid stalls. + */ + brw_obj->vertices_written_valid = false; } void @@ -282,6 +449,9 @@ gen7_pause_transform_feedback(struct gl_context *ctx, struct brw_transform_feedback_object *brw_obj = (struct brw_transform_feedback_object *) obj; + /* Flush any drawing so that the counters have the right values. */ + intel_batchbuffer_emit_mi_flush(brw); + /* Save the SOL buffer offset register values. */ for (int i = 0; i < 4; i++) { BEGIN_BATCH(3); @@ -292,6 +462,13 @@ gen7_pause_transform_feedback(struct gl_context *ctx, i * sizeof(uint32_t)); ADVANCE_BATCH(); } + + /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. + * While this operation is paused, other transform feedback actions may + * occur, which will contribute to the counters. We need to exclude that + * from our counts. + */ + gen7_save_primitives_written_counters(brw, brw_obj); } void @@ -312,4 +489,7 @@ gen7_resume_transform_feedback(struct gl_context *ctx, i * sizeof(uint32_t)); ADVANCE_BATCH(); } + + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + gen7_save_primitives_written_counters(brw, brw_obj); } -- 2.30.2