X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen6_queryobj.c;h=de71bb565f53cb35ff9c93ec4ab97fe8650dda77;hb=8776b1b14b229d110f283f5da8c3c36261068ede;hp=56e9d5db9374f6c4f8cc2c4e709b75923a840158;hpb=32a3f5f6d768e5828be1d1f46b1b3f819f55cba8;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 56e9d5db937..de71bb565f5 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -39,61 +39,6 @@ #include "intel_batchbuffer.h" #include "intel_reg.h" -/** - * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. - */ -static void -write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) -{ - /* Emit workaround flushes: */ - if (brw->gen == 6) { - /* The timestamp write below is a non-zero post-sync op, which on - * Gen6 necessitates a CS stall. CS stalls need stall at scoreboard - * set. See the comments for intel_emit_post_sync_nonzero_flush(). - */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); - OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - idx * sizeof(uint64_t)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/** - * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer. - */ -static void -write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) -{ - /* Emit Sandybridge workaround flush: */ - if (brw->gen == 6) - intel_emit_post_sync_nonzero_flush(brw); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_WRITE_DEPTH_COUNT); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - (idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - /* * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM. * @@ -112,38 +57,53 @@ brw_store_register_mem64(struct brw_context *brw, /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to * read a full 64-bit register, we need to do two of them. */ - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - idx * sizeof(uint64_t)); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(reg + sizeof(uint32_t)); - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - sizeof(uint32_t) + idx * sizeof(uint64_t)); - ADVANCE_BATCH(); + if (brw->gen >= 8) { + BEGIN_BATCH(8); + OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); + OUT_BATCH(reg); + OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + idx * sizeof(uint64_t)); + OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2)); + OUT_BATCH(reg + sizeof(uint32_t)); + OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + sizeof(uint32_t) + idx * sizeof(uint64_t)); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + idx * sizeof(uint64_t)); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg + sizeof(uint32_t)); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + sizeof(uint32_t) + idx * sizeof(uint64_t)); + ADVANCE_BATCH(); + } } static void write_primitives_generated(struct brw_context *brw, - drm_intel_bo *query_bo, int idx) + drm_intel_bo *query_bo, int stream, int idx) { intel_batchbuffer_emit_mi_flush(brw); - brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx); + if (brw->gen >= 7 && stream > 0) { + brw_store_register_mem64(brw, query_bo, + GEN7_SO_PRIM_STORAGE_NEEDED(stream), idx); + } else { + brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx); + } } static void write_xfb_primitives_written(struct brw_context *brw, - drm_intel_bo *bo, int idx) + drm_intel_bo *bo, int stream, int idx) { intel_batchbuffer_emit_mi_flush(brw); if (brw->gen >= 7) { - brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(0), idx); + brw_store_register_mem64(brw, bo, GEN7_SO_NUM_PRIMS_WRITTEN(stream), idx); } else { brw_store_register_mem64(brw, bo, GEN6_SO_NUM_PRIMS_WRITTEN, idx); } @@ -161,20 +121,7 @@ gen6_queryobj_get_results(struct gl_context *ctx, if (query->bo == NULL) return; - /* If the application has requested the query result, but this batch is - * still contributing to it, flush it now so the results will be present - * when mapped. - */ - if (drm_intel_bo_references(brw->batch.bo, query->bo)) - intel_batchbuffer_flush(brw); - - if (unlikely(brw->perf_debug)) { - if (drm_intel_bo_busy(query->bo)) { - perf_debug("Stalling on the GPU waiting for a query object.\n"); - } - } - - drm_intel_bo_map(query->bo, false); + brw_bo_map(brw, query->bo, false, "query object"); uint64_t *results = query->bo->virtual; switch (query->Base.Target) { case GL_TIME_ELAPSED: @@ -226,8 +173,7 @@ gen6_queryobj_get_results(struct gl_context *ctx, break; default: - assert(!"Unrecognized query target in brw_queryobj_get_results()"); - break; + unreachable("Unrecognized query target in brw_queryobj_get_results()"); } drm_intel_bo_unmap(query->bo); @@ -236,6 +182,8 @@ gen6_queryobj_get_results(struct gl_context *ctx, */ drm_intel_bo_unreference(query->bo); query->bo = NULL; + + query->Base.Ready = true; } /** @@ -275,26 +223,25 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) * obtain the time elapsed. Notably, this includes time elapsed while * the system was doing other work, such as running other applications. */ - write_timestamp(brw, query->bo, 0); + brw_write_timestamp(brw, query->bo, 0); break; case GL_ANY_SAMPLES_PASSED: case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: case GL_SAMPLES_PASSED_ARB: - write_depth_count(brw, query->bo, 0); + brw_write_depth_count(brw, query->bo, 0); break; case GL_PRIMITIVES_GENERATED: - write_primitives_generated(brw, query->bo, 0); + write_primitives_generated(brw, query->bo, query->Base.Stream, 0); break; case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - write_xfb_primitives_written(brw, query->bo, 0); + write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0); break; default: - assert(!"Unrecognized query target in brw_begin_query()"); - break; + unreachable("Unrecognized query target in brw_begin_query()"); } } @@ -314,27 +261,47 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) switch (query->Base.Target) { case GL_TIME_ELAPSED: - write_timestamp(brw, query->bo, 1); + brw_write_timestamp(brw, query->bo, 1); break; case GL_ANY_SAMPLES_PASSED: case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: case GL_SAMPLES_PASSED_ARB: - write_depth_count(brw, query->bo, 1); + brw_write_depth_count(brw, query->bo, 1); break; case GL_PRIMITIVES_GENERATED: - write_primitives_generated(brw, query->bo, 1); + write_primitives_generated(brw, query->bo, query->Base.Stream, 1); break; case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - write_xfb_primitives_written(brw, query->bo, 1); + write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1); break; default: - assert(!"Unrecognized query target in brw_end_query()"); - break; + unreachable("Unrecognized query target in brw_end_query()"); } + + /* The current batch contains the commands to handle EndQuery(), + * but they won't actually execute until it is flushed. + */ + query->flushed = false; +} + +/** + * Flush the batch if it still references the query object BO. + */ +static void +flush_batch_if_needed(struct brw_context *brw, struct brw_query_object *query) +{ + /* If the batch doesn't reference the BO, it must have been flushed + * (for example, due to being full). Record that it's been flushed. + */ + query->flushed = query->flushed || + !drm_intel_bo_references(brw->batch.bo, query->bo); + + if (!query->flushed) + intel_batchbuffer_flush(brw); } /** @@ -345,10 +312,16 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) */ static void gen6_wait_query(struct gl_context *ctx, struct gl_query_object *q) { + struct brw_context *brw = brw_context(ctx); struct brw_query_object *query = (struct brw_query_object *)q; + /* If the application has requested the query result, but this batch is + * still contributing to it, flush it now to finish that work so the + * result will become available (eventually). + */ + flush_batch_if_needed(brw, query); + gen6_queryobj_get_results(ctx, query); - query->Base.Ready = true; } /** @@ -362,6 +335,12 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q) struct brw_context *brw = brw_context(ctx); struct brw_query_object *query = (struct brw_query_object *)q; + /* If query->bo is NULL, we've already gathered the results - this is a + * redundant CheckQuery call. Ignore it. + */ + if (query->bo == NULL) + return; + /* From the GL_ARB_occlusion_query spec: * * "Instead of allowing for an infinite loop, performing a @@ -369,12 +348,10 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q) * not ready yet on the first time it is queried. This ensures that * the async query will return true in finite time. */ - if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo)) - intel_batchbuffer_flush(brw); + flush_batch_if_needed(brw, query); - if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { + if (!drm_intel_bo_busy(query->bo)) { gen6_queryobj_get_results(ctx, query); - query->Base.Ready = true; } }