X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen6_queryobj.c;h=8e639cfeef4025acc1414b2cc6a7549991b8c2d4;hb=9a50fc7cfcc0b084f3644ce79bb5c16505cfb404;hp=96db5e95bb142f70036a278481b9de81a7a98d7a;hpb=b0629e6894513a2c49a018bc3342a4e55435a236;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 96db5e95bb1..8e639cfeef4 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -38,7 +38,6 @@ #include "brw_state.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#include "intel_reg.h" static inline void set_query_availability(struct brw_context *brw, struct brw_query_object *query, @@ -61,16 +60,25 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query, */ if (brw->ctx.Extensions.ARB_query_buffer_object && brw_is_query_pipelined(query)) { - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_IMMEDIATE, + unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; + + if (available) { + /* Order available *after* the query results. */ + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } else { + /* Make it unavailable *before* any pipelined reads. */ + flags |= PIPE_CONTROL_CS_STALL; + } + + brw_emit_pipe_control_write(brw, flags, query->bo, 2 * sizeof(uint64_t), - available, 0); + available); } } static void write_primitives_generated(struct brw_context *brw, - drm_intel_bo *query_bo, int stream, int idx) + struct brw_bo *query_bo, int stream, int idx) { brw_emit_mi_flush(brw); @@ -86,7 +94,7 @@ write_primitives_generated(struct brw_context *brw, static void write_xfb_primitives_written(struct brw_context *brw, - drm_intel_bo *bo, int stream, int idx) + struct brw_bo *bo, int stream, int idx) { brw_emit_mi_flush(brw); @@ -99,7 +107,55 @@ write_xfb_primitives_written(struct brw_context *brw, } } -static inline const int +static void +write_xfb_overflow_streams(struct gl_context *ctx, + struct brw_bo *bo, int stream, int count, + int idx) +{ + struct brw_context *brw = brw_context(ctx); + + brw_emit_mi_flush(brw); + + for (int i = 0; i < count; i++) { + int w_idx = 4 * i + idx; + int g_idx = 4 * i + idx + 2; + + if (brw->gen >= 7) { + brw_store_register_mem64(brw, bo, + GEN7_SO_NUM_PRIMS_WRITTEN(stream + i), + g_idx * sizeof(uint64_t)); + brw_store_register_mem64(brw, bo, + GEN7_SO_PRIM_STORAGE_NEEDED(stream + i), + w_idx * sizeof(uint64_t)); + } else { + brw_store_register_mem64(brw, bo, + GEN6_SO_NUM_PRIMS_WRITTEN, + g_idx * sizeof(uint64_t)); + brw_store_register_mem64(brw, bo, + GEN6_SO_PRIM_STORAGE_NEEDED, + w_idx * sizeof(uint64_t)); + } + } +} + +static bool +check_xfb_overflow_streams(uint64_t *results, int count) +{ + bool overflow = false; + + for (int i = 0; i < count; i++) { + uint64_t *result_i = &results[4 * i]; + + if ((result_i[3] - result_i[2]) != (result_i[1] - result_i[0])) { + overflow = true; + break; + } + } + + return overflow; +} + +static inline int pipeline_target_to_index(int target) { if (target == GL_GEOMETRY_SHADER_INVOCATIONS) @@ -109,7 +165,7 @@ pipeline_target_to_index(int target) } static void -emit_pipeline_stat(struct brw_context *brw, drm_intel_bo *bo, +emit_pipeline_stat(struct brw_context *brw, struct brw_bo *bo, int stream, int target, int idx) { /* One source of confusion is the tessellation shader statistics. The @@ -165,37 +221,24 @@ gen6_queryobj_get_results(struct gl_context *ctx, if (query->bo == NULL) return; - brw_bo_map(brw, query->bo, false, "query object"); - uint64_t *results = query->bo->virtual; + uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ); switch (query->Base.Target) { case GL_TIME_ELAPSED: /* The query BO contains the starting and ending timestamps. * Subtract the two and convert to nanoseconds. */ - query->Base.Result += 80 * (results[1] - results[0]); + query->Base.Result = brw_raw_timestamp_delta(brw, results[0], results[1]); + query->Base.Result = brw_timebase_scale(brw, query->Base.Result); break; case GL_TIMESTAMP: - /* Our timer is a clock that increments every 80ns (regardless of - * other clock scaling in the system). The timestamp register we can - * read for glGetTimestamp() masks out the top 32 bits, so we do that - * here too to let the two counters be compared against each other. - * - * If we just multiplied that 32 bits of data by 80, it would roll - * over at a non-power-of-two, so an application couldn't use - * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we - * report 36 bits and truncate at that (rolling over 5 times as often - * as the HW counter), and when the 32-bit counter rolls over, it - * happens to also be at a rollover in the reported value from near - * (1<<36) to 0. - * - * The low 32 bits rolls over in ~343 seconds. Our 36-bit result - * rolls over every ~69 seconds. - * - * The query BO contains a single timestamp value in results[0]. + /* The query BO contains a single timestamp value in results[0]. */ + query->Base.Result = brw_timebase_scale(brw, results[0]); + + /* Ensure the scaled timestamp overflows according to + * GL_QUERY_COUNTER_BITS */ - query->Base.Result = 80 * (results[0] & 0xffffffff); - query->Base.Result &= (1ull << 36) - 1; + query->Base.Result &= (1ull << ctx->Const.QueryCounterBits.Timestamp) - 1; break; case GL_SAMPLES_PASSED_ARB: @@ -226,6 +269,14 @@ gen6_queryobj_get_results(struct gl_context *ctx, query->Base.Result = results[1] - results[0]; break; + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: + query->Base.Result = check_xfb_overflow_streams(results, 1); + break; + + case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: + query->Base.Result = check_xfb_overflow_streams(results, MAX_VERTEX_STREAMS); + break; + case GL_FRAGMENT_SHADER_INVOCATIONS_ARB: query->Base.Result = (results[1] - results[0]); /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround: @@ -245,12 +296,12 @@ gen6_queryobj_get_results(struct gl_context *ctx, default: unreachable("Unrecognized query target in brw_queryobj_get_results()"); } - drm_intel_bo_unmap(query->bo); + brw_bo_unmap(query->bo); /* Now that we've processed the data stored in the query's buffer object, * we can release it. */ - drm_intel_bo_unreference(query->bo); + brw_bo_unreference(query->bo); query->bo = NULL; query->Base.Ready = true; @@ -269,8 +320,8 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) struct brw_query_object *query = (struct brw_query_object *)q; /* Since we're starting a new query, we need to throw away old results. */ - drm_intel_bo_unreference(query->bo); - query->bo = drm_intel_bo_alloc(brw->bufmgr, "query results", 4096, 4096); + brw_bo_unreference(query->bo); + query->bo = brw_bo_alloc(brw->bufmgr, "query results", 4096, 4096); /* For ARB_query_buffer_object: The result is not available */ set_query_availability(brw, query, false); @@ -315,6 +366,14 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q) write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 0); break; + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: + write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 0); + break; + + case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: + write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 0); + break; + case GL_VERTICES_SUBMITTED_ARB: case GL_PRIMITIVES_SUBMITTED_ARB: case GL_VERTEX_SHADER_INVOCATIONS_ARB: @@ -369,6 +428,15 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q) write_xfb_primitives_written(brw, query->bo, query->Base.Stream, 1); break; + case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB: + write_xfb_overflow_streams(ctx, query->bo, query->Base.Stream, 1, 1); + break; + + case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB: + write_xfb_overflow_streams(ctx, query->bo, 0, MAX_VERTEX_STREAMS, 1); + break; + + /* calculate overflow here */ case GL_VERTICES_SUBMITTED_ARB: case GL_PRIMITIVES_SUBMITTED_ARB: case GL_VERTEX_SHADER_INVOCATIONS_ARB: @@ -407,7 +475,7 @@ flush_batch_if_needed(struct brw_context *brw, struct brw_query_object *query) * (for example, due to being full). Record that it's been flushed. */ query->flushed = query->flushed || - !drm_intel_bo_references(brw->batch.bo, query->bo); + !brw_batch_references(&brw->batch, query->bo); if (!query->flushed) intel_batchbuffer_flush(brw); @@ -459,7 +527,7 @@ static void gen6_check_query(struct gl_context *ctx, struct gl_query_object *q) */ flush_batch_if_needed(brw, query); - if (!drm_intel_bo_busy(query->bo)) { + if (!brw_bo_busy(query->bo)) { gen6_queryobj_get_results(ctx, query); } }