X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_query.c;h=6d9659080a7b182fed8652c95663b8d96a989278;hb=4f1319a17dc795cb76cd97853b15bfd1dfb02a14;hp=de579f25584a19fc3705fc0584a3c4bb51b278c1;hpb=df60241ff7d212eeecfdad7090a7a4aa9fc8a4a5;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_query.c b/src/gallium/drivers/iris/iris_query.c index de579f25584..6d9659080a7 100644 --- a/src/gallium/drivers/iris/iris_query.c +++ b/src/gallium/drivers/iris/iris_query.c @@ -35,8 +35,10 @@ #include "pipe/p_screen.h" #include "util/fast_idiv_by_const.h" #include "util/u_inlines.h" +#include "util/u_upload_mgr.h" #include "iris_context.h" #include "iris_defines.h" +#include "iris_fence.h" #include "iris_resource.h" #include "iris_screen.h" #include "vulkan/util/vk_util.h" @@ -72,27 +74,12 @@ #define MI_ALU_STORE 0x180 #define MI_ALU_STOREINV 0x580 -#define MI_ALU_R0 0x00 -#define MI_ALU_R1 0x01 -#define MI_ALU_R2 0x02 -#define MI_ALU_R3 0x03 -#define MI_ALU_R4 0x04 #define MI_ALU_SRCA 0x20 #define MI_ALU_SRCB 0x21 #define MI_ALU_ACCU 0x31 #define MI_ALU_ZF 0x32 #define MI_ALU_CF 0x33 -#define _MI_ALU(op, x, y) (((op) << 20) | ((x) << 10) | (y)) - -#define _MI_ALU0(op) _MI_ALU(MI_ALU_##op, 0, 0) -#define _MI_ALU1(op, x) _MI_ALU(MI_ALU_##op, x, 0) -#define _MI_ALU2(op, x, y) _MI_ALU(MI_ALU_##op, x, y) - -#define MI_ALU0(op) _MI_ALU0(op) -#define MI_ALU1(op, x) _MI_ALU1(op, MI_ALU_##x) -#define MI_ALU2(op, x, y) _MI_ALU2(op, MI_ALU_##x, MI_ALU_##y) - #define emit_lri32 ice->vtbl.load_register_imm32 #define emit_lri64 ice->vtbl.load_register_imm64 #define emit_lrr32 ice->vtbl.load_register_reg32 @@ -107,15 +94,16 @@ struct iris_query { uint64_t result; - struct iris_bo *bo; + struct iris_state_ref query_state_ref; struct iris_query_snapshots *map; + struct iris_syncpt *syncpt; int batch_idx; }; struct iris_query_snapshots { - /** iris_render_condition's saved MI_PREDICATE_DATA value. */ - uint64_t predicate_data; + /** iris_render_condition's saved MI_PREDICATE_RESULT value. */ + uint64_t predicate_result; /** Have the start/end snapshots landed? */ uint64_t snapshots_landed; @@ -126,7 +114,7 @@ struct iris_query_snapshots { }; struct iris_query_so_overflow { - uint64_t predicate_data; + uint64_t predicate_result; uint64_t snapshots_landed; struct { @@ -161,13 +149,15 @@ mark_available(struct iris_context *ice, struct iris_query *q) struct iris_batch *batch = &ice->batches[q->batch_idx]; unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE; unsigned offset = offsetof(struct iris_query_snapshots, snapshots_landed); + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); + offset += q->query_state_ref.offset; if (!iris_is_query_pipelined(q)) { - ice->vtbl.store_data_imm64(batch, q->bo, offset, true); + ice->vtbl.store_data_imm64(batch, bo, offset, true); } else { /* Order available *after* the query results. */ flags |= PIPE_CONTROL_FLUSH_ENABLE; - iris_emit_pipe_control_write(batch, flags, q->bo, offset, true); + iris_emit_pipe_control_write(batch, flags, bo, offset, true); } } @@ -183,9 +173,10 @@ iris_pipelined_write(struct iris_batch *batch, const struct gen_device_info *devinfo = &batch->screen->devinfo; const unsigned optional_cs_stall = devinfo->gen == 9 && devinfo->gt == 4 ? PIPE_CONTROL_CS_STALL : 0; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); iris_emit_pipe_control_write(batch, flags | optional_cs_stall, - q->bo, offset, 0ull); + bo, offset, 0ull); } static void @@ -193,6 +184,7 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) { struct iris_batch *batch = &ice->batches[q->batch_idx]; const struct gen_device_info *devinfo = &batch->screen->devinfo; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); if (!iris_is_query_pipelined(q)) { iris_emit_pipe_control_flush(batch, @@ -228,14 +220,14 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) ice->vtbl.store_register_mem64(batch, q->index == 0 ? CL_INVOCATION_COUNT : SO_PRIM_STORAGE_NEEDED(q->index), - q->bo, offset, false); + bo, offset, false); break; case PIPE_QUERY_PRIMITIVES_EMITTED: ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(q->index), - q->bo, offset, false); + bo, offset, false); break; - case PIPE_QUERY_PIPELINE_STATISTICS: { + case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: { static const uint32_t index_to_reg[] = { IA_VERTICES_COUNT, IA_PRIMITIVES_COUNT, @@ -251,7 +243,7 @@ write_value(struct iris_context *ice, struct iris_query *q, unsigned offset) }; const uint32_t reg = index_to_reg[q->index]; - ice->vtbl.store_register_mem64(batch, reg, q->bo, offset, false); + ice->vtbl.store_register_mem64(batch, reg, bo, offset, false); break; } default: @@ -264,20 +256,22 @@ write_overflow_values(struct iris_context *ice, struct iris_query *q, bool end) { struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); + uint32_t offset = q->query_state_ref.offset; iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); for (uint32_t i = 0; i < count; i++) { int s = q->index + i; - int g_idx = offsetof(struct iris_query_so_overflow, + int g_idx = offset + offsetof(struct iris_query_so_overflow, stream[s].num_prims[end]); - int w_idx = offsetof(struct iris_query_so_overflow, + int w_idx = offset + offsetof(struct iris_query_so_overflow, stream[s].prim_storage_needed[end]); ice->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s), - q->bo, g_idx, false); + bo, g_idx, false); ice->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s), - q->bo, w_idx, false); + bo, w_idx, false); } } @@ -334,10 +328,16 @@ calculate_result_on_cpu(const struct gen_device_info *devinfo, for (int i = 0; i < MAX_VERTEX_STREAMS; i++) q->result |= stream_overflowed((void *) q->map, i); break; + case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: + q->result = q->map->end - q->map->start; + + /* WaDividePSInvocationCountBy4:HSW,BDW */ + if (devinfo->gen == 8 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) + q->result /= 4; + break; case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: - case PIPE_QUERY_PIPELINE_STATISTICS: default: q->result = q->map->end - q->map->start; break; @@ -488,6 +488,15 @@ iris_math_div32_gpr0(struct iris_context *ice, } } +void +iris_math_add32_gpr0(struct iris_context *ice, + struct iris_batch *batch, + uint32_t x) +{ + emit_lri32(batch, CS_GPR(1), x); + emit_alu_add(batch, MI_ALU_R0, MI_ALU_R0, MI_ALU_R1); +} + /* * GPR0 = (GPR0 == 0) ? 0 : 1; */ @@ -518,18 +527,20 @@ load_overflow_data_to_cs_gprs(struct iris_context *ice, int idx) { struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); + uint32_t offset = q->query_state_ref.offset; - ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(1), bo, offset + offsetof(struct iris_query_so_overflow, stream[idx].prim_storage_needed[0])); - ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(2), bo, offset + offsetof(struct iris_query_so_overflow, stream[idx].prim_storage_needed[1])); - ice->vtbl.load_register_mem64(batch, CS_GPR(3), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(3), bo, offset + offsetof(struct iris_query_so_overflow, stream[idx].num_prims[0])); - ice->vtbl.load_register_mem64(batch, CS_GPR(4), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(4), bo, offset + offsetof(struct iris_query_so_overflow, stream[idx].num_prims[1])); } @@ -661,6 +672,8 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) struct iris_batch *batch = &ice->batches[q->batch_idx]; struct iris_screen *screen = (void *) ice->ctx.screen; const struct gen_device_info *devinfo = &batch->screen->devinfo; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); + uint32_t offset = q->query_state_ref.offset; if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) { @@ -669,7 +682,8 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) } if (q->type == PIPE_QUERY_TIMESTAMP) { - ice->vtbl.load_register_mem64(batch, CS_GPR(0), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(0), bo, + offset + offsetof(struct iris_query_snapshots, start)); /* TODO: This discards any fractional bits of the timebase scale. * We would need to do a bit of fixed point math on the CS ALU, or @@ -680,9 +694,11 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) return; } - ice->vtbl.load_register_mem64(batch, CS_GPR(1), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(1), bo, + offset + offsetof(struct iris_query_snapshots, start)); - ice->vtbl.load_register_mem64(batch, CS_GPR(2), q->bo, + ice->vtbl.load_register_mem64(batch, CS_GPR(2), bo, + offset + offsetof(struct iris_query_snapshots, end)); static const uint32_t math[] = { @@ -695,7 +711,9 @@ calculate_result_on_gpu(struct iris_context *ice, struct iris_query *q) iris_batch_emit(batch, math, sizeof(math)); /* WaDividePSInvocationCountBy4:HSW,BDW */ - if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 7 && devinfo->gen == 8) + if (devinfo->gen == 8 && + q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && + q->index == PIPE_STAT_QUERY_PS_INVOCATIONS) shr_gpr0_by_2_bits(ice); if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE || @@ -718,7 +736,8 @@ iris_create_query(struct pipe_context *ctx, q->type = query_type; q->index = index; - if (q->type == PIPE_QUERY_PIPELINE_STATISTICS && q->index == 10) + if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE && + q->index == PIPE_STAT_QUERY_CS_INVOCATIONS) q->batch_idx = IRIS_BATCH_COMPUTE; else q->batch_idx = IRIS_BATCH_RENDER; @@ -729,7 +748,8 @@ static void iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query) { struct iris_query *query = (void *) p_query; - iris_bo_unreference(query->bo); + struct iris_screen *screen = (void *) ctx->screen; + iris_syncpt_reference(screen, &query->syncpt, NULL); free(query); } @@ -737,23 +757,31 @@ iris_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query) static boolean iris_begin_query(struct pipe_context *ctx, struct pipe_query *query) { - struct iris_screen *screen = (void *) ctx->screen; struct iris_context *ice = (void *) ctx; struct iris_query *q = (void *) query; + void *ptr = NULL; + uint32_t size; + + if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE || + q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + size = sizeof(struct iris_query_so_overflow); + else + size = sizeof(struct iris_query_snapshots); - iris_bo_unreference(q->bo); - q->bo = iris_bo_alloc(screen->bufmgr, "query object", 4096, - IRIS_MEMZONE_OTHER); - if (!q->bo) + u_upload_alloc(ice->query_buffer_uploader, 0, + size, size, &q->query_state_ref.offset, + &q->query_state_ref.res, &ptr); + + if (!iris_resource_bo(q->query_state_ref.res)) return false; - q->map = iris_bo_map(&ice->dbg, q->bo, MAP_READ | MAP_WRITE | MAP_ASYNC); + q->map = ptr; if (!q->map) return false; q->result = 0ull; q->ready = false; - q->map->snapshots_landed = false; + WRITE_ONCE(q->map->snapshots_landed, false); if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) { ice->state.prims_generated_query_active = true; @@ -764,7 +792,9 @@ iris_begin_query(struct pipe_context *ctx, struct pipe_query *query) q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) write_overflow_values(ice, q, false); else - write_value(ice, q, offsetof(struct iris_query_snapshots, start)); + write_value(ice, q, + q->query_state_ref.offset + + offsetof(struct iris_query_snapshots, start)); return true; } @@ -774,9 +804,11 @@ iris_end_query(struct pipe_context *ctx, struct pipe_query *query) { struct iris_context *ice = (void *) ctx; struct iris_query *q = (void *) query; + struct iris_batch *batch = &ice->batches[q->batch_idx]; if (q->type == PIPE_QUERY_TIMESTAMP) { iris_begin_query(ctx, query); + iris_batch_reference_signal_syncpt(batch, &q->syncpt); mark_available(ice, q); return true; } @@ -790,7 +822,11 @@ iris_end_query(struct pipe_context *ctx, struct pipe_query *query) q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) write_overflow_values(ice, q, true); else - write_value(ice, q, offsetof(struct iris_query_snapshots, end)); + write_value(ice, q, + q->query_state_ref.offset + + offsetof(struct iris_query_snapshots, end)); + + iris_batch_reference_signal_syncpt(batch, &q->syncpt); mark_available(ice, q); return true; @@ -806,7 +842,7 @@ iris_check_query_no_flush(struct iris_context *ice, struct iris_query *q) struct iris_screen *screen = (void *) ice->ctx.screen; const struct gen_device_info *devinfo = &screen->devinfo; - if (!q->ready && q->map->snapshots_landed) { + if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { calculate_result_on_cpu(devinfo, q); } } @@ -821,69 +857,26 @@ iris_get_query_result(struct pipe_context *ctx, struct iris_query *q = (void *) query; struct iris_screen *screen = (void *) ctx->screen; const struct gen_device_info *devinfo = &screen->devinfo; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); if (!q->ready) { - if (iris_batch_references(&ice->batches[q->batch_idx], q->bo)) + if (iris_batch_references(&ice->batches[q->batch_idx], bo)) iris_batch_flush(&ice->batches[q->batch_idx]); - if (!q->map->snapshots_landed) { + while (!READ_ONCE(q->map->snapshots_landed)) { if (wait) - iris_bo_wait_rendering(q->bo); + iris_wait_syncpt(ctx->screen, q->syncpt, INT64_MAX); else return false; } - assert(q->map->snapshots_landed); + assert(READ_ONCE(q->map->snapshots_landed)); calculate_result_on_cpu(devinfo, q); } assert(q->ready); - if (q->type == PIPE_QUERY_PIPELINE_STATISTICS) { - switch (q->index) { - case 0: - result->pipeline_statistics.ia_vertices = q->result; - break; - case 1: - result->pipeline_statistics.ia_primitives = q->result; - break; - case 2: - result->pipeline_statistics.vs_invocations = q->result; - break; - case 3: - result->pipeline_statistics.gs_invocations = q->result; - break; - case 4: - result->pipeline_statistics.gs_primitives = q->result; - break; - case 5: - result->pipeline_statistics.c_invocations = q->result; - break; - case 6: - result->pipeline_statistics.c_primitives = q->result; - break; - case 7: - result->pipeline_statistics.ps_invocations = q->result; - /* Implement the "WaDividePSInvocationCountBy4:HSW,BDW" workaround: - * "Invocation counter is 4 times actual. WA: SW to divide HW reported - * PS Invocations value by 4." - */ - if (screen->devinfo.gen == 8) - result->pipeline_statistics.ps_invocations /= 4; - break; - case 8: - result->pipeline_statistics.hs_invocations = q->result; - break; - case 9: - result->pipeline_statistics.ds_invocations = q->result; - break; - case 10: - result->pipeline_statistics.cs_invocations = q->result; - break; - } - } else { - result->u64 = q->result; - } + result->u64 = q->result; return true; } @@ -902,6 +895,7 @@ iris_get_query_result_resource(struct pipe_context *ctx, struct iris_batch *batch = &ice->batches[q->batch_idx]; const struct gen_device_info *devinfo = &batch->screen->devinfo; struct iris_resource *res = (void *) p_res; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); unsigned snapshots_landed_offset = offsetof(struct iris_query_snapshots, snapshots_landed); @@ -913,16 +907,16 @@ iris_get_query_result_resource(struct pipe_context *ctx, * now so that progress happens. Either way, copy the snapshots * landed field to the destination resource. */ - if (iris_batch_references(batch, q->bo)) + if (iris_batch_references(batch, bo)) iris_batch_flush(batch); ice->vtbl.copy_mem_mem(batch, iris_resource_bo(p_res), offset, - q->bo, snapshots_landed_offset, + bo, snapshots_landed_offset, result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8); return; } - if (!q->ready && q->map->snapshots_landed) { + if (!q->ready && READ_ONCE(q->map->snapshots_landed)) { /* The final snapshots happen to have landed, so let's just compute * the result on the CPU now... */ @@ -954,7 +948,7 @@ iris_get_query_result_resource(struct pipe_context *ctx, if (predicated) { ice->vtbl.load_register_imm64(batch, MI_PREDICATE_SRC1, 0ull); - ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo, + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo, snapshots_landed_offset); uint32_t predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV | @@ -1010,6 +1004,7 @@ set_predicate_for_result(struct iris_context *ice, bool inverted) { struct iris_batch *batch = &ice->batches[IRIS_BATCH_RENDER]; + struct iris_bo *bo = iris_resource_bo(q->query_state_ref.res); /* The CPU doesn't have the query result yet; use hardware predication */ ice->state.predicate = IRIS_PREDICATE_STATE_USE_BIT; @@ -1028,10 +1023,12 @@ set_predicate_for_result(struct iris_context *ice, break; default: /* PIPE_QUERY_OCCLUSION_* */ - ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, q->bo, - offsetof(struct iris_query_snapshots, start)); - ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, q->bo, - offsetof(struct iris_query_snapshots, end)); + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo, + offsetof(struct iris_query_snapshots, start) + + q->query_state_ref.offset); + ice->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo, + offsetof(struct iris_query_snapshots, end) + + q->query_state_ref.offset); break; } @@ -1045,13 +1042,14 @@ set_predicate_for_result(struct iris_context *ice, /* We immediately set the predicate on the render batch, as all the * counters come from 3D operations. However, we may need to predicate * a compute dispatch, which executes in a different GEM context and has - * a different MI_PREDICATE_DATA register. So, we save the result to + * a different MI_PREDICATE_RESULT register. So, we save the result to * memory and reload it in iris_launch_grid. */ - unsigned offset = offsetof(struct iris_query_snapshots, predicate_data); - ice->vtbl.store_register_mem64(batch, MI_PREDICATE_DATA, - q->bo, offset, false); - ice->state.compute_predicate = q->bo; + unsigned offset = q->query_state_ref.offset + + offsetof(struct iris_query_snapshots, predicate_result); + ice->vtbl.store_register_mem64(batch, MI_PREDICATE_RESULT, + bo, offset, false); + ice->state.compute_predicate = bo; } static void @@ -1063,6 +1061,11 @@ iris_render_condition(struct pipe_context *ctx, struct iris_context *ice = (void *) ctx; struct iris_query *q = (void *) query; + /* The old condition isn't relevant; we'll update it if necessary */ + ice->state.compute_predicate = NULL; + ice->condition.query = q; + ice->condition.condition = condition; + if (!q) { ice->state.predicate = IRIS_PREDICATE_STATE_RENDER; return; @@ -1082,6 +1085,23 @@ iris_render_condition(struct pipe_context *ctx, } } +void +iris_resolve_conditional_render(struct iris_context *ice) +{ + struct pipe_context *ctx = (void *) ice; + struct iris_query *q = ice->condition.query; + struct pipe_query *query = (void *) q; + union pipe_query_result result; + + if (ice->state.predicate != IRIS_PREDICATE_STATE_USE_BIT) + return; + + assert(q); + + iris_get_query_result(ctx, query, true, &result); + set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition); +} + void iris_init_query_functions(struct pipe_context *ctx) {