From 69d7782b155b72707d95a2f6b0c0776afbb888e3 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 2 Oct 2019 15:09:33 -0400 Subject: [PATCH] intel/decoder: Make get_state_size take a full 64-bit address and a base i965 wants to use an offset from a base because everything is in a single buffer whose address may be relocated, and all base addresses are set to the start of that buffer. iris wants to use a full 64-bit address, because state lives in separate buffers which may be in the shader, surface, and dynamic memory zones, where addresses grow downward from the top of a 4GB zone, So it's very possible for a 32-bit offset to exist relative to multiple bases, leading to the wrong state size. --- src/gallium/drivers/iris/iris_batch.c | 15 +++-------- src/gallium/drivers/iris/iris_state.c | 13 ++++++---- src/intel/common/gen_batch_decoder.c | 25 ++++++++++++------- src/intel/common/gen_decoder.h | 6 +++-- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 7 +++--- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 9dbe4a51bde..a44715a9458 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -150,20 +150,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) } static unsigned -decode_get_state_size(void *v_batch, uint32_t offset_from_base) +decode_get_state_size(void *v_batch, + uint64_t address, + UNUSED uint64_t base_address) { struct iris_batch *batch = v_batch; - - /* The decoder gives us offsets from a base address, which is not great. - * Binding tables are relative to surface state base address, and other - * state is relative to dynamic state base address. These could alias, - * but in practice it's unlikely because surface offsets are always in - * the [0, 64K) range, and we assign dynamic state addresses starting at - * the top of the 4GB range. We should fix this but it's likely good - * enough for now. - */ unsigned size = (uintptr_t) - _mesa_hash_table_u64_search(batch->state_sizes, offset_from_base); + _mesa_hash_table_u64_search(batch->state_sizes, address); return size; } diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 6e90d33502b..4e0fefa9251 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -346,9 +346,10 @@ stream_state(struct iris_batch *batch, struct iris_bo *bo = iris_resource_bo(*out_res); iris_use_pinned_bo(batch, bo, false); - *out_offset += iris_bo_offset_from_base_address(bo); + iris_record_state_size(batch->state_sizes, + bo->gtt_offset + *out_offset, size); - iris_record_state_size(batch->state_sizes, *out_offset, size); + *out_offset += iris_bo_offset_from_base_address(bo); return ptr; } @@ -1988,10 +1989,12 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) return; struct pipe_resource *res = shs->sampler_table.res; - shs->sampler_table.offset += - iris_bo_offset_from_base_address(iris_resource_bo(res)); + struct iris_bo *bo = iris_resource_bo(res); + + iris_record_state_size(ice->state.sizes, + bo->gtt_offset + shs->sampler_table.offset, size); - iris_record_state_size(ice->state.sizes, shs->sampler_table.offset, size); + shs->sampler_table.offset += iris_bo_offset_from_base_address(bo); /* Make sure all land in the same BO */ iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c index 41425e8bf68..2a5261b2f92 100644 --- a/src/intel/common/gen_batch_decoder.c +++ b/src/intel/common/gen_batch_decoder.c @@ -36,7 +36,8 @@ gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx, struct gen_batch_decode_bo (*get_bo)(void *, bool, uint64_t), - unsigned (*get_state_size)(void *, uint32_t), + unsigned (*get_state_size)(void *, uint64_t, + uint64_t), void *user_data) { memset(ctx, 0, sizeof(*ctx)); @@ -110,14 +111,15 @@ ctx_get_bo(struct gen_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr) static int update_count(struct gen_batch_decode_ctx *ctx, - uint32_t offset_from_dsba, + uint64_t address, + uint64_t base_address, unsigned element_dwords, unsigned guess) { unsigned size = 0; if (ctx->get_state_size) - size = ctx->get_state_size(ctx->user_data, offset_from_dsba); + size = ctx->get_state_size(ctx->user_data, address, base_address); if (size > 0) return size / (sizeof(uint32_t) * element_dwords); @@ -249,8 +251,10 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count) return; } - if (count < 0) - count = update_count(ctx, offset, 1, 8); + if (count < 0) { + count = update_count(ctx, ctx->surface_base + offset, + ctx->surface_base, 1, 8); + } if (offset % 32 != 0 || offset >= UINT16_MAX) { fprintf(ctx->fp, " invalid binding table pointer\n"); @@ -289,11 +293,13 @@ static void dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count) { struct gen_group *strct = gen_spec_find_struct(ctx->spec, "SAMPLER_STATE"); + uint64_t state_addr = ctx->dynamic_base + offset; - if (count < 0) - count = update_count(ctx, offset, strct->dw_length, 4); + if (count < 0) { + count = update_count(ctx, state_addr, ctx->dynamic_base, + strct->dw_length, 4); + } - uint64_t state_addr = ctx->dynamic_base + offset; struct gen_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr); const void *state_map = bo.map; @@ -765,7 +771,8 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx, state = gen_spec_find_struct(ctx->spec, struct_type); } - count = update_count(ctx, state_offset, state->dw_length, count); + count = update_count(ctx, ctx->dynamic_base + state_offset, + ctx->dynamic_base, state->dw_length, count); for (int i = 0; i < count; i++) { fprintf(ctx->fp, "%s %d\n", struct_type, i); diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h index 153e48d8e49..0b770ee3691 100644 --- a/src/intel/common/gen_decoder.h +++ b/src/intel/common/gen_decoder.h @@ -231,7 +231,8 @@ struct gen_batch_decode_ctx { */ struct gen_batch_decode_bo (*get_bo)(void *user_data, bool ppgtt, uint64_t address); unsigned (*get_state_size)(void *user_data, - uint32_t offset_from_dynamic_state_base_addr); + uint64_t address, + uint64_t base_address); void *user_data; FILE *fp; @@ -259,7 +260,8 @@ void gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx, bool, uint64_t), - unsigned (*get_state_size)(void *, uint32_t), + unsigned (*get_state_size)(void *, uint64_t, + uint64_t), void *user_data); void gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index af076f65f0b..d5676e9cb9f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -104,12 +104,13 @@ decode_get_bo(void *v_brw, bool ppgtt, uint64_t address) } static unsigned -decode_get_state_size(void *v_brw, uint32_t offset_from_dsba) +decode_get_state_size(void *v_brw, uint64_t address, uint64_t base_address) { struct brw_context *brw = v_brw; struct intel_batchbuffer *batch = &brw->batch; - unsigned size = (uintptr_t) _mesa_hash_table_u64_search( - batch->state_batch_sizes, offset_from_dsba); + unsigned size = (uintptr_t) + _mesa_hash_table_u64_search(batch->state_batch_sizes, + address - base_address); return size; } -- 2.30.2