From: Kenneth Graunke Date: Thu, 23 May 2019 01:14:38 +0000 (-0700) Subject: iris: Record state sizes for INTEL_DEBUG=bat decoding. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7d2b54e3936ded025d5d75246a7c0a3f2712413f;p=mesa.git iris: Record state sizes for INTEL_DEBUG=bat decoding. Felix noticed a crash when using INTEL_DEBUG=bat decoding. It turned out that we were sometimes placing variable length data near the end of a buffer, and with the decoder guessing random lengths rather than having an actual count, it was walking off the end and crashing. So this does more than improve the decoder output. Unfortunately, this is a bit more complicated than i965's handling, because we don't have a single state buffer. Various places upload data via u_upload_mgr, and so there isn't a central place to record the size. We don't need to catch every single place, however, since it's only important to record variable length packets (like viewports and binding tables). State data also lives arbitrarily long, rather than being discarded on every batch like i965, so we don't know when to clear out old entries either. (We also don't have a callback when an upload buffer is released.) So, this tracking may space leak over time. That's probably okay though, as this is only a debugging feature and it's a slow leak. We may also get lucky and overwrite existing entries as we reuse BOs, though I find this unlikely to happen. The fact that the decoder works in terms of offsets from a state base address is also not ideal, as dynamic state base address and surface state base address differ for iris. However, because dynamic state addresses start from the top of a 4GB region, and binding tables start from addresses [0, 64K), it's highly unlikely that we'll get overlap. We can always improve this, but for now it's better than what we had. --- diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index bbcedf4bfc0..cb44994f717 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -147,6 +147,25 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) return (struct gen_batch_decode_bo) { }; } +static unsigned +decode_get_state_size(void *v_batch, uint32_t offset_from_base) +{ + struct iris_batch *batch = v_batch; + + /* The decoder gives us offsets from a base address, which is not great. + * Binding tables are relative to surface state base address, and other + * state is relative to dynamic state base address. These could alias, + * but in practice it's unlikely because surface offsets are always in + * the [0, 64K) range, and we assign dynamic state addresses starting at + * the top of the 4GB range. We should fix this but it's likely good + * enough for now. + */ + unsigned size = (uintptr_t) + _mesa_hash_table_u64_search(batch->state_sizes, offset_from_base); + + return size; +} + /** * Decode the current batch. */ @@ -164,6 +183,7 @@ iris_init_batch(struct iris_batch *batch, struct iris_vtable *vtbl, struct pipe_debug_callback *dbg, struct pipe_device_reset_callback *reset, + struct hash_table_u64 *state_sizes, struct iris_batch *all_batches, enum iris_batch_name name, uint8_t engine, @@ -173,6 +193,7 @@ iris_init_batch(struct iris_batch *batch, batch->vtbl = vtbl; batch->dbg = dbg; batch->reset = reset; + batch->state_sizes = state_sizes; batch->name = name; /* engine should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */ @@ -214,10 +235,9 @@ iris_init_batch(struct iris_batch *batch, GEN_BATCH_DECODE_OFFSETS | GEN_BATCH_DECODE_FLOATS; - /* TODO: track state size so we can print the right # of entries */ gen_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, stderr, decode_flags, NULL, - decode_get_bo, NULL, batch); + decode_get_bo, decode_get_state_size, batch); batch->decoder.max_vbo_decoded_lines = 32; } diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 44e27e79623..00386418e7b 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -122,6 +122,7 @@ struct iris_batch { } cache; struct gen_batch_decode_ctx decoder; + struct hash_table_u64 *state_sizes; /** Have we emitted any draw calls to this batch? */ bool contains_draw; @@ -132,6 +133,7 @@ void iris_init_batch(struct iris_batch *batch, struct iris_vtable *vtbl, struct pipe_debug_callback *dbg, struct pipe_device_reset_callback *reset, + struct hash_table_u64 *state_sizes, struct iris_batch *all_batches, enum iris_batch_name name, uint8_t ring, @@ -216,4 +218,18 @@ iris_batch_reference_signal_syncpt(struct iris_batch *batch, iris_syncpt_reference(batch->screen, out_syncpt, syncpt); } +/** + * Record the size of a piece of state for use in INTEL_DEBUG=bat printing. + */ +static inline void +iris_record_state_size(struct hash_table_u64 *ht, + uint32_t offset_from_base, + uint32_t size) +{ + if (ht) { + _mesa_hash_table_u64_insert(ht, offset_from_base, + (void *)(uintptr_t) size); + } +} + #endif diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c index 6d23de229bf..5bc0f764819 100644 --- a/src/gallium/drivers/iris/iris_binder.c +++ b/src/gallium/drivers/iris/iris_binder.c @@ -186,6 +186,8 @@ iris_binder_reserve_3d(struct iris_context *ice) for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (ice->state.dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { binder->bt_offset[stage] = sizes[stage] > 0 ? offset : 0; + iris_record_state_size(ice->state.sizes, + binder->bo->gtt_offset + offset, sizes[stage]); offset += sizes[stage]; } } diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 46ecd2b8531..2f36c4a5705 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -68,6 +68,9 @@ stream_state(struct iris_batch *batch, struct iris_bo *bo = iris_resource_bo(res); iris_use_pinned_bo(batch, bo, false); + iris_record_state_size(batch->state_sizes, + bo->gtt_offset + *out_offset, size); + /* If the caller has asked for a BO, we leave them the responsibility of * adding bo->gtt_offset (say, by handing an address to genxml). If not, * we assume they want the offset from a base address. diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index b56e746ea76..9ffab8d20ee 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -296,9 +296,13 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) if (flags & PIPE_CONTEXT_LOW_PRIORITY) priority = GEN_CONTEXT_LOW_PRIORITY; + if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) + ice->state.sizes = _mesa_hash_table_u64_create(ice); + for (int i = 0; i < IRIS_BATCH_COUNT; i++) { iris_init_batch(&ice->batches[i], screen, &ice->vtbl, &ice->dbg, - &ice->reset, ice->batches, (enum iris_batch_name) i, + &ice->reset, ice->state.sizes, + ice->batches, (enum iris_batch_name) i, I915_EXEC_RENDER, priority); } diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 75e13778631..fb4423957ca 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -680,6 +680,9 @@ struct iris_context { struct pipe_resource *blend; struct pipe_resource *index_buffer; } last_res; + + /** Records the size of variable-length state for INTEL_DEBUG=bat */ + struct hash_table_u64 *sizes; } state; }; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index f48bacf77e1..da6e4a31c85 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -426,6 +426,8 @@ stream_state(struct iris_batch *batch, *out_offset += iris_bo_offset_from_base_address(bo); + iris_record_state_size(batch->state_sizes, *out_offset, size); + return ptr; } @@ -1562,9 +1564,9 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) * in the dynamic state memory zone, so we can point to it via the * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. */ + unsigned size = count * 4 * GENX(SAMPLER_STATE_length); uint32_t *map = - upload_state(ice->state.dynamic_uploader, &shs->sampler_table, - count * 4 * GENX(SAMPLER_STATE_length), 32); + upload_state(ice->state.dynamic_uploader, &shs->sampler_table, size, 32); if (unlikely(!map)) return; @@ -1572,6 +1574,8 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) shs->sampler_table.offset += iris_bo_offset_from_base_address(iris_resource_bo(res)); + iris_record_state_size(ice->state.sizes, shs->sampler_table.offset, size); + /* Make sure all land in the same BO */ iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS);