#include "drm-uapi/i915_drm.h"
#include "util/hash_table.h"
+#include "util/set.h"
#include "main/macros.h"
#include <errno.h>
void
iris_init_batch(struct iris_batch *batch,
struct iris_screen *screen,
+ struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg,
uint8_t ring)
{
batch->screen = screen;
+ batch->vtbl = vtbl;
batch->dbg = dbg;
/* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
batch->validation_list =
malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
+ batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
if (unlikely(INTEL_DEBUG)) {
batch->state_sizes =
_mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
}
static void
-iris_batch_reset_and_clear_render_cache(struct iris_batch *batch)
+iris_batch_reset_and_clear_caches(struct iris_batch *batch)
{
iris_batch_reset(batch);
- // XXX: iris_render_cache_set_clear(batch);
+ iris_cache_sets_clear(batch);
}
static void
iris_bo_unreference(batch->last_cmd_bo);
+ _mesa_hash_table_destroy(batch->cache.render, NULL);
+ _mesa_set_destroy(batch->cache.depth, NULL);
+
if (batch->state_sizes) {
_mesa_hash_table_destroy(batch->state_sizes, NULL);
gen_batch_decode_ctx_finish(&batch->decoder);
batch->aperture_space = 0;
/* Start a new batch buffer. */
- iris_batch_reset_and_clear_render_cache(batch);
+ iris_batch_reset_and_clear_caches(batch);
return 0;
}
struct iris_batch {
struct iris_screen *screen;
+ struct iris_vtable *vtbl;
struct pipe_debug_callback *dbg;
/** Current batchbuffer being queued up. */
/** The amount of aperture space (in bytes) used by all exec_bos */
int aperture_space;
+ struct {
+ /**
+ * Set of struct brw_bo * that have been rendered to within this
+ * batchbuffer and would need flushing before being used from another
+ * cache domain that isn't coherent with it (i.e. the sampler).
+ */
+ struct hash_table *render;
+
+ /**
+ * Set of struct brw_bo * that have been used as a depth buffer within
+ * this batchbuffer and would need flushing before being used from
+ * another cache domain that isn't coherent with it (i.e. the sampler).
+ */
+ struct set *depth;
+ } cache;
+
#if DEBUG
/** Map from batch offset to iris_alloc_state data (with DEBUG_BATCH) */
// XXX: unused
void iris_init_batch(struct iris_batch *batch,
struct iris_screen *screen,
+ struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg,
uint8_t ring);
void iris_batch_free(struct iris_batch *batch);
IRIS_RESOURCE_FLAG_DYNAMIC_MEMZONE);
genX_call(devinfo, init_state, ice);
- ice->state.init_render_context(screen, &ice->render_batch, &ice->dbg);
+ ice->vtbl.init_render_context(screen, &ice->render_batch, &ice->vtbl,
+ &ice->dbg);
return ctx;
}
#include "iris_screen.h"
struct iris_bo;
+struct iris_context;
#define IRIS_RESOURCE_FLAG_SHADER_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define IRIS_RESOURCE_FLAG_SURFACE_MEMZONE (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
unsigned const_size;
};
+struct iris_vtable {
+ void (*destroy_state)(struct iris_context *ice);
+ void (*init_render_context)(struct iris_screen *screen,
+ struct iris_batch *batch,
+ struct iris_vtable *vtbl,
+ struct pipe_debug_callback *dbg);
+ void (*upload_render_state)(struct iris_context *ice,
+ struct iris_batch *batch,
+ const struct pipe_draw_info *draw);
+ void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags,
+ struct iris_bo *bo, uint32_t offset,
+ uint64_t imm);
+ unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
+ void (*set_derived_program_state)(const struct gen_device_info *devinfo,
+ enum iris_program_cache_id cache_id,
+ struct iris_compiled_shader *shader);
+ void (*populate_vs_key)(const struct iris_context *ice,
+ struct brw_vs_prog_key *key);
+ void (*populate_tcs_key)(const struct iris_context *ice,
+ struct brw_tcs_prog_key *key);
+ void (*populate_tes_key)(const struct iris_context *ice,
+ struct brw_tes_prog_key *key);
+ void (*populate_gs_key)(const struct iris_context *ice,
+ struct brw_gs_prog_key *key);
+ void (*populate_fs_key)(const struct iris_context *ice,
+ struct brw_wm_prog_key *key);
+};
+
struct iris_context {
struct pipe_context ctx;
struct pipe_debug_callback dbg;
+ struct iris_vtable vtbl;
+
struct {
struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
// "I'm streaming this out at draw time and never want it again!"
struct u_upload_mgr *dynamic_uploader;
- void (*destroy_state)(struct iris_context *ice);
- void (*init_render_context)(struct iris_screen *screen,
- struct iris_batch *batch,
- struct pipe_debug_callback *dbg);
- void (*upload_render_state)(struct iris_context *ice,
- struct iris_batch *batch,
- const struct pipe_draw_info *draw);
- void (*emit_raw_pipe_control)(struct iris_batch *batch, uint32_t flags,
- struct iris_bo *bo, uint32_t offset,
- uint64_t imm);
- unsigned (*derived_program_state_size)(enum iris_program_cache_id id);
- void (*set_derived_program_state)(const struct gen_device_info *devinfo,
- enum iris_program_cache_id cache_id,
- struct iris_compiled_shader *shader);
- void (*populate_vs_key)(const struct iris_context *ice,
- struct brw_vs_prog_key *key);
- void (*populate_tcs_key)(const struct iris_context *ice,
- struct brw_tcs_prog_key *key);
- void (*populate_tes_key)(const struct iris_context *ice,
- struct brw_tes_prog_key *key);
- void (*populate_gs_key)(const struct iris_context *ice,
- struct brw_gs_prog_key *key);
- void (*populate_fs_key)(const struct iris_context *ice,
- struct brw_wm_prog_key *key);
} state;
};
/* iris_pipe_control.c */
-void iris_emit_pipe_control_flush(struct iris_context *ice,
- struct iris_batch *batch,
+void iris_emit_pipe_control_flush(struct iris_batch *batch,
uint32_t flags);
-void iris_emit_pipe_control_write(struct iris_context *ice,
- struct iris_batch *batch, uint32_t flags,
+void iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm);
-void iris_emit_end_of_pipe_sync(struct iris_context *ice,
- struct iris_batch *batch,
+void iris_emit_end_of_pipe_sync(struct iris_batch *batch,
uint32_t flags);
+void iris_cache_sets_clear(struct iris_batch *batch);
+void iris_cache_flush_for_read(struct iris_batch *batch, struct iris_bo *bo);
+void iris_cache_flush_for_render(struct iris_batch *batch,
+ struct iris_bo *bo,
+ enum isl_format format,
+ enum isl_aux_usage aux_usage);
+void iris_render_cache_add_bo(struct iris_batch *batch,
+ struct iris_bo *bo,
+ enum isl_format format,
+ enum isl_aux_usage aux_usage);
+void iris_cache_flush_for_depth(struct iris_batch *batch, struct iris_bo *bo);
+void iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo);
+
/* iris_state.c */
void gen9_init_state(struct iris_context *ice);
struct iris_context *ice = (struct iris_context *) ctx;
iris_update_compiled_shaders(ice);
- ice->state.upload_render_state(ice, &ice->render_batch, info);
+ ice->vtbl.upload_render_state(ice, &ice->render_batch, info);
}
*/
#include "iris_context.h"
+#include "util/hash_table.h"
+#include "util/set.h"
/**
* Emit a PIPE_CONTROL with various flushing flags.
* given generation.
*/
void
-iris_emit_pipe_control_flush(struct iris_context *ice,
- struct iris_batch *batch,
- uint32_t flags)
+iris_emit_pipe_control_flush(struct iris_batch *batch, uint32_t flags)
{
if ((flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
(flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
* with any write cache flush, so this shouldn't be a concern. In order
* to ensure a full stall, we do an end-of-pipe sync.
*/
- iris_emit_end_of_pipe_sync(ice, batch,
- flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
+ iris_emit_end_of_pipe_sync(batch, flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
}
- ice->state.emit_raw_pipe_control(batch, flags, NULL, 0, 0);
+ batch->vtbl->emit_raw_pipe_control(batch, flags, NULL, 0, 0);
}
/**
* - PIPE_CONTROL_WRITE_DEPTH_COUNT
*/
void
-iris_emit_pipe_control_write(struct iris_context *ice,
- struct iris_batch *batch, uint32_t flags,
+iris_emit_pipe_control_write(struct iris_batch *batch, uint32_t flags,
struct iris_bo *bo, uint32_t offset,
uint64_t imm)
{
- ice->state.emit_raw_pipe_control(batch, flags, bo, offset, imm);
+ batch->vtbl->emit_raw_pipe_control(batch, flags, bo, offset, imm);
}
/*
* Data" in the PIPE_CONTROL command.
*/
void
-iris_emit_end_of_pipe_sync(struct iris_context *ice,
- struct iris_batch *batch,
- uint32_t flags)
+iris_emit_end_of_pipe_sync(struct iris_batch *batch, uint32_t flags)
{
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
*
* Data, Required Write Cache Flush bits set)
* - Workload-2 (Can use the data produce or output by Workload-1)
*/
- iris_emit_pipe_control_write(ice, batch, flags | PIPE_CONTROL_CS_STALL |
+ iris_emit_pipe_control_write(batch, flags | PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_WRITE_IMMEDIATE,
batch->screen->workaround_bo, 0, 0);
}
+
+void
+iris_cache_sets_clear(struct iris_batch *batch)
+{
+ struct hash_entry *render_entry;
+ hash_table_foreach(batch->cache.render, render_entry)
+ _mesa_hash_table_remove(batch->cache.render, render_entry);
+
+ struct set_entry *depth_entry;
+ set_foreach(batch->cache.depth, depth_entry)
+ _mesa_set_remove(batch->cache.depth, depth_entry);
+}
+
+/**
+ * Emits an appropriate flush for a BO if it has been rendered to within the
+ * same batchbuffer as a read that's about to be emitted.
+ *
+ * The GPU has separate, incoherent caches for the render cache and the
+ * sampler cache, along with other caches. Usually data in the different
+ * caches don't interact (e.g. we don't render to our driver-generated
+ * immediate constant data), but for render-to-texture in FBOs we definitely
+ * do. When a batchbuffer is flushed, the kernel will ensure that everything
+ * necessary is flushed before another use of that BO, but for reuse from
+ * different caches within a batchbuffer, it's all our responsibility.
+ */
+static void
+flush_depth_and_render_caches(struct iris_batch *batch, struct iris_bo *bo)
+{
+ iris_emit_pipe_control_flush(batch,
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_CS_STALL);
+
+ iris_emit_pipe_control_flush(batch,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_CONST_CACHE_INVALIDATE);
+
+ iris_cache_sets_clear(batch);
+}
+
+void
+iris_cache_flush_for_read(struct iris_batch *batch,
+ struct iris_bo *bo)
+{
+ if (_mesa_hash_table_search(batch->cache.render, bo) ||
+ _mesa_set_search(batch->cache.depth, bo))
+ flush_depth_and_render_caches(batch, bo);
+}
+
+static void *
+format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
+{
+ return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
+}
+
+void
+iris_cache_flush_for_render(struct iris_batch *batch,
+ struct iris_bo *bo,
+ enum isl_format format,
+ enum isl_aux_usage aux_usage)
+{
+ if (_mesa_set_search(batch->cache.depth, bo))
+ flush_depth_and_render_caches(batch, bo);
+
+ /* Check to see if this bo has been used by a previous rendering operation
+ * but with a different format or aux usage. If it has, flush the render
+ * cache so we ensure that it's only in there with one format or aux usage
+ * at a time.
+ *
+ * Even though it's not obvious, this can easily happen in practice.
+ * Suppose a client is blending on a surface with sRGB encode enabled on
+ * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client
+ * then disables sRGB decode and continues blending we will flip on
+ * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
+ * perfectly valid since CCS_E is a subset of CCS_D). However, this means
+ * that we have fragments in-flight which are rendering with UNORM+CCS_E
+ * and other fragments in-flight with SRGB+CCS_D on the same surface at the
+ * same time and the pixel scoreboard and color blender are trying to sort
+ * it all out. This ends badly (i.e. GPU hangs).
+ *
+ * To date, we have never observed GPU hangs or even corruption to be
+ * associated with switching the format, only the aux usage. However,
+ * there are comments in various docs which indicate that the render cache
+ * isn't 100% resilient to format changes. We may as well be conservative
+ * and flush on format changes too. We can always relax this later if we
+ * find it to be a performance problem.
+ */
+ struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo);
+ if (entry && entry->data != format_aux_tuple(format, aux_usage))
+ flush_depth_and_render_caches(batch, bo);
+}
+
+void
+iris_render_cache_add_bo(struct iris_batch *batch,
+ struct iris_bo *bo,
+ enum isl_format format,
+ enum isl_aux_usage aux_usage)
+{
+#ifndef NDEBUG
+ struct hash_entry *entry = _mesa_hash_table_search(batch->cache.render, bo);
+ if (entry) {
+ /* Otherwise, someone didn't do a flush_for_render and that would be
+ * very bad indeed.
+ */
+ assert(entry->data == format_aux_tuple(format, aux_usage));
+ }
+#endif
+
+ _mesa_hash_table_insert(batch->cache.render, bo,
+ format_aux_tuple(format, aux_usage));
+}
+
+void
+iris_cache_flush_for_depth(struct iris_batch *batch,
+ struct iris_bo *bo)
+{
+ if (_mesa_hash_table_search(batch->cache.render, bo))
+ flush_depth_and_render_caches(batch, bo);
+}
+
+void
+iris_depth_cache_add_bo(struct iris_batch *batch, struct iris_bo *bo)
+{
+ _mesa_set_add(batch->cache.depth, bo);
+}
iris_update_compiled_vs(struct iris_context *ice)
{
struct brw_vs_prog_key key;
- ice->state.populate_vs_key(ice, &key);
+ ice->vtbl.populate_vs_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_VS, &key))
return;
return;
struct brw_tes_prog_key key;
- ice->state.populate_tes_key(ice, &key);
+ ice->vtbl.populate_tes_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_TES, &key))
return;
iris_update_compiled_fs(struct iris_context *ice)
{
struct brw_wm_prog_key key;
- ice->state.populate_fs_key(ice, &key);
+ ice->vtbl.populate_fs_key(ice, &key);
if (iris_bind_cached_shader(ice, IRIS_CACHE_FS, &key))
return;
struct hash_table *cache = ice->shaders.cache;
struct iris_compiled_shader *shader =
ralloc_size(cache, sizeof(struct iris_compiled_shader) +
- ice->state.derived_program_state_size(cache_id));
+ ice->vtbl.derived_program_state_size(cache_id));
const struct iris_compiled_shader *existing =
find_existing_assembly(cache, assembly, prog_data->program_size);
ralloc_steal(shader->prog_data, prog_data->pull_param);
/* Store the 3DSTATE shader packets and other derived state. */
- ice->state.set_derived_program_state(devinfo, cache_id, shader);
+ ice->vtbl.set_derived_program_state(devinfo, cache_id, shader);
struct keybox *keybox = make_keybox(cache, cache_id, key);
_mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
static void
iris_init_render_context(struct iris_screen *screen,
struct iris_batch *batch,
+ struct iris_vtable *vtbl,
struct pipe_debug_callback *dbg)
{
- iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER);
+ iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER);
/* XXX: PIPE_CONTROLs */
ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
ctx->set_stream_output_targets = iris_set_stream_output_targets;
- ice->state.destroy_state = iris_destroy_state;
- ice->state.init_render_context = iris_init_render_context;
- ice->state.upload_render_state = iris_upload_render_state;
- ice->state.emit_raw_pipe_control = iris_emit_raw_pipe_control;
- ice->state.derived_program_state_size = iris_derived_program_state_size;
- ice->state.set_derived_program_state = iris_set_derived_program_state;
- ice->state.populate_vs_key = iris_populate_vs_key;
- ice->state.populate_tcs_key = iris_populate_tcs_key;
- ice->state.populate_tes_key = iris_populate_tes_key;
- ice->state.populate_gs_key = iris_populate_gs_key;
- ice->state.populate_fs_key = iris_populate_fs_key;
+ ice->vtbl.destroy_state = iris_destroy_state;
+ ice->vtbl.init_render_context = iris_init_render_context;
+ ice->vtbl.upload_render_state = iris_upload_render_state;
+ ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
+ ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
+ ice->vtbl.set_derived_program_state = iris_set_derived_program_state;
+ ice->vtbl.populate_vs_key = iris_populate_vs_key;
+ ice->vtbl.populate_tcs_key = iris_populate_tcs_key;
+ ice->vtbl.populate_tes_key = iris_populate_tes_key;
+ ice->vtbl.populate_gs_key = iris_populate_gs_key;
+ ice->vtbl.populate_fs_key = iris_populate_fs_key;
ice->state.dirty = ~0ull;
}