const struct gen_l3_config *cfg)
{
uint32_t reg_val;
+ assert(cfg || GEN_GEN >= 12);
#if GEN_GEN >= 12
#define L3_ALLOCATION_REG GENX(L3ALLOC)
reg.ErrorDetectionBehaviorControl = true;
reg.UseFullWays = true;
#endif
- reg.URBAllocation = cfg->n[GEN_L3P_URB];
- reg.ROAllocation = cfg->n[GEN_L3P_RO];
- reg.DCAllocation = cfg->n[GEN_L3P_DC];
- reg.AllAllocation = cfg->n[GEN_L3P_ALL];
+ if (GEN_GEN < 12 || cfg) {
+ reg.URBAllocation = cfg->n[GEN_L3P_URB];
+ reg.ROAllocation = cfg->n[GEN_L3P_RO];
+ reg.DCAllocation = cfg->n[GEN_L3P_DC];
+ reg.AllAllocation = cfg->n[GEN_L3P_ALL];
+ } else {
+#if GEN_GEN >= 12
+ reg.L3FullWayAllocationEnable = true;
+#endif
+ }
}
_iris_emit_lri(batch, L3_ALLOCATION_REG_num, reg_val);
}
init_aux_map_state(struct iris_batch *batch);
#endif
+/**
+ * Upload initial GPU state for any kind of context.
+ *
+ * These need to happen for both render and compute.
+ */
+static void
+iris_init_common_context(struct iris_batch *batch)
+{
+#if GEN_GEN == 11
+ uint32_t reg_val;
+
+ iris_pack_state(GENX(SAMPLER_MODE), ®_val, reg) {
+ reg.HeaderlessMessageforPreemptableContexts = 1;
+ reg.HeaderlessMessageforPreemptableContextsMask = 1;
+ }
+ iris_emit_lri(batch, SAMPLER_MODE, reg_val);
+
+ /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */
+ iris_pack_state(GENX(HALF_SLICE_CHICKEN7), ®_val, reg) {
+ reg.EnabledTexelOffsetPrecisionFix = 1;
+ reg.EnabledTexelOffsetPrecisionFixMask = 1;
+ }
+ iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val);
+#endif
+}
+
/**
* Upload the initial GPU state for a render context.
*
init_state_base_address(batch);
+ iris_init_common_context(batch);
+
#if GEN_GEN >= 9
iris_pack_state(GENX(CS_DEBUG_MODE2), ®_val, reg) {
reg.CONSTANT_BUFFERAddressOffsetDisable = true;
}
iris_emit_lri(batch, TCCNTLREG, reg_val);
- iris_pack_state(GENX(SAMPLER_MODE), ®_val, reg) {
- reg.HeaderlessMessageforPreemptableContexts = 1;
- reg.HeaderlessMessageforPreemptableContextsMask = 1;
- }
- iris_emit_lri(batch, SAMPLER_MODE, reg_val);
-
- /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */
- iris_pack_state(GENX(HALF_SLICE_CHICKEN7), ®_val, reg) {
- reg.EnabledTexelOffsetPrecisionFix = 1;
- reg.EnabledTexelOffsetPrecisionFixMask = 1;
- }
- iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val);
-
/* Hardware specification recommends disabling repacking for the
* compatibility with decompression mechanism in display controller.
*/
init_state_base_address(batch);
+ iris_init_common_context(batch);
+
#if GEN_GEN == 12
emit_pipeline_select(batch, GPGPU);
#endif
struct iris_blend_state *cso = state;
ice->state.cso_blend = cso;
- ice->state.blend_enables = cso ? cso->blend_enables : 0;
ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
- ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
ice->state.stage_dirty |= ice->state.stage_dirty_for_nos[IRIS_NOS_BLEND];
if (GEN_GEN == 8)
: IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
}
+static void
+iris_set_compute_resources(struct pipe_context *ctx,
+ unsigned start, unsigned count,
+ struct pipe_surface **resources)
+{
+ assert(count == 0);
+}
+
+static void
+iris_set_global_binding(struct pipe_context *ctx,
+ unsigned start_slot, unsigned count,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct iris_context *ice = (struct iris_context *) ctx;
+
+ assert(start_slot + count <= IRIS_MAX_GLOBAL_BINDINGS);
+ for (unsigned i = 0; i < count; i++) {
+ if (resources && resources[i]) {
+ pipe_resource_reference(&ice->state.global_bindings[start_slot + i],
+ resources[i]);
+ struct iris_resource *res = (void *) resources[i];
+ uint64_t addr = res->bo->gtt_offset;
+ memcpy(handles[i], &addr, sizeof(addr));
+ } else {
+ pipe_resource_reference(&ice->state.global_bindings[start_slot + i],
+ NULL);
+ }
+ }
+
+ ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_CS;
+}
+
/**
* The pipe->set_tess_state() driver hook.
*/
static void
upload_sysvals(struct iris_context *ice,
- gl_shader_stage stage)
+ gl_shader_stage stage,
+ const struct pipe_grid_info *grid)
{
UNUSED struct iris_genx_state *genx = ice->state.genx;
struct iris_shader_state *shs = &ice->state.shaders[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
- if (!shader || shader->num_system_values == 0)
+ if (!shader || (shader->num_system_values == 0 &&
+ shader->kernel_input_size == 0))
return;
assert(shader->num_cbufs > 0);
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
- unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
- uint32_t *map = NULL;
+ unsigned system_values_start =
+ ALIGN(shader->kernel_input_size, sizeof(uint32_t));
+ unsigned upload_size = system_values_start +
+ shader->num_system_values * sizeof(uint32_t);
+ void *map = NULL;
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
- &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
+ &cbuf->buffer_offset, &cbuf->buffer, &map);
+
+ if (shader->kernel_input_size > 0)
+ memcpy(map, grid->input, shader->kernel_input_size);
+ uint32_t *sysval_map = map + system_values_start;
for (int i = 0; i < shader->num_system_values; i++) {
uint32_t sysval = shader->system_values[i];
uint32_t value = 0;
assert(!"unhandled system value");
}
- *map++ = value;
+ *sysval_map++ = value;
}
cbuf->buffer_size = upload_size;
unsigned aux_modes,
enum isl_aux_usage aux_usage)
{
+ assert(aux_modes & (1 << aux_usage));
return SURFACE_STATE_ALIGNMENT *
util_bitcount(aux_modes & ((1 << aux_usage) - 1));
}
if (res->aux.bo) {
iris_use_pinned_bo(batch, res->aux.bo, writeable, access);
if (res->aux.clear_color_bo)
- iris_use_pinned_bo(batch, res->aux.clear_color_bo,
- false, IRIS_DOMAIN_OTHER_READ);
+ iris_use_pinned_bo(batch, res->aux.clear_color_bo, false, access);
if (memcmp(&res->aux.clear_color, &surf->clear_color,
sizeof(surf->clear_color)) != 0) {
continue;
if (shs->sysvals_need_upload)
- upload_sysvals(ice, stage);
+ upload_sysvals(ice, stage, NULL);
struct push_bos push_bos = {};
setup_constant_buffers(ice, batch, stage, &push_bos);
iris_use_pinned_bo(batch, ice->state.binder.bo, false,
IRIS_DOMAIN_NONE);
- if (!batch->contains_draw) {
+ if (!batch->contains_draw_with_next_seqno) {
iris_restore_render_saved_bos(ice, batch, draw);
- batch->contains_draw = true;
+ batch->contains_draw_with_next_seqno = batch->contains_draw = true;
}
iris_upload_dirty_render_state(ice, batch, draw);
}
static void
-iris_upload_compute_state(struct iris_context *ice,
- struct iris_batch *batch,
- const struct pipe_grid_info *grid)
+iris_load_indirect_location(struct iris_context *ice,
+ struct iris_batch *batch,
+ const struct pipe_grid_info *grid)
+{
+#define GPGPU_DISPATCHDIMX 0x2500
+#define GPGPU_DISPATCHDIMY 0x2504
+#define GPGPU_DISPATCHDIMZ 0x2508
+
+ assert(grid->indirect);
+
+ struct iris_state_ref *grid_size = &ice->state.grid_size;
+ struct iris_bo *bo = iris_resource_bo(grid_size->res);
+ iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+ lrm.RegisterAddress = GPGPU_DISPATCHDIMX;
+ lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);
+ }
+ iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+ lrm.RegisterAddress = GPGPU_DISPATCHDIMY;
+ lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);
+ }
+ iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
+ lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;
+ lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);
+ }
+}
+
+static void
+iris_upload_gpgpu_walker(struct iris_context *ice,
+ struct iris_batch *batch,
+ const struct pipe_grid_info *grid)
{
const uint64_t stage_dirty = ice->state.stage_dirty;
struct iris_screen *screen = batch->screen;
ice->shaders.prog[MESA_SHADER_COMPUTE];
struct brw_stage_prog_data *prog_data = shader->prog_data;
struct brw_cs_prog_data *cs_prog_data = (void *) prog_data;
-
const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2];
const unsigned simd_size =
brw_cs_simd_size_for_group_size(devinfo, cs_prog_data, group_size);
const unsigned threads = DIV_ROUND_UP(group_size, simd_size);
- iris_batch_sync_region_start(batch);
-
- /* Always pin the binder. If we're emitting new binding table pointers,
- * we need it. If not, we're probably inheriting old tables via the
- * context, and need it anyway. Since true zero-bindings cases are
- * practically non-existent, just pin it and avoid last_res tracking.
- */
- iris_use_pinned_bo(batch, ice->state.binder.bo, false, IRIS_DOMAIN_NONE);
-
- if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
- shs->sysvals_need_upload)
- upload_sysvals(ice, MESA_SHADER_COMPUTE);
-
- if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
- iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
-
- if (stage_dirty & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS)
- iris_upload_sampler_states(ice, MESA_SHADER_COMPUTE);
-
- iris_use_optional_res(batch, shs->sampler_table.res, false,
- IRIS_DOMAIN_NONE);
- iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false,
- IRIS_DOMAIN_NONE);
-
- if (ice->state.need_border_colors)
- iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false,
- IRIS_DOMAIN_NONE);
-
-#if GEN_GEN >= 12
- genX(invalidate_aux_map_state)(batch);
-#endif
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
/* The MEDIA_VFE_STATE documentation for Gen8+ says:
}
}
+ for (unsigned i = 0; i < IRIS_MAX_GLOBAL_BINDINGS; i++) {
+ struct pipe_resource *res = ice->state.global_bindings[i];
+ if (!res)
+ continue;
+
+ iris_use_pinned_bo(batch, iris_resource_bo(res),
+ true, IRIS_DOMAIN_NONE);
+ }
+
if (stage_dirty & (IRIS_STAGE_DIRTY_SAMPLER_STATES_CS |
IRIS_STAGE_DIRTY_BINDINGS_CS |
IRIS_STAGE_DIRTY_CONSTANTS_CS |
}
}
-#define GPGPU_DISPATCHDIMX 0x2500
-#define GPGPU_DISPATCHDIMY 0x2504
-#define GPGPU_DISPATCHDIMZ 0x2508
-
- if (grid->indirect) {
- struct iris_state_ref *grid_size = &ice->state.grid_size;
- struct iris_bo *bo = iris_resource_bo(grid_size->res);
- iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
- lrm.RegisterAddress = GPGPU_DISPATCHDIMX;
- lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 0);
- }
- iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
- lrm.RegisterAddress = GPGPU_DISPATCHDIMY;
- lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 4);
- }
- iris_emit_cmd(batch, GENX(MI_LOAD_REGISTER_MEM), lrm) {
- lrm.RegisterAddress = GPGPU_DISPATCHDIMZ;
- lrm.MemoryAddress = ro_bo(bo, grid_size->offset + 8);
- }
- }
+ if (grid->indirect)
+ iris_load_indirect_location(ice, batch, grid);
const uint32_t right_mask = brw_cs_right_mask(group_size, simd_size);
}
iris_emit_cmd(batch, GENX(MEDIA_STATE_FLUSH), msf);
+}
- if (!batch->contains_draw) {
+static void
+iris_upload_compute_state(struct iris_context *ice,
+ struct iris_batch *batch,
+ const struct pipe_grid_info *grid)
+{
+ const uint64_t stage_dirty = ice->state.stage_dirty;
+ struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
+ struct iris_compiled_shader *shader =
+ ice->shaders.prog[MESA_SHADER_COMPUTE];
+
+ iris_batch_sync_region_start(batch);
+
+ /* Always pin the binder. If we're emitting new binding table pointers,
+ * we need it. If not, we're probably inheriting old tables via the
+ * context, and need it anyway. Since true zero-bindings cases are
+ * practically non-existent, just pin it and avoid last_res tracking.
+ */
+ iris_use_pinned_bo(batch, ice->state.binder.bo, false, IRIS_DOMAIN_NONE);
+
+ if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
+ shs->sysvals_need_upload)
+ upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
+
+ if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
+ iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
+
+ if (stage_dirty & IRIS_STAGE_DIRTY_SAMPLER_STATES_CS)
+ iris_upload_sampler_states(ice, MESA_SHADER_COMPUTE);
+
+ iris_use_optional_res(batch, shs->sampler_table.res, false,
+ IRIS_DOMAIN_NONE);
+ iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false,
+ IRIS_DOMAIN_NONE);
+
+ if (ice->state.need_border_colors)
+ iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false,
+ IRIS_DOMAIN_NONE);
+
+#if GEN_GEN >= 12
+ genX(invalidate_aux_map_state)(batch);
+#endif
+
+ iris_upload_gpgpu_walker(ice, batch, grid);
+
+ if (!batch->contains_draw_with_next_seqno) {
iris_restore_compute_saved_bos(ice, batch, grid);
- batch->contains_draw = true;
+ batch->contains_draw_with_next_seqno = batch->contains_draw = true;
}
iris_batch_sync_region_end(batch);
/* ------------------------------------------------------------------- */
+/**
+ * Introduce a batch synchronization boundary, and update its cache coherency
+ * status to reflect the execution of a PIPE_CONTROL command with the
+ * specified flags.
+ */
+static void
+batch_mark_sync_for_pipe_control(struct iris_batch *batch, uint32_t flags)
+{
+ iris_batch_sync_boundary(batch);
+
+ if ((flags & PIPE_CONTROL_CS_STALL)) {
+ if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH))
+ iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_RENDER_WRITE);
+
+ if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))
+ iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_DEPTH_WRITE);
+
+ if ((flags & PIPE_CONTROL_FLUSH_ENABLE))
+ iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_WRITE);
+
+ if ((flags & (PIPE_CONTROL_CACHE_FLUSH_BITS |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD)))
+ iris_batch_mark_flush_sync(batch, IRIS_DOMAIN_OTHER_READ);
+ }
+
+ if ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH))
+ iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_RENDER_WRITE);
+
+ if ((flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))
+ iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_DEPTH_WRITE);
+
+ if ((flags & PIPE_CONTROL_FLUSH_ENABLE))
+ iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_WRITE);
+
+ if ((flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) &&
+ (flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE))
+ iris_batch_mark_invalidate_sync(batch, IRIS_DOMAIN_OTHER_READ);
+}
+
static unsigned
flags_to_post_sync_op(uint32_t flags)
{
imm, reason);
}
+ batch_mark_sync_for_pipe_control(batch, flags);
iris_batch_sync_region_start(batch);
iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
ctx->set_shader_buffers = iris_set_shader_buffers;
ctx->set_shader_images = iris_set_shader_images;
ctx->set_sampler_views = iris_set_sampler_views;
+ ctx->set_compute_resources = iris_set_compute_resources;
+ ctx->set_global_binding = iris_set_global_binding;
ctx->set_tess_state = iris_set_tess_state;
ctx->set_framebuffer_state = iris_set_framebuffer_state;
ctx->set_polygon_stipple = iris_set_polygon_stipple;