init_aux_map_state(struct iris_batch *batch);
#endif
+/**
+ * Upload initial GPU state for any kind of context.
+ *
+ * These need to happen for both render and compute.
+ */
+static void
+iris_init_common_context(struct iris_batch *batch)
+{
+#if GEN_GEN == 11
+ uint32_t reg_val;
+
+ iris_pack_state(GENX(SAMPLER_MODE), ®_val, reg) {
+ reg.HeaderlessMessageforPreemptableContexts = 1;
+ reg.HeaderlessMessageforPreemptableContextsMask = 1;
+ }
+ iris_emit_lri(batch, SAMPLER_MODE, reg_val);
+
+ /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */
+ iris_pack_state(GENX(HALF_SLICE_CHICKEN7), ®_val, reg) {
+ reg.EnabledTexelOffsetPrecisionFix = 1;
+ reg.EnabledTexelOffsetPrecisionFixMask = 1;
+ }
+ iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val);
+#endif
+}
+
/**
* Upload the initial GPU state for a render context.
*
init_state_base_address(batch);
+ iris_init_common_context(batch);
+
#if GEN_GEN >= 9
iris_pack_state(GENX(CS_DEBUG_MODE2), ®_val, reg) {
reg.CONSTANT_BUFFERAddressOffsetDisable = true;
}
iris_emit_lri(batch, TCCNTLREG, reg_val);
- iris_pack_state(GENX(SAMPLER_MODE), ®_val, reg) {
- reg.HeaderlessMessageforPreemptableContexts = 1;
- reg.HeaderlessMessageforPreemptableContextsMask = 1;
- }
- iris_emit_lri(batch, SAMPLER_MODE, reg_val);
-
- /* Bit 1 must be set in HALF_SLICE_CHICKEN7. */
- iris_pack_state(GENX(HALF_SLICE_CHICKEN7), ®_val, reg) {
- reg.EnabledTexelOffsetPrecisionFix = 1;
- reg.EnabledTexelOffsetPrecisionFixMask = 1;
- }
- iris_emit_lri(batch, HALF_SLICE_CHICKEN7, reg_val);
-
/* Hardware specification recommends disabling repacking for the
* compatibility with decompression mechanism in display controller.
*/
init_state_base_address(batch);
+ iris_init_common_context(batch);
+
#if GEN_GEN == 12
emit_pipeline_select(batch, GPGPU);
#endif
static void
upload_sysvals(struct iris_context *ice,
- gl_shader_stage stage)
+ gl_shader_stage stage,
+ const struct pipe_grid_info *grid)
{
UNUSED struct iris_genx_state *genx = ice->state.genx;
struct iris_shader_state *shs = &ice->state.shaders[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
- if (!shader || shader->num_system_values == 0)
+ if (!shader || (shader->num_system_values == 0 &&
+ shader->kernel_input_size == 0))
return;
assert(shader->num_cbufs > 0);
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
- unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
- uint32_t *map = NULL;
+ unsigned system_values_start =
+ ALIGN(shader->kernel_input_size, sizeof(uint32_t));
+ unsigned upload_size = system_values_start +
+ shader->num_system_values * sizeof(uint32_t);
+ void *map = NULL;
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
- &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
+ &cbuf->buffer_offset, &cbuf->buffer, &map);
+
+ if (shader->kernel_input_size > 0)
+ memcpy(map, grid->input, shader->kernel_input_size);
+ uint32_t *sysval_map = map + system_values_start;
for (int i = 0; i < shader->num_system_values; i++) {
uint32_t sysval = shader->system_values[i];
uint32_t value = 0;
assert(!"unhandled system value");
}
- *map++ = value;
+ *sysval_map++ = value;
}
cbuf->buffer_size = upload_size;
continue;
if (shs->sysvals_need_upload)
- upload_sysvals(ice, stage);
+ upload_sysvals(ice, stage, NULL);
struct push_bos push_bos = {};
setup_constant_buffers(ice, batch, stage, &push_bos);
if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
shs->sysvals_need_upload)
- upload_sysvals(ice, MESA_SHADER_COMPUTE);
+ upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);