Clover doesn't upload a cbuf0 but instead provides the kernel inputs as
part of the pipe_grid. The most obvious thing to do is to upload them
along with system values.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6280>
/* Whether shader uses atomic operations. */
bool uses_atomic_load_store;
/* Whether shader uses atomic operations. */
bool uses_atomic_load_store;
+ /** Size (in bytes) of the kernel input data */
+ unsigned kernel_input_size;
+
/** Constant data scraped from the shader by nir_opt_large_constants */
struct pipe_resource *const_data;
/** Constant data scraped from the shader by nir_opt_large_constants */
struct pipe_resource *const_data;
* 2. Assembly code
* 3. Number of entries in the system value array
* 4. System value array
* 2. Assembly code
* 3. Number of entries in the system value array
* 4. System value array
- * 5. Legacy param array (only used for compute workgroup ID)
- * 6. Binding table
+ * 5. Size (in bytes) of kernel inputs
+ * 6. Legacy param array (only used for compute workgroup ID)
+ * 7. Binding table
*/
blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
*/
blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
if (num_cbufs || ish->nir->num_uniforms)
num_cbufs++;
if (num_cbufs || ish->nir->num_uniforms)
num_cbufs++;
+ if (num_system_values || kernel_input_size)
num_cbufs++;
assert(stage < ARRAY_SIZE(cache_id_for_stage));
num_cbufs++;
assert(stage < ARRAY_SIZE(cache_id_for_stage));
void *mem_ctx,
nir_shader *nir,
struct brw_stage_prog_data *prog_data,
void *mem_ctx,
nir_shader *nir,
struct brw_stage_prog_data *prog_data,
+ unsigned kernel_input_size,
enum brw_param_builtin **out_system_values,
unsigned *out_num_system_values,
unsigned *out_num_cbufs)
{
UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
enum brw_param_builtin **out_system_values,
unsigned *out_num_system_values,
unsigned *out_num_cbufs)
{
UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
+ unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
+
const unsigned IRIS_MAX_SYSTEM_VALUES =
PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
enum brw_param_builtin *system_values =
const unsigned IRIS_MAX_SYSTEM_VALUES =
PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
enum brw_param_builtin *system_values =
}
b.cursor = nir_before_instr(instr);
}
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ ucp_idx[ucp] * sizeof(uint32_t));
break;
}
case nir_intrinsic_load_patch_vertices_in:
break;
}
case nir_intrinsic_load_patch_vertices_in:
BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
b.cursor = nir_before_instr(instr);
BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ patch_vert_idx * sizeof(uint32_t));
break;
case nir_intrinsic_image_deref_load_param_intel: {
assert(devinfo->gen < 9);
break;
case nir_intrinsic_image_deref_load_param_intel: {
assert(devinfo->gen < 9);
b.cursor = nir_before_instr(instr);
offset = nir_iadd(&b,
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
b.cursor = nir_before_instr(instr);
offset = nir_iadd(&b,
get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
- nir_imm_int(&b, img_idx[var->data.binding] * 4 +
+ nir_imm_int(&b, system_values_start +
+ img_idx[var->data.binding] * 4 +
nir_intrinsic_base(intrin) * 16));
break;
}
nir_intrinsic_base(intrin) * 16));
break;
}
}
b.cursor = nir_before_instr(instr);
}
b.cursor = nir_before_instr(instr);
- offset = nir_imm_int(&b, variable_group_size_idx * sizeof(uint32_t));
+ offset = nir_imm_int(&b, system_values_start +
+ variable_group_size_idx * sizeof(uint32_t));
+ break;
+ }
+ case nir_intrinsic_load_kernel_input: {
+ assert(nir_intrinsic_base(intrin) +
+ nir_intrinsic_range(intrin) <= kernel_input_size);
+ b.cursor = nir_before_instr(instr);
+ offset = nir_iadd_imm(&b, intrin->src[0].ssa,
+ nir_intrinsic_base(intrin));
num_cbufs++;
/* Place the new params in a new cbuf. */
num_cbufs++;
/* Place the new params in a new cbuf. */
- if (num_system_values > 0) {
+ if (num_system_values > 0 || kernel_input_size > 0) {
unsigned sysval_cbuf_index = num_cbufs;
num_cbufs++;
unsigned sysval_cbuf_index = num_cbufs;
num_cbufs++;
prog_data->use_alt_mode = ish->use_alt_mode;
prog_data->use_alt_mode = ish->use_alt_mode;
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
if (ish) {
nir = nir_shader_clone(mem_ctx, ish->nir);
if (ish) {
nir = nir_shader_clone(mem_ctx, ish->nir);
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
num_system_values, num_cbufs);
&num_system_values, &num_cbufs);
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
num_system_values, num_cbufs);
nir_shader_gather_info(nir, impl);
}
nir_shader_gather_info(nir, impl);
}
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
nir_shader_gather_info(nir, impl);
}
nir_shader_gather_info(nir, impl);
}
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
&num_system_values, &num_cbufs);
struct iris_binding_table bt;
prog_data->use_alt_mode = ish->use_alt_mode;
prog_data->use_alt_mode = ish->use_alt_mode;
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
&num_system_values, &num_cbufs);
/* Lower output variables to load_output intrinsics before setting up
&num_system_values, &num_cbufs);
/* Lower output variables to load_output intrinsics before setting up
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
- iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
- &num_system_values, &num_cbufs);
+ iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
+ ish->kernel_input_size,
+ &system_values, &num_system_values, &num_cbufs);
struct iris_binding_table bt;
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
struct iris_binding_table bt;
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
struct iris_compiled_shader *shader =
iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
prog_data, NULL, system_values, num_system_values,
struct iris_compiled_shader *shader =
iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
prog_data, NULL, system_values, num_system_values,
+ ish->kernel_input_size, num_cbufs, &bt);
iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
struct iris_uncompiled_shader *ish =
iris_create_uncompiled_shader(ctx, nir, NULL);
struct iris_uncompiled_shader *ish =
iris_create_uncompiled_shader(ctx, nir, NULL);
+ ish->kernel_input_size = state->req_input_mem;
// XXX: disallow more than 64KB of shared variables
// XXX: disallow more than 64KB of shared variables
static void
upload_sysvals(struct iris_context *ice,
static void
upload_sysvals(struct iris_context *ice,
+ gl_shader_stage stage,
+ const struct pipe_grid_info *grid)
{
UNUSED struct iris_genx_state *genx = ice->state.genx;
struct iris_shader_state *shs = &ice->state.shaders[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
{
UNUSED struct iris_genx_state *genx = ice->state.genx;
struct iris_shader_state *shs = &ice->state.shaders[stage];
struct iris_compiled_shader *shader = ice->shaders.prog[stage];
- if (!shader || shader->num_system_values == 0)
+ if (!shader || (shader->num_system_values == 0 &&
+ shader->kernel_input_size == 0))
return;
assert(shader->num_cbufs > 0);
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
return;
assert(shader->num_cbufs > 0);
unsigned sysval_cbuf_index = shader->num_cbufs - 1;
struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
- unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
- uint32_t *map = NULL;
+ unsigned system_values_start =
+ ALIGN(shader->kernel_input_size, sizeof(uint32_t));
+ unsigned upload_size = system_values_start +
+ shader->num_system_values * sizeof(uint32_t);
+ void *map = NULL;
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
- &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
+ &cbuf->buffer_offset, &cbuf->buffer, &map);
+ if (shader->kernel_input_size > 0)
+ memcpy(map, grid->input, shader->kernel_input_size);
+
+ uint32_t *sysval_map = map + system_values_start;
for (int i = 0; i < shader->num_system_values; i++) {
uint32_t sysval = shader->system_values[i];
uint32_t value = 0;
for (int i = 0; i < shader->num_system_values; i++) {
uint32_t sysval = shader->system_values[i];
uint32_t value = 0;
assert(!"unhandled system value");
}
assert(!"unhandled system value");
}
}
cbuf->buffer_size = upload_size;
}
cbuf->buffer_size = upload_size;
continue;
if (shs->sysvals_need_upload)
continue;
if (shs->sysvals_need_upload)
- upload_sysvals(ice, stage);
+ upload_sysvals(ice, stage, NULL);
struct push_bos push_bos = {};
setup_constant_buffers(ice, batch, stage, &push_bos);
struct push_bos push_bos = {};
setup_constant_buffers(ice, batch, stage, &push_bos);
if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
shs->sysvals_need_upload)
if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
shs->sysvals_need_upload)
- upload_sysvals(ice, MESA_SHADER_COMPUTE);
+ upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);