const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
- const uint32_t threads = anv_cs_threads(pipeline);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
const unsigned total_push_constants_size =
- brw_cs_push_const_total_size(cs_prog_data, threads);
+ brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
}
if (cs_prog_data->push.per_thread.size > 0) {
- for (unsigned t = 0; t < threads; t++) {
+ for (unsigned t = 0; t < cs_params.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +
return VK_SUCCESS;
}
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline)
{
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
- return cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] *
- cs_prog_data->local_size[2];
-}
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline)
-{
- const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
- return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
- cs_prog_data->simd_size);
+ struct anv_cs_parameters cs_params = {};
+
+ cs_params.group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] *
+ cs_prog_data->local_size[2];
+ cs_params.simd_size =
+ brw_cs_simd_size_for_group_size(&pipeline->base.device->info,
+ cs_prog_data, cs_params.group_size);
+ cs_params.threads = DIV_ROUND_UP(cs_params.group_size, cs_params.simd_size);
+
+ return cs_params;
}
/**
const char *entrypoint,
const VkSpecializationInfo *spec_info);
-uint32_t
-anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters {
+ uint32_t group_size;
+ uint32_t simd_size;
+ uint32_t threads;
+};
-uint32_t
-anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+struct anv_cs_parameters
+anv_cs_parameters(const struct anv_compute_pipeline *pipeline);
struct anv_format_plane {
enum isl_format isl_format:16;
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
ggw.PredicateEnable = cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
+ ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
+ ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
#endif
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+
anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
ggw.IndirectParameterEnable = true;
ggw.PredicateEnable = GEN_GEN <= 7 ||
cmd_buffer->state.conditional_render_enabled;
- ggw.SIMDSize = prog_data->simd_size / 16;
+ ggw.SIMDSize = cs_params.simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
+ ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}
anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
- uint32_t group_size = cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
- uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
+ const struct anv_cs_parameters cs_params = anv_cs_parameters(pipeline);
+ uint32_t remainder = cs_params.group_size & (cs_params.simd_size - 1);
if (remainder > 0)
pipeline->cs_right_mask = ~0u >> (32 - remainder);
else
- pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
-
- const uint32_t threads = anv_cs_threads(pipeline);
+ pipeline->cs_right_mask = ~0u >> (32 - cs_params.simd_size);
const uint32_t vfe_curbe_allocation =
- ALIGN(cs_prog_data->push.per_thread.regs * threads +
+ ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
}
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
- .KernelStartPointer = cs_bin->kernel.offset,
+ .KernelStartPointer =
+ cs_bin->kernel.offset +
+ brw_cs_prog_data_prog_offset(cs_prog_data, cs_params.simd_size),
+
/* WA_1606682166 */
.SamplerCount = GEN_GEN == 11 ? 0 : get_sampler_count(cs_bin),
/* We add 1 because the CS indirect parameters buffer isn't accounted
.ThreadPreemptionDisable = true,
#endif
- .NumberofThreadsinGPGPUThreadGroup = threads,
+ .NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,