const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
+ const uint32_t threads = anv_cs_threads(pipeline);
const unsigned total_push_constants_size =
- brw_cs_push_const_total_size(cs_prog_data, cs_prog_data->threads);
+ brw_cs_push_const_total_size(cs_prog_data, threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
}
if (cs_prog_data->push.per_thread.size > 0) {
- for (unsigned t = 0; t < cs_prog_data->threads; t++) {
+ for (unsigned t = 0; t < threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +
return VK_SUCCESS;
}
+uint32_t
+anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline)
+{
+ const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
+ return cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] *
+ cs_prog_data->local_size[2];
+}
+
+uint32_t
+anv_cs_threads(const struct anv_compute_pipeline *pipeline)
+{
+ const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
+ return DIV_ROUND_UP(anv_cs_workgroup_size(pipeline),
+ cs_prog_data->simd_size);
+}
+
/**
* Copy pipeline state not marked as dynamic.
* Dynamic state is pipeline state which hasn't been provided at pipeline
const char *entrypoint,
const VkSpecializationInfo *spec_info);
+uint32_t
+anv_cs_workgroup_size(const struct anv_compute_pipeline *pipeline);
+
+uint32_t
+anv_cs_threads(const struct anv_compute_pipeline *pipeline);
+
struct anv_format_plane {
enum isl_format isl_format:16;
struct isl_swizzle swizzle;
ggw.SIMDSize = prog_data->simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = prog_data->threads - 1;
+ ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
ggw.SIMDSize = prog_data->simd_size / 16;
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
- ggw.ThreadWidthCounterMaximum = prog_data->threads - 1;
+ ggw.ThreadWidthCounterMaximum = anv_cs_threads(pipeline) - 1;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}
else
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
+ const uint32_t threads = anv_cs_threads(pipeline);
+
const uint32_t vfe_curbe_allocation =
- ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
+ ALIGN(cs_prog_data->push.per_thread.regs * threads +
cs_prog_data->push.cross_thread.regs, 2);
const uint32_t subslices = MAX2(device->physical->subslice_total, 1);
.ThreadPreemptionDisable = true,
#endif
- .NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
+ .NumberofThreadsinGPGPUThreadGroup = threads,
};
GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
pipeline->interface_descriptor_data,