struct anv_pipeline_layout *layout;
uint32_t color_count, bias, state_offset;
- if (stage == MESA_SHADER_COMPUTE)
- layout = cmd_buffer->state.compute_pipeline->layout;
- else
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
layout = cmd_buffer->state.pipeline->layout;
-
- if (stage == MESA_SHADER_FRAGMENT) {
bias = MAX_RTS;
color_count = subpass->color_count;
- } else {
+ break;
+ case MESA_SHADER_COMPUTE:
+ layout = cmd_buffer->state.compute_pipeline->layout;
+ bias = 1;
+ color_count = 0;
+ break;
+ default:
+ layout = cmd_buffer->state.pipeline->layout;
bias = 0;
color_count = 0;
+ break;
}
/* This is a little awkward: layout can be NULL but we still have to
iview->bo, iview->offset);
}
+ if (stage == MESA_SHADER_COMPUTE &&
+ cmd_buffer->state.compute_pipeline->cs_prog_data.uses_num_work_groups) {
+ struct anv_bo *bo = cmd_buffer->state.num_workgroups_bo;
+ uint32_t bo_offset = cmd_buffer->state.num_workgroups_offset;
+
+ struct anv_state surface_state;
+ surface_state =
+ anv_cmd_buffer_alloc_surface_state(cmd_buffer);
+
+ fill_descriptor_buffer_surface_state(cmd_buffer->device,
+ surface_state.map, stage,
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ bo_offset, 12);
+
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(surface_state);
+
+ bt_map[0] = surface_state.offset + state_offset;
+ add_surface_state_reloc(cmd_buffer, surface_state, bo, bo_offset);
+ }
+
if (layout == NULL)
goto out;
/* All binding table offsets provided by apply_pipeline_layout() are
* relative to the start of the bindint table (plus MAX_RTS for VS).
*/
- unsigned bias = stage == MESA_SHADER_FRAGMENT ? MAX_RTS : 0;
+ unsigned bias;
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ bias = MAX_RTS;
+ break;
+ case MESA_SHADER_COMPUTE:
+ bias = 1;
+ break;
+ default:
+ bias = 0;
+ break;
+ }
prog_data->binding_table.size_bytes = 0;
prog_data->binding_table.texture_start = bias;
prog_data->binding_table.ubo_start = bias;
memset(prog_data, 0, sizeof(*prog_data));
+ prog_data->binding_table.work_groups_start = 0;
+
nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint,
MESA_SHADER_COMPUTE,
&prog_data->base);
uint32_t vb_dirty;
anv_cmd_dirty_mask_t dirty;
anv_cmd_dirty_mask_t compute_dirty;
+ uint32_t num_workgroups_offset;
+ struct anv_bo *num_workgroups_bo;
VkShaderStageFlags descriptors_dirty;
VkShaderStageFlags push_constants_dirty;
uint32_t scratch_size;
struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+ if (prog_data->uses_num_work_groups) {
+ struct anv_state state =
+ anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
+ uint32_t *sizes = state.map;
+ sizes[0] = x;
+ sizes[1] = y;
+ sizes[2] = z;
+ if (!cmd_buffer->device->info.has_llc)
+ anv_state_clflush(state);
+ cmd_buffer->state.num_workgroups_offset = state.offset;
+ cmd_buffer->state.num_workgroups_bo =
+ &cmd_buffer->device->dynamic_state_block_pool.bo;
+ }
+
cmd_buffer_flush_compute_state(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
struct anv_bo *bo = buffer->bo;
uint32_t bo_offset = buffer->offset + offset;
+ if (prog_data->uses_num_work_groups) {
+ cmd_buffer->state.num_workgroups_offset = bo_offset;
+ cmd_buffer->state.num_workgroups_bo = bo;
+ }
+
cmd_buffer_flush_compute_state(cmd_buffer);
emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);