anv: Fix misc simple warnings
[mesa.git] / src / vulkan / genX_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "anv_private.h"
28
29 #if (ANV_GEN == 9)
30 # include "gen9_pack.h"
31 #elif (ANV_GEN == 8)
32 # include "gen8_pack.h"
33 #elif (ANV_IS_HASWELL)
34 # include "gen75_pack.h"
35 #elif (ANV_GEN == 7)
36 # include "gen7_pack.h"
37 #endif
38
39 void
40 genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
41 {
42 struct anv_device *device = cmd_buffer->device;
43 struct anv_bo *scratch_bo = NULL;
44
45 cmd_buffer->state.scratch_size =
46 anv_block_pool_size(&device->scratch_block_pool);
47 if (cmd_buffer->state.scratch_size > 0)
48 scratch_bo = &device->scratch_block_pool.bo;
49
50 /* XXX: Do we need this on more than just BDW? */
51 #if (ANV_GEN >= 8)
52 /* Emit a render target cache flush.
53 *
54 * This isn't documented anywhere in the PRM. However, it seems to be
55 * necessary prior to changing the surface state base adress. Without
56 * this, we get GPU hangs when using multi-level command buffers which
57 * clear depth, reset state base address, and then go render stuff.
58 */
59 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
60 .RenderTargetCacheFlushEnable = true);
61 #endif
62
63 anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS),
64 .GeneralStateBaseAddress = { scratch_bo, 0 },
65 .GeneralStateMemoryObjectControlState = GENX(MOCS),
66 .GeneralStateBaseAddressModifyEnable = true,
67
68 .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer),
69 .SurfaceStateMemoryObjectControlState = GENX(MOCS),
70 .SurfaceStateBaseAddressModifyEnable = true,
71
72 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
73 .DynamicStateMemoryObjectControlState = GENX(MOCS),
74 .DynamicStateBaseAddressModifyEnable = true,
75
76 .IndirectObjectBaseAddress = { NULL, 0 },
77 .IndirectObjectMemoryObjectControlState = GENX(MOCS),
78 .IndirectObjectBaseAddressModifyEnable = true,
79
80 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
81 .InstructionMemoryObjectControlState = GENX(MOCS),
82 .InstructionBaseAddressModifyEnable = true,
83
84 # if (ANV_GEN >= 8)
85 /* Broadwell requires that we specify a buffer size for a bunch of
86 * these fields. However, since we will be growing the BO's live, we
87 * just set them all to the maximum.
88 */
89 .GeneralStateBufferSize = 0xfffff,
90 .GeneralStateBufferSizeModifyEnable = true,
91 .DynamicStateBufferSize = 0xfffff,
92 .DynamicStateBufferSizeModifyEnable = true,
93 .IndirectObjectBufferSize = 0xfffff,
94 .IndirectObjectBufferSizeModifyEnable = true,
95 .InstructionBufferSize = 0xfffff,
96 .InstructionBuffersizeModifyEnable = true,
97 # endif
98 );
99
100 /* After re-setting the surface state base address, we have to do some
101 * cache flusing so that the sampler engine will pick up the new
102 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
103 * Shared Function > 3D Sampler > State > State Caching (page 96):
104 *
105 * Coherency with system memory in the state cache, like the texture
106 * cache is handled partially by software. It is expected that the
107 * command stream or shader will issue Cache Flush operation or
108 * Cache_Flush sampler message to ensure that the L1 cache remains
109 * coherent with system memory.
110 *
111 * [...]
112 *
113 * Whenever the value of the Dynamic_State_Base_Addr,
114 * Surface_State_Base_Addr are altered, the L1 state cache must be
115 * invalidated to ensure the new surface or sampler state is fetched
116 * from system memory.
117 *
118 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
119 * which, according the PIPE_CONTROL instruction documentation in the
120 * Broadwell PRM:
121 *
122 * Setting this bit is independent of any other bit in this packet.
123 * This bit controls the invalidation of the L1 and L2 state caches
124 * at the top of the pipe i.e. at the parsing time.
125 *
126 * Unfortunately, experimentation seems to indicate that state cache
127 * invalidation through a PIPE_CONTROL does nothing whatsoever in
128 * regards to surface state and binding tables. In stead, it seems that
129 * invalidating the texture cache is what is actually needed.
130 *
131 * XXX: As far as we have been able to determine through
132 * experimentation, shows that flush the texture cache appears to be
133 * sufficient. The theory here is that all of the sampling/rendering
134 * units cache the binding table in the texture cache. However, we have
135 * yet to be able to actually confirm this.
136 */
137 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
138 .TextureCacheInvalidationEnable = true);
139 }
140
141 void genX(CmdPipelineBarrier)(
142 VkCommandBuffer commandBuffer,
143 VkPipelineStageFlags srcStageMask,
144 VkPipelineStageFlags destStageMask,
145 VkBool32 byRegion,
146 uint32_t memoryBarrierCount,
147 const VkMemoryBarrier* pMemoryBarriers,
148 uint32_t bufferMemoryBarrierCount,
149 const VkBufferMemoryBarrier* pBufferMemoryBarriers,
150 uint32_t imageMemoryBarrierCount,
151 const VkImageMemoryBarrier* pImageMemoryBarriers)
152 {
153 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
154 uint32_t b, *dw;
155
156 struct GENX(PIPE_CONTROL) cmd = {
157 GENX(PIPE_CONTROL_header),
158 .PostSyncOperation = NoWrite,
159 };
160
161 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
162
163 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
164 /* This is just what PIPE_CONTROL does */
165 }
166
167 if (anv_clear_mask(&srcStageMask,
168 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
169 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
170 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
171 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
172 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
173 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
174 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
175 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
176 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
177 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
178 cmd.StallAtPixelScoreboard = true;
179 }
180
181 if (anv_clear_mask(&srcStageMask,
182 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
183 VK_PIPELINE_STAGE_TRANSFER_BIT)) {
184 cmd.CommandStreamerStallEnable = true;
185 }
186
187 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
188 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
189 }
190
191 /* On our hardware, all stages will wait for execution as needed. */
192 (void)destStageMask;
193
194 /* We checked all known VkPipeEventFlags. */
195 anv_assert(srcStageMask == 0);
196
197 /* XXX: Right now, we're really dumb and just flush whatever categories
198 * the app asks for. One of these days we may make this a bit better
199 * but right now that's all the hardware allows for in most areas.
200 */
201 VkAccessFlags src_flags = 0;
202 VkAccessFlags dst_flags = 0;
203
204 for (uint32_t i = 0; i < memoryBarrierCount; i++) {
205 src_flags |= pMemoryBarriers[i].srcAccessMask;
206 dst_flags |= pMemoryBarriers[i].dstAccessMask;
207 }
208
209 for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
210 src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
211 dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
212 }
213
214 for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
215 src_flags |= pImageMemoryBarriers[i].srcAccessMask;
216 dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
217 }
218
219 /* The src flags represent how things were used previously. This is
220 * what we use for doing flushes.
221 */
222 for_each_bit(b, src_flags) {
223 switch ((VkAccessFlagBits)(1 << b)) {
224 case VK_ACCESS_SHADER_WRITE_BIT:
225 cmd.DCFlushEnable = true;
226 break;
227 case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
228 cmd.RenderTargetCacheFlushEnable = true;
229 break;
230 case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
231 cmd.DepthCacheFlushEnable = true;
232 break;
233 case VK_ACCESS_TRANSFER_WRITE_BIT:
234 cmd.RenderTargetCacheFlushEnable = true;
235 cmd.DepthCacheFlushEnable = true;
236 break;
237 default:
238 /* Doesn't require a flush */
239 break;
240 }
241 }
242
243 /* The dst flags represent how things will be used in the fugure. This
244 * is what we use for doing cache invalidations.
245 */
246 for_each_bit(b, dst_flags) {
247 switch ((VkAccessFlagBits)(1 << b)) {
248 case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
249 case VK_ACCESS_INDEX_READ_BIT:
250 case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
251 cmd.VFCacheInvalidationEnable = true;
252 break;
253 case VK_ACCESS_UNIFORM_READ_BIT:
254 cmd.ConstantCacheInvalidationEnable = true;
255 /* fallthrough */
256 case VK_ACCESS_SHADER_READ_BIT:
257 cmd.TextureCacheInvalidationEnable = true;
258 break;
259 case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
260 cmd.TextureCacheInvalidationEnable = true;
261 break;
262 case VK_ACCESS_TRANSFER_READ_BIT:
263 cmd.TextureCacheInvalidationEnable = true;
264 break;
265 case VK_ACCESS_MEMORY_READ_BIT:
266 break; /* XXX: What is this? */
267 default:
268 /* Doesn't require a flush */
269 break;
270 }
271 }
272
273 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
274 GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd);
275 }
276
277 static void
278 emit_base_vertex_instance_bo(struct anv_cmd_buffer *cmd_buffer,
279 struct anv_bo *bo, uint32_t offset)
280 {
281 uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, 5,
282 GENX(3DSTATE_VERTEX_BUFFERS));
283
284 GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, p + 1,
285 &(struct GENX(VERTEX_BUFFER_STATE)) {
286 .VertexBufferIndex = 32, /* Reserved for this */
287 .AddressModifyEnable = true,
288 .BufferPitch = 0,
289 #if (ANV_GEN >= 8)
290 .MemoryObjectControlState = GENX(MOCS),
291 .BufferStartingAddress = { bo, offset },
292 .BufferSize = 8
293 #else
294 .VertexBufferMemoryObjectControlState = GENX(MOCS),
295 .BufferStartingAddress = { bo, offset },
296 .EndAddress = { bo, offset + 8 },
297 #endif
298 });
299 }
300
301 static void
302 emit_base_vertex_instance(struct anv_cmd_buffer *cmd_buffer,
303 uint32_t base_vertex, uint32_t base_instance)
304 {
305 struct anv_state id_state =
306 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 8, 4);
307
308 ((uint32_t *)id_state.map)[0] = base_vertex;
309 ((uint32_t *)id_state.map)[1] = base_instance;
310
311 if (!cmd_buffer->device->info.has_llc)
312 anv_state_clflush(id_state);
313
314 emit_base_vertex_instance_bo(cmd_buffer,
315 &cmd_buffer->device->dynamic_state_block_pool.bo, id_state.offset);
316 }
317
318 void genX(CmdDraw)(
319 VkCommandBuffer commandBuffer,
320 uint32_t vertexCount,
321 uint32_t instanceCount,
322 uint32_t firstVertex,
323 uint32_t firstInstance)
324 {
325 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
326 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
327
328 genX(cmd_buffer_flush_state)(cmd_buffer);
329
330 if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
331 cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
332 emit_base_vertex_instance(cmd_buffer, firstVertex, firstInstance);
333
334 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
335 .VertexAccessType = SEQUENTIAL,
336 .PrimitiveTopologyType = pipeline->topology,
337 .VertexCountPerInstance = vertexCount,
338 .StartVertexLocation = firstVertex,
339 .InstanceCount = instanceCount,
340 .StartInstanceLocation = firstInstance,
341 .BaseVertexLocation = 0);
342 }
343
344 void genX(CmdDrawIndexed)(
345 VkCommandBuffer commandBuffer,
346 uint32_t indexCount,
347 uint32_t instanceCount,
348 uint32_t firstIndex,
349 int32_t vertexOffset,
350 uint32_t firstInstance)
351 {
352 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
353 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
354
355 genX(cmd_buffer_flush_state)(cmd_buffer);
356
357 if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
358 cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
359 emit_base_vertex_instance(cmd_buffer, vertexOffset, firstInstance);
360
361 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
362 .VertexAccessType = RANDOM,
363 .PrimitiveTopologyType = pipeline->topology,
364 .VertexCountPerInstance = indexCount,
365 .StartVertexLocation = firstIndex,
366 .InstanceCount = instanceCount,
367 .StartInstanceLocation = firstInstance,
368 .BaseVertexLocation = vertexOffset);
369 }
370
371 /* Auto-Draw / Indirect Registers */
372 #define GEN7_3DPRIM_END_OFFSET 0x2420
373 #define GEN7_3DPRIM_START_VERTEX 0x2430
374 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
375 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
376 #define GEN7_3DPRIM_START_INSTANCE 0x243C
377 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
378
379 static void
380 emit_lrm(struct anv_batch *batch,
381 uint32_t reg, struct anv_bo *bo, uint32_t offset)
382 {
383 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
384 .RegisterAddress = reg,
385 .MemoryAddress = { bo, offset });
386 }
387
388 static void
389 emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
390 {
391 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
392 .RegisterOffset = reg,
393 .DataDWord = imm);
394 }
395
396 void genX(CmdDrawIndirect)(
397 VkCommandBuffer commandBuffer,
398 VkBuffer _buffer,
399 VkDeviceSize offset,
400 uint32_t drawCount,
401 uint32_t stride)
402 {
403 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
404 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
405 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
406 struct anv_bo *bo = buffer->bo;
407 uint32_t bo_offset = buffer->offset + offset;
408
409 genX(cmd_buffer_flush_state)(cmd_buffer);
410
411 if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
412 cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
413 emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 8);
414
415 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
416 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
417 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
418 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
419 emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
420
421 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
422 .IndirectParameterEnable = true,
423 .VertexAccessType = SEQUENTIAL,
424 .PrimitiveTopologyType = pipeline->topology);
425 }
426
427 void genX(CmdDrawIndexedIndirect)(
428 VkCommandBuffer commandBuffer,
429 VkBuffer _buffer,
430 VkDeviceSize offset,
431 uint32_t drawCount,
432 uint32_t stride)
433 {
434 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
435 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
436 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
437 struct anv_bo *bo = buffer->bo;
438 uint32_t bo_offset = buffer->offset + offset;
439
440 genX(cmd_buffer_flush_state)(cmd_buffer);
441
442 /* TODO: We need to stomp base vertex to 0 somehow */
443 if (cmd_buffer->state.pipeline->vs_prog_data.uses_basevertex ||
444 cmd_buffer->state.pipeline->vs_prog_data.uses_baseinstance)
445 emit_base_vertex_instance_bo(cmd_buffer, bo, bo_offset + 12);
446
447 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
448 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
449 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
450 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
451 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
452
453 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
454 .IndirectParameterEnable = true,
455 .VertexAccessType = RANDOM,
456 .PrimitiveTopologyType = pipeline->topology);
457 }
458
459
460 void genX(CmdDispatch)(
461 VkCommandBuffer commandBuffer,
462 uint32_t x,
463 uint32_t y,
464 uint32_t z)
465 {
466 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
467 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
468 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
469
470 if (prog_data->uses_num_work_groups) {
471 struct anv_state state =
472 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
473 uint32_t *sizes = state.map;
474 sizes[0] = x;
475 sizes[1] = y;
476 sizes[2] = z;
477 if (!cmd_buffer->device->info.has_llc)
478 anv_state_clflush(state);
479 cmd_buffer->state.num_workgroups_offset = state.offset;
480 cmd_buffer->state.num_workgroups_bo =
481 &cmd_buffer->device->dynamic_state_block_pool.bo;
482 }
483
484 genX(cmd_buffer_flush_compute_state)(cmd_buffer);
485
486 anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
487 .SIMDSize = prog_data->simd_size / 16,
488 .ThreadDepthCounterMaximum = 0,
489 .ThreadHeightCounterMaximum = 0,
490 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
491 .ThreadGroupIDXDimension = x,
492 .ThreadGroupIDYDimension = y,
493 .ThreadGroupIDZDimension = z,
494 .RightExecutionMask = pipeline->cs_right_mask,
495 .BottomExecutionMask = 0xffffffff);
496
497 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
498 }
499
500 #define GPGPU_DISPATCHDIMX 0x2500
501 #define GPGPU_DISPATCHDIMY 0x2504
502 #define GPGPU_DISPATCHDIMZ 0x2508
503
504 void genX(CmdDispatchIndirect)(
505 VkCommandBuffer commandBuffer,
506 VkBuffer _buffer,
507 VkDeviceSize offset)
508 {
509 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
510 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
511 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
512 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
513 struct anv_bo *bo = buffer->bo;
514 uint32_t bo_offset = buffer->offset + offset;
515
516 if (prog_data->uses_num_work_groups) {
517 cmd_buffer->state.num_workgroups_offset = bo_offset;
518 cmd_buffer->state.num_workgroups_bo = bo;
519 }
520
521 genX(cmd_buffer_flush_compute_state)(cmd_buffer);
522
523 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
524 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
525 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
526
527 anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
528 .IndirectParameterEnable = true,
529 .SIMDSize = prog_data->simd_size / 16,
530 .ThreadDepthCounterMaximum = 0,
531 .ThreadHeightCounterMaximum = 0,
532 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
533 .RightExecutionMask = pipeline->cs_right_mask,
534 .BottomExecutionMask = 0xffffffff);
535
536 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
537 }