gen7/8/cmd_buffer: Allocate the correct ammount for COLOR_CALC_STATE
[mesa.git] / src / vulkan / gen8_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
34
35 static void
36 cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
37 {
38 static const uint32_t push_constant_opcodes[] = {
39 [MESA_SHADER_VERTEX] = 21,
40 [MESA_SHADER_TESS_CTRL] = 25, /* HS */
41 [MESA_SHADER_TESS_EVAL] = 26, /* DS */
42 [MESA_SHADER_GEOMETRY] = 22,
43 [MESA_SHADER_FRAGMENT] = 23,
44 [MESA_SHADER_COMPUTE] = 0,
45 };
46
47 VkShaderStageFlags flushed = 0;
48
49 anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) {
50 if (stage == MESA_SHADER_COMPUTE)
51 continue;
52
53 struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
54
55 if (state.offset == 0)
56 continue;
57
58 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_VS),
59 ._3DCommandSubOpcode = push_constant_opcodes[stage],
60 .ConstantBody = {
61 .PointerToConstantBuffer0 = { .offset = state.offset },
62 .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
63 });
64
65 flushed |= mesa_to_vk_shader_stage(stage);
66 }
67
68 cmd_buffer->state.push_constants_dirty &= ~flushed;
69 }
70
71 #if ANV_GEN == 8
72 static void
73 emit_viewport_state(struct anv_cmd_buffer *cmd_buffer,
74 uint32_t count, const VkViewport *viewports)
75 {
76 struct anv_state sf_clip_state =
77 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
78 struct anv_state cc_state =
79 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
80
81 for (uint32_t i = 0; i < count; i++) {
82 const VkViewport *vp = &viewports[i];
83
84 /* The gen7 state struct has just the matrix and guardband fields, the
85 * gen8 struct adds the min/max viewport fields. */
86 struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
87 .ViewportMatrixElementm00 = vp->width / 2,
88 .ViewportMatrixElementm11 = vp->height / 2,
89 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
90 .ViewportMatrixElementm30 = vp->x + vp->width / 2,
91 .ViewportMatrixElementm31 = vp->y + vp->height / 2,
92 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
93 .XMinClipGuardband = -1.0f,
94 .XMaxClipGuardband = 1.0f,
95 .YMinClipGuardband = -1.0f,
96 .YMaxClipGuardband = 1.0f,
97 .XMinViewPort = vp->x,
98 .XMaxViewPort = vp->x + vp->width - 1,
99 .YMinViewPort = vp->y,
100 .YMaxViewPort = vp->y + vp->height - 1,
101 };
102
103 struct GENX(CC_VIEWPORT) cc_viewport = {
104 .MinimumDepth = vp->minDepth,
105 .MaximumDepth = vp->maxDepth
106 };
107
108 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
109 &sf_clip_viewport);
110 GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 32, &cc_viewport);
111 }
112
113 if (!cmd_buffer->device->info.has_llc) {
114 anv_state_clflush(sf_clip_state);
115 anv_state_clflush(cc_state);
116 }
117
118 anv_batch_emit(&cmd_buffer->batch,
119 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC),
120 .CCViewportPointer = cc_state.offset);
121 anv_batch_emit(&cmd_buffer->batch,
122 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP),
123 .SFClipViewportPointer = sf_clip_state.offset);
124 }
125
126 void
127 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
128 {
129 if (cmd_buffer->state.dynamic.viewport.count > 0) {
130 emit_viewport_state(cmd_buffer, cmd_buffer->state.dynamic.viewport.count,
131 cmd_buffer->state.dynamic.viewport.viewports);
132 } else {
133 /* If viewport count is 0, this is taken to mean "use the default" */
134 emit_viewport_state(cmd_buffer, 1,
135 &(VkViewport) {
136 .x = 0.0f,
137 .y = 0.0f,
138 .width = cmd_buffer->state.framebuffer->width,
139 .height = cmd_buffer->state.framebuffer->height,
140 .minDepth = 0.0f,
141 .maxDepth = 1.0f,
142 });
143 }
144 }
145 #endif
146
147 static void
148 cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
149 {
150 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
151 uint32_t *p;
152
153 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
154
155 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
156
157 if (cmd_buffer->state.current_pipeline != _3D) {
158 anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
159 #if ANV_GEN >= 9
160 .MaskBits = 3,
161 #endif
162 .PipelineSelection = _3D);
163 cmd_buffer->state.current_pipeline = _3D;
164 }
165
166 if (vb_emit) {
167 const uint32_t num_buffers = __builtin_popcount(vb_emit);
168 const uint32_t num_dwords = 1 + num_buffers * 4;
169
170 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
171 GENX(3DSTATE_VERTEX_BUFFERS));
172 uint32_t vb, i = 0;
173 for_each_bit(vb, vb_emit) {
174 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
175 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
176
177 struct GENX(VERTEX_BUFFER_STATE) state = {
178 .VertexBufferIndex = vb,
179 .MemoryObjectControlState = GENX(MOCS),
180 .AddressModifyEnable = true,
181 .BufferPitch = pipeline->binding_stride[vb],
182 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
183 .BufferSize = buffer->size - offset
184 };
185
186 GENX(VERTEX_BUFFER_STATE_pack)(&cmd_buffer->batch, &p[1 + i * 4], &state);
187 i++;
188 }
189 }
190
191 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
192 /* If somebody compiled a pipeline after starting a command buffer the
193 * scratch bo may have grown since we started this cmd buffer (and
194 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
195 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
196 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
197 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
198
199 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
200 }
201
202 #if ANV_GEN >= 9
203 /* On SKL+ the new constants don't take effect until the next corresponding
204 * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
205 * that is sent. As it is, we re-emit binding tables but we could hold on
206 * to the offset of the most recent binding table and only re-emit the
207 * 3DSTATE_BINDING_TABLE_POINTER_* command.
208 */
209 cmd_buffer->state.descriptors_dirty |=
210 cmd_buffer->state.push_constants_dirty &
211 cmd_buffer->state.pipeline->active_stages;
212 #endif
213
214 if (cmd_buffer->state.descriptors_dirty)
215 gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer);
216
217 if (cmd_buffer->state.push_constants_dirty)
218 cmd_buffer_flush_push_constants(cmd_buffer);
219
220 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
221 gen8_cmd_buffer_emit_viewport(cmd_buffer);
222
223 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
224 gen7_cmd_buffer_emit_scissor(cmd_buffer);
225
226 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
227 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
228 uint32_t sf_dw[GENX(3DSTATE_SF_length)];
229 struct GENX(3DSTATE_SF) sf = {
230 GENX(3DSTATE_SF_header),
231 .LineWidth = cmd_buffer->state.dynamic.line_width,
232 };
233 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
234 /* FIXME: gen9.fs */
235 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
236 }
237
238 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
239 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
240 bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
241 cmd_buffer->state.dynamic.depth_bias.slope != 0.0f;
242
243 uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
244 struct GENX(3DSTATE_RASTER) raster = {
245 GENX(3DSTATE_RASTER_header),
246 .GlobalDepthOffsetEnableSolid = enable_bias,
247 .GlobalDepthOffsetEnableWireframe = enable_bias,
248 .GlobalDepthOffsetEnablePoint = enable_bias,
249 .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
250 .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope,
251 .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
252 };
253 GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
254 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
255 pipeline->gen8.raster);
256 }
257
258 /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
259 * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
260 * across different state packets for gen8 and gen9. We handle that by
261 * using a big old #if switch here.
262 */
263 #if ANV_GEN == 8
264 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
265 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
266 struct anv_state cc_state =
267 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
268 GEN8_COLOR_CALC_STATE_length * 4,
269 64);
270 struct GEN8_COLOR_CALC_STATE cc = {
271 .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
272 .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
273 .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
274 .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
275 .StencilReferenceValue =
276 cmd_buffer->state.dynamic.stencil_reference.front,
277 .BackFaceStencilReferenceValue =
278 cmd_buffer->state.dynamic.stencil_reference.back,
279 };
280 GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
281
282 if (!cmd_buffer->device->info.has_llc)
283 anv_state_clflush(cc_state);
284
285 anv_batch_emit(&cmd_buffer->batch,
286 GEN8_3DSTATE_CC_STATE_POINTERS,
287 .ColorCalcStatePointer = cc_state.offset,
288 .ColorCalcStatePointerValid = true);
289 }
290
291 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
292 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
293 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
294 uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length];
295
296 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
297 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
298
299 /* Is this what we need to do? */
300 .StencilBufferWriteEnable =
301 cmd_buffer->state.dynamic.stencil_write_mask.front != 0,
302
303 .StencilTestMask =
304 cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
305 .StencilWriteMask =
306 cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff,
307
308 .BackfaceStencilTestMask =
309 cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff,
310 .BackfaceStencilWriteMask =
311 cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff,
312 };
313 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw,
314 &wm_depth_stencil);
315
316 anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
317 pipeline->gen8.wm_depth_stencil);
318 }
319 #else
320 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
321 struct anv_state cc_state =
322 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
323 GEN9_COLOR_CALC_STATE_length * 4,
324 64);
325 struct GEN9_COLOR_CALC_STATE cc = {
326 .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
327 .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
328 .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
329 .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
330 };
331 GEN9_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
332
333 if (!cmd_buffer->device->info.has_llc)
334 anv_state_clflush(cc_state);
335
336 anv_batch_emit(&cmd_buffer->batch,
337 GEN9_3DSTATE_CC_STATE_POINTERS,
338 .ColorCalcStatePointer = cc_state.offset,
339 .ColorCalcStatePointerValid = true);
340 }
341
342 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
343 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
344 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
345 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
346 uint32_t dwords[GEN9_3DSTATE_WM_DEPTH_STENCIL_length];
347 struct anv_dynamic_state *d = &cmd_buffer->state.dynamic;
348 struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
349 GEN9_3DSTATE_WM_DEPTH_STENCIL_header,
350
351 .StencilBufferWriteEnable = d->stencil_write_mask.front != 0,
352
353 .StencilTestMask = d->stencil_compare_mask.front & 0xff,
354 .StencilWriteMask = d->stencil_write_mask.front & 0xff,
355
356 .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
357 .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
358
359 .StencilReferenceValue = d->stencil_reference.front,
360 .BackfaceStencilReferenceValue = d->stencil_reference.back
361 };
362 GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, dwords, &wm_depth_stencil);
363
364 anv_batch_emit_merge(&cmd_buffer->batch, dwords,
365 pipeline->gen9.wm_depth_stencil);
366 }
367 #endif
368
369 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
370 ANV_CMD_DIRTY_INDEX_BUFFER)) {
371 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF),
372 .IndexedDrawCutIndexEnable = pipeline->primitive_restart,
373 .CutIndex = cmd_buffer->state.restart_index,
374 );
375 }
376
377 cmd_buffer->state.vb_dirty &= ~vb_emit;
378 cmd_buffer->state.dirty = 0;
379 }
380
381 void genX(CmdDraw)(
382 VkCommandBuffer commandBuffer,
383 uint32_t vertexCount,
384 uint32_t instanceCount,
385 uint32_t firstVertex,
386 uint32_t firstInstance)
387 {
388 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
389
390 cmd_buffer_flush_state(cmd_buffer);
391
392 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
393 .VertexAccessType = SEQUENTIAL,
394 .VertexCountPerInstance = vertexCount,
395 .StartVertexLocation = firstVertex,
396 .InstanceCount = instanceCount,
397 .StartInstanceLocation = firstInstance,
398 .BaseVertexLocation = 0);
399 }
400
401 void genX(CmdDrawIndexed)(
402 VkCommandBuffer commandBuffer,
403 uint32_t indexCount,
404 uint32_t instanceCount,
405 uint32_t firstIndex,
406 int32_t vertexOffset,
407 uint32_t firstInstance)
408 {
409 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
410
411 cmd_buffer_flush_state(cmd_buffer);
412
413 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
414 .VertexAccessType = RANDOM,
415 .VertexCountPerInstance = indexCount,
416 .StartVertexLocation = firstIndex,
417 .InstanceCount = instanceCount,
418 .StartInstanceLocation = firstInstance,
419 .BaseVertexLocation = vertexOffset);
420 }
421
422 static void
423 emit_lrm(struct anv_batch *batch,
424 uint32_t reg, struct anv_bo *bo, uint32_t offset)
425 {
426 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
427 .RegisterAddress = reg,
428 .MemoryAddress = { bo, offset });
429 }
430
431 static void
432 emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
433 {
434 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM),
435 .RegisterOffset = reg,
436 .DataDWord = imm);
437 }
438
439 /* Auto-Draw / Indirect Registers */
440 #define GEN7_3DPRIM_END_OFFSET 0x2420
441 #define GEN7_3DPRIM_START_VERTEX 0x2430
442 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
443 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
444 #define GEN7_3DPRIM_START_INSTANCE 0x243C
445 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
446
447 void genX(CmdDrawIndirect)(
448 VkCommandBuffer commandBuffer,
449 VkBuffer _buffer,
450 VkDeviceSize offset,
451 uint32_t drawCount,
452 uint32_t stride)
453 {
454 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
455 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
456 struct anv_bo *bo = buffer->bo;
457 uint32_t bo_offset = buffer->offset + offset;
458
459 cmd_buffer_flush_state(cmd_buffer);
460
461 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
462 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
463 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
464 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
465 emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
466
467 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
468 .IndirectParameterEnable = true,
469 .VertexAccessType = SEQUENTIAL);
470 }
471
472 void genX(CmdBindIndexBuffer)(
473 VkCommandBuffer commandBuffer,
474 VkBuffer _buffer,
475 VkDeviceSize offset,
476 VkIndexType indexType)
477 {
478 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
479 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
480
481 static const uint32_t vk_to_gen_index_type[] = {
482 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
483 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
484 };
485
486 static const uint32_t restart_index_for_type[] = {
487 [VK_INDEX_TYPE_UINT16] = UINT16_MAX,
488 [VK_INDEX_TYPE_UINT32] = UINT32_MAX,
489 };
490
491 cmd_buffer->state.restart_index = restart_index_for_type[indexType];
492
493 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER),
494 .IndexFormat = vk_to_gen_index_type[indexType],
495 .MemoryObjectControlState = GENX(MOCS),
496 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
497 .BufferSize = buffer->size - offset);
498
499 cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
500 }
501
502 static VkResult
503 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
504 {
505 struct anv_device *device = cmd_buffer->device;
506 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
507 struct anv_state surfaces = { 0, }, samplers = { 0, };
508 VkResult result;
509
510 result = anv_cmd_buffer_emit_samplers(cmd_buffer,
511 MESA_SHADER_COMPUTE, &samplers);
512 if (result != VK_SUCCESS)
513 return result;
514 result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
515 MESA_SHADER_COMPUTE, &surfaces);
516 if (result != VK_SUCCESS)
517 return result;
518
519 struct anv_state push_state = anv_cmd_buffer_cs_push_constants(cmd_buffer);
520
521 const struct brw_cs_prog_data *cs_prog_data = &pipeline->cs_prog_data;
522 const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
523
524 unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
525 unsigned push_constant_data_size =
526 (prog_data->nr_params + local_id_dwords) * 4;
527 unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
528 unsigned push_constant_regs = reg_aligned_constant_size / 32;
529
530 if (push_state.alloc_size) {
531 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD),
532 .CURBETotalDataLength = push_state.alloc_size,
533 .CURBEDataStartAddress = push_state.offset);
534 }
535
536 struct anv_state state =
537 anv_state_pool_emit(&device->dynamic_state_pool,
538 GENX(INTERFACE_DESCRIPTOR_DATA), 64,
539 .KernelStartPointer = pipeline->cs_simd,
540 .KernelStartPointerHigh = 0,
541 .BindingTablePointer = surfaces.offset,
542 .BindingTableEntryCount = 0,
543 .SamplerStatePointer = samplers.offset,
544 .SamplerCount = 0,
545 .ConstantIndirectURBEntryReadLength = push_constant_regs,
546 .ConstantURBEntryReadOffset = 0,
547 .NumberofThreadsinGPGPUThreadGroup = 0);
548
549 uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
550 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
551 .InterfaceDescriptorTotalLength = size,
552 .InterfaceDescriptorDataStartAddress = state.offset);
553
554 return VK_SUCCESS;
555 }
556
557 static void
558 cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
559 {
560 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
561 VkResult result;
562
563 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
564
565 if (cmd_buffer->state.current_pipeline != GPGPU) {
566 anv_batch_emit(&cmd_buffer->batch, GENX(PIPELINE_SELECT),
567 #if ANV_GEN >= 9
568 .MaskBits = 3,
569 #endif
570 .PipelineSelection = GPGPU);
571 cmd_buffer->state.current_pipeline = GPGPU;
572 }
573
574 if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
575 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
576
577 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
578 (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
579 result = flush_compute_descriptor_set(cmd_buffer);
580 assert(result == VK_SUCCESS);
581 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE_BIT;
582 }
583
584 cmd_buffer->state.compute_dirty = 0;
585 }
586
587 void genX(CmdDrawIndexedIndirect)(
588 VkCommandBuffer commandBuffer,
589 VkBuffer _buffer,
590 VkDeviceSize offset,
591 uint32_t drawCount,
592 uint32_t stride)
593 {
594 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
595 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
596 struct anv_bo *bo = buffer->bo;
597 uint32_t bo_offset = buffer->offset + offset;
598
599 cmd_buffer_flush_state(cmd_buffer);
600
601 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
602 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
603 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
604 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
605 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
606
607 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE),
608 .IndirectParameterEnable = true,
609 .VertexAccessType = RANDOM);
610 }
611
612 void genX(CmdDispatch)(
613 VkCommandBuffer commandBuffer,
614 uint32_t x,
615 uint32_t y,
616 uint32_t z)
617 {
618 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
619 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
620 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
621
622 if (prog_data->uses_num_work_groups) {
623 struct anv_state state =
624 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
625 uint32_t *sizes = state.map;
626 sizes[0] = x;
627 sizes[1] = y;
628 sizes[2] = z;
629 if (!cmd_buffer->device->info.has_llc)
630 anv_state_clflush(state);
631 cmd_buffer->state.num_workgroups_offset = state.offset;
632 cmd_buffer->state.num_workgroups_bo =
633 &cmd_buffer->device->dynamic_state_block_pool.bo;
634 }
635
636 cmd_buffer_flush_compute_state(cmd_buffer);
637
638 anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
639 .SIMDSize = prog_data->simd_size / 16,
640 .ThreadDepthCounterMaximum = 0,
641 .ThreadHeightCounterMaximum = 0,
642 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
643 .ThreadGroupIDXDimension = x,
644 .ThreadGroupIDYDimension = y,
645 .ThreadGroupIDZDimension = z,
646 .RightExecutionMask = pipeline->cs_right_mask,
647 .BottomExecutionMask = 0xffffffff);
648
649 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
650 }
651
652 #define GPGPU_DISPATCHDIMX 0x2500
653 #define GPGPU_DISPATCHDIMY 0x2504
654 #define GPGPU_DISPATCHDIMZ 0x2508
655
656 void genX(CmdDispatchIndirect)(
657 VkCommandBuffer commandBuffer,
658 VkBuffer _buffer,
659 VkDeviceSize offset)
660 {
661 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
662 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
663 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
664 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
665 struct anv_bo *bo = buffer->bo;
666 uint32_t bo_offset = buffer->offset + offset;
667
668 if (prog_data->uses_num_work_groups) {
669 cmd_buffer->state.num_workgroups_offset = bo_offset;
670 cmd_buffer->state.num_workgroups_bo = bo;
671 }
672
673 cmd_buffer_flush_compute_state(cmd_buffer);
674
675 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
676 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
677 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
678
679 anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER),
680 .IndirectParameterEnable = true,
681 .SIMDSize = prog_data->simd_size / 16,
682 .ThreadDepthCounterMaximum = 0,
683 .ThreadHeightCounterMaximum = 0,
684 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1,
685 .RightExecutionMask = pipeline->cs_right_mask,
686 .BottomExecutionMask = 0xffffffff);
687
688 anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_STATE_FLUSH));
689 }
690
691 static void
692 cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
693 {
694 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
695 const struct anv_image_view *iview =
696 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
697 const struct anv_image *image = iview ? iview->image : NULL;
698 const bool has_depth = iview && iview->format->depth_format;
699 const bool has_stencil = iview && iview->format->has_stencil;
700
701 /* FIXME: Implement the PMA stall W/A */
702 /* FIXME: Width and Height are wrong */
703
704 /* Emit 3DSTATE_DEPTH_BUFFER */
705 if (has_depth) {
706 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
707 .SurfaceType = SURFTYPE_2D,
708 .DepthWriteEnable = iview->format->depth_format,
709 .StencilWriteEnable = has_stencil,
710 .HierarchicalDepthBufferEnable = false,
711 .SurfaceFormat = iview->format->depth_format,
712 .SurfacePitch = image->depth_surface.isl.row_pitch - 1,
713 .SurfaceBaseAddress = {
714 .bo = image->bo,
715 .offset = image->depth_surface.offset,
716 },
717 .Height = fb->height - 1,
718 .Width = fb->width - 1,
719 .LOD = 0,
720 .Depth = 1 - 1,
721 .MinimumArrayElement = 0,
722 .DepthBufferObjectControlState = GENX(MOCS),
723 .RenderTargetViewExtent = 1 - 1,
724 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->depth_surface.isl) >> 2);
725 } else {
726 /* Even when no depth buffer is present, the hardware requires that
727 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
728 *
729 * If a null depth buffer is bound, the driver must instead bind depth as:
730 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
731 * 3DSTATE_DEPTH.Width = 1
732 * 3DSTATE_DEPTH.Height = 1
733 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
734 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
735 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
736 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
737 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
738 *
739 * The PRM is wrong, though. The width and height must be programmed to
740 * actual framebuffer's width and height, even when neither depth buffer
741 * nor stencil buffer is present.
742 */
743 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BUFFER),
744 .SurfaceType = SURFTYPE_2D,
745 .SurfaceFormat = D16_UNORM,
746 .Width = fb->width - 1,
747 .Height = fb->height - 1,
748 .StencilWriteEnable = has_stencil);
749 }
750
751 /* Emit 3DSTATE_STENCIL_BUFFER */
752 if (has_stencil) {
753 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
754 .StencilBufferEnable = true,
755 .StencilBufferObjectControlState = GENX(MOCS),
756
757 /* Stencil buffers have strange pitch. The PRM says:
758 *
759 * The pitch must be set to 2x the value computed based on width,
760 * as the stencil buffer is stored with two rows interleaved.
761 */
762 .SurfacePitch = 2 * image->stencil_surface.isl.row_pitch - 1,
763
764 .SurfaceBaseAddress = {
765 .bo = image->bo,
766 .offset = image->offset + image->stencil_surface.offset,
767 },
768 .SurfaceQPitch = isl_surf_get_array_pitch_el_rows(&image->stencil_surface.isl) >> 2);
769 } else {
770 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER));
771 }
772
773 /* Disable hierarchial depth buffers. */
774 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER));
775
776 /* Clear the clear params. */
777 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS));
778 }
779
780 void
781 genX(cmd_buffer_begin_subpass)(struct anv_cmd_buffer *cmd_buffer,
782 struct anv_subpass *subpass)
783 {
784 cmd_buffer->state.subpass = subpass;
785
786 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
787
788 cmd_buffer_emit_depth_stencil(cmd_buffer);
789 }
790
791 void genX(CmdBeginRenderPass)(
792 VkCommandBuffer commandBuffer,
793 const VkRenderPassBeginInfo* pRenderPassBegin,
794 VkSubpassContents contents)
795 {
796 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
797 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
798 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
799
800 cmd_buffer->state.framebuffer = framebuffer;
801 cmd_buffer->state.pass = pass;
802
803 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
804
805 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DRAWING_RECTANGLE),
806 .ClippedDrawingRectangleYMin = render_area->offset.y,
807 .ClippedDrawingRectangleXMin = render_area->offset.x,
808 .ClippedDrawingRectangleYMax =
809 render_area->offset.y + render_area->extent.height - 1,
810 .ClippedDrawingRectangleXMax =
811 render_area->offset.x + render_area->extent.width - 1,
812 .DrawingRectangleOriginY = 0,
813 .DrawingRectangleOriginX = 0);
814
815 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
816 pRenderPassBegin->pClearValues);
817
818 genX(cmd_buffer_begin_subpass)(cmd_buffer, pass->subpasses);
819 }
820
821 void genX(CmdNextSubpass)(
822 VkCommandBuffer commandBuffer,
823 VkSubpassContents contents)
824 {
825 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
826
827 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
828
829 genX(cmd_buffer_begin_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
830 }
831
832 void genX(CmdEndRenderPass)(
833 VkCommandBuffer commandBuffer)
834 {
835 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
836
837 /* Emit a flushing pipe control at the end of a pass. This is kind of a
838 * hack but it ensures that render targets always actually get written.
839 * Eventually, we should do flushing based on image format transitions
840 * or something of that nature.
841 */
842 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
843 .PostSyncOperation = NoWrite,
844 .RenderTargetCacheFlushEnable = true,
845 .InstructionCacheInvalidateEnable = true,
846 .DepthCacheFlushEnable = true,
847 .VFCacheInvalidationEnable = true,
848 .TextureCacheInvalidationEnable = true,
849 .CommandStreamerStallEnable = true);
850 }
851
852 static void
853 emit_ps_depth_count(struct anv_batch *batch,
854 struct anv_bo *bo, uint32_t offset)
855 {
856 anv_batch_emit(batch, GENX(PIPE_CONTROL),
857 .DestinationAddressType = DAT_PPGTT,
858 .PostSyncOperation = WritePSDepthCount,
859 .Address = { bo, offset });
860 }
861
862 void genX(CmdBeginQuery)(
863 VkCommandBuffer commandBuffer,
864 VkQueryPool queryPool,
865 uint32_t entry,
866 VkQueryControlFlags flags)
867 {
868 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
869 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
870
871 switch (pool->type) {
872 case VK_QUERY_TYPE_OCCLUSION:
873 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
874 entry * sizeof(struct anv_query_pool_slot));
875 break;
876
877 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
878 default:
879 unreachable("");
880 }
881 }
882
883 void genX(CmdEndQuery)(
884 VkCommandBuffer commandBuffer,
885 VkQueryPool queryPool,
886 uint32_t entry)
887 {
888 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
889 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
890
891 switch (pool->type) {
892 case VK_QUERY_TYPE_OCCLUSION:
893 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
894 entry * sizeof(struct anv_query_pool_slot) + 8);
895 break;
896
897 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
898 default:
899 unreachable("");
900 }
901 }
902
903 #define TIMESTAMP 0x2358
904
905 void genX(CmdWriteTimestamp)(
906 VkCommandBuffer commandBuffer,
907 VkPipelineStageFlagBits pipelineStage,
908 VkQueryPool queryPool,
909 uint32_t entry)
910 {
911 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
912 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
913
914 assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
915
916 switch (pipelineStage) {
917 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
918 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
919 .RegisterAddress = TIMESTAMP,
920 .MemoryAddress = { &pool->bo, entry * 8 });
921 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
922 .RegisterAddress = TIMESTAMP + 4,
923 .MemoryAddress = { &pool->bo, entry * 8 + 4 });
924 break;
925
926 default:
927 /* Everything else is bottom-of-pipe */
928 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
929 .DestinationAddressType = DAT_PPGTT,
930 .PostSyncOperation = WriteTimestamp,
931 .Address = { &pool->bo, entry * 8 });
932 break;
933 }
934 }
935
936 #define alu_opcode(v) __gen_field((v), 20, 31)
937 #define alu_operand1(v) __gen_field((v), 10, 19)
938 #define alu_operand2(v) __gen_field((v), 0, 9)
939 #define alu(opcode, operand1, operand2) \
940 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
941
942 #define OPCODE_NOOP 0x000
943 #define OPCODE_LOAD 0x080
944 #define OPCODE_LOADINV 0x480
945 #define OPCODE_LOAD0 0x081
946 #define OPCODE_LOAD1 0x481
947 #define OPCODE_ADD 0x100
948 #define OPCODE_SUB 0x101
949 #define OPCODE_AND 0x102
950 #define OPCODE_OR 0x103
951 #define OPCODE_XOR 0x104
952 #define OPCODE_STORE 0x180
953 #define OPCODE_STOREINV 0x580
954
955 #define OPERAND_R0 0x00
956 #define OPERAND_R1 0x01
957 #define OPERAND_R2 0x02
958 #define OPERAND_R3 0x03
959 #define OPERAND_R4 0x04
960 #define OPERAND_SRCA 0x20
961 #define OPERAND_SRCB 0x21
962 #define OPERAND_ACCU 0x31
963 #define OPERAND_ZF 0x32
964 #define OPERAND_CF 0x33
965
966 #define CS_GPR(n) (0x2600 + (n) * 8)
967
968 static void
969 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
970 struct anv_bo *bo, uint32_t offset)
971 {
972 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
973 .RegisterAddress = reg,
974 .MemoryAddress = { bo, offset });
975 anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_MEM),
976 .RegisterAddress = reg + 4,
977 .MemoryAddress = { bo, offset + 4 });
978 }
979
980 void genX(CmdCopyQueryPoolResults)(
981 VkCommandBuffer commandBuffer,
982 VkQueryPool queryPool,
983 uint32_t startQuery,
984 uint32_t queryCount,
985 VkBuffer destBuffer,
986 VkDeviceSize destOffset,
987 VkDeviceSize destStride,
988 VkQueryResultFlags flags)
989 {
990 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
991 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
992 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
993 uint32_t slot_offset, dst_offset;
994
995 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
996 /* Where is the availabilty info supposed to go? */
997 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
998 return;
999 }
1000
1001 assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
1002
1003 /* FIXME: If we're not waiting, should we just do this on the CPU? */
1004 if (flags & VK_QUERY_RESULT_WAIT_BIT)
1005 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1006 .CommandStreamerStallEnable = true,
1007 .StallAtPixelScoreboard = true);
1008
1009 dst_offset = buffer->offset + destOffset;
1010 for (uint32_t i = 0; i < queryCount; i++) {
1011
1012 slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot);
1013
1014 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset);
1015 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8);
1016
1017 /* FIXME: We need to clamp the result for 32 bit. */
1018
1019 uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
1020 dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
1021 dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
1022 dw[3] = alu(OPCODE_SUB, 0, 0);
1023 dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
1024
1025 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
1026 .RegisterAddress = CS_GPR(2),
1027 .MemoryAddress = { buffer->bo, dst_offset });
1028
1029 if (flags & VK_QUERY_RESULT_64_BIT)
1030 anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM),
1031 .RegisterAddress = CS_GPR(2) + 4,
1032 .MemoryAddress = { buffer->bo, dst_offset + 4 });
1033
1034 dst_offset += destStride;
1035 }
1036 }
1037
1038 void genX(CmdSetEvent)(
1039 VkCommandBuffer commandBuffer,
1040 VkEvent _event,
1041 VkPipelineStageFlags stageMask)
1042 {
1043 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1044 ANV_FROM_HANDLE(anv_event, event, _event);
1045
1046 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1047 .DestinationAddressType = DAT_PPGTT,
1048 .PostSyncOperation = WriteImmediateData,
1049 .Address = {
1050 &cmd_buffer->device->dynamic_state_block_pool.bo,
1051 event->state.offset
1052 },
1053 .ImmediateData = VK_EVENT_SET);
1054 }
1055
1056 void genX(CmdResetEvent)(
1057 VkCommandBuffer commandBuffer,
1058 VkEvent _event,
1059 VkPipelineStageFlags stageMask)
1060 {
1061 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1062 ANV_FROM_HANDLE(anv_event, event, _event);
1063
1064 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
1065 .DestinationAddressType = DAT_PPGTT,
1066 .PostSyncOperation = WriteImmediateData,
1067 .Address = {
1068 &cmd_buffer->device->dynamic_state_block_pool.bo,
1069 event->state.offset
1070 },
1071 .ImmediateData = VK_EVENT_RESET);
1072 }
1073
1074 void genX(CmdWaitEvents)(
1075 VkCommandBuffer commandBuffer,
1076 uint32_t eventCount,
1077 const VkEvent* pEvents,
1078 VkPipelineStageFlags srcStageMask,
1079 VkPipelineStageFlags destStageMask,
1080 uint32_t memBarrierCount,
1081 const void* const* ppMemBarriers)
1082 {
1083 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1084 for (uint32_t i = 0; i < eventCount; i++) {
1085 ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
1086
1087 anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT),
1088 .WaitMode = PollingMode,
1089 .CompareOperation = SAD_EQUAL_SDD,
1090 .SemaphoreDataDword = VK_EVENT_SET,
1091 .SemaphoreAddress = {
1092 &cmd_buffer->device->dynamic_state_block_pool.bo,
1093 event->state.offset
1094 });
1095 }
1096
1097 genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
1098 false, /* byRegion */
1099 memBarrierCount, ppMemBarriers);
1100 }