3b9e67fdd0f375ca0815e1096f23d2e675148539
[mesa.git] / src / vulkan / anv_cmd_emit.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 /** \file anv_cmd_buffer.c
33 *
34 * This file contains all of the stuff for emitting commands into a command
35 * buffer. This includes implementations of most of the vkCmd*
36 * entrypoints. This file is concerned entirely with state emission and
37 * not with the command buffer data structure itself. As far as this file
38 * is concerned, most of anv_cmd_buffer is magic.
39 */
40
41 static void
42 anv_cmd_state_init(struct anv_cmd_state *state)
43 {
44 state->rs_state = NULL;
45 state->vp_state = NULL;
46 state->cb_state = NULL;
47 state->ds_state = NULL;
48 memset(&state->state_vf, 0, sizeof(state->state_vf));
49 memset(&state->descriptors, 0, sizeof(state->descriptors));
50
51 state->dirty = 0;
52 state->vb_dirty = 0;
53 state->descriptors_dirty = 0;
54 state->pipeline = NULL;
55 state->vp_state = NULL;
56 state->rs_state = NULL;
57 state->ds_state = NULL;
58 }
59
60 VkResult anv_CreateCommandBuffer(
61 VkDevice _device,
62 const VkCmdBufferCreateInfo* pCreateInfo,
63 VkCmdBuffer* pCmdBuffer)
64 {
65 ANV_FROM_HANDLE(anv_device, device, _device);
66 ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool);
67 struct anv_cmd_buffer *cmd_buffer;
68 VkResult result;
69
70 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
71 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
72 if (cmd_buffer == NULL)
73 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
74
75 cmd_buffer->device = device;
76
77 result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
78 if (result != VK_SUCCESS)
79 goto fail;
80
81 anv_state_stream_init(&cmd_buffer->surface_state_stream,
82 &device->surface_state_block_pool);
83 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
84 &device->dynamic_state_block_pool);
85
86 cmd_buffer->level = pCreateInfo->level;
87 cmd_buffer->opt_flags = 0;
88
89 anv_cmd_state_init(&cmd_buffer->state);
90
91 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
92
93 *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
94
95 return VK_SUCCESS;
96
97 fail: anv_device_free(device, cmd_buffer);
98
99 return result;
100 }
101
102 VkResult anv_DestroyCommandBuffer(
103 VkDevice _device,
104 VkCmdBuffer _cmd_buffer)
105 {
106 ANV_FROM_HANDLE(anv_device, device, _device);
107 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer);
108
109 list_del(&cmd_buffer->pool_link);
110
111 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
112
113 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
114 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
115 anv_device_free(device, cmd_buffer);
116
117 return VK_SUCCESS;
118 }
119
120 VkResult anv_ResetCommandBuffer(
121 VkCmdBuffer cmdBuffer,
122 VkCmdBufferResetFlags flags)
123 {
124 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
125
126 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
127
128 anv_cmd_state_init(&cmd_buffer->state);
129
130 return VK_SUCCESS;
131 }
132
133 void
134 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
135 {
136 struct anv_device *device = cmd_buffer->device;
137 struct anv_bo *scratch_bo = NULL;
138
139 cmd_buffer->state.scratch_size = device->scratch_block_pool.size;
140 if (cmd_buffer->state.scratch_size > 0)
141 scratch_bo = &device->scratch_block_pool.bo;
142
143 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
144 .GeneralStateBaseAddress = { scratch_bo, 0 },
145 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
146 .GeneralStateBaseAddressModifyEnable = true,
147 .GeneralStateBufferSize = 0xfffff,
148 .GeneralStateBufferSizeModifyEnable = true,
149
150 .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 },
151 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
152 .SurfaceStateBaseAddressModifyEnable = true,
153
154 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
155 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
156 .DynamicStateBaseAddressModifyEnable = true,
157 .DynamicStateBufferSize = 0xfffff,
158 .DynamicStateBufferSizeModifyEnable = true,
159
160 .IndirectObjectBaseAddress = { NULL, 0 },
161 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
162 .IndirectObjectBaseAddressModifyEnable = true,
163 .IndirectObjectBufferSize = 0xfffff,
164 .IndirectObjectBufferSizeModifyEnable = true,
165
166 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
167 .InstructionMemoryObjectControlState = GEN8_MOCS,
168 .InstructionBaseAddressModifyEnable = true,
169 .InstructionBufferSize = 0xfffff,
170 .InstructionBuffersizeModifyEnable = true);
171
172 /* After re-setting the surface state base address, we have to do some
173 * cache flusing so that the sampler engine will pick up the new
174 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
175 * Shared Function > 3D Sampler > State > State Caching (page 96):
176 *
177 * Coherency with system memory in the state cache, like the texture
178 * cache is handled partially by software. It is expected that the
179 * command stream or shader will issue Cache Flush operation or
180 * Cache_Flush sampler message to ensure that the L1 cache remains
181 * coherent with system memory.
182 *
183 * [...]
184 *
185 * Whenever the value of the Dynamic_State_Base_Addr,
186 * Surface_State_Base_Addr are altered, the L1 state cache must be
187 * invalidated to ensure the new surface or sampler state is fetched
188 * from system memory.
189 *
190 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
191 * which, according the PIPE_CONTROL instruction documentation in the
192 * Broadwell PRM:
193 *
194 * Setting this bit is independent of any other bit in this packet.
195 * This bit controls the invalidation of the L1 and L2 state caches
196 * at the top of the pipe i.e. at the parsing time.
197 *
198 * Unfortunately, experimentation seems to indicate that state cache
199 * invalidation through a PIPE_CONTROL does nothing whatsoever in
200 * regards to surface state and binding tables. In stead, it seems that
201 * invalidating the texture cache is what is actually needed.
202 *
203 * XXX: As far as we have been able to determine through
204 * experimentation, shows that flush the texture cache appears to be
205 * sufficient. The theory here is that all of the sampling/rendering
206 * units cache the binding table in the texture cache. However, we have
207 * yet to be able to actually confirm this.
208 */
209 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
210 .TextureCacheInvalidationEnable = true);
211 }
212
213 VkResult anv_BeginCommandBuffer(
214 VkCmdBuffer cmdBuffer,
215 const VkCmdBufferBeginInfo* pBeginInfo)
216 {
217 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
218
219 cmd_buffer->opt_flags = pBeginInfo->flags;
220
221 if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) {
222 cmd_buffer->state.framebuffer =
223 anv_framebuffer_from_handle(pBeginInfo->framebuffer);
224 cmd_buffer->state.pass =
225 anv_render_pass_from_handle(pBeginInfo->renderPass);
226
227 /* FIXME: We shouldn't be starting on the first subpass */
228 anv_cmd_buffer_begin_subpass(cmd_buffer,
229 &cmd_buffer->state.pass->subpasses[0]);
230 }
231
232 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
233 cmd_buffer->state.current_pipeline = UINT32_MAX;
234
235 return VK_SUCCESS;
236 }
237
238 VkResult anv_EndCommandBuffer(
239 VkCmdBuffer cmdBuffer)
240 {
241 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
242 struct anv_device *device = cmd_buffer->device;
243
244 anv_cmd_buffer_end_batch_buffer(cmd_buffer);
245
246 if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) {
247 /* The algorithm used to compute the validate list is not threadsafe as
248 * it uses the bo->index field. We have to lock the device around it.
249 * Fortunately, the chances for contention here are probably very low.
250 */
251 pthread_mutex_lock(&device->mutex);
252 anv_cmd_buffer_prepare_execbuf(cmd_buffer);
253 pthread_mutex_unlock(&device->mutex);
254 }
255
256 return VK_SUCCESS;
257 }
258
259 void anv_CmdBindPipeline(
260 VkCmdBuffer cmdBuffer,
261 VkPipelineBindPoint pipelineBindPoint,
262 VkPipeline _pipeline)
263 {
264 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
265 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
266
267 switch (pipelineBindPoint) {
268 case VK_PIPELINE_BIND_POINT_COMPUTE:
269 cmd_buffer->state.compute_pipeline = pipeline;
270 cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
271 break;
272
273 case VK_PIPELINE_BIND_POINT_GRAPHICS:
274 cmd_buffer->state.pipeline = pipeline;
275 cmd_buffer->state.vb_dirty |= pipeline->vb_used;
276 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
277 break;
278
279 default:
280 assert(!"invalid bind point");
281 break;
282 }
283 }
284
285 void anv_CmdBindDynamicViewportState(
286 VkCmdBuffer cmdBuffer,
287 VkDynamicViewportState dynamicViewportState)
288 {
289 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
290 ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState);
291
292 cmd_buffer->state.vp_state = vp_state;
293 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY;
294 }
295
296 void anv_CmdBindDynamicRasterState(
297 VkCmdBuffer cmdBuffer,
298 VkDynamicRasterState dynamicRasterState)
299 {
300 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
301 ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState);
302
303 cmd_buffer->state.rs_state = rs_state;
304 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY;
305 }
306
307 void anv_CmdBindDynamicColorBlendState(
308 VkCmdBuffer cmdBuffer,
309 VkDynamicColorBlendState dynamicColorBlendState)
310 {
311 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
312 ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState);
313
314 cmd_buffer->state.cb_state = cb_state;
315 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY;
316 }
317
318 void anv_CmdBindDynamicDepthStencilState(
319 VkCmdBuffer cmdBuffer,
320 VkDynamicDepthStencilState dynamicDepthStencilState)
321 {
322 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
323 ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState);
324
325 cmd_buffer->state.ds_state = ds_state;
326 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY;
327 }
328
329 void anv_CmdBindDescriptorSets(
330 VkCmdBuffer cmdBuffer,
331 VkPipelineBindPoint pipelineBindPoint,
332 VkPipelineLayout _layout,
333 uint32_t firstSet,
334 uint32_t setCount,
335 const VkDescriptorSet* pDescriptorSets,
336 uint32_t dynamicOffsetCount,
337 const uint32_t* pDynamicOffsets)
338 {
339 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
340 ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
341 struct anv_descriptor_set_layout *set_layout;
342
343 assert(firstSet + setCount < MAX_SETS);
344
345 uint32_t dynamic_slot = 0;
346 for (uint32_t i = 0; i < setCount; i++) {
347 ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
348 set_layout = layout->set[firstSet + i].layout;
349
350 cmd_buffer->state.descriptors[firstSet + i].set = set;
351
352 assert(set_layout->num_dynamic_buffers <
353 ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets));
354 memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets,
355 pDynamicOffsets + dynamic_slot,
356 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
357
358 cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
359
360 dynamic_slot += set_layout->num_dynamic_buffers;
361 }
362 }
363
364 void anv_CmdBindIndexBuffer(
365 VkCmdBuffer cmdBuffer,
366 VkBuffer _buffer,
367 VkDeviceSize offset,
368 VkIndexType indexType)
369 {
370 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
371 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
372
373 static const uint32_t vk_to_gen_index_type[] = {
374 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
375 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
376 };
377
378 struct GEN8_3DSTATE_VF vf = {
379 GEN8_3DSTATE_VF_header,
380 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
381 };
382 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf);
383
384 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
385
386 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
387 .IndexFormat = vk_to_gen_index_type[indexType],
388 .MemoryObjectControlState = GEN8_MOCS,
389 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
390 .BufferSize = buffer->size - offset);
391 }
392
393 void anv_CmdBindVertexBuffers(
394 VkCmdBuffer cmdBuffer,
395 uint32_t startBinding,
396 uint32_t bindingCount,
397 const VkBuffer* pBuffers,
398 const VkDeviceSize* pOffsets)
399 {
400 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
401 struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
402
403 /* We have to defer setting up vertex buffer since we need the buffer
404 * stride from the pipeline. */
405
406 assert(startBinding + bindingCount < MAX_VBS);
407 for (uint32_t i = 0; i < bindingCount; i++) {
408 vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
409 vb[startBinding + i].offset = pOffsets[i];
410 cmd_buffer->state.vb_dirty |= 1 << (startBinding + i);
411 }
412 }
413
414 static VkResult
415 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
416 unsigned stage, struct anv_state *bt_state)
417 {
418 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
419 struct anv_subpass *subpass = cmd_buffer->state.subpass;
420 struct anv_pipeline_layout *layout;
421 uint32_t attachments, bias, size;
422
423 if (stage == VK_SHADER_STAGE_COMPUTE)
424 layout = cmd_buffer->state.compute_pipeline->layout;
425 else
426 layout = cmd_buffer->state.pipeline->layout;
427
428 if (stage == VK_SHADER_STAGE_FRAGMENT) {
429 bias = MAX_RTS;
430 attachments = subpass->color_count;
431 } else {
432 bias = 0;
433 attachments = 0;
434 }
435
436 /* This is a little awkward: layout can be NULL but we still have to
437 * allocate and set a binding table for the PS stage for render
438 * targets. */
439 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
440
441 if (attachments + surface_count == 0)
442 return VK_SUCCESS;
443
444 size = (bias + surface_count) * sizeof(uint32_t);
445 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
446 uint32_t *bt_map = bt_state->map;
447
448 if (bt_state->map == NULL)
449 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
450
451 /* This is highly annoying. The Vulkan spec puts the depth-stencil
452 * attachments in with the color attachments. Unfortunately, thanks to
453 * other aspects of the API, we cana't really saparate them before this
454 * point. Therefore, we have to walk all of the attachments but only
455 * put the color attachments into the binding table.
456 */
457 for (uint32_t a = 0; a < attachments; a++) {
458 const struct anv_attachment_view *attachment =
459 fb->attachments[subpass->color_attachments[a]];
460
461 assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR);
462 const struct anv_color_attachment_view *view =
463 (const struct anv_color_attachment_view *)attachment;
464
465 struct anv_state state =
466 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
467
468 if (state.map == NULL)
469 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
470
471 memcpy(state.map, view->view.surface_state.map, 64);
472
473 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
474 *(uint64_t *)(state.map + 8 * 4) =
475 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
476 cmd_buffer->device,
477 state.offset + 8 * 4,
478 view->view.bo, view->view.offset);
479
480 bt_map[a] = state.offset;
481 }
482
483 if (layout == NULL)
484 return VK_SUCCESS;
485
486 for (uint32_t set = 0; set < layout->num_sets; set++) {
487 struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
488 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
489 struct anv_descriptor_slot *surface_slots =
490 set_layout->stage[stage].surface_start;
491
492 uint32_t start = bias + layout->set[set].surface_start[stage];
493
494 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
495 struct anv_surface_view *view =
496 d->set->descriptors[surface_slots[b].index].view;
497
498 if (!view)
499 continue;
500
501 struct anv_state state =
502 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
503
504 if (state.map == NULL)
505 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
506
507 uint32_t offset;
508 if (surface_slots[b].dynamic_slot >= 0) {
509 uint32_t dynamic_offset =
510 d->dynamic_offsets[surface_slots[b].dynamic_slot];
511
512 offset = view->offset + dynamic_offset;
513 anv_fill_buffer_surface_state(state.map, view->format, offset,
514 view->range - dynamic_offset);
515 } else {
516 offset = view->offset;
517 memcpy(state.map, view->surface_state.map, 64);
518 }
519
520 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
521 *(uint64_t *)(state.map + 8 * 4) =
522 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
523 cmd_buffer->device,
524 state.offset + 8 * 4,
525 view->bo, offset);
526
527 bt_map[start + b] = state.offset;
528 }
529 }
530
531 return VK_SUCCESS;
532 }
533
534 static VkResult
535 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
536 unsigned stage, struct anv_state *state)
537 {
538 struct anv_pipeline_layout *layout;
539 uint32_t sampler_count;
540
541 if (stage == VK_SHADER_STAGE_COMPUTE)
542 layout = cmd_buffer->state.compute_pipeline->layout;
543 else
544 layout = cmd_buffer->state.pipeline->layout;
545
546 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
547 if (sampler_count == 0)
548 return VK_SUCCESS;
549
550 uint32_t size = sampler_count * 16;
551 *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32);
552
553 if (state->map == NULL)
554 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
555
556 for (uint32_t set = 0; set < layout->num_sets; set++) {
557 struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
558 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
559 struct anv_descriptor_slot *sampler_slots =
560 set_layout->stage[stage].sampler_start;
561
562 uint32_t start = layout->set[set].sampler_start[stage];
563
564 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
565 struct anv_sampler *sampler =
566 d->set->descriptors[sampler_slots[b].index].sampler;
567
568 if (!sampler)
569 continue;
570
571 memcpy(state->map + (start + b) * 16,
572 sampler->state, sizeof(sampler->state));
573 }
574 }
575
576 return VK_SUCCESS;
577 }
578
579 static VkResult
580 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
581 {
582 struct anv_state surfaces = { 0, }, samplers = { 0, };
583 VkResult result;
584
585 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
586 if (result != VK_SUCCESS)
587 return result;
588 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
589 if (result != VK_SUCCESS)
590 return result;
591
592 static const uint32_t sampler_state_opcodes[] = {
593 [VK_SHADER_STAGE_VERTEX] = 43,
594 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
595 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
596 [VK_SHADER_STAGE_GEOMETRY] = 46,
597 [VK_SHADER_STAGE_FRAGMENT] = 47,
598 [VK_SHADER_STAGE_COMPUTE] = 0,
599 };
600
601 static const uint32_t binding_table_opcodes[] = {
602 [VK_SHADER_STAGE_VERTEX] = 38,
603 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
604 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
605 [VK_SHADER_STAGE_GEOMETRY] = 41,
606 [VK_SHADER_STAGE_FRAGMENT] = 42,
607 [VK_SHADER_STAGE_COMPUTE] = 0,
608 };
609
610 if (samplers.alloc_size > 0) {
611 anv_batch_emit(&cmd_buffer->batch,
612 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
613 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
614 .PointertoVSSamplerState = samplers.offset);
615 }
616
617 if (surfaces.alloc_size > 0) {
618 anv_batch_emit(&cmd_buffer->batch,
619 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
620 ._3DCommandSubOpcode = binding_table_opcodes[stage],
621 .PointertoVSBindingTable = surfaces.offset);
622 }
623
624 return VK_SUCCESS;
625 }
626
627 static void
628 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
629 {
630 uint32_t s, dirty = cmd_buffer->state.descriptors_dirty &
631 cmd_buffer->state.pipeline->active_stages;
632
633 VkResult result = VK_SUCCESS;
634 for_each_bit(s, dirty) {
635 result = flush_descriptor_set(cmd_buffer, s);
636 if (result != VK_SUCCESS)
637 break;
638 }
639
640 if (result != VK_SUCCESS) {
641 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
642
643 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
644 assert(result == VK_SUCCESS);
645
646 /* Re-emit state base addresses so we get the new surface state base
647 * address before we start emitting binding tables etc.
648 */
649 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
650
651 /* Re-emit all active binding tables */
652 for_each_bit(s, cmd_buffer->state.pipeline->active_stages) {
653 result = flush_descriptor_set(cmd_buffer, s);
654
655 /* It had better succeed this time */
656 assert(result == VK_SUCCESS);
657 }
658 }
659
660 cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages;
661 }
662
663 static struct anv_state
664 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
665 uint32_t *a, uint32_t dwords, uint32_t alignment)
666 {
667 struct anv_state state;
668
669 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
670 dwords * 4, alignment);
671 memcpy(state.map, a, dwords * 4);
672
673 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
674
675 return state;
676 }
677
678 static struct anv_state
679 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
680 uint32_t *a, uint32_t *b,
681 uint32_t dwords, uint32_t alignment)
682 {
683 struct anv_state state;
684 uint32_t *p;
685
686 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
687 dwords * 4, alignment);
688 p = state.map;
689 for (uint32_t i = 0; i < dwords; i++)
690 p[i] = a[i] | b[i];
691
692 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
693
694 return state;
695 }
696
697 static VkResult
698 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
699 {
700 struct anv_device *device = cmd_buffer->device;
701 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
702 struct anv_state surfaces = { 0, }, samplers = { 0, };
703 VkResult result;
704
705 result = cmd_buffer_emit_samplers(cmd_buffer,
706 VK_SHADER_STAGE_COMPUTE, &samplers);
707 if (result != VK_SUCCESS)
708 return result;
709 result = cmd_buffer_emit_binding_table(cmd_buffer,
710 VK_SHADER_STAGE_COMPUTE, &surfaces);
711 if (result != VK_SUCCESS)
712 return result;
713
714 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
715 .KernelStartPointer = pipeline->cs_simd,
716 .KernelStartPointerHigh = 0,
717 .BindingTablePointer = surfaces.offset,
718 .BindingTableEntryCount = 0,
719 .SamplerStatePointer = samplers.offset,
720 .SamplerCount = 0,
721 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
722 };
723
724 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
725 struct anv_state state =
726 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
727
728 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
729
730 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
731 .InterfaceDescriptorTotalLength = size,
732 .InterfaceDescriptorDataStartAddress = state.offset);
733
734 return VK_SUCCESS;
735 }
736
737 static void
738 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
739 {
740 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
741 VkResult result;
742
743 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
744
745 if (cmd_buffer->state.current_pipeline != GPGPU) {
746 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
747 .PipelineSelection = GPGPU);
748 cmd_buffer->state.current_pipeline = GPGPU;
749 }
750
751 if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
752 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
753
754 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
755 (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
756 result = flush_compute_descriptor_set(cmd_buffer);
757 assert(result == VK_SUCCESS);
758 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
759 }
760
761 cmd_buffer->state.compute_dirty = 0;
762 }
763
764 static void
765 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
766 {
767 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
768 uint32_t *p;
769
770 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
771
772 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
773
774 if (cmd_buffer->state.current_pipeline != _3D) {
775 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
776 .PipelineSelection = _3D);
777 cmd_buffer->state.current_pipeline = _3D;
778 }
779
780 if (vb_emit) {
781 const uint32_t num_buffers = __builtin_popcount(vb_emit);
782 const uint32_t num_dwords = 1 + num_buffers * 4;
783
784 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
785 GEN8_3DSTATE_VERTEX_BUFFERS);
786 uint32_t vb, i = 0;
787 for_each_bit(vb, vb_emit) {
788 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
789 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
790
791 struct GEN8_VERTEX_BUFFER_STATE state = {
792 .VertexBufferIndex = vb,
793 .MemoryObjectControlState = GEN8_MOCS,
794 .AddressModifyEnable = true,
795 .BufferPitch = pipeline->binding_stride[vb],
796 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
797 .BufferSize = buffer->size - offset
798 };
799
800 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
801 i++;
802 }
803 }
804
805 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
806 /* If somebody compiled a pipeline after starting a command buffer the
807 * scratch bo may have grown since we started this cmd buffer (and
808 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
809 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
810 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
811 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
812
813 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
814 }
815
816 if (cmd_buffer->state.descriptors_dirty)
817 flush_descriptor_sets(cmd_buffer);
818
819 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) {
820 struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state;
821 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
822 .ScissorRectPointer = vp_state->scissor.offset);
823 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
824 .CCViewportPointer = vp_state->cc_vp.offset);
825 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
826 .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
827 }
828
829 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
830 ANV_CMD_BUFFER_RS_DIRTY)) {
831 anv_batch_emit_merge(&cmd_buffer->batch,
832 cmd_buffer->state.rs_state->state_sf,
833 pipeline->state_sf);
834 anv_batch_emit_merge(&cmd_buffer->batch,
835 cmd_buffer->state.rs_state->state_raster,
836 pipeline->state_raster);
837 }
838
839 if (cmd_buffer->state.ds_state &&
840 (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
841 ANV_CMD_BUFFER_DS_DIRTY))) {
842 anv_batch_emit_merge(&cmd_buffer->batch,
843 cmd_buffer->state.ds_state->state_wm_depth_stencil,
844 pipeline->state_wm_depth_stencil);
845 }
846
847 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY |
848 ANV_CMD_BUFFER_DS_DIRTY)) {
849 struct anv_state state;
850 if (cmd_buffer->state.ds_state == NULL)
851 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
852 cmd_buffer->state.cb_state->state_color_calc,
853 GEN8_COLOR_CALC_STATE_length, 64);
854 else if (cmd_buffer->state.cb_state == NULL)
855 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
856 cmd_buffer->state.ds_state->state_color_calc,
857 GEN8_COLOR_CALC_STATE_length, 64);
858 else
859 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
860 cmd_buffer->state.ds_state->state_color_calc,
861 cmd_buffer->state.cb_state->state_color_calc,
862 GEN8_COLOR_CALC_STATE_length, 64);
863
864 anv_batch_emit(&cmd_buffer->batch,
865 GEN8_3DSTATE_CC_STATE_POINTERS,
866 .ColorCalcStatePointer = state.offset,
867 .ColorCalcStatePointerValid = true);
868 }
869
870 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
871 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
872 anv_batch_emit_merge(&cmd_buffer->batch,
873 cmd_buffer->state.state_vf, pipeline->state_vf);
874 }
875
876 cmd_buffer->state.vb_dirty &= ~vb_emit;
877 cmd_buffer->state.dirty = 0;
878 }
879
880 void anv_CmdDraw(
881 VkCmdBuffer cmdBuffer,
882 uint32_t firstVertex,
883 uint32_t vertexCount,
884 uint32_t firstInstance,
885 uint32_t instanceCount)
886 {
887 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
888
889 anv_cmd_buffer_flush_state(cmd_buffer);
890
891 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
892 .VertexAccessType = SEQUENTIAL,
893 .VertexCountPerInstance = vertexCount,
894 .StartVertexLocation = firstVertex,
895 .InstanceCount = instanceCount,
896 .StartInstanceLocation = firstInstance,
897 .BaseVertexLocation = 0);
898 }
899
900 void anv_CmdDrawIndexed(
901 VkCmdBuffer cmdBuffer,
902 uint32_t firstIndex,
903 uint32_t indexCount,
904 int32_t vertexOffset,
905 uint32_t firstInstance,
906 uint32_t instanceCount)
907 {
908 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
909
910 anv_cmd_buffer_flush_state(cmd_buffer);
911
912 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
913 .VertexAccessType = RANDOM,
914 .VertexCountPerInstance = indexCount,
915 .StartVertexLocation = firstIndex,
916 .InstanceCount = instanceCount,
917 .StartInstanceLocation = firstInstance,
918 .BaseVertexLocation = vertexOffset);
919 }
920
921 static void
922 anv_batch_lrm(struct anv_batch *batch,
923 uint32_t reg, struct anv_bo *bo, uint32_t offset)
924 {
925 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
926 .RegisterAddress = reg,
927 .MemoryAddress = { bo, offset });
928 }
929
930 static void
931 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
932 {
933 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
934 .RegisterOffset = reg,
935 .DataDWord = imm);
936 }
937
938 /* Auto-Draw / Indirect Registers */
939 #define GEN7_3DPRIM_END_OFFSET 0x2420
940 #define GEN7_3DPRIM_START_VERTEX 0x2430
941 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
942 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
943 #define GEN7_3DPRIM_START_INSTANCE 0x243C
944 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
945
946 void anv_CmdDrawIndirect(
947 VkCmdBuffer cmdBuffer,
948 VkBuffer _buffer,
949 VkDeviceSize offset,
950 uint32_t count,
951 uint32_t stride)
952 {
953 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
954 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
955 struct anv_bo *bo = buffer->bo;
956 uint32_t bo_offset = buffer->offset + offset;
957
958 anv_cmd_buffer_flush_state(cmd_buffer);
959
960 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
961 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
962 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
963 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
964 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
965
966 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
967 .IndirectParameterEnable = true,
968 .VertexAccessType = SEQUENTIAL);
969 }
970
971 void anv_CmdDrawIndexedIndirect(
972 VkCmdBuffer cmdBuffer,
973 VkBuffer _buffer,
974 VkDeviceSize offset,
975 uint32_t count,
976 uint32_t stride)
977 {
978 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
979 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
980 struct anv_bo *bo = buffer->bo;
981 uint32_t bo_offset = buffer->offset + offset;
982
983 anv_cmd_buffer_flush_state(cmd_buffer);
984
985 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
986 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
987 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
988 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
989 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
990
991 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
992 .IndirectParameterEnable = true,
993 .VertexAccessType = RANDOM);
994 }
995
996 void anv_CmdDispatch(
997 VkCmdBuffer cmdBuffer,
998 uint32_t x,
999 uint32_t y,
1000 uint32_t z)
1001 {
1002 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1003 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
1004 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
1005
1006 anv_cmd_buffer_flush_compute_state(cmd_buffer);
1007
1008 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
1009 .SIMDSize = prog_data->simd_size / 16,
1010 .ThreadDepthCounterMaximum = 0,
1011 .ThreadHeightCounterMaximum = 0,
1012 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
1013 .ThreadGroupIDXDimension = x,
1014 .ThreadGroupIDYDimension = y,
1015 .ThreadGroupIDZDimension = z,
1016 .RightExecutionMask = pipeline->cs_right_mask,
1017 .BottomExecutionMask = 0xffffffff);
1018
1019 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
1020 }
1021
1022 #define GPGPU_DISPATCHDIMX 0x2500
1023 #define GPGPU_DISPATCHDIMY 0x2504
1024 #define GPGPU_DISPATCHDIMZ 0x2508
1025
1026 void anv_CmdDispatchIndirect(
1027 VkCmdBuffer cmdBuffer,
1028 VkBuffer _buffer,
1029 VkDeviceSize offset)
1030 {
1031 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1032 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1033 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
1034 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
1035 struct anv_bo *bo = buffer->bo;
1036 uint32_t bo_offset = buffer->offset + offset;
1037
1038 anv_cmd_buffer_flush_compute_state(cmd_buffer);
1039
1040 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
1041 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
1042 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
1043
1044 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
1045 .IndirectParameterEnable = true,
1046 .SIMDSize = prog_data->simd_size / 16,
1047 .ThreadDepthCounterMaximum = 0,
1048 .ThreadHeightCounterMaximum = 0,
1049 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
1050 .RightExecutionMask = pipeline->cs_right_mask,
1051 .BottomExecutionMask = 0xffffffff);
1052
1053 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
1054 }
1055
1056 void anv_CmdSetEvent(
1057 VkCmdBuffer cmdBuffer,
1058 VkEvent event,
1059 VkPipelineStageFlags stageMask)
1060 {
1061 stub();
1062 }
1063
1064 void anv_CmdResetEvent(
1065 VkCmdBuffer cmdBuffer,
1066 VkEvent event,
1067 VkPipelineStageFlags stageMask)
1068 {
1069 stub();
1070 }
1071
1072 void anv_CmdWaitEvents(
1073 VkCmdBuffer cmdBuffer,
1074 uint32_t eventCount,
1075 const VkEvent* pEvents,
1076 VkPipelineStageFlags srcStageMask,
1077 VkPipelineStageFlags destStageMask,
1078 uint32_t memBarrierCount,
1079 const void* const* ppMemBarriers)
1080 {
1081 stub();
1082 }
1083
1084 void anv_CmdPipelineBarrier(
1085 VkCmdBuffer cmdBuffer,
1086 VkPipelineStageFlags srcStageMask,
1087 VkPipelineStageFlags destStageMask,
1088 VkBool32 byRegion,
1089 uint32_t memBarrierCount,
1090 const void* const* ppMemBarriers)
1091 {
1092 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1093 uint32_t b, *dw;
1094
1095 struct GEN8_PIPE_CONTROL cmd = {
1096 GEN8_PIPE_CONTROL_header,
1097 .PostSyncOperation = NoWrite,
1098 };
1099
1100 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
1101
1102 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
1103 /* This is just what PIPE_CONTROL does */
1104 }
1105
1106 if (anv_clear_mask(&srcStageMask,
1107 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1108 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
1109 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
1110 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT |
1111 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT |
1112 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
1113 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
1114 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
1115 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
1116 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
1117 cmd.StallAtPixelScoreboard = true;
1118 }
1119
1120
1121 if (anv_clear_mask(&srcStageMask,
1122 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
1123 VK_PIPELINE_STAGE_TRANSFER_BIT |
1124 VK_PIPELINE_STAGE_TRANSITION_BIT)) {
1125 cmd.CommandStreamerStallEnable = true;
1126 }
1127
1128 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
1129 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
1130 }
1131
1132 /* On our hardware, all stages will wait for execution as needed. */
1133 (void)destStageMask;
1134
1135 /* We checked all known VkPipeEventFlags. */
1136 anv_assert(srcStageMask == 0);
1137
1138 /* XXX: Right now, we're really dumb and just flush whatever categories
1139 * the app asks for. One of these days we may make this a bit better
1140 * but right now that's all the hardware allows for in most areas.
1141 */
1142 VkMemoryOutputFlags out_flags = 0;
1143 VkMemoryInputFlags in_flags = 0;
1144
1145 for (uint32_t i = 0; i < memBarrierCount; i++) {
1146 const struct anv_common *common = ppMemBarriers[i];
1147 switch (common->sType) {
1148 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
1149 ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
1150 out_flags |= barrier->outputMask;
1151 in_flags |= barrier->inputMask;
1152 break;
1153 }
1154 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
1155 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
1156 out_flags |= barrier->outputMask;
1157 in_flags |= barrier->inputMask;
1158 break;
1159 }
1160 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
1161 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
1162 out_flags |= barrier->outputMask;
1163 in_flags |= barrier->inputMask;
1164 break;
1165 }
1166 default:
1167 unreachable("Invalid memory barrier type");
1168 }
1169 }
1170
1171 for_each_bit(b, out_flags) {
1172 switch ((VkMemoryOutputFlags)(1 << b)) {
1173 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
1174 break; /* FIXME: Little-core systems */
1175 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
1176 cmd.DCFlushEnable = true;
1177 break;
1178 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
1179 cmd.RenderTargetCacheFlushEnable = true;
1180 break;
1181 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1182 cmd.DepthCacheFlushEnable = true;
1183 break;
1184 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
1185 cmd.RenderTargetCacheFlushEnable = true;
1186 cmd.DepthCacheFlushEnable = true;
1187 break;
1188 default:
1189 unreachable("Invalid memory output flag");
1190 }
1191 }
1192
1193 for_each_bit(b, out_flags) {
1194 switch ((VkMemoryInputFlags)(1 << b)) {
1195 case VK_MEMORY_INPUT_HOST_READ_BIT:
1196 break; /* FIXME: Little-core systems */
1197 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
1198 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
1199 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
1200 cmd.VFCacheInvalidationEnable = true;
1201 break;
1202 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
1203 cmd.ConstantCacheInvalidationEnable = true;
1204 /* fallthrough */
1205 case VK_MEMORY_INPUT_SHADER_READ_BIT:
1206 cmd.DCFlushEnable = true;
1207 cmd.TextureCacheInvalidationEnable = true;
1208 break;
1209 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
1210 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1211 break; /* XXX: Hunh? */
1212 case VK_MEMORY_INPUT_TRANSFER_BIT:
1213 cmd.TextureCacheInvalidationEnable = true;
1214 break;
1215 }
1216 }
1217
1218 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
1219 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
1220 }
1221
1222 void anv_CmdPushConstants(
1223 VkCmdBuffer cmdBuffer,
1224 VkPipelineLayout layout,
1225 VkShaderStageFlags stageFlags,
1226 uint32_t start,
1227 uint32_t length,
1228 const void* values)
1229 {
1230 stub();
1231 }
1232
1233 static void
1234 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
1235 {
1236 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1237 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1238 const struct anv_depth_stencil_view *view;
1239
1240 static const struct anv_depth_stencil_view null_view =
1241 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
1242
1243 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
1244 const struct anv_attachment_view *aview =
1245 fb->attachments[subpass->depth_stencil_attachment];
1246 assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL);
1247 view = (const struct anv_depth_stencil_view *)aview;
1248 } else {
1249 view = &null_view;
1250 }
1251
1252 /* FIXME: Implement the PMA stall W/A */
1253 /* FIXME: Width and Height are wrong */
1254
1255 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
1256 .SurfaceType = SURFTYPE_2D,
1257 .DepthWriteEnable = view->depth_stride > 0,
1258 .StencilWriteEnable = view->stencil_stride > 0,
1259 .HierarchicalDepthBufferEnable = false,
1260 .SurfaceFormat = view->depth_format,
1261 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
1262 .SurfaceBaseAddress = { view->bo, view->depth_offset },
1263 .Height = cmd_buffer->state.framebuffer->height - 1,
1264 .Width = cmd_buffer->state.framebuffer->width - 1,
1265 .LOD = 0,
1266 .Depth = 1 - 1,
1267 .MinimumArrayElement = 0,
1268 .DepthBufferObjectControlState = GEN8_MOCS,
1269 .RenderTargetViewExtent = 1 - 1,
1270 .SurfaceQPitch = view->depth_qpitch >> 2);
1271
1272 /* Disable hierarchial depth buffers. */
1273 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
1274
1275 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
1276 .StencilBufferEnable = view->stencil_stride > 0,
1277 .StencilBufferObjectControlState = GEN8_MOCS,
1278 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
1279 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
1280 .SurfaceQPitch = view->stencil_qpitch >> 2);
1281
1282 /* Clear the clear params. */
1283 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
1284 }
1285
1286 void
1287 anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
1288 struct anv_subpass *subpass)
1289 {
1290 cmd_buffer->state.subpass = subpass;
1291
1292 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
1293
1294 anv_cmd_buffer_emit_depth_stencil(cmd_buffer);
1295 }
1296
1297 void anv_CmdBeginRenderPass(
1298 VkCmdBuffer cmdBuffer,
1299 const VkRenderPassBeginInfo* pRenderPassBegin,
1300 VkRenderPassContents contents)
1301 {
1302 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1303 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
1304 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
1305
1306 cmd_buffer->state.framebuffer = framebuffer;
1307 cmd_buffer->state.pass = pass;
1308
1309 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
1310
1311 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
1312 .ClippedDrawingRectangleYMin = render_area->offset.y,
1313 .ClippedDrawingRectangleXMin = render_area->offset.x,
1314 .ClippedDrawingRectangleYMax =
1315 render_area->offset.y + render_area->extent.height - 1,
1316 .ClippedDrawingRectangleXMax =
1317 render_area->offset.x + render_area->extent.width - 1,
1318 .DrawingRectangleOriginY = 0,
1319 .DrawingRectangleOriginX = 0);
1320
1321 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
1322 pRenderPassBegin->pAttachmentClearValues);
1323
1324 anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
1325 }
1326
1327 void anv_CmdNextSubpass(
1328 VkCmdBuffer cmdBuffer,
1329 VkRenderPassContents contents)
1330 {
1331 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1332
1333 assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
1334
1335 anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
1336 }
1337
1338 void anv_CmdEndRenderPass(
1339 VkCmdBuffer cmdBuffer)
1340 {
1341 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1342
1343 /* Emit a flushing pipe control at the end of a pass. This is kind of a
1344 * hack but it ensures that render targets always actually get written.
1345 * Eventually, we should do flushing based on image format transitions
1346 * or something of that nature.
1347 */
1348 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
1349 .PostSyncOperation = NoWrite,
1350 .RenderTargetCacheFlushEnable = true,
1351 .InstructionCacheInvalidateEnable = true,
1352 .DepthCacheFlushEnable = true,
1353 .VFCacheInvalidationEnable = true,
1354 .TextureCacheInvalidationEnable = true,
1355 .CommandStreamerStallEnable = true);
1356 }
1357
1358 void anv_CmdExecuteCommands(
1359 VkCmdBuffer cmdBuffer,
1360 uint32_t cmdBuffersCount,
1361 const VkCmdBuffer* pCmdBuffers)
1362 {
1363 ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer);
1364
1365 assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
1366
1367 anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]);
1368
1369 for (uint32_t i = 0; i < cmdBuffersCount; i++) {
1370 ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
1371
1372 assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY);
1373
1374 anv_cmd_buffer_add_secondary(primary, secondary);
1375 }
1376 }
1377
1378 VkResult anv_CreateCommandPool(
1379 VkDevice _device,
1380 const VkCmdPoolCreateInfo* pCreateInfo,
1381 VkCmdPool* pCmdPool)
1382 {
1383 ANV_FROM_HANDLE(anv_device, device, _device);
1384 struct anv_cmd_pool *pool;
1385
1386 pool = anv_device_alloc(device, sizeof(*pool), 8,
1387 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1388 if (pool == NULL)
1389 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1390
1391 list_inithead(&pool->cmd_buffers);
1392
1393 *pCmdPool = anv_cmd_pool_to_handle(pool);
1394
1395 return VK_SUCCESS;
1396 }
1397
1398 VkResult anv_DestroyCommandPool(
1399 VkDevice _device,
1400 VkCmdPool cmdPool)
1401 {
1402 ANV_FROM_HANDLE(anv_device, device, _device);
1403 ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
1404
1405 anv_ResetCommandPool(_device, cmdPool, 0);
1406
1407 anv_device_free(device, pool);
1408
1409 return VK_SUCCESS;
1410 }
1411
1412 VkResult anv_ResetCommandPool(
1413 VkDevice device,
1414 VkCmdPool cmdPool,
1415 VkCmdPoolResetFlags flags)
1416 {
1417 ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
1418
1419 list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
1420 &pool->cmd_buffers, pool_link) {
1421 anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer));
1422 }
1423
1424 return VK_SUCCESS;
1425 }