Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / vulkan / anv_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 /** \file anv_cmd_buffer.c
33 *
34 * This file contains all of the stuff for emitting commands into a command
35 * buffer. This includes implementations of most of the vkCmd*
36 * entrypoints. This file is concerned entirely with state emission and
37 * not with the command buffer data structure itself. As far as this file
38 * is concerned, most of anv_cmd_buffer is magic.
39 */
40
41 static void
42 anv_cmd_state_init(struct anv_cmd_state *state)
43 {
44 state->rs_state = NULL;
45 state->vp_state = NULL;
46 state->cb_state = NULL;
47 state->ds_state = NULL;
48 memset(&state->state_vf, 0, sizeof(state->state_vf));
49 memset(&state->descriptors, 0, sizeof(state->descriptors));
50
51 state->dirty = 0;
52 state->vb_dirty = 0;
53 state->descriptors_dirty = 0;
54 state->pipeline = NULL;
55 state->vp_state = NULL;
56 state->rs_state = NULL;
57 state->ds_state = NULL;
58 }
59
60 VkResult anv_CreateCommandBuffer(
61 VkDevice _device,
62 const VkCmdBufferCreateInfo* pCreateInfo,
63 VkCmdBuffer* pCmdBuffer)
64 {
65 ANV_FROM_HANDLE(anv_device, device, _device);
66 ANV_FROM_HANDLE(anv_cmd_pool, pool, pCreateInfo->cmdPool);
67 struct anv_cmd_buffer *cmd_buffer;
68 VkResult result;
69
70 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
71 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
72 if (cmd_buffer == NULL)
73 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
74
75 cmd_buffer->device = device;
76
77 result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
78 if (result != VK_SUCCESS)
79 goto fail;
80
81 anv_state_stream_init(&cmd_buffer->surface_state_stream,
82 &device->surface_state_block_pool);
83 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
84 &device->dynamic_state_block_pool);
85
86 cmd_buffer->level = pCreateInfo->level;
87 cmd_buffer->opt_flags = 0;
88
89 anv_cmd_state_init(&cmd_buffer->state);
90
91 if (pool) {
92 list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
93 } else {
94 /* Init the pool_link so we can safefly call list_del when we destroy
95 * the command buffer
96 */
97 list_inithead(&cmd_buffer->pool_link);
98 }
99
100 *pCmdBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
101
102 return VK_SUCCESS;
103
104 fail: anv_device_free(device, cmd_buffer);
105
106 return result;
107 }
108
109 VkResult anv_DestroyCommandBuffer(
110 VkDevice _device,
111 VkCmdBuffer _cmd_buffer)
112 {
113 ANV_FROM_HANDLE(anv_device, device, _device);
114 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, _cmd_buffer);
115
116 list_del(&cmd_buffer->pool_link);
117
118 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
119
120 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
121 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
122 anv_device_free(device, cmd_buffer);
123
124 return VK_SUCCESS;
125 }
126
127 VkResult anv_ResetCommandBuffer(
128 VkCmdBuffer cmdBuffer,
129 VkCmdBufferResetFlags flags)
130 {
131 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
132
133 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
134
135 anv_cmd_state_init(&cmd_buffer->state);
136
137 return VK_SUCCESS;
138 }
139
140 void
141 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
142 {
143 struct anv_device *device = cmd_buffer->device;
144 struct anv_bo *scratch_bo = NULL;
145
146 cmd_buffer->state.scratch_size =
147 anv_block_pool_size(&device->scratch_block_pool);
148 if (cmd_buffer->state.scratch_size > 0)
149 scratch_bo = &device->scratch_block_pool.bo;
150
151 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
152 .GeneralStateBaseAddress = { scratch_bo, 0 },
153 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
154 .GeneralStateBaseAddressModifyEnable = true,
155 .GeneralStateBufferSize = 0xfffff,
156 .GeneralStateBufferSizeModifyEnable = true,
157
158 .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 },
159 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
160 .SurfaceStateBaseAddressModifyEnable = true,
161
162 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
163 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
164 .DynamicStateBaseAddressModifyEnable = true,
165 .DynamicStateBufferSize = 0xfffff,
166 .DynamicStateBufferSizeModifyEnable = true,
167
168 .IndirectObjectBaseAddress = { NULL, 0 },
169 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
170 .IndirectObjectBaseAddressModifyEnable = true,
171 .IndirectObjectBufferSize = 0xfffff,
172 .IndirectObjectBufferSizeModifyEnable = true,
173
174 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
175 .InstructionMemoryObjectControlState = GEN8_MOCS,
176 .InstructionBaseAddressModifyEnable = true,
177 .InstructionBufferSize = 0xfffff,
178 .InstructionBuffersizeModifyEnable = true);
179
180 /* After re-setting the surface state base address, we have to do some
181 * cache flusing so that the sampler engine will pick up the new
182 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
183 * Shared Function > 3D Sampler > State > State Caching (page 96):
184 *
185 * Coherency with system memory in the state cache, like the texture
186 * cache is handled partially by software. It is expected that the
187 * command stream or shader will issue Cache Flush operation or
188 * Cache_Flush sampler message to ensure that the L1 cache remains
189 * coherent with system memory.
190 *
191 * [...]
192 *
193 * Whenever the value of the Dynamic_State_Base_Addr,
194 * Surface_State_Base_Addr are altered, the L1 state cache must be
195 * invalidated to ensure the new surface or sampler state is fetched
196 * from system memory.
197 *
198 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
199 * which, according the PIPE_CONTROL instruction documentation in the
200 * Broadwell PRM:
201 *
202 * Setting this bit is independent of any other bit in this packet.
203 * This bit controls the invalidation of the L1 and L2 state caches
204 * at the top of the pipe i.e. at the parsing time.
205 *
206 * Unfortunately, experimentation seems to indicate that state cache
207 * invalidation through a PIPE_CONTROL does nothing whatsoever in
208 * regards to surface state and binding tables. In stead, it seems that
209 * invalidating the texture cache is what is actually needed.
210 *
211 * XXX: As far as we have been able to determine through
212 * experimentation, shows that flush the texture cache appears to be
213 * sufficient. The theory here is that all of the sampling/rendering
214 * units cache the binding table in the texture cache. However, we have
215 * yet to be able to actually confirm this.
216 */
217 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
218 .TextureCacheInvalidationEnable = true);
219 }
220
221 VkResult anv_BeginCommandBuffer(
222 VkCmdBuffer cmdBuffer,
223 const VkCmdBufferBeginInfo* pBeginInfo)
224 {
225 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
226
227 cmd_buffer->opt_flags = pBeginInfo->flags;
228
229 if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_SECONDARY) {
230 cmd_buffer->state.framebuffer =
231 anv_framebuffer_from_handle(pBeginInfo->framebuffer);
232 cmd_buffer->state.pass =
233 anv_render_pass_from_handle(pBeginInfo->renderPass);
234
235 /* FIXME: We shouldn't be starting on the first subpass */
236 anv_cmd_buffer_begin_subpass(cmd_buffer,
237 &cmd_buffer->state.pass->subpasses[0]);
238 }
239
240 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
241 cmd_buffer->state.current_pipeline = UINT32_MAX;
242
243 return VK_SUCCESS;
244 }
245
246 VkResult anv_EndCommandBuffer(
247 VkCmdBuffer cmdBuffer)
248 {
249 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
250 struct anv_device *device = cmd_buffer->device;
251
252 anv_cmd_buffer_end_batch_buffer(cmd_buffer);
253
254 if (cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY) {
255 /* The algorithm used to compute the validate list is not threadsafe as
256 * it uses the bo->index field. We have to lock the device around it.
257 * Fortunately, the chances for contention here are probably very low.
258 */
259 pthread_mutex_lock(&device->mutex);
260 anv_cmd_buffer_prepare_execbuf(cmd_buffer);
261 pthread_mutex_unlock(&device->mutex);
262 }
263
264 return VK_SUCCESS;
265 }
266
267 void anv_CmdBindPipeline(
268 VkCmdBuffer cmdBuffer,
269 VkPipelineBindPoint pipelineBindPoint,
270 VkPipeline _pipeline)
271 {
272 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
273 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
274
275 switch (pipelineBindPoint) {
276 case VK_PIPELINE_BIND_POINT_COMPUTE:
277 cmd_buffer->state.compute_pipeline = pipeline;
278 cmd_buffer->state.compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
279 break;
280
281 case VK_PIPELINE_BIND_POINT_GRAPHICS:
282 cmd_buffer->state.pipeline = pipeline;
283 cmd_buffer->state.vb_dirty |= pipeline->vb_used;
284 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
285 break;
286
287 default:
288 assert(!"invalid bind point");
289 break;
290 }
291 }
292
293 void anv_CmdBindDynamicViewportState(
294 VkCmdBuffer cmdBuffer,
295 VkDynamicViewportState dynamicViewportState)
296 {
297 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
298 ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, dynamicViewportState);
299
300 cmd_buffer->state.vp_state = vp_state;
301 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_VP_DIRTY;
302 }
303
304 void anv_CmdBindDynamicRasterState(
305 VkCmdBuffer cmdBuffer,
306 VkDynamicRasterState dynamicRasterState)
307 {
308 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
309 ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, dynamicRasterState);
310
311 cmd_buffer->state.rs_state = rs_state;
312 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_RS_DIRTY;
313 }
314
315 void anv_CmdBindDynamicColorBlendState(
316 VkCmdBuffer cmdBuffer,
317 VkDynamicColorBlendState dynamicColorBlendState)
318 {
319 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
320 ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, dynamicColorBlendState);
321
322 cmd_buffer->state.cb_state = cb_state;
323 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_CB_DIRTY;
324 }
325
326 void anv_CmdBindDynamicDepthStencilState(
327 VkCmdBuffer cmdBuffer,
328 VkDynamicDepthStencilState dynamicDepthStencilState)
329 {
330 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
331 ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, dynamicDepthStencilState);
332
333 cmd_buffer->state.ds_state = ds_state;
334 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_DS_DIRTY;
335 }
336
337 void anv_CmdBindDescriptorSets(
338 VkCmdBuffer cmdBuffer,
339 VkPipelineBindPoint pipelineBindPoint,
340 VkPipelineLayout _layout,
341 uint32_t firstSet,
342 uint32_t setCount,
343 const VkDescriptorSet* pDescriptorSets,
344 uint32_t dynamicOffsetCount,
345 const uint32_t* pDynamicOffsets)
346 {
347 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
348 ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
349 struct anv_descriptor_set_layout *set_layout;
350
351 assert(firstSet + setCount < MAX_SETS);
352
353 uint32_t dynamic_slot = 0;
354 for (uint32_t i = 0; i < setCount; i++) {
355 ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
356 set_layout = layout->set[firstSet + i].layout;
357
358 cmd_buffer->state.descriptors[firstSet + i].set = set;
359
360 assert(set_layout->num_dynamic_buffers <
361 ARRAY_SIZE(cmd_buffer->state.descriptors[0].dynamic_offsets));
362 memcpy(cmd_buffer->state.descriptors[firstSet + i].dynamic_offsets,
363 pDynamicOffsets + dynamic_slot,
364 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
365
366 cmd_buffer->state.descriptors_dirty |= set_layout->shader_stages;
367
368 dynamic_slot += set_layout->num_dynamic_buffers;
369 }
370 }
371
372 void anv_CmdBindIndexBuffer(
373 VkCmdBuffer cmdBuffer,
374 VkBuffer _buffer,
375 VkDeviceSize offset,
376 VkIndexType indexType)
377 {
378 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
379 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
380
381 static const uint32_t vk_to_gen_index_type[] = {
382 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
383 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
384 };
385
386 struct GEN8_3DSTATE_VF vf = {
387 GEN8_3DSTATE_VF_header,
388 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
389 };
390 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf);
391
392 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
393
394 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
395 .IndexFormat = vk_to_gen_index_type[indexType],
396 .MemoryObjectControlState = GEN8_MOCS,
397 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
398 .BufferSize = buffer->size - offset);
399 }
400
401 void anv_CmdBindVertexBuffers(
402 VkCmdBuffer cmdBuffer,
403 uint32_t startBinding,
404 uint32_t bindingCount,
405 const VkBuffer* pBuffers,
406 const VkDeviceSize* pOffsets)
407 {
408 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
409 struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
410
411 /* We have to defer setting up vertex buffer since we need the buffer
412 * stride from the pipeline. */
413
414 assert(startBinding + bindingCount < MAX_VBS);
415 for (uint32_t i = 0; i < bindingCount; i++) {
416 vb[startBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
417 vb[startBinding + i].offset = pOffsets[i];
418 cmd_buffer->state.vb_dirty |= 1 << (startBinding + i);
419 }
420 }
421
422 static VkResult
423 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
424 unsigned stage, struct anv_state *bt_state)
425 {
426 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
427 struct anv_subpass *subpass = cmd_buffer->state.subpass;
428 struct anv_pipeline_layout *layout;
429 uint32_t attachments, bias, size;
430
431 if (stage == VK_SHADER_STAGE_COMPUTE)
432 layout = cmd_buffer->state.compute_pipeline->layout;
433 else
434 layout = cmd_buffer->state.pipeline->layout;
435
436 if (stage == VK_SHADER_STAGE_FRAGMENT) {
437 bias = MAX_RTS;
438 attachments = subpass->color_count;
439 } else {
440 bias = 0;
441 attachments = 0;
442 }
443
444 /* This is a little awkward: layout can be NULL but we still have to
445 * allocate and set a binding table for the PS stage for render
446 * targets. */
447 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
448
449 if (attachments + surface_count == 0)
450 return VK_SUCCESS;
451
452 size = (bias + surface_count) * sizeof(uint32_t);
453 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
454 uint32_t *bt_map = bt_state->map;
455
456 if (bt_state->map == NULL)
457 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
458
459 /* This is highly annoying. The Vulkan spec puts the depth-stencil
460 * attachments in with the color attachments. Unfortunately, thanks to
461 * other aspects of the API, we cana't really saparate them before this
462 * point. Therefore, we have to walk all of the attachments but only
463 * put the color attachments into the binding table.
464 */
465 for (uint32_t a = 0; a < attachments; a++) {
466 const struct anv_attachment_view *attachment =
467 fb->attachments[subpass->color_attachments[a]];
468
469 assert(attachment->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_COLOR);
470 const struct anv_color_attachment_view *view =
471 (const struct anv_color_attachment_view *)attachment;
472
473 struct anv_state state =
474 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
475
476 if (state.map == NULL)
477 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
478
479 memcpy(state.map, view->view.surface_state.map, 64);
480
481 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
482 *(uint64_t *)(state.map + 8 * 4) =
483 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
484 cmd_buffer->device,
485 state.offset + 8 * 4,
486 view->view.bo, view->view.offset);
487
488 bt_map[a] = state.offset;
489 }
490
491 if (layout == NULL)
492 return VK_SUCCESS;
493
494 for (uint32_t set = 0; set < layout->num_sets; set++) {
495 struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
496 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
497 struct anv_descriptor_slot *surface_slots =
498 set_layout->stage[stage].surface_start;
499
500 uint32_t start = bias + layout->set[set].surface_start[stage];
501
502 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
503 struct anv_surface_view *view =
504 d->set->descriptors[surface_slots[b].index].view;
505
506 if (!view)
507 continue;
508
509 struct anv_state state =
510 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
511
512 if (state.map == NULL)
513 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
514
515 uint32_t offset;
516 if (surface_slots[b].dynamic_slot >= 0) {
517 uint32_t dynamic_offset =
518 d->dynamic_offsets[surface_slots[b].dynamic_slot];
519
520 offset = view->offset + dynamic_offset;
521 anv_fill_buffer_surface_state(state.map, view->format, offset,
522 view->range - dynamic_offset);
523 } else {
524 offset = view->offset;
525 memcpy(state.map, view->surface_state.map, 64);
526 }
527
528 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
529 *(uint64_t *)(state.map + 8 * 4) =
530 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer),
531 cmd_buffer->device,
532 state.offset + 8 * 4,
533 view->bo, offset);
534
535 bt_map[start + b] = state.offset;
536 }
537 }
538
539 return VK_SUCCESS;
540 }
541
542 static VkResult
543 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
544 unsigned stage, struct anv_state *state)
545 {
546 struct anv_pipeline_layout *layout;
547 uint32_t sampler_count;
548
549 if (stage == VK_SHADER_STAGE_COMPUTE)
550 layout = cmd_buffer->state.compute_pipeline->layout;
551 else
552 layout = cmd_buffer->state.pipeline->layout;
553
554 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
555 if (sampler_count == 0)
556 return VK_SUCCESS;
557
558 uint32_t size = sampler_count * 16;
559 *state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 32);
560
561 if (state->map == NULL)
562 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
563
564 for (uint32_t set = 0; set < layout->num_sets; set++) {
565 struct anv_descriptor_set_binding *d = &cmd_buffer->state.descriptors[set];
566 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
567 struct anv_descriptor_slot *sampler_slots =
568 set_layout->stage[stage].sampler_start;
569
570 uint32_t start = layout->set[set].sampler_start[stage];
571
572 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
573 struct anv_sampler *sampler =
574 d->set->descriptors[sampler_slots[b].index].sampler;
575
576 if (!sampler)
577 continue;
578
579 memcpy(state->map + (start + b) * 16,
580 sampler->state, sizeof(sampler->state));
581 }
582 }
583
584 return VK_SUCCESS;
585 }
586
587 static VkResult
588 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
589 {
590 struct anv_state surfaces = { 0, }, samplers = { 0, };
591 VkResult result;
592
593 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
594 if (result != VK_SUCCESS)
595 return result;
596 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
597 if (result != VK_SUCCESS)
598 return result;
599
600 static const uint32_t sampler_state_opcodes[] = {
601 [VK_SHADER_STAGE_VERTEX] = 43,
602 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
603 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
604 [VK_SHADER_STAGE_GEOMETRY] = 46,
605 [VK_SHADER_STAGE_FRAGMENT] = 47,
606 [VK_SHADER_STAGE_COMPUTE] = 0,
607 };
608
609 static const uint32_t binding_table_opcodes[] = {
610 [VK_SHADER_STAGE_VERTEX] = 38,
611 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
612 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
613 [VK_SHADER_STAGE_GEOMETRY] = 41,
614 [VK_SHADER_STAGE_FRAGMENT] = 42,
615 [VK_SHADER_STAGE_COMPUTE] = 0,
616 };
617
618 if (samplers.alloc_size > 0) {
619 anv_batch_emit(&cmd_buffer->batch,
620 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
621 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
622 .PointertoVSSamplerState = samplers.offset);
623 }
624
625 if (surfaces.alloc_size > 0) {
626 anv_batch_emit(&cmd_buffer->batch,
627 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
628 ._3DCommandSubOpcode = binding_table_opcodes[stage],
629 .PointertoVSBindingTable = surfaces.offset);
630 }
631
632 return VK_SUCCESS;
633 }
634
635 static void
636 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
637 {
638 uint32_t s, dirty = cmd_buffer->state.descriptors_dirty &
639 cmd_buffer->state.pipeline->active_stages;
640
641 VkResult result = VK_SUCCESS;
642 for_each_bit(s, dirty) {
643 result = flush_descriptor_set(cmd_buffer, s);
644 if (result != VK_SUCCESS)
645 break;
646 }
647
648 if (result != VK_SUCCESS) {
649 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
650
651 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
652 assert(result == VK_SUCCESS);
653
654 /* Re-emit state base addresses so we get the new surface state base
655 * address before we start emitting binding tables etc.
656 */
657 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
658
659 /* Re-emit all active binding tables */
660 for_each_bit(s, cmd_buffer->state.pipeline->active_stages) {
661 result = flush_descriptor_set(cmd_buffer, s);
662
663 /* It had better succeed this time */
664 assert(result == VK_SUCCESS);
665 }
666 }
667
668 cmd_buffer->state.descriptors_dirty &= ~cmd_buffer->state.pipeline->active_stages;
669 }
670
671 static struct anv_state
672 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
673 uint32_t *a, uint32_t dwords, uint32_t alignment)
674 {
675 struct anv_state state;
676
677 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
678 dwords * 4, alignment);
679 memcpy(state.map, a, dwords * 4);
680
681 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
682
683 return state;
684 }
685
686 static struct anv_state
687 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
688 uint32_t *a, uint32_t *b,
689 uint32_t dwords, uint32_t alignment)
690 {
691 struct anv_state state;
692 uint32_t *p;
693
694 state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
695 dwords * 4, alignment);
696 p = state.map;
697 for (uint32_t i = 0; i < dwords; i++)
698 p[i] = a[i] | b[i];
699
700 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
701
702 return state;
703 }
704
705 static VkResult
706 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
707 {
708 struct anv_device *device = cmd_buffer->device;
709 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
710 struct anv_state surfaces = { 0, }, samplers = { 0, };
711 VkResult result;
712
713 result = cmd_buffer_emit_samplers(cmd_buffer,
714 VK_SHADER_STAGE_COMPUTE, &samplers);
715 if (result != VK_SUCCESS)
716 return result;
717 result = cmd_buffer_emit_binding_table(cmd_buffer,
718 VK_SHADER_STAGE_COMPUTE, &surfaces);
719 if (result != VK_SUCCESS)
720 return result;
721
722 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
723 .KernelStartPointer = pipeline->cs_simd,
724 .KernelStartPointerHigh = 0,
725 .BindingTablePointer = surfaces.offset,
726 .BindingTableEntryCount = 0,
727 .SamplerStatePointer = samplers.offset,
728 .SamplerCount = 0,
729 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
730 };
731
732 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
733 struct anv_state state =
734 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
735
736 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
737
738 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
739 .InterfaceDescriptorTotalLength = size,
740 .InterfaceDescriptorDataStartAddress = state.offset);
741
742 return VK_SUCCESS;
743 }
744
745 static void
746 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
747 {
748 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
749 VkResult result;
750
751 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
752
753 if (cmd_buffer->state.current_pipeline != GPGPU) {
754 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
755 .PipelineSelection = GPGPU);
756 cmd_buffer->state.current_pipeline = GPGPU;
757 }
758
759 if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
760 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
761
762 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
763 (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
764 result = flush_compute_descriptor_set(cmd_buffer);
765 assert(result == VK_SUCCESS);
766 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
767 }
768
769 cmd_buffer->state.compute_dirty = 0;
770 }
771
772 static void
773 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
774 {
775 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
776 uint32_t *p;
777
778 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
779
780 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
781
782 if (cmd_buffer->state.current_pipeline != _3D) {
783 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
784 .PipelineSelection = _3D);
785 cmd_buffer->state.current_pipeline = _3D;
786 }
787
788 if (vb_emit) {
789 const uint32_t num_buffers = __builtin_popcount(vb_emit);
790 const uint32_t num_dwords = 1 + num_buffers * 4;
791
792 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
793 GEN8_3DSTATE_VERTEX_BUFFERS);
794 uint32_t vb, i = 0;
795 for_each_bit(vb, vb_emit) {
796 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
797 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
798
799 struct GEN8_VERTEX_BUFFER_STATE state = {
800 .VertexBufferIndex = vb,
801 .MemoryObjectControlState = GEN8_MOCS,
802 .AddressModifyEnable = true,
803 .BufferPitch = pipeline->binding_stride[vb],
804 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
805 .BufferSize = buffer->size - offset
806 };
807
808 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
809 i++;
810 }
811 }
812
813 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
814 /* If somebody compiled a pipeline after starting a command buffer the
815 * scratch bo may have grown since we started this cmd buffer (and
816 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
817 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
818 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
819 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
820
821 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
822 }
823
824 if (cmd_buffer->state.descriptors_dirty)
825 flush_descriptor_sets(cmd_buffer);
826
827 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) {
828 struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state;
829 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
830 .ScissorRectPointer = vp_state->scissor.offset);
831 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
832 .CCViewportPointer = vp_state->cc_vp.offset);
833 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
834 .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
835 }
836
837 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
838 ANV_CMD_BUFFER_RS_DIRTY)) {
839 anv_batch_emit_merge(&cmd_buffer->batch,
840 cmd_buffer->state.rs_state->state_sf,
841 pipeline->state_sf);
842 anv_batch_emit_merge(&cmd_buffer->batch,
843 cmd_buffer->state.rs_state->state_raster,
844 pipeline->state_raster);
845 }
846
847 if (cmd_buffer->state.ds_state &&
848 (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
849 ANV_CMD_BUFFER_DS_DIRTY))) {
850 anv_batch_emit_merge(&cmd_buffer->batch,
851 cmd_buffer->state.ds_state->state_wm_depth_stencil,
852 pipeline->state_wm_depth_stencil);
853 }
854
855 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY |
856 ANV_CMD_BUFFER_DS_DIRTY)) {
857 struct anv_state state;
858 if (cmd_buffer->state.ds_state == NULL)
859 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
860 cmd_buffer->state.cb_state->state_color_calc,
861 GEN8_COLOR_CALC_STATE_length, 64);
862 else if (cmd_buffer->state.cb_state == NULL)
863 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
864 cmd_buffer->state.ds_state->state_color_calc,
865 GEN8_COLOR_CALC_STATE_length, 64);
866 else
867 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
868 cmd_buffer->state.ds_state->state_color_calc,
869 cmd_buffer->state.cb_state->state_color_calc,
870 GEN8_COLOR_CALC_STATE_length, 64);
871
872 anv_batch_emit(&cmd_buffer->batch,
873 GEN8_3DSTATE_CC_STATE_POINTERS,
874 .ColorCalcStatePointer = state.offset,
875 .ColorCalcStatePointerValid = true);
876 }
877
878 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
879 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
880 anv_batch_emit_merge(&cmd_buffer->batch,
881 cmd_buffer->state.state_vf, pipeline->state_vf);
882 }
883
884 cmd_buffer->state.vb_dirty &= ~vb_emit;
885 cmd_buffer->state.dirty = 0;
886 }
887
888 void anv_CmdDraw(
889 VkCmdBuffer cmdBuffer,
890 uint32_t firstVertex,
891 uint32_t vertexCount,
892 uint32_t firstInstance,
893 uint32_t instanceCount)
894 {
895 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
896
897 anv_cmd_buffer_flush_state(cmd_buffer);
898
899 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
900 .VertexAccessType = SEQUENTIAL,
901 .VertexCountPerInstance = vertexCount,
902 .StartVertexLocation = firstVertex,
903 .InstanceCount = instanceCount,
904 .StartInstanceLocation = firstInstance,
905 .BaseVertexLocation = 0);
906 }
907
908 void anv_CmdDrawIndexed(
909 VkCmdBuffer cmdBuffer,
910 uint32_t firstIndex,
911 uint32_t indexCount,
912 int32_t vertexOffset,
913 uint32_t firstInstance,
914 uint32_t instanceCount)
915 {
916 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
917
918 anv_cmd_buffer_flush_state(cmd_buffer);
919
920 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
921 .VertexAccessType = RANDOM,
922 .VertexCountPerInstance = indexCount,
923 .StartVertexLocation = firstIndex,
924 .InstanceCount = instanceCount,
925 .StartInstanceLocation = firstInstance,
926 .BaseVertexLocation = vertexOffset);
927 }
928
929 static void
930 anv_batch_lrm(struct anv_batch *batch,
931 uint32_t reg, struct anv_bo *bo, uint32_t offset)
932 {
933 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
934 .RegisterAddress = reg,
935 .MemoryAddress = { bo, offset });
936 }
937
938 static void
939 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
940 {
941 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
942 .RegisterOffset = reg,
943 .DataDWord = imm);
944 }
945
946 /* Auto-Draw / Indirect Registers */
947 #define GEN7_3DPRIM_END_OFFSET 0x2420
948 #define GEN7_3DPRIM_START_VERTEX 0x2430
949 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
950 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
951 #define GEN7_3DPRIM_START_INSTANCE 0x243C
952 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
953
954 void anv_CmdDrawIndirect(
955 VkCmdBuffer cmdBuffer,
956 VkBuffer _buffer,
957 VkDeviceSize offset,
958 uint32_t count,
959 uint32_t stride)
960 {
961 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
962 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
963 struct anv_bo *bo = buffer->bo;
964 uint32_t bo_offset = buffer->offset + offset;
965
966 anv_cmd_buffer_flush_state(cmd_buffer);
967
968 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
969 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
970 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
971 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
972 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
973
974 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
975 .IndirectParameterEnable = true,
976 .VertexAccessType = SEQUENTIAL);
977 }
978
979 void anv_CmdDrawIndexedIndirect(
980 VkCmdBuffer cmdBuffer,
981 VkBuffer _buffer,
982 VkDeviceSize offset,
983 uint32_t count,
984 uint32_t stride)
985 {
986 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
987 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
988 struct anv_bo *bo = buffer->bo;
989 uint32_t bo_offset = buffer->offset + offset;
990
991 anv_cmd_buffer_flush_state(cmd_buffer);
992
993 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
994 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
995 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
996 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
997 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
998
999 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
1000 .IndirectParameterEnable = true,
1001 .VertexAccessType = RANDOM);
1002 }
1003
1004 void anv_CmdDispatch(
1005 VkCmdBuffer cmdBuffer,
1006 uint32_t x,
1007 uint32_t y,
1008 uint32_t z)
1009 {
1010 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1011 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
1012 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
1013
1014 anv_cmd_buffer_flush_compute_state(cmd_buffer);
1015
1016 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
1017 .SIMDSize = prog_data->simd_size / 16,
1018 .ThreadDepthCounterMaximum = 0,
1019 .ThreadHeightCounterMaximum = 0,
1020 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
1021 .ThreadGroupIDXDimension = x,
1022 .ThreadGroupIDYDimension = y,
1023 .ThreadGroupIDZDimension = z,
1024 .RightExecutionMask = pipeline->cs_right_mask,
1025 .BottomExecutionMask = 0xffffffff);
1026
1027 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
1028 }
1029
1030 #define GPGPU_DISPATCHDIMX 0x2500
1031 #define GPGPU_DISPATCHDIMY 0x2504
1032 #define GPGPU_DISPATCHDIMZ 0x2508
1033
1034 void anv_CmdDispatchIndirect(
1035 VkCmdBuffer cmdBuffer,
1036 VkBuffer _buffer,
1037 VkDeviceSize offset)
1038 {
1039 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1040 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1041 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
1042 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
1043 struct anv_bo *bo = buffer->bo;
1044 uint32_t bo_offset = buffer->offset + offset;
1045
1046 anv_cmd_buffer_flush_compute_state(cmd_buffer);
1047
1048 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
1049 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
1050 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
1051
1052 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
1053 .IndirectParameterEnable = true,
1054 .SIMDSize = prog_data->simd_size / 16,
1055 .ThreadDepthCounterMaximum = 0,
1056 .ThreadHeightCounterMaximum = 0,
1057 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
1058 .RightExecutionMask = pipeline->cs_right_mask,
1059 .BottomExecutionMask = 0xffffffff);
1060
1061 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
1062 }
1063
1064 void anv_CmdSetEvent(
1065 VkCmdBuffer cmdBuffer,
1066 VkEvent event,
1067 VkPipelineStageFlags stageMask)
1068 {
1069 stub();
1070 }
1071
1072 void anv_CmdResetEvent(
1073 VkCmdBuffer cmdBuffer,
1074 VkEvent event,
1075 VkPipelineStageFlags stageMask)
1076 {
1077 stub();
1078 }
1079
1080 void anv_CmdWaitEvents(
1081 VkCmdBuffer cmdBuffer,
1082 uint32_t eventCount,
1083 const VkEvent* pEvents,
1084 VkPipelineStageFlags srcStageMask,
1085 VkPipelineStageFlags destStageMask,
1086 uint32_t memBarrierCount,
1087 const void* const* ppMemBarriers)
1088 {
1089 stub();
1090 }
1091
1092 void anv_CmdPipelineBarrier(
1093 VkCmdBuffer cmdBuffer,
1094 VkPipelineStageFlags srcStageMask,
1095 VkPipelineStageFlags destStageMask,
1096 VkBool32 byRegion,
1097 uint32_t memBarrierCount,
1098 const void* const* ppMemBarriers)
1099 {
1100 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1101 uint32_t b, *dw;
1102
1103 struct GEN8_PIPE_CONTROL cmd = {
1104 GEN8_PIPE_CONTROL_header,
1105 .PostSyncOperation = NoWrite,
1106 };
1107
1108 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
1109
1110 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
1111 /* This is just what PIPE_CONTROL does */
1112 }
1113
1114 if (anv_clear_mask(&srcStageMask,
1115 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
1116 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
1117 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
1118 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT |
1119 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT |
1120 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
1121 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
1122 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
1123 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
1124 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
1125 cmd.StallAtPixelScoreboard = true;
1126 }
1127
1128
1129 if (anv_clear_mask(&srcStageMask,
1130 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
1131 VK_PIPELINE_STAGE_TRANSFER_BIT |
1132 VK_PIPELINE_STAGE_TRANSITION_BIT)) {
1133 cmd.CommandStreamerStallEnable = true;
1134 }
1135
1136 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
1137 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
1138 }
1139
1140 /* On our hardware, all stages will wait for execution as needed. */
1141 (void)destStageMask;
1142
1143 /* We checked all known VkPipeEventFlags. */
1144 anv_assert(srcStageMask == 0);
1145
1146 /* XXX: Right now, we're really dumb and just flush whatever categories
1147 * the app asks for. One of these days we may make this a bit better
1148 * but right now that's all the hardware allows for in most areas.
1149 */
1150 VkMemoryOutputFlags out_flags = 0;
1151 VkMemoryInputFlags in_flags = 0;
1152
1153 for (uint32_t i = 0; i < memBarrierCount; i++) {
1154 const struct anv_common *common = ppMemBarriers[i];
1155 switch (common->sType) {
1156 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
1157 ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
1158 out_flags |= barrier->outputMask;
1159 in_flags |= barrier->inputMask;
1160 break;
1161 }
1162 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
1163 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
1164 out_flags |= barrier->outputMask;
1165 in_flags |= barrier->inputMask;
1166 break;
1167 }
1168 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
1169 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
1170 out_flags |= barrier->outputMask;
1171 in_flags |= barrier->inputMask;
1172 break;
1173 }
1174 default:
1175 unreachable("Invalid memory barrier type");
1176 }
1177 }
1178
1179 for_each_bit(b, out_flags) {
1180 switch ((VkMemoryOutputFlags)(1 << b)) {
1181 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
1182 break; /* FIXME: Little-core systems */
1183 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
1184 cmd.DCFlushEnable = true;
1185 break;
1186 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
1187 cmd.RenderTargetCacheFlushEnable = true;
1188 break;
1189 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1190 cmd.DepthCacheFlushEnable = true;
1191 break;
1192 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
1193 cmd.RenderTargetCacheFlushEnable = true;
1194 cmd.DepthCacheFlushEnable = true;
1195 break;
1196 default:
1197 unreachable("Invalid memory output flag");
1198 }
1199 }
1200
1201 for_each_bit(b, out_flags) {
1202 switch ((VkMemoryInputFlags)(1 << b)) {
1203 case VK_MEMORY_INPUT_HOST_READ_BIT:
1204 break; /* FIXME: Little-core systems */
1205 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
1206 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
1207 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
1208 cmd.VFCacheInvalidationEnable = true;
1209 break;
1210 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
1211 cmd.ConstantCacheInvalidationEnable = true;
1212 /* fallthrough */
1213 case VK_MEMORY_INPUT_SHADER_READ_BIT:
1214 cmd.DCFlushEnable = true;
1215 cmd.TextureCacheInvalidationEnable = true;
1216 break;
1217 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
1218 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1219 break; /* XXX: Hunh? */
1220 case VK_MEMORY_INPUT_TRANSFER_BIT:
1221 cmd.TextureCacheInvalidationEnable = true;
1222 break;
1223 }
1224 }
1225
1226 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
1227 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
1228 }
1229
1230 void anv_CmdPushConstants(
1231 VkCmdBuffer cmdBuffer,
1232 VkPipelineLayout layout,
1233 VkShaderStageFlags stageFlags,
1234 uint32_t start,
1235 uint32_t length,
1236 const void* values)
1237 {
1238 stub();
1239 }
1240
1241 static void
1242 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
1243 {
1244 struct anv_subpass *subpass = cmd_buffer->state.subpass;
1245 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
1246 const struct anv_depth_stencil_view *view;
1247
1248 static const struct anv_depth_stencil_view null_view =
1249 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
1250
1251 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
1252 const struct anv_attachment_view *aview =
1253 fb->attachments[subpass->depth_stencil_attachment];
1254 assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL);
1255 view = (const struct anv_depth_stencil_view *)aview;
1256 } else {
1257 view = &null_view;
1258 }
1259
1260 /* FIXME: Implement the PMA stall W/A */
1261 /* FIXME: Width and Height are wrong */
1262
1263 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
1264 .SurfaceType = SURFTYPE_2D,
1265 .DepthWriteEnable = view->depth_stride > 0,
1266 .StencilWriteEnable = view->stencil_stride > 0,
1267 .HierarchicalDepthBufferEnable = false,
1268 .SurfaceFormat = view->depth_format,
1269 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
1270 .SurfaceBaseAddress = { view->bo, view->depth_offset },
1271 .Height = cmd_buffer->state.framebuffer->height - 1,
1272 .Width = cmd_buffer->state.framebuffer->width - 1,
1273 .LOD = 0,
1274 .Depth = 1 - 1,
1275 .MinimumArrayElement = 0,
1276 .DepthBufferObjectControlState = GEN8_MOCS,
1277 .RenderTargetViewExtent = 1 - 1,
1278 .SurfaceQPitch = view->depth_qpitch >> 2);
1279
1280 /* Disable hierarchial depth buffers. */
1281 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
1282
1283 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
1284 .StencilBufferEnable = view->stencil_stride > 0,
1285 .StencilBufferObjectControlState = GEN8_MOCS,
1286 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
1287 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
1288 .SurfaceQPitch = view->stencil_qpitch >> 2);
1289
1290 /* Clear the clear params. */
1291 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
1292 }
1293
1294 void
1295 anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
1296 struct anv_subpass *subpass)
1297 {
1298 cmd_buffer->state.subpass = subpass;
1299
1300 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
1301
1302 anv_cmd_buffer_emit_depth_stencil(cmd_buffer);
1303 }
1304
1305 void anv_CmdBeginRenderPass(
1306 VkCmdBuffer cmdBuffer,
1307 const VkRenderPassBeginInfo* pRenderPassBegin,
1308 VkRenderPassContents contents)
1309 {
1310 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1311 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
1312 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
1313
1314 cmd_buffer->state.framebuffer = framebuffer;
1315 cmd_buffer->state.pass = pass;
1316
1317 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
1318
1319 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
1320 .ClippedDrawingRectangleYMin = render_area->offset.y,
1321 .ClippedDrawingRectangleXMin = render_area->offset.x,
1322 .ClippedDrawingRectangleYMax =
1323 render_area->offset.y + render_area->extent.height - 1,
1324 .ClippedDrawingRectangleXMax =
1325 render_area->offset.x + render_area->extent.width - 1,
1326 .DrawingRectangleOriginY = 0,
1327 .DrawingRectangleOriginX = 0);
1328
1329 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
1330 pRenderPassBegin->pAttachmentClearValues);
1331
1332 anv_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
1333 }
1334
1335 void anv_CmdNextSubpass(
1336 VkCmdBuffer cmdBuffer,
1337 VkRenderPassContents contents)
1338 {
1339 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1340
1341 assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
1342
1343 anv_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
1344 }
1345
1346 void anv_CmdEndRenderPass(
1347 VkCmdBuffer cmdBuffer)
1348 {
1349 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
1350
1351 /* Emit a flushing pipe control at the end of a pass. This is kind of a
1352 * hack but it ensures that render targets always actually get written.
1353 * Eventually, we should do flushing based on image format transitions
1354 * or something of that nature.
1355 */
1356 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
1357 .PostSyncOperation = NoWrite,
1358 .RenderTargetCacheFlushEnable = true,
1359 .InstructionCacheInvalidateEnable = true,
1360 .DepthCacheFlushEnable = true,
1361 .VFCacheInvalidationEnable = true,
1362 .TextureCacheInvalidationEnable = true,
1363 .CommandStreamerStallEnable = true);
1364 }
1365
1366 void anv_CmdExecuteCommands(
1367 VkCmdBuffer cmdBuffer,
1368 uint32_t cmdBuffersCount,
1369 const VkCmdBuffer* pCmdBuffers)
1370 {
1371 ANV_FROM_HANDLE(anv_cmd_buffer, primary, cmdBuffer);
1372
1373 assert(primary->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
1374
1375 anv_assert(primary->state.subpass == &primary->state.pass->subpasses[0]);
1376
1377 for (uint32_t i = 0; i < cmdBuffersCount; i++) {
1378 ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
1379
1380 assert(secondary->level == VK_CMD_BUFFER_LEVEL_SECONDARY);
1381
1382 anv_cmd_buffer_add_secondary(primary, secondary);
1383 }
1384 }
1385
1386 VkResult anv_CreateCommandPool(
1387 VkDevice _device,
1388 const VkCmdPoolCreateInfo* pCreateInfo,
1389 VkCmdPool* pCmdPool)
1390 {
1391 ANV_FROM_HANDLE(anv_device, device, _device);
1392 struct anv_cmd_pool *pool;
1393
1394 pool = anv_device_alloc(device, sizeof(*pool), 8,
1395 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1396 if (pool == NULL)
1397 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1398
1399 list_inithead(&pool->cmd_buffers);
1400
1401 *pCmdPool = anv_cmd_pool_to_handle(pool);
1402
1403 return VK_SUCCESS;
1404 }
1405
1406 VkResult anv_DestroyCommandPool(
1407 VkDevice _device,
1408 VkCmdPool cmdPool)
1409 {
1410 ANV_FROM_HANDLE(anv_device, device, _device);
1411 ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
1412
1413 anv_ResetCommandPool(_device, cmdPool, 0);
1414
1415 anv_device_free(device, pool);
1416
1417 return VK_SUCCESS;
1418 }
1419
1420 VkResult anv_ResetCommandPool(
1421 VkDevice device,
1422 VkCmdPool cmdPool,
1423 VkCmdPoolResetFlags flags)
1424 {
1425 ANV_FROM_HANDLE(anv_cmd_pool, pool, cmdPool);
1426
1427 list_for_each_entry_safe(struct anv_cmd_buffer, cmd_buffer,
1428 &pool->cmd_buffers, pool_link) {
1429 anv_DestroyCommandBuffer(device, anv_cmd_buffer_to_handle(cmd_buffer));
1430 }
1431
1432 return VK_SUCCESS;
1433 }