anv: Delay allocation of relocation lists
[mesa.git] / src / intel / vulkan / anv_batch_chain.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen8_pack.h"
33
34 #include "util/debug.h"
35
36 /** \file anv_batch_chain.c
37 *
38 * This file contains functions related to anv_cmd_buffer as a data
39 * structure. This involves everything required to create and destroy
40 * the actual batch buffers as well as link them together and handle
41 * relocations and surface state. It specifically does *not* contain any
42 * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
43 */
44
45 /*-----------------------------------------------------------------------*
46 * Functions related to anv_reloc_list
47 *-----------------------------------------------------------------------*/
48
49 VkResult
50 anv_reloc_list_init(struct anv_reloc_list *list,
51 const VkAllocationCallbacks *alloc)
52 {
53 memset(list, 0, sizeof(*list));
54 return VK_SUCCESS;
55 }
56
57 static VkResult
58 anv_reloc_list_init_clone(struct anv_reloc_list *list,
59 const VkAllocationCallbacks *alloc,
60 const struct anv_reloc_list *other_list)
61 {
62 list->num_relocs = other_list->num_relocs;
63 list->array_length = other_list->array_length;
64
65 if (list->num_relocs > 0) {
66 list->relocs =
67 vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
68 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
69 if (list->relocs == NULL)
70 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
71
72 list->reloc_bos =
73 vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
74 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
75 if (list->reloc_bos == NULL) {
76 vk_free(alloc, list->relocs);
77 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
78 }
79
80 memcpy(list->relocs, other_list->relocs,
81 list->array_length * sizeof(*list->relocs));
82 memcpy(list->reloc_bos, other_list->reloc_bos,
83 list->array_length * sizeof(*list->reloc_bos));
84 } else {
85 list->relocs = NULL;
86 list->reloc_bos = NULL;
87 }
88
89 if (other_list->deps) {
90 list->deps = _mesa_set_clone(other_list->deps, NULL);
91 if (!list->deps) {
92 vk_free(alloc, list->relocs);
93 vk_free(alloc, list->reloc_bos);
94 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
95 }
96 } else {
97 list->deps = NULL;
98 }
99
100 return VK_SUCCESS;
101 }
102
103 void
104 anv_reloc_list_finish(struct anv_reloc_list *list,
105 const VkAllocationCallbacks *alloc)
106 {
107 vk_free(alloc, list->relocs);
108 vk_free(alloc, list->reloc_bos);
109 if (list->deps != NULL)
110 _mesa_set_destroy(list->deps, NULL);
111 }
112
113 static VkResult
114 anv_reloc_list_grow(struct anv_reloc_list *list,
115 const VkAllocationCallbacks *alloc,
116 size_t num_additional_relocs)
117 {
118 if (list->num_relocs + num_additional_relocs <= list->array_length)
119 return VK_SUCCESS;
120
121 size_t new_length = MAX2(256, list->array_length * 2);
122 while (new_length < list->num_relocs + num_additional_relocs)
123 new_length *= 2;
124
125 struct drm_i915_gem_relocation_entry *new_relocs =
126 vk_realloc(alloc, list->relocs,
127 new_length * sizeof(*list->relocs), 8,
128 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
129 if (new_relocs == NULL)
130 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
131 list->relocs = new_relocs;
132
133 struct anv_bo **new_reloc_bos =
134 vk_realloc(alloc, list->reloc_bos,
135 new_length * sizeof(*list->reloc_bos), 8,
136 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
137 if (new_reloc_bos == NULL)
138 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
139 list->reloc_bos = new_reloc_bos;
140
141 list->array_length = new_length;
142
143 return VK_SUCCESS;
144 }
145
146 VkResult
147 anv_reloc_list_add(struct anv_reloc_list *list,
148 const VkAllocationCallbacks *alloc,
149 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
150 {
151 struct drm_i915_gem_relocation_entry *entry;
152 int index;
153
154 if (target_bo->flags & EXEC_OBJECT_PINNED) {
155 if (list->deps == NULL) {
156 list->deps = _mesa_pointer_set_create(NULL);
157 if (unlikely(list->deps == NULL))
158 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
159 }
160 _mesa_set_add(list->deps, target_bo);
161 return VK_SUCCESS;
162 }
163
164 VkResult result = anv_reloc_list_grow(list, alloc, 1);
165 if (result != VK_SUCCESS)
166 return result;
167
168 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
169 index = list->num_relocs++;
170 list->reloc_bos[index] = target_bo;
171 entry = &list->relocs[index];
172 entry->target_handle = target_bo->gem_handle;
173 entry->delta = delta;
174 entry->offset = offset;
175 entry->presumed_offset = target_bo->offset;
176 entry->read_domains = 0;
177 entry->write_domain = 0;
178 VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
179
180 return VK_SUCCESS;
181 }
182
183 static VkResult
184 anv_reloc_list_append(struct anv_reloc_list *list,
185 const VkAllocationCallbacks *alloc,
186 struct anv_reloc_list *other, uint32_t offset)
187 {
188 VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs);
189 if (result != VK_SUCCESS)
190 return result;
191
192 if (other->num_relocs > 0) {
193 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
194 other->num_relocs * sizeof(other->relocs[0]));
195 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
196 other->num_relocs * sizeof(other->reloc_bos[0]));
197
198 for (uint32_t i = 0; i < other->num_relocs; i++)
199 list->relocs[i + list->num_relocs].offset += offset;
200
201 list->num_relocs += other->num_relocs;
202 }
203
204 if (other->deps) {
205 if (list->deps == NULL) {
206 list->deps = _mesa_pointer_set_create(NULL);
207 if (unlikely(list->deps == NULL))
208 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
209 }
210 set_foreach(other->deps, entry)
211 _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
212 }
213
214 return VK_SUCCESS;
215 }
216
217 /*-----------------------------------------------------------------------*
218 * Functions related to anv_batch
219 *-----------------------------------------------------------------------*/
220
221 void *
222 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
223 {
224 if (batch->next + num_dwords * 4 > batch->end) {
225 VkResult result = batch->extend_cb(batch, batch->user_data);
226 if (result != VK_SUCCESS) {
227 anv_batch_set_error(batch, result);
228 return NULL;
229 }
230 }
231
232 void *p = batch->next;
233
234 batch->next += num_dwords * 4;
235 assert(batch->next <= batch->end);
236
237 return p;
238 }
239
240 uint64_t
241 anv_batch_emit_reloc(struct anv_batch *batch,
242 void *location, struct anv_bo *bo, uint32_t delta)
243 {
244 VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
245 location - batch->start, bo, delta);
246 if (result != VK_SUCCESS) {
247 anv_batch_set_error(batch, result);
248 return 0;
249 }
250
251 return bo->offset + delta;
252 }
253
254 void
255 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
256 {
257 uint32_t size, offset;
258
259 size = other->next - other->start;
260 assert(size % 4 == 0);
261
262 if (batch->next + size > batch->end) {
263 VkResult result = batch->extend_cb(batch, batch->user_data);
264 if (result != VK_SUCCESS) {
265 anv_batch_set_error(batch, result);
266 return;
267 }
268 }
269
270 assert(batch->next + size <= batch->end);
271
272 VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
273 memcpy(batch->next, other->start, size);
274
275 offset = batch->next - batch->start;
276 VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc,
277 other->relocs, offset);
278 if (result != VK_SUCCESS) {
279 anv_batch_set_error(batch, result);
280 return;
281 }
282
283 batch->next += size;
284 }
285
286 /*-----------------------------------------------------------------------*
287 * Functions related to anv_batch_bo
288 *-----------------------------------------------------------------------*/
289
290 static VkResult
291 anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
292 struct anv_batch_bo **bbo_out)
293 {
294 VkResult result;
295
296 struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
297 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
298 if (bbo == NULL)
299 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
300
301 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
302 ANV_CMD_BUFFER_BATCH_SIZE);
303 if (result != VK_SUCCESS)
304 goto fail_alloc;
305
306 result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
307 if (result != VK_SUCCESS)
308 goto fail_bo_alloc;
309
310 *bbo_out = bbo;
311
312 return VK_SUCCESS;
313
314 fail_bo_alloc:
315 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
316 fail_alloc:
317 vk_free(&cmd_buffer->pool->alloc, bbo);
318
319 return result;
320 }
321
322 static VkResult
323 anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
324 const struct anv_batch_bo *other_bbo,
325 struct anv_batch_bo **bbo_out)
326 {
327 VkResult result;
328
329 struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
330 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
331 if (bbo == NULL)
332 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
333
334 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
335 other_bbo->bo.size);
336 if (result != VK_SUCCESS)
337 goto fail_alloc;
338
339 result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
340 &other_bbo->relocs);
341 if (result != VK_SUCCESS)
342 goto fail_bo_alloc;
343
344 bbo->length = other_bbo->length;
345 memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
346
347 *bbo_out = bbo;
348
349 return VK_SUCCESS;
350
351 fail_bo_alloc:
352 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
353 fail_alloc:
354 vk_free(&cmd_buffer->pool->alloc, bbo);
355
356 return result;
357 }
358
359 static void
360 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
361 size_t batch_padding)
362 {
363 batch->next = batch->start = bbo->bo.map;
364 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
365 batch->relocs = &bbo->relocs;
366 bbo->relocs.num_relocs = 0;
367 _mesa_set_clear(bbo->relocs.deps, NULL);
368 }
369
370 static void
371 anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
372 size_t batch_padding)
373 {
374 batch->start = bbo->bo.map;
375 batch->next = bbo->bo.map + bbo->length;
376 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
377 batch->relocs = &bbo->relocs;
378 }
379
380 static void
381 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
382 {
383 assert(batch->start == bbo->bo.map);
384 bbo->length = batch->next - batch->start;
385 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
386 }
387
388 static VkResult
389 anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
390 struct anv_batch *batch, size_t aditional,
391 size_t batch_padding)
392 {
393 assert(batch->start == bbo->bo.map);
394 bbo->length = batch->next - batch->start;
395
396 size_t new_size = bbo->bo.size;
397 while (new_size <= bbo->length + aditional + batch_padding)
398 new_size *= 2;
399
400 if (new_size == bbo->bo.size)
401 return VK_SUCCESS;
402
403 struct anv_bo new_bo;
404 VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
405 &new_bo, new_size);
406 if (result != VK_SUCCESS)
407 return result;
408
409 memcpy(new_bo.map, bbo->bo.map, bbo->length);
410
411 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
412
413 bbo->bo = new_bo;
414 anv_batch_bo_continue(bbo, batch, batch_padding);
415
416 return VK_SUCCESS;
417 }
418
419 static void
420 anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
421 struct anv_batch_bo *prev_bbo,
422 struct anv_batch_bo *next_bbo,
423 uint32_t next_bbo_offset)
424 {
425 const uint32_t bb_start_offset =
426 prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4;
427 ASSERTED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset;
428
429 /* Make sure we're looking at a MI_BATCH_BUFFER_START */
430 assert(((*bb_start >> 29) & 0x07) == 0);
431 assert(((*bb_start >> 23) & 0x3f) == 49);
432
433 if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
434 assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED);
435 assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED);
436
437 write_reloc(cmd_buffer->device,
438 prev_bbo->bo.map + bb_start_offset + 4,
439 next_bbo->bo.offset + next_bbo_offset, true);
440 } else {
441 uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
442 assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
443
444 prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo;
445 prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
446
447 /* Use a bogus presumed offset to force a relocation */
448 prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1;
449 }
450 }
451
452 static void
453 anv_batch_bo_destroy(struct anv_batch_bo *bbo,
454 struct anv_cmd_buffer *cmd_buffer)
455 {
456 anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
457 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
458 vk_free(&cmd_buffer->pool->alloc, bbo);
459 }
460
461 static VkResult
462 anv_batch_bo_list_clone(const struct list_head *list,
463 struct anv_cmd_buffer *cmd_buffer,
464 struct list_head *new_list)
465 {
466 VkResult result = VK_SUCCESS;
467
468 list_inithead(new_list);
469
470 struct anv_batch_bo *prev_bbo = NULL;
471 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
472 struct anv_batch_bo *new_bbo = NULL;
473 result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
474 if (result != VK_SUCCESS)
475 break;
476 list_addtail(&new_bbo->link, new_list);
477
478 if (prev_bbo)
479 anv_batch_bo_link(cmd_buffer, prev_bbo, new_bbo, 0);
480
481 prev_bbo = new_bbo;
482 }
483
484 if (result != VK_SUCCESS) {
485 list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
486 anv_batch_bo_destroy(bbo, cmd_buffer);
487 }
488
489 return result;
490 }
491
492 /*-----------------------------------------------------------------------*
493 * Functions related to anv_batch_bo
494 *-----------------------------------------------------------------------*/
495
496 static struct anv_batch_bo *
497 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
498 {
499 return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
500 }
501
502 struct anv_address
503 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
504 {
505 struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
506 return (struct anv_address) {
507 .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
508 .offset = bt_block->offset,
509 };
510 }
511
512 static void
513 emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
514 struct anv_bo *bo, uint32_t offset)
515 {
516 /* In gen8+ the address field grew to two dwords to accomodate 48 bit
517 * offsets. The high 16 bits are in the last dword, so we can use the gen8
518 * version in either case, as long as we set the instruction length in the
519 * header accordingly. This means that we always emit three dwords here
520 * and all the padding and adjustment we do in this file works for all
521 * gens.
522 */
523
524 #define GEN7_MI_BATCH_BUFFER_START_length 2
525 #define GEN7_MI_BATCH_BUFFER_START_length_bias 2
526
527 const uint32_t gen7_length =
528 GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
529 const uint32_t gen8_length =
530 GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
531
532 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
533 bbs.DWordLength = cmd_buffer->device->info.gen < 8 ?
534 gen7_length : gen8_length;
535 bbs.SecondLevelBatchBuffer = Firstlevelbatch;
536 bbs.AddressSpaceIndicator = ASI_PPGTT;
537 bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
538 }
539 }
540
541 static void
542 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
543 struct anv_batch_bo *bbo)
544 {
545 struct anv_batch *batch = &cmd_buffer->batch;
546 struct anv_batch_bo *current_bbo =
547 anv_cmd_buffer_current_batch_bo(cmd_buffer);
548
549 /* We set the end of the batch a little short so we would be sure we
550 * have room for the chaining command. Since we're about to emit the
551 * chaining command, let's set it back where it should go.
552 */
553 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
554 assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
555
556 emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
557
558 anv_batch_bo_finish(current_bbo, batch);
559 }
560
561 static VkResult
562 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
563 {
564 struct anv_cmd_buffer *cmd_buffer = _data;
565 struct anv_batch_bo *new_bbo;
566
567 VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
568 if (result != VK_SUCCESS)
569 return result;
570
571 struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
572 if (seen_bbo == NULL) {
573 anv_batch_bo_destroy(new_bbo, cmd_buffer);
574 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
575 }
576 *seen_bbo = new_bbo;
577
578 cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
579
580 list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
581
582 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
583
584 return VK_SUCCESS;
585 }
586
587 static VkResult
588 anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data)
589 {
590 struct anv_cmd_buffer *cmd_buffer = _data;
591 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
592
593 anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096,
594 GEN8_MI_BATCH_BUFFER_START_length * 4);
595
596 return VK_SUCCESS;
597 }
598
599 /** Allocate a binding table
600 *
601 * This function allocates a binding table. This is a bit more complicated
602 * than one would think due to a combination of Vulkan driver design and some
603 * unfortunate hardware restrictions.
604 *
605 * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for
606 * the binding table pointer which means that all binding tables need to live
607 * in the bottom 64k of surface state base address. The way the GL driver has
608 * classically dealt with this restriction is to emit all surface states
609 * on-the-fly into the batch and have a batch buffer smaller than 64k. This
610 * isn't really an option in Vulkan for a couple of reasons:
611 *
612 * 1) In Vulkan, we have growing (or chaining) batches so surface states have
613 * to live in their own buffer and we have to be able to re-emit
614 * STATE_BASE_ADDRESS as needed which requires a full pipeline stall. In
615 * order to avoid emitting STATE_BASE_ADDRESS any more often than needed
616 * (it's not that hard to hit 64k of just binding tables), we allocate
617 * surface state objects up-front when VkImageView is created. In order
618 * for this to work, surface state objects need to be allocated from a
619 * global buffer.
620 *
621 * 2) We tried to design the surface state system in such a way that it's
622 * already ready for bindless texturing. The way bindless texturing works
623 * on our hardware is that you have a big pool of surface state objects
624 * (with its own state base address) and the bindless handles are simply
625 * offsets into that pool. With the architecture we chose, we already
626 * have that pool and it's exactly the same pool that we use for regular
627 * surface states so we should already be ready for bindless.
628 *
629 * 3) For render targets, we need to be able to fill out the surface states
630 * later in vkBeginRenderPass so that we can assign clear colors
631 * correctly. One way to do this would be to just create the surface
632 * state data and then repeatedly copy it into the surface state BO every
633 * time we have to re-emit STATE_BASE_ADDRESS. While this works, it's
634 * rather annoying and just being able to allocate them up-front and
635 * re-use them for the entire render pass.
636 *
637 * While none of these are technically blockers for emitting state on the fly
638 * like we do in GL, the ability to have a single surface state pool is
639 * simplifies things greatly. Unfortunately, it comes at a cost...
640 *
641 * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't
642 * place the binding tables just anywhere in surface state base address.
643 * Because 64k isn't a whole lot of space, we can't simply restrict the
644 * surface state buffer to 64k, we have to be more clever. The solution we've
645 * chosen is to have a block pool with a maximum size of 2G that starts at
646 * zero and grows in both directions. All surface states are allocated from
647 * the top of the pool (positive offsets) and we allocate blocks (< 64k) of
648 * binding tables from the bottom of the pool (negative offsets). Every time
649 * we allocate a new binding table block, we set surface state base address to
650 * point to the bottom of the binding table block. This way all of the
651 * binding tables in the block are in the bottom 64k of surface state base
652 * address. When we fill out the binding table, we add the distance between
653 * the bottom of our binding table block and zero of the block pool to the
654 * surface state offsets so that they are correct relative to out new surface
655 * state base address at the bottom of the binding table block.
656 *
657 * \see adjust_relocations_from_block_pool()
658 * \see adjust_relocations_too_block_pool()
659 *
660 * \param[in] entries The number of surface state entries the binding
661 * table should be able to hold.
662 *
663 * \param[out] state_offset The offset surface surface state base address
664 * where the surface states live. This must be
665 * added to the surface state offset when it is
666 * written into the binding table entry.
667 *
668 * \return An anv_state representing the binding table
669 */
670 struct anv_state
671 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
672 uint32_t entries, uint32_t *state_offset)
673 {
674 struct anv_device *device = cmd_buffer->device;
675 struct anv_state_pool *state_pool = &device->surface_state_pool;
676 struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
677 struct anv_state state;
678
679 state.alloc_size = align_u32(entries * 4, 32);
680
681 if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
682 return (struct anv_state) { 0 };
683
684 state.offset = cmd_buffer->bt_next;
685 state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
686 bt_block->offset + state.offset);
687
688 cmd_buffer->bt_next += state.alloc_size;
689
690 if (device->instance->physicalDevice.use_softpin) {
691 assert(bt_block->offset >= 0);
692 *state_offset = device->surface_state_pool.block_pool.start_address -
693 device->binding_table_pool.block_pool.start_address - bt_block->offset;
694 } else {
695 assert(bt_block->offset < 0);
696 *state_offset = -bt_block->offset;
697 }
698
699 return state;
700 }
701
702 struct anv_state
703 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
704 {
705 struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
706 return anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
707 isl_dev->ss.size, isl_dev->ss.align);
708 }
709
710 struct anv_state
711 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
712 uint32_t size, uint32_t alignment)
713 {
714 return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
715 size, alignment);
716 }
717
718 VkResult
719 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
720 {
721 struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states);
722 if (bt_block == NULL) {
723 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
724 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
725 }
726
727 *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
728 cmd_buffer->bt_next = 0;
729
730 return VK_SUCCESS;
731 }
732
733 VkResult
734 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
735 {
736 struct anv_batch_bo *batch_bo;
737 VkResult result;
738
739 list_inithead(&cmd_buffer->batch_bos);
740
741 result = anv_batch_bo_create(cmd_buffer, &batch_bo);
742 if (result != VK_SUCCESS)
743 return result;
744
745 list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
746
747 cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
748 cmd_buffer->batch.user_data = cmd_buffer;
749
750 if (cmd_buffer->device->can_chain_batches) {
751 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
752 } else {
753 cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch;
754 }
755
756 anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
757 GEN8_MI_BATCH_BUFFER_START_length * 4);
758
759 int success = u_vector_init(&cmd_buffer->seen_bbos,
760 sizeof(struct anv_bo *),
761 8 * sizeof(struct anv_bo *));
762 if (!success)
763 goto fail_batch_bo;
764
765 *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
766
767 /* u_vector requires power-of-two size elements */
768 unsigned pow2_state_size = util_next_power_of_two(sizeof(struct anv_state));
769 success = u_vector_init(&cmd_buffer->bt_block_states,
770 pow2_state_size, 8 * pow2_state_size);
771 if (!success)
772 goto fail_seen_bbos;
773
774 result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
775 &cmd_buffer->pool->alloc);
776 if (result != VK_SUCCESS)
777 goto fail_bt_blocks;
778 cmd_buffer->last_ss_pool_center = 0;
779
780 result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
781 if (result != VK_SUCCESS)
782 goto fail_bt_blocks;
783
784 return VK_SUCCESS;
785
786 fail_bt_blocks:
787 u_vector_finish(&cmd_buffer->bt_block_states);
788 fail_seen_bbos:
789 u_vector_finish(&cmd_buffer->seen_bbos);
790 fail_batch_bo:
791 anv_batch_bo_destroy(batch_bo, cmd_buffer);
792
793 return result;
794 }
795
796 void
797 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
798 {
799 struct anv_state *bt_block;
800 u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
801 anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
802 u_vector_finish(&cmd_buffer->bt_block_states);
803
804 anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
805
806 u_vector_finish(&cmd_buffer->seen_bbos);
807
808 /* Destroy all of the batch buffers */
809 list_for_each_entry_safe(struct anv_batch_bo, bbo,
810 &cmd_buffer->batch_bos, link) {
811 anv_batch_bo_destroy(bbo, cmd_buffer);
812 }
813 }
814
815 void
816 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
817 {
818 /* Delete all but the first batch bo */
819 assert(!list_is_empty(&cmd_buffer->batch_bos));
820 while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
821 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
822 list_del(&bbo->link);
823 anv_batch_bo_destroy(bbo, cmd_buffer);
824 }
825 assert(!list_is_empty(&cmd_buffer->batch_bos));
826
827 anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
828 &cmd_buffer->batch,
829 GEN8_MI_BATCH_BUFFER_START_length * 4);
830
831 while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
832 struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states);
833 anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
834 }
835 assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
836 cmd_buffer->bt_next = 0;
837
838 cmd_buffer->surface_relocs.num_relocs = 0;
839 _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
840 cmd_buffer->last_ss_pool_center = 0;
841
842 /* Reset the list of seen buffers */
843 cmd_buffer->seen_bbos.head = 0;
844 cmd_buffer->seen_bbos.tail = 0;
845
846 *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
847 anv_cmd_buffer_current_batch_bo(cmd_buffer);
848 }
849
850 void
851 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
852 {
853 struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
854
855 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
856 /* When we start a batch buffer, we subtract a certain amount of
857 * padding from the end to ensure that we always have room to emit a
858 * BATCH_BUFFER_START to chain to the next BO. We need to remove
859 * that padding before we end the batch; otherwise, we may end up
860 * with our BATCH_BUFFER_END in another BO.
861 */
862 cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
863 assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
864
865 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe);
866
867 /* Round batch up to an even number of dwords. */
868 if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
869 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop);
870
871 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
872 } else {
873 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
874 /* If this is a secondary command buffer, we need to determine the
875 * mode in which it will be executed with vkExecuteCommands. We
876 * determine this statically here so that this stays in sync with the
877 * actual ExecuteCommands implementation.
878 */
879 const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
880 if (!cmd_buffer->device->can_chain_batches) {
881 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
882 } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
883 (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
884 /* If the secondary has exactly one batch buffer in its list *and*
885 * that batch buffer is less than half of the maximum size, we're
886 * probably better of simply copying it into our batch.
887 */
888 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
889 } else if (!(cmd_buffer->usage_flags &
890 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
891 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
892
893 /* In order to chain, we need this command buffer to contain an
894 * MI_BATCH_BUFFER_START which will jump back to the calling batch.
895 * It doesn't matter where it points now so long as has a valid
896 * relocation. We'll adjust it later as part of the chaining
897 * process.
898 *
899 * We set the end of the batch a little short so we would be sure we
900 * have room for the chaining command. Since we're about to emit the
901 * chaining command, let's set it back where it should go.
902 */
903 cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
904 assert(cmd_buffer->batch.start == batch_bo->bo.map);
905 assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
906
907 emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
908 assert(cmd_buffer->batch.start == batch_bo->bo.map);
909 } else {
910 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
911 }
912 }
913
914 anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
915 }
916
917 static VkResult
918 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
919 struct list_head *list)
920 {
921 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
922 struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos);
923 if (bbo_ptr == NULL)
924 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
925
926 *bbo_ptr = bbo;
927 }
928
929 return VK_SUCCESS;
930 }
931
932 void
933 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
934 struct anv_cmd_buffer *secondary)
935 {
936 switch (secondary->exec_mode) {
937 case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
938 anv_batch_emit_batch(&primary->batch, &secondary->batch);
939 break;
940 case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: {
941 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary);
942 unsigned length = secondary->batch.end - secondary->batch.start;
943 anv_batch_bo_grow(primary, bbo, &primary->batch, length,
944 GEN8_MI_BATCH_BUFFER_START_length * 4);
945 anv_batch_emit_batch(&primary->batch, &secondary->batch);
946 break;
947 }
948 case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
949 struct anv_batch_bo *first_bbo =
950 list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
951 struct anv_batch_bo *last_bbo =
952 list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
953
954 emit_batch_buffer_start(primary, &first_bbo->bo, 0);
955
956 struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
957 assert(primary->batch.start == this_bbo->bo.map);
958 uint32_t offset = primary->batch.next - primary->batch.start;
959
960 /* Make the tail of the secondary point back to right after the
961 * MI_BATCH_BUFFER_START in the primary batch.
962 */
963 anv_batch_bo_link(primary, last_bbo, this_bbo, offset);
964
965 anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
966 break;
967 }
968 case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
969 struct list_head copy_list;
970 VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
971 secondary,
972 &copy_list);
973 if (result != VK_SUCCESS)
974 return; /* FIXME */
975
976 anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
977
978 struct anv_batch_bo *first_bbo =
979 list_first_entry(&copy_list, struct anv_batch_bo, link);
980 struct anv_batch_bo *last_bbo =
981 list_last_entry(&copy_list, struct anv_batch_bo, link);
982
983 cmd_buffer_chain_to_batch_bo(primary, first_bbo);
984
985 list_splicetail(&copy_list, &primary->batch_bos);
986
987 anv_batch_bo_continue(last_bbo, &primary->batch,
988 GEN8_MI_BATCH_BUFFER_START_length * 4);
989 break;
990 }
991 default:
992 assert(!"Invalid execution mode");
993 }
994
995 anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
996 &secondary->surface_relocs, 0);
997 }
998
999 struct anv_execbuf {
1000 struct drm_i915_gem_execbuffer2 execbuf;
1001
1002 struct drm_i915_gem_exec_object2 * objects;
1003 uint32_t bo_count;
1004 struct anv_bo ** bos;
1005
1006 /* Allocated length of the 'objects' and 'bos' arrays */
1007 uint32_t array_length;
1008
1009 bool has_relocs;
1010
1011 uint32_t fence_count;
1012 uint32_t fence_array_length;
1013 struct drm_i915_gem_exec_fence * fences;
1014 struct anv_syncobj ** syncobjs;
1015 };
1016
1017 static void
1018 anv_execbuf_init(struct anv_execbuf *exec)
1019 {
1020 memset(exec, 0, sizeof(*exec));
1021 }
1022
1023 static void
1024 anv_execbuf_finish(struct anv_execbuf *exec,
1025 const VkAllocationCallbacks *alloc)
1026 {
1027 vk_free(alloc, exec->objects);
1028 vk_free(alloc, exec->bos);
1029 vk_free(alloc, exec->fences);
1030 vk_free(alloc, exec->syncobjs);
1031 }
1032
1033 static int
1034 _compare_bo_handles(const void *_bo1, const void *_bo2)
1035 {
1036 struct anv_bo * const *bo1 = _bo1;
1037 struct anv_bo * const *bo2 = _bo2;
1038
1039 return (*bo1)->gem_handle - (*bo2)->gem_handle;
1040 }
1041
1042 static VkResult
1043 anv_execbuf_add_bo_set(struct anv_execbuf *exec,
1044 struct set *deps,
1045 uint32_t extra_flags,
1046 const VkAllocationCallbacks *alloc);
1047
1048 static VkResult
1049 anv_execbuf_add_bo(struct anv_execbuf *exec,
1050 struct anv_bo *bo,
1051 struct anv_reloc_list *relocs,
1052 uint32_t extra_flags,
1053 const VkAllocationCallbacks *alloc)
1054 {
1055 struct drm_i915_gem_exec_object2 *obj = NULL;
1056
1057 if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
1058 obj = &exec->objects[bo->index];
1059
1060 if (obj == NULL) {
1061 /* We've never seen this one before. Add it to the list and assign
1062 * an id that we can use later.
1063 */
1064 if (exec->bo_count >= exec->array_length) {
1065 uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
1066
1067 struct drm_i915_gem_exec_object2 *new_objects =
1068 vk_alloc(alloc, new_len * sizeof(*new_objects),
1069 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1070 if (new_objects == NULL)
1071 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1072
1073 struct anv_bo **new_bos =
1074 vk_alloc(alloc, new_len * sizeof(*new_bos),
1075 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1076 if (new_bos == NULL) {
1077 vk_free(alloc, new_objects);
1078 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1079 }
1080
1081 if (exec->objects) {
1082 memcpy(new_objects, exec->objects,
1083 exec->bo_count * sizeof(*new_objects));
1084 memcpy(new_bos, exec->bos,
1085 exec->bo_count * sizeof(*new_bos));
1086 }
1087
1088 vk_free(alloc, exec->objects);
1089 vk_free(alloc, exec->bos);
1090
1091 exec->objects = new_objects;
1092 exec->bos = new_bos;
1093 exec->array_length = new_len;
1094 }
1095
1096 assert(exec->bo_count < exec->array_length);
1097
1098 bo->index = exec->bo_count++;
1099 obj = &exec->objects[bo->index];
1100 exec->bos[bo->index] = bo;
1101
1102 obj->handle = bo->gem_handle;
1103 obj->relocation_count = 0;
1104 obj->relocs_ptr = 0;
1105 obj->alignment = 0;
1106 obj->offset = bo->offset;
1107 obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
1108 obj->rsvd1 = 0;
1109 obj->rsvd2 = 0;
1110 }
1111
1112 if (relocs != NULL) {
1113 assert(obj->relocation_count == 0);
1114
1115 if (relocs->num_relocs > 0) {
1116 /* This is the first time we've ever seen a list of relocations for
1117 * this BO. Go ahead and set the relocations and then walk the list
1118 * of relocations and add them all.
1119 */
1120 exec->has_relocs = true;
1121 obj->relocation_count = relocs->num_relocs;
1122 obj->relocs_ptr = (uintptr_t) relocs->relocs;
1123
1124 for (size_t i = 0; i < relocs->num_relocs; i++) {
1125 VkResult result;
1126
1127 /* A quick sanity check on relocations */
1128 assert(relocs->relocs[i].offset < bo->size);
1129 result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
1130 extra_flags, alloc);
1131
1132 if (result != VK_SUCCESS)
1133 return result;
1134 }
1135 }
1136
1137 return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
1138 }
1139
1140 return VK_SUCCESS;
1141 }
1142
1143 /* Add BO dependencies to execbuf */
1144 static VkResult
1145 anv_execbuf_add_bo_set(struct anv_execbuf *exec,
1146 struct set *deps,
1147 uint32_t extra_flags,
1148 const VkAllocationCallbacks *alloc)
1149 {
1150 if (!deps || deps->entries <= 0)
1151 return VK_SUCCESS;
1152
1153 const uint32_t entries = deps->entries;
1154 struct anv_bo **bos =
1155 vk_alloc(alloc, entries * sizeof(*bos),
1156 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1157 if (bos == NULL)
1158 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1159
1160 struct anv_bo **bo = bos;
1161 set_foreach(deps, entry) {
1162 *bo++ = (void *)entry->key;
1163 }
1164
1165 qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
1166
1167 VkResult result = VK_SUCCESS;
1168 for (bo = bos; bo < bos + entries; bo++) {
1169 result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
1170 if (result != VK_SUCCESS)
1171 break;
1172 }
1173
1174 vk_free(alloc, bos);
1175
1176 return result;
1177 }
1178
1179 static VkResult
1180 anv_execbuf_add_syncobj(struct anv_execbuf *exec,
1181 uint32_t handle, uint32_t flags,
1182 const VkAllocationCallbacks *alloc)
1183 {
1184 assert(flags != 0);
1185
1186 if (exec->fence_count >= exec->fence_array_length) {
1187 uint32_t new_len = MAX2(exec->fence_array_length * 2, 64);
1188
1189 exec->fences = vk_realloc(alloc, exec->fences,
1190 new_len * sizeof(*exec->fences),
1191 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1192 if (exec->fences == NULL)
1193 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1194
1195 exec->fence_array_length = new_len;
1196 }
1197
1198 exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) {
1199 .handle = handle,
1200 .flags = flags,
1201 };
1202
1203 exec->fence_count++;
1204
1205 return VK_SUCCESS;
1206 }
1207
1208 static void
1209 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
1210 struct anv_reloc_list *list)
1211 {
1212 for (size_t i = 0; i < list->num_relocs; i++)
1213 list->relocs[i].target_handle = list->reloc_bos[i]->index;
1214 }
1215
1216 static void
1217 adjust_relocations_from_state_pool(struct anv_state_pool *pool,
1218 struct anv_reloc_list *relocs,
1219 uint32_t last_pool_center_bo_offset)
1220 {
1221 assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1222 uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1223
1224 for (size_t i = 0; i < relocs->num_relocs; i++) {
1225 /* All of the relocations from this block pool to other BO's should
1226 * have been emitted relative to the surface block pool center. We
1227 * need to add the center offset to make them relative to the
1228 * beginning of the actual GEM bo.
1229 */
1230 relocs->relocs[i].offset += delta;
1231 }
1232 }
1233
1234 static void
1235 adjust_relocations_to_state_pool(struct anv_state_pool *pool,
1236 struct anv_bo *from_bo,
1237 struct anv_reloc_list *relocs,
1238 uint32_t last_pool_center_bo_offset)
1239 {
1240 assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1241 uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1242
1243 /* When we initially emit relocations into a block pool, we don't
1244 * actually know what the final center_bo_offset will be so we just emit
1245 * it as if center_bo_offset == 0. Now that we know what the center
1246 * offset is, we need to walk the list of relocations and adjust any
1247 * relocations that point to the pool bo with the correct offset.
1248 */
1249 for (size_t i = 0; i < relocs->num_relocs; i++) {
1250 if (relocs->reloc_bos[i] == pool->block_pool.bo) {
1251 /* Adjust the delta value in the relocation to correctly
1252 * correspond to the new delta. Initially, this value may have
1253 * been negative (if treated as unsigned), but we trust in
1254 * uint32_t roll-over to fix that for us at this point.
1255 */
1256 relocs->relocs[i].delta += delta;
1257
1258 /* Since the delta has changed, we need to update the actual
1259 * relocated value with the new presumed value. This function
1260 * should only be called on batch buffers, so we know it isn't in
1261 * use by the GPU at the moment.
1262 */
1263 assert(relocs->relocs[i].offset < from_bo->size);
1264 write_reloc(pool->block_pool.device,
1265 from_bo->map + relocs->relocs[i].offset,
1266 relocs->relocs[i].presumed_offset +
1267 relocs->relocs[i].delta, false);
1268 }
1269 }
1270 }
1271
1272 static void
1273 anv_reloc_list_apply(struct anv_device *device,
1274 struct anv_reloc_list *list,
1275 struct anv_bo *bo,
1276 bool always_relocate)
1277 {
1278 for (size_t i = 0; i < list->num_relocs; i++) {
1279 struct anv_bo *target_bo = list->reloc_bos[i];
1280 if (list->relocs[i].presumed_offset == target_bo->offset &&
1281 !always_relocate)
1282 continue;
1283
1284 void *p = bo->map + list->relocs[i].offset;
1285 write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true);
1286 list->relocs[i].presumed_offset = target_bo->offset;
1287 }
1288 }
1289
1290 /**
1291 * This function applies the relocation for a command buffer and writes the
1292 * actual addresses into the buffers as per what we were told by the kernel on
1293 * the previous execbuf2 call. This should be safe to do because, for each
1294 * relocated address, we have two cases:
1295 *
1296 * 1) The target BO is inactive (as seen by the kernel). In this case, it is
1297 * not in use by the GPU so updating the address is 100% ok. It won't be
1298 * in-use by the GPU (from our context) again until the next execbuf2
1299 * happens. If the kernel decides to move it in the next execbuf2, it
1300 * will have to do the relocations itself, but that's ok because it should
1301 * have all of the information needed to do so.
1302 *
1303 * 2) The target BO is active (as seen by the kernel). In this case, it
1304 * hasn't moved since the last execbuffer2 call because GTT shuffling
1305 * *only* happens when the BO is idle. (From our perspective, it only
1306 * happens inside the execbuffer2 ioctl, but the shuffling may be
1307 * triggered by another ioctl, with full-ppgtt this is limited to only
1308 * execbuffer2 ioctls on the same context, or memory pressure.) Since the
1309 * target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT
1310 * address and the relocated value we are writing into the BO will be the
1311 * same as the value that is already there.
1312 *
1313 * There is also a possibility that the target BO is active but the exact
1314 * RENDER_SURFACE_STATE object we are writing the relocation into isn't in
1315 * use. In this case, the address currently in the RENDER_SURFACE_STATE
1316 * may be stale but it's still safe to write the relocation because that
1317 * particular RENDER_SURFACE_STATE object isn't in-use by the GPU and
1318 * won't be until the next execbuf2 call.
1319 *
1320 * By doing relocations on the CPU, we can tell the kernel that it doesn't
1321 * need to bother. We want to do this because the surface state buffer is
1322 * used by every command buffer so, if the kernel does the relocations, it
1323 * will always be busy and the kernel will always stall. This is also
1324 * probably the fastest mechanism for doing relocations since the kernel would
1325 * have to make a full copy of all the relocations lists.
1326 */
1327 static bool
1328 relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
1329 struct anv_execbuf *exec)
1330 {
1331 if (!exec->has_relocs)
1332 return true;
1333
1334 static int userspace_relocs = -1;
1335 if (userspace_relocs < 0)
1336 userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
1337 if (!userspace_relocs)
1338 return false;
1339
1340 /* First, we have to check to see whether or not we can even do the
1341 * relocation. New buffers which have never been submitted to the kernel
1342 * don't have a valid offset so we need to let the kernel do relocations so
1343 * that we can get offsets for them. On future execbuf2 calls, those
1344 * buffers will have offsets and we will be able to skip relocating.
1345 * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1.
1346 */
1347 for (uint32_t i = 0; i < exec->bo_count; i++) {
1348 if (exec->bos[i]->offset == (uint64_t)-1)
1349 return false;
1350 }
1351
1352 /* Since surface states are shared between command buffers and we don't
1353 * know what order they will be submitted to the kernel, we don't know
1354 * what address is actually written in the surface state object at any
1355 * given time. The only option is to always relocate them.
1356 */
1357 anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
1358 cmd_buffer->device->surface_state_pool.block_pool.bo,
1359 true /* always relocate surface states */);
1360
1361 /* Since we own all of the batch buffers, we know what values are stored
1362 * in the relocated addresses and only have to update them if the offsets
1363 * have changed.
1364 */
1365 struct anv_batch_bo **bbo;
1366 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1367 anv_reloc_list_apply(cmd_buffer->device,
1368 &(*bbo)->relocs, &(*bbo)->bo, false);
1369 }
1370
1371 for (uint32_t i = 0; i < exec->bo_count; i++)
1372 exec->objects[i].offset = exec->bos[i]->offset;
1373
1374 return true;
1375 }
1376
1377 static VkResult
1378 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
1379 struct anv_cmd_buffer *cmd_buffer)
1380 {
1381 struct anv_batch *batch = &cmd_buffer->batch;
1382 struct anv_state_pool *ss_pool =
1383 &cmd_buffer->device->surface_state_pool;
1384
1385 adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
1386 cmd_buffer->last_ss_pool_center);
1387 VkResult result;
1388 struct anv_bo *bo;
1389 if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
1390 anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
1391 result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1392 &cmd_buffer->device->alloc);
1393 if (result != VK_SUCCESS)
1394 return result;
1395 }
1396 /* Add surface dependencies (BOs) to the execbuf */
1397 anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
1398 &cmd_buffer->device->alloc);
1399
1400 /* Add the BOs for all memory objects */
1401 list_for_each_entry(struct anv_device_memory, mem,
1402 &cmd_buffer->device->memory_objects, link) {
1403 result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
1404 &cmd_buffer->device->alloc);
1405 if (result != VK_SUCCESS)
1406 return result;
1407 }
1408
1409 struct anv_block_pool *pool;
1410 pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
1411 anv_block_pool_foreach_bo(bo, pool) {
1412 result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1413 &cmd_buffer->device->alloc);
1414 if (result != VK_SUCCESS)
1415 return result;
1416 }
1417
1418 pool = &cmd_buffer->device->instruction_state_pool.block_pool;
1419 anv_block_pool_foreach_bo(bo, pool) {
1420 result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1421 &cmd_buffer->device->alloc);
1422 if (result != VK_SUCCESS)
1423 return result;
1424 }
1425
1426 pool = &cmd_buffer->device->binding_table_pool.block_pool;
1427 anv_block_pool_foreach_bo(bo, pool) {
1428 result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1429 &cmd_buffer->device->alloc);
1430 if (result != VK_SUCCESS)
1431 return result;
1432 }
1433 } else {
1434 /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
1435 * will get added automatically by processing relocations on the batch
1436 * buffer. We have to add the surface state BO manually because it has
1437 * relocations of its own that we need to be sure are processsed.
1438 */
1439 result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
1440 &cmd_buffer->surface_relocs, 0,
1441 &cmd_buffer->device->alloc);
1442 if (result != VK_SUCCESS)
1443 return result;
1444 }
1445
1446 /* First, we walk over all of the bos we've seen and add them and their
1447 * relocations to the validate list.
1448 */
1449 struct anv_batch_bo **bbo;
1450 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1451 adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
1452 cmd_buffer->last_ss_pool_center);
1453
1454 result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0,
1455 &cmd_buffer->device->alloc);
1456 if (result != VK_SUCCESS)
1457 return result;
1458 }
1459
1460 /* Now that we've adjusted all of the surface state relocations, we need to
1461 * record the surface state pool center so future executions of the command
1462 * buffer can adjust correctly.
1463 */
1464 cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset;
1465
1466 struct anv_batch_bo *first_batch_bo =
1467 list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
1468
1469 /* The kernel requires that the last entry in the validation list be the
1470 * batch buffer to execute. We can simply swap the element
1471 * corresponding to the first batch_bo in the chain with the last
1472 * element in the list.
1473 */
1474 if (first_batch_bo->bo.index != execbuf->bo_count - 1) {
1475 uint32_t idx = first_batch_bo->bo.index;
1476 uint32_t last_idx = execbuf->bo_count - 1;
1477
1478 struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
1479 assert(execbuf->bos[idx] == &first_batch_bo->bo);
1480
1481 execbuf->objects[idx] = execbuf->objects[last_idx];
1482 execbuf->bos[idx] = execbuf->bos[last_idx];
1483 execbuf->bos[idx]->index = idx;
1484
1485 execbuf->objects[last_idx] = tmp_obj;
1486 execbuf->bos[last_idx] = &first_batch_bo->bo;
1487 first_batch_bo->bo.index = last_idx;
1488 }
1489
1490 /* If we are pinning our BOs, we shouldn't have to relocate anything */
1491 if (cmd_buffer->device->instance->physicalDevice.use_softpin)
1492 assert(!execbuf->has_relocs);
1493
1494 /* Now we go through and fixup all of the relocation lists to point to
1495 * the correct indices in the object array. We have to do this after we
1496 * reorder the list above as some of the indices may have changed.
1497 */
1498 if (execbuf->has_relocs) {
1499 u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
1500 anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
1501
1502 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
1503 }
1504
1505 if (!cmd_buffer->device->info.has_llc) {
1506 __builtin_ia32_mfence();
1507 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1508 for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
1509 __builtin_ia32_clflush((*bbo)->bo.map + i);
1510 }
1511 }
1512
1513 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1514 .buffers_ptr = (uintptr_t) execbuf->objects,
1515 .buffer_count = execbuf->bo_count,
1516 .batch_start_offset = 0,
1517 .batch_len = batch->next - batch->start,
1518 .cliprects_ptr = 0,
1519 .num_cliprects = 0,
1520 .DR1 = 0,
1521 .DR4 = 0,
1522 .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1523 .rsvd1 = cmd_buffer->device->context_id,
1524 .rsvd2 = 0,
1525 };
1526
1527 if (relocate_cmd_buffer(cmd_buffer, execbuf)) {
1528 /* If we were able to successfully relocate everything, tell the kernel
1529 * that it can skip doing relocations. The requirement for using
1530 * NO_RELOC is:
1531 *
1532 * 1) The addresses written in the objects must match the corresponding
1533 * reloc.presumed_offset which in turn must match the corresponding
1534 * execobject.offset.
1535 *
1536 * 2) To avoid stalling, execobject.offset should match the current
1537 * address of that object within the active context.
1538 *
1539 * In order to satisfy all of the invariants that make userspace
1540 * relocations to be safe (see relocate_cmd_buffer()), we need to
1541 * further ensure that the addresses we use match those used by the
1542 * kernel for the most recent execbuf2.
1543 *
1544 * The kernel may still choose to do relocations anyway if something has
1545 * moved in the GTT. In this case, the relocation list still needs to be
1546 * valid. All relocations on the batch buffers are already valid and
1547 * kept up-to-date. For surface state relocations, by applying the
1548 * relocations in relocate_cmd_buffer, we ensured that the address in
1549 * the RENDER_SURFACE_STATE matches presumed_offset, so it should be
1550 * safe for the kernel to relocate them as needed.
1551 */
1552 execbuf->execbuf.flags |= I915_EXEC_NO_RELOC;
1553 } else {
1554 /* In the case where we fall back to doing kernel relocations, we need
1555 * to ensure that the relocation list is valid. All relocations on the
1556 * batch buffers are already valid and kept up-to-date. Since surface
1557 * states are shared between command buffers and we don't know what
1558 * order they will be submitted to the kernel, we don't know what
1559 * address is actually written in the surface state object at any given
1560 * time. The only option is to set a bogus presumed offset and let the
1561 * kernel relocate them.
1562 */
1563 for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
1564 cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
1565 }
1566
1567 return VK_SUCCESS;
1568 }
1569
1570 static VkResult
1571 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
1572 {
1573 VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo,
1574 NULL, 0, &device->alloc);
1575 if (result != VK_SUCCESS)
1576 return result;
1577
1578 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1579 .buffers_ptr = (uintptr_t) execbuf->objects,
1580 .buffer_count = execbuf->bo_count,
1581 .batch_start_offset = 0,
1582 .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
1583 .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1584 .rsvd1 = device->context_id,
1585 .rsvd2 = 0,
1586 };
1587
1588 return VK_SUCCESS;
1589 }
1590
1591 VkResult
1592 anv_cmd_buffer_execbuf(struct anv_device *device,
1593 struct anv_cmd_buffer *cmd_buffer,
1594 const VkSemaphore *in_semaphores,
1595 uint32_t num_in_semaphores,
1596 const VkSemaphore *out_semaphores,
1597 uint32_t num_out_semaphores,
1598 VkFence _fence)
1599 {
1600 ANV_FROM_HANDLE(anv_fence, fence, _fence);
1601 UNUSED struct anv_physical_device *pdevice = &device->instance->physicalDevice;
1602
1603 struct anv_execbuf execbuf;
1604 anv_execbuf_init(&execbuf);
1605
1606 int in_fence = -1;
1607 VkResult result = VK_SUCCESS;
1608 for (uint32_t i = 0; i < num_in_semaphores; i++) {
1609 ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1610 struct anv_semaphore_impl *impl =
1611 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1612 &semaphore->temporary : &semaphore->permanent;
1613
1614 switch (impl->type) {
1615 case ANV_SEMAPHORE_TYPE_BO:
1616 assert(!pdevice->has_syncobj);
1617 result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1618 0, &device->alloc);
1619 if (result != VK_SUCCESS)
1620 return result;
1621 break;
1622
1623 case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1624 assert(!pdevice->has_syncobj);
1625 if (in_fence == -1) {
1626 in_fence = impl->fd;
1627 } else {
1628 int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd);
1629 if (merge == -1)
1630 return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
1631
1632 close(impl->fd);
1633 close(in_fence);
1634 in_fence = merge;
1635 }
1636
1637 impl->fd = -1;
1638 break;
1639
1640 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1641 result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1642 I915_EXEC_FENCE_WAIT,
1643 &device->alloc);
1644 if (result != VK_SUCCESS)
1645 return result;
1646 break;
1647
1648 default:
1649 break;
1650 }
1651 }
1652
1653 bool need_out_fence = false;
1654 for (uint32_t i = 0; i < num_out_semaphores; i++) {
1655 ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1656
1657 /* Under most circumstances, out fences won't be temporary. However,
1658 * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1659 *
1660 * "If the import is temporary, the implementation must restore the
1661 * semaphore to its prior permanent state after submitting the next
1662 * semaphore wait operation."
1663 *
1664 * The spec says nothing whatsoever about signal operations on
1665 * temporarily imported semaphores so it appears they are allowed.
1666 * There are also CTS tests that require this to work.
1667 */
1668 struct anv_semaphore_impl *impl =
1669 semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1670 &semaphore->temporary : &semaphore->permanent;
1671
1672 switch (impl->type) {
1673 case ANV_SEMAPHORE_TYPE_BO:
1674 assert(!pdevice->has_syncobj);
1675 result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1676 EXEC_OBJECT_WRITE, &device->alloc);
1677 if (result != VK_SUCCESS)
1678 return result;
1679 break;
1680
1681 case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1682 assert(!pdevice->has_syncobj);
1683 need_out_fence = true;
1684 break;
1685
1686 case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1687 result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1688 I915_EXEC_FENCE_SIGNAL,
1689 &device->alloc);
1690 if (result != VK_SUCCESS)
1691 return result;
1692 break;
1693
1694 default:
1695 break;
1696 }
1697 }
1698
1699 if (fence) {
1700 /* Under most circumstances, out fences won't be temporary. However,
1701 * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
1702 *
1703 * "If the import is temporary, the implementation must restore the
1704 * semaphore to its prior permanent state after submitting the next
1705 * semaphore wait operation."
1706 *
1707 * The spec says nothing whatsoever about signal operations on
1708 * temporarily imported semaphores so it appears they are allowed.
1709 * There are also CTS tests that require this to work.
1710 */
1711 struct anv_fence_impl *impl =
1712 fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1713 &fence->temporary : &fence->permanent;
1714
1715 switch (impl->type) {
1716 case ANV_FENCE_TYPE_BO:
1717 assert(!pdevice->has_syncobj_wait);
1718 result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
1719 EXEC_OBJECT_WRITE, &device->alloc);
1720 if (result != VK_SUCCESS)
1721 return result;
1722 break;
1723
1724 case ANV_FENCE_TYPE_SYNCOBJ:
1725 result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1726 I915_EXEC_FENCE_SIGNAL,
1727 &device->alloc);
1728 if (result != VK_SUCCESS)
1729 return result;
1730 break;
1731
1732 default:
1733 unreachable("Invalid fence type");
1734 }
1735 }
1736
1737 if (cmd_buffer) {
1738 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
1739 struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos);
1740
1741 device->cmd_buffer_being_decoded = cmd_buffer;
1742 gen_print_batch(&device->decoder_ctx, (*bo)->bo.map,
1743 (*bo)->bo.size, (*bo)->bo.offset, false);
1744 device->cmd_buffer_being_decoded = NULL;
1745 }
1746
1747 result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
1748 } else {
1749 result = setup_empty_execbuf(&execbuf, device);
1750 }
1751
1752 if (result != VK_SUCCESS)
1753 return result;
1754
1755 if (execbuf.fence_count > 0) {
1756 assert(device->instance->physicalDevice.has_syncobj);
1757 execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
1758 execbuf.execbuf.num_cliprects = execbuf.fence_count;
1759 execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences;
1760 }
1761
1762 if (in_fence != -1) {
1763 execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
1764 execbuf.execbuf.rsvd2 |= (uint32_t)in_fence;
1765 }
1766
1767 if (need_out_fence)
1768 execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
1769
1770 result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
1771
1772 /* Execbuf does not consume the in_fence. It's our job to close it. */
1773 if (in_fence != -1)
1774 close(in_fence);
1775
1776 for (uint32_t i = 0; i < num_in_semaphores; i++) {
1777 ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1778 /* From the Vulkan 1.0.53 spec:
1779 *
1780 * "If the import is temporary, the implementation must restore the
1781 * semaphore to its prior permanent state after submitting the next
1782 * semaphore wait operation."
1783 *
1784 * This has to happen after the execbuf in case we close any syncobjs in
1785 * the process.
1786 */
1787 anv_semaphore_reset_temporary(device, semaphore);
1788 }
1789
1790 if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
1791 assert(!pdevice->has_syncobj_wait);
1792 /* BO fences can't be shared, so they can't be temporary. */
1793 assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1794
1795 /* Once the execbuf has returned, we need to set the fence state to
1796 * SUBMITTED. We can't do this before calling execbuf because
1797 * anv_GetFenceStatus does take the global device lock before checking
1798 * fence->state.
1799 *
1800 * We set the fence state to SUBMITTED regardless of whether or not the
1801 * execbuf succeeds because we need to ensure that vkWaitForFences() and
1802 * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1803 * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1804 */
1805 fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1806 }
1807
1808 if (result == VK_SUCCESS && need_out_fence) {
1809 assert(!pdevice->has_syncobj_wait);
1810 int out_fence = execbuf.execbuf.rsvd2 >> 32;
1811 for (uint32_t i = 0; i < num_out_semaphores; i++) {
1812 ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1813 /* Out fences can't have temporary state because that would imply
1814 * that we imported a sync file and are trying to signal it.
1815 */
1816 assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
1817 struct anv_semaphore_impl *impl = &semaphore->permanent;
1818
1819 if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) {
1820 assert(impl->fd == -1);
1821 impl->fd = dup(out_fence);
1822 }
1823 }
1824 close(out_fence);
1825 }
1826
1827 anv_execbuf_finish(&execbuf, &device->alloc);
1828
1829 return result;
1830 }