anv: Add an anv_execbuf helper struct
[mesa.git] / src / intel / vulkan / anv_batch_chain.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen7_pack.h"
33 #include "genxml/gen8_pack.h"
34
35 /** \file anv_batch_chain.c
36 *
37 * This file contains functions related to anv_cmd_buffer as a data
38 * structure. This involves everything required to create and destroy
39 * the actual batch buffers as well as link them together and handle
40 * relocations and surface state. It specifically does *not* contain any
41 * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
42 */
43
44 /*-----------------------------------------------------------------------*
45 * Functions related to anv_reloc_list
46 *-----------------------------------------------------------------------*/
47
48 static VkResult
49 anv_reloc_list_init_clone(struct anv_reloc_list *list,
50 const VkAllocationCallbacks *alloc,
51 const struct anv_reloc_list *other_list)
52 {
53 if (other_list) {
54 list->num_relocs = other_list->num_relocs;
55 list->array_length = other_list->array_length;
56 } else {
57 list->num_relocs = 0;
58 list->array_length = 256;
59 }
60
61 list->relocs =
62 vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
63 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
64
65 if (list->relocs == NULL)
66 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
67
68 list->reloc_bos =
69 vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
70 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
71
72 if (list->reloc_bos == NULL) {
73 vk_free(alloc, list->relocs);
74 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
75 }
76
77 if (other_list) {
78 memcpy(list->relocs, other_list->relocs,
79 list->array_length * sizeof(*list->relocs));
80 memcpy(list->reloc_bos, other_list->reloc_bos,
81 list->array_length * sizeof(*list->reloc_bos));
82 }
83
84 return VK_SUCCESS;
85 }
86
87 VkResult
88 anv_reloc_list_init(struct anv_reloc_list *list,
89 const VkAllocationCallbacks *alloc)
90 {
91 return anv_reloc_list_init_clone(list, alloc, NULL);
92 }
93
94 void
95 anv_reloc_list_finish(struct anv_reloc_list *list,
96 const VkAllocationCallbacks *alloc)
97 {
98 vk_free(alloc, list->relocs);
99 vk_free(alloc, list->reloc_bos);
100 }
101
102 static VkResult
103 anv_reloc_list_grow(struct anv_reloc_list *list,
104 const VkAllocationCallbacks *alloc,
105 size_t num_additional_relocs)
106 {
107 if (list->num_relocs + num_additional_relocs <= list->array_length)
108 return VK_SUCCESS;
109
110 size_t new_length = list->array_length * 2;
111 while (new_length < list->num_relocs + num_additional_relocs)
112 new_length *= 2;
113
114 struct drm_i915_gem_relocation_entry *new_relocs =
115 vk_alloc(alloc, new_length * sizeof(*list->relocs), 8,
116 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
117 if (new_relocs == NULL)
118 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
119
120 struct anv_bo **new_reloc_bos =
121 vk_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
122 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
123 if (new_reloc_bos == NULL) {
124 vk_free(alloc, new_relocs);
125 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
126 }
127
128 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
129 memcpy(new_reloc_bos, list->reloc_bos,
130 list->num_relocs * sizeof(*list->reloc_bos));
131
132 vk_free(alloc, list->relocs);
133 vk_free(alloc, list->reloc_bos);
134
135 list->array_length = new_length;
136 list->relocs = new_relocs;
137 list->reloc_bos = new_reloc_bos;
138
139 return VK_SUCCESS;
140 }
141
142 uint64_t
143 anv_reloc_list_add(struct anv_reloc_list *list,
144 const VkAllocationCallbacks *alloc,
145 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
146 {
147 struct drm_i915_gem_relocation_entry *entry;
148 int index;
149
150 const uint32_t domain =
151 target_bo->is_winsys_bo ? I915_GEM_DOMAIN_RENDER : 0;
152
153 anv_reloc_list_grow(list, alloc, 1);
154 /* TODO: Handle failure */
155
156 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
157 index = list->num_relocs++;
158 list->reloc_bos[index] = target_bo;
159 entry = &list->relocs[index];
160 entry->target_handle = target_bo->gem_handle;
161 entry->delta = delta;
162 entry->offset = offset;
163 entry->presumed_offset = target_bo->offset;
164 entry->read_domains = domain;
165 entry->write_domain = domain;
166 VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
167
168 return target_bo->offset + delta;
169 }
170
171 static void
172 anv_reloc_list_append(struct anv_reloc_list *list,
173 const VkAllocationCallbacks *alloc,
174 struct anv_reloc_list *other, uint32_t offset)
175 {
176 anv_reloc_list_grow(list, alloc, other->num_relocs);
177 /* TODO: Handle failure */
178
179 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
180 other->num_relocs * sizeof(other->relocs[0]));
181 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
182 other->num_relocs * sizeof(other->reloc_bos[0]));
183
184 for (uint32_t i = 0; i < other->num_relocs; i++)
185 list->relocs[i + list->num_relocs].offset += offset;
186
187 list->num_relocs += other->num_relocs;
188 }
189
190 /*-----------------------------------------------------------------------*
191 * Functions related to anv_batch
192 *-----------------------------------------------------------------------*/
193
194 void *
195 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
196 {
197 if (batch->next + num_dwords * 4 > batch->end)
198 batch->extend_cb(batch, batch->user_data);
199
200 void *p = batch->next;
201
202 batch->next += num_dwords * 4;
203 assert(batch->next <= batch->end);
204
205 return p;
206 }
207
208 uint64_t
209 anv_batch_emit_reloc(struct anv_batch *batch,
210 void *location, struct anv_bo *bo, uint32_t delta)
211 {
212 return anv_reloc_list_add(batch->relocs, batch->alloc,
213 location - batch->start, bo, delta);
214 }
215
216 void
217 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
218 {
219 uint32_t size, offset;
220
221 size = other->next - other->start;
222 assert(size % 4 == 0);
223
224 if (batch->next + size > batch->end)
225 batch->extend_cb(batch, batch->user_data);
226
227 assert(batch->next + size <= batch->end);
228
229 VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
230 memcpy(batch->next, other->start, size);
231
232 offset = batch->next - batch->start;
233 anv_reloc_list_append(batch->relocs, batch->alloc,
234 other->relocs, offset);
235
236 batch->next += size;
237 }
238
239 /*-----------------------------------------------------------------------*
240 * Functions related to anv_batch_bo
241 *-----------------------------------------------------------------------*/
242
243 static VkResult
244 anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
245 struct anv_batch_bo **bbo_out)
246 {
247 VkResult result;
248
249 struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
250 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
251 if (bbo == NULL)
252 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
253
254 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
255 ANV_CMD_BUFFER_BATCH_SIZE);
256 if (result != VK_SUCCESS)
257 goto fail_alloc;
258
259 result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
260 if (result != VK_SUCCESS)
261 goto fail_bo_alloc;
262
263 *bbo_out = bbo;
264
265 return VK_SUCCESS;
266
267 fail_bo_alloc:
268 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
269 fail_alloc:
270 vk_free(&cmd_buffer->pool->alloc, bbo);
271
272 return result;
273 }
274
275 static VkResult
276 anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
277 const struct anv_batch_bo *other_bbo,
278 struct anv_batch_bo **bbo_out)
279 {
280 VkResult result;
281
282 struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
283 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
284 if (bbo == NULL)
285 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
286
287 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
288 other_bbo->bo.size);
289 if (result != VK_SUCCESS)
290 goto fail_alloc;
291
292 result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
293 &other_bbo->relocs);
294 if (result != VK_SUCCESS)
295 goto fail_bo_alloc;
296
297 bbo->length = other_bbo->length;
298 memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
299
300 bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset;
301
302 *bbo_out = bbo;
303
304 return VK_SUCCESS;
305
306 fail_bo_alloc:
307 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
308 fail_alloc:
309 vk_free(&cmd_buffer->pool->alloc, bbo);
310
311 return result;
312 }
313
314 static void
315 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
316 size_t batch_padding)
317 {
318 batch->next = batch->start = bbo->bo.map;
319 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
320 batch->relocs = &bbo->relocs;
321 bbo->last_ss_pool_bo_offset = 0;
322 bbo->relocs.num_relocs = 0;
323 }
324
325 static void
326 anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
327 size_t batch_padding)
328 {
329 batch->start = bbo->bo.map;
330 batch->next = bbo->bo.map + bbo->length;
331 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
332 batch->relocs = &bbo->relocs;
333 }
334
335 static void
336 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
337 {
338 assert(batch->start == bbo->bo.map);
339 bbo->length = batch->next - batch->start;
340 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
341 }
342
343 static VkResult
344 anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
345 struct anv_batch *batch, size_t aditional,
346 size_t batch_padding)
347 {
348 assert(batch->start == bbo->bo.map);
349 bbo->length = batch->next - batch->start;
350
351 size_t new_size = bbo->bo.size;
352 while (new_size <= bbo->length + aditional + batch_padding)
353 new_size *= 2;
354
355 if (new_size == bbo->bo.size)
356 return VK_SUCCESS;
357
358 struct anv_bo new_bo;
359 VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
360 &new_bo, new_size);
361 if (result != VK_SUCCESS)
362 return result;
363
364 memcpy(new_bo.map, bbo->bo.map, bbo->length);
365
366 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
367
368 bbo->bo = new_bo;
369 anv_batch_bo_continue(bbo, batch, batch_padding);
370
371 return VK_SUCCESS;
372 }
373
374 static void
375 anv_batch_bo_destroy(struct anv_batch_bo *bbo,
376 struct anv_cmd_buffer *cmd_buffer)
377 {
378 anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
379 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
380 vk_free(&cmd_buffer->pool->alloc, bbo);
381 }
382
383 static VkResult
384 anv_batch_bo_list_clone(const struct list_head *list,
385 struct anv_cmd_buffer *cmd_buffer,
386 struct list_head *new_list)
387 {
388 VkResult result = VK_SUCCESS;
389
390 list_inithead(new_list);
391
392 struct anv_batch_bo *prev_bbo = NULL;
393 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
394 struct anv_batch_bo *new_bbo = NULL;
395 result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
396 if (result != VK_SUCCESS)
397 break;
398 list_addtail(&new_bbo->link, new_list);
399
400 if (prev_bbo) {
401 /* As we clone this list of batch_bo's, they chain one to the
402 * other using MI_BATCH_BUFFER_START commands. We need to fix up
403 * those relocations as we go. Fortunately, this is pretty easy
404 * as it will always be the last relocation in the list.
405 */
406 uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
407 assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
408 prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
409 }
410
411 prev_bbo = new_bbo;
412 }
413
414 if (result != VK_SUCCESS) {
415 list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
416 anv_batch_bo_destroy(bbo, cmd_buffer);
417 }
418
419 return result;
420 }
421
422 /*-----------------------------------------------------------------------*
423 * Functions related to anv_batch_bo
424 *-----------------------------------------------------------------------*/
425
426 static inline struct anv_batch_bo *
427 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
428 {
429 return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
430 }
431
432 struct anv_address
433 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
434 {
435 return (struct anv_address) {
436 .bo = &cmd_buffer->device->surface_state_block_pool.bo,
437 .offset = *(int32_t *)u_vector_head(&cmd_buffer->bt_blocks),
438 };
439 }
440
441 static void
442 emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
443 struct anv_bo *bo, uint32_t offset)
444 {
445 /* In gen8+ the address field grew to two dwords to accomodate 48 bit
446 * offsets. The high 16 bits are in the last dword, so we can use the gen8
447 * version in either case, as long as we set the instruction length in the
448 * header accordingly. This means that we always emit three dwords here
449 * and all the padding and adjustment we do in this file works for all
450 * gens.
451 */
452
453 const uint32_t gen7_length =
454 GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
455 const uint32_t gen8_length =
456 GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
457
458 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
459 bbs.DWordLength = cmd_buffer->device->info.gen < 8 ?
460 gen7_length : gen8_length;
461 bbs._2ndLevelBatchBuffer = _1stlevelbatch;
462 bbs.AddressSpaceIndicator = ASI_PPGTT;
463 bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
464 }
465 }
466
467 static void
468 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
469 struct anv_batch_bo *bbo)
470 {
471 struct anv_batch *batch = &cmd_buffer->batch;
472 struct anv_batch_bo *current_bbo =
473 anv_cmd_buffer_current_batch_bo(cmd_buffer);
474
475 /* We set the end of the batch a little short so we would be sure we
476 * have room for the chaining command. Since we're about to emit the
477 * chaining command, let's set it back where it should go.
478 */
479 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
480 assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
481
482 emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
483
484 anv_batch_bo_finish(current_bbo, batch);
485 }
486
487 static VkResult
488 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
489 {
490 struct anv_cmd_buffer *cmd_buffer = _data;
491 struct anv_batch_bo *new_bbo;
492
493 VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
494 if (result != VK_SUCCESS)
495 return result;
496
497 struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
498 if (seen_bbo == NULL) {
499 anv_batch_bo_destroy(new_bbo, cmd_buffer);
500 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
501 }
502 *seen_bbo = new_bbo;
503
504 cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
505
506 list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
507
508 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
509
510 return VK_SUCCESS;
511 }
512
513 static VkResult
514 anv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data)
515 {
516 struct anv_cmd_buffer *cmd_buffer = _data;
517 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
518
519 anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096,
520 GEN8_MI_BATCH_BUFFER_START_length * 4);
521
522 return VK_SUCCESS;
523 }
524
525 /** Allocate a binding table
526 *
527 * This function allocates a binding table. This is a bit more complicated
528 * than one would think due to a combination of Vulkan driver design and some
529 * unfortunate hardware restrictions.
530 *
531 * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for
532 * the binding table pointer which means that all binding tables need to live
533 * in the bottom 64k of surface state base address. The way the GL driver has
534 * classically dealt with this restriction is to emit all surface states
535 * on-the-fly into the batch and have a batch buffer smaller than 64k. This
536 * isn't really an option in Vulkan for a couple of reasons:
537 *
538 * 1) In Vulkan, we have growing (or chaining) batches so surface states have
539 * to live in their own buffer and we have to be able to re-emit
540 * STATE_BASE_ADDRESS as needed which requires a full pipeline stall. In
541 * order to avoid emitting STATE_BASE_ADDRESS any more often than needed
542 * (it's not that hard to hit 64k of just binding tables), we allocate
543 * surface state objects up-front when VkImageView is created. In order
544 * for this to work, surface state objects need to be allocated from a
545 * global buffer.
546 *
547 * 2) We tried to design the surface state system in such a way that it's
548 * already ready for bindless texturing. The way bindless texturing works
549 * on our hardware is that you have a big pool of surface state objects
550 * (with its own state base address) and the bindless handles are simply
551 * offsets into that pool. With the architecture we chose, we already
552 * have that pool and it's exactly the same pool that we use for regular
553 * surface states so we should already be ready for bindless.
554 *
555 * 3) For render targets, we need to be able to fill out the surface states
556 * later in vkBeginRenderPass so that we can assign clear colors
557 * correctly. One way to do this would be to just create the surface
558 * state data and then repeatedly copy it into the surface state BO every
559 * time we have to re-emit STATE_BASE_ADDRESS. While this works, it's
560 * rather annoying and just being able to allocate them up-front and
561 * re-use them for the entire render pass.
562 *
563 * While none of these are technically blockers for emitting state on the fly
564 * like we do in GL, the ability to have a single surface state pool is
565 * simplifies things greatly. Unfortunately, it comes at a cost...
566 *
567 * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't
568 * place the binding tables just anywhere in surface state base address.
569 * Because 64k isn't a whole lot of space, we can't simply restrict the
570 * surface state buffer to 64k, we have to be more clever. The solution we've
571 * chosen is to have a block pool with a maximum size of 2G that starts at
572 * zero and grows in both directions. All surface states are allocated from
573 * the top of the pool (positive offsets) and we allocate blocks (< 64k) of
574 * binding tables from the bottom of the pool (negative offsets). Every time
575 * we allocate a new binding table block, we set surface state base address to
576 * point to the bottom of the binding table block. This way all of the
577 * binding tables in the block are in the bottom 64k of surface state base
578 * address. When we fill out the binding table, we add the distance between
579 * the bottom of our binding table block and zero of the block pool to the
580 * surface state offsets so that they are correct relative to out new surface
581 * state base address at the bottom of the binding table block.
582 *
583 * \see adjust_relocations_from_block_pool()
584 * \see adjust_relocations_too_block_pool()
585 *
586 * \param[in] entries The number of surface state entries the binding
587 * table should be able to hold.
588 *
589 * \param[out] state_offset The offset surface surface state base address
590 * where the surface states live. This must be
591 * added to the surface state offset when it is
592 * written into the binding table entry.
593 *
594 * \return An anv_state representing the binding table
595 */
596 struct anv_state
597 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
598 uint32_t entries, uint32_t *state_offset)
599 {
600 struct anv_block_pool *block_pool =
601 &cmd_buffer->device->surface_state_block_pool;
602 int32_t *bt_block = u_vector_head(&cmd_buffer->bt_blocks);
603 struct anv_state state;
604
605 state.alloc_size = align_u32(entries * 4, 32);
606
607 if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
608 return (struct anv_state) { 0 };
609
610 state.offset = cmd_buffer->bt_next;
611 state.map = block_pool->map + *bt_block + state.offset;
612
613 cmd_buffer->bt_next += state.alloc_size;
614
615 assert(*bt_block < 0);
616 *state_offset = -(*bt_block);
617
618 return state;
619 }
620
621 struct anv_state
622 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
623 {
624 return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
625 }
626
627 struct anv_state
628 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
629 uint32_t size, uint32_t alignment)
630 {
631 return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
632 size, alignment);
633 }
634
635 VkResult
636 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
637 {
638 struct anv_block_pool *block_pool =
639 &cmd_buffer->device->surface_state_block_pool;
640
641 int32_t *offset = u_vector_add(&cmd_buffer->bt_blocks);
642 if (offset == NULL)
643 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
644
645 *offset = anv_block_pool_alloc_back(block_pool);
646 cmd_buffer->bt_next = 0;
647
648 return VK_SUCCESS;
649 }
650
651 static void
652 anv_execbuf_init(struct anv_execbuf *exec)
653 {
654 memset(exec, 0, sizeof(*exec));
655 }
656
657 static void
658 anv_execbuf_finish(struct anv_execbuf *exec,
659 const VkAllocationCallbacks *alloc)
660 {
661 vk_free(alloc, exec->objects);
662 vk_free(alloc, exec->bos);
663 }
664
665 VkResult
666 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
667 {
668 struct anv_batch_bo *batch_bo;
669 VkResult result;
670
671 list_inithead(&cmd_buffer->batch_bos);
672
673 result = anv_batch_bo_create(cmd_buffer, &batch_bo);
674 if (result != VK_SUCCESS)
675 return result;
676
677 list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
678
679 cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
680 cmd_buffer->batch.user_data = cmd_buffer;
681
682 if (cmd_buffer->device->can_chain_batches) {
683 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
684 } else {
685 cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch;
686 }
687
688 anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
689 GEN8_MI_BATCH_BUFFER_START_length * 4);
690
691 int success = u_vector_init(&cmd_buffer->seen_bbos,
692 sizeof(struct anv_bo *),
693 8 * sizeof(struct anv_bo *));
694 if (!success)
695 goto fail_batch_bo;
696
697 *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
698
699 success = u_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t),
700 8 * sizeof(int32_t));
701 if (!success)
702 goto fail_seen_bbos;
703
704 result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
705 &cmd_buffer->pool->alloc);
706 if (result != VK_SUCCESS)
707 goto fail_bt_blocks;
708
709 anv_cmd_buffer_new_binding_table_block(cmd_buffer);
710
711 anv_execbuf_init(&cmd_buffer->execbuf2);
712
713 return VK_SUCCESS;
714
715 fail_bt_blocks:
716 u_vector_finish(&cmd_buffer->bt_blocks);
717 fail_seen_bbos:
718 u_vector_finish(&cmd_buffer->seen_bbos);
719 fail_batch_bo:
720 anv_batch_bo_destroy(batch_bo, cmd_buffer);
721
722 return result;
723 }
724
725 void
726 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
727 {
728 int32_t *bt_block;
729 u_vector_foreach(bt_block, &cmd_buffer->bt_blocks) {
730 anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
731 *bt_block);
732 }
733 u_vector_finish(&cmd_buffer->bt_blocks);
734
735 anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
736
737 u_vector_finish(&cmd_buffer->seen_bbos);
738
739 /* Destroy all of the batch buffers */
740 list_for_each_entry_safe(struct anv_batch_bo, bbo,
741 &cmd_buffer->batch_bos, link) {
742 anv_batch_bo_destroy(bbo, cmd_buffer);
743 }
744
745 anv_execbuf_finish(&cmd_buffer->execbuf2, &cmd_buffer->pool->alloc);
746 }
747
748 void
749 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
750 {
751 /* Delete all but the first batch bo */
752 assert(!list_empty(&cmd_buffer->batch_bos));
753 while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
754 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
755 list_del(&bbo->link);
756 anv_batch_bo_destroy(bbo, cmd_buffer);
757 }
758 assert(!list_empty(&cmd_buffer->batch_bos));
759
760 anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
761 &cmd_buffer->batch,
762 GEN8_MI_BATCH_BUFFER_START_length * 4);
763
764 while (u_vector_length(&cmd_buffer->bt_blocks) > 1) {
765 int32_t *bt_block = u_vector_remove(&cmd_buffer->bt_blocks);
766 anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
767 *bt_block);
768 }
769 assert(u_vector_length(&cmd_buffer->bt_blocks) == 1);
770 cmd_buffer->bt_next = 0;
771
772 cmd_buffer->surface_relocs.num_relocs = 0;
773
774 /* Reset the list of seen buffers */
775 cmd_buffer->seen_bbos.head = 0;
776 cmd_buffer->seen_bbos.tail = 0;
777
778 *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
779 anv_cmd_buffer_current_batch_bo(cmd_buffer);
780 }
781
782 void
783 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
784 {
785 struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
786
787 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
788 /* When we start a batch buffer, we subtract a certain amount of
789 * padding from the end to ensure that we always have room to emit a
790 * BATCH_BUFFER_START to chain to the next BO. We need to remove
791 * that padding before we end the batch; otherwise, we may end up
792 * with our BATCH_BUFFER_END in another BO.
793 */
794 cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
795 assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
796
797 anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END, bbe);
798
799 /* Round batch up to an even number of dwords. */
800 if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
801 anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP, noop);
802
803 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
804 }
805
806 anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
807
808 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
809 /* If this is a secondary command buffer, we need to determine the
810 * mode in which it will be executed with vkExecuteCommands. We
811 * determine this statically here so that this stays in sync with the
812 * actual ExecuteCommands implementation.
813 */
814 if (!cmd_buffer->device->can_chain_batches) {
815 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
816 } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
817 (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
818 /* If the secondary has exactly one batch buffer in its list *and*
819 * that batch buffer is less than half of the maximum size, we're
820 * probably better of simply copying it into our batch.
821 */
822 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
823 } else if (!(cmd_buffer->usage_flags &
824 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
825 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
826
827 /* When we chain, we need to add an MI_BATCH_BUFFER_START command
828 * with its relocation. In order to handle this we'll increment here
829 * so we can unconditionally decrement right before adding the
830 * MI_BATCH_BUFFER_START command.
831 */
832 batch_bo->relocs.num_relocs++;
833 cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4;
834 } else {
835 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
836 }
837 }
838 }
839
840 static inline VkResult
841 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
842 struct list_head *list)
843 {
844 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
845 struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos);
846 if (bbo_ptr == NULL)
847 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
848
849 *bbo_ptr = bbo;
850 }
851
852 return VK_SUCCESS;
853 }
854
855 void
856 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
857 struct anv_cmd_buffer *secondary)
858 {
859 switch (secondary->exec_mode) {
860 case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
861 anv_batch_emit_batch(&primary->batch, &secondary->batch);
862 break;
863 case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: {
864 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary);
865 unsigned length = secondary->batch.end - secondary->batch.start;
866 anv_batch_bo_grow(primary, bbo, &primary->batch, length,
867 GEN8_MI_BATCH_BUFFER_START_length * 4);
868 anv_batch_emit_batch(&primary->batch, &secondary->batch);
869 break;
870 }
871 case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
872 struct anv_batch_bo *first_bbo =
873 list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
874 struct anv_batch_bo *last_bbo =
875 list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
876
877 emit_batch_buffer_start(primary, &first_bbo->bo, 0);
878
879 struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
880 assert(primary->batch.start == this_bbo->bo.map);
881 uint32_t offset = primary->batch.next - primary->batch.start;
882 const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
883
884 /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
885 * can emit a new command and relocation for the current splice. In
886 * order to handle the initial-use case, we incremented next and
887 * num_relocs in end_batch_buffer() so we can alyways just subtract
888 * here.
889 */
890 last_bbo->relocs.num_relocs--;
891 secondary->batch.next -= inst_size;
892 emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
893 anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
894
895 /* After patching up the secondary buffer, we need to clflush the
896 * modified instruction in case we're on a !llc platform. We use a
897 * little loop to handle the case where the instruction crosses a cache
898 * line boundary.
899 */
900 if (!primary->device->info.has_llc) {
901 void *inst = secondary->batch.next - inst_size;
902 void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
903 __builtin_ia32_mfence();
904 while (p < secondary->batch.next) {
905 __builtin_ia32_clflush(p);
906 p += CACHELINE_SIZE;
907 }
908 }
909 break;
910 }
911 case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
912 struct list_head copy_list;
913 VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
914 secondary,
915 &copy_list);
916 if (result != VK_SUCCESS)
917 return; /* FIXME */
918
919 anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
920
921 struct anv_batch_bo *first_bbo =
922 list_first_entry(&copy_list, struct anv_batch_bo, link);
923 struct anv_batch_bo *last_bbo =
924 list_last_entry(&copy_list, struct anv_batch_bo, link);
925
926 cmd_buffer_chain_to_batch_bo(primary, first_bbo);
927
928 list_splicetail(&copy_list, &primary->batch_bos);
929
930 anv_batch_bo_continue(last_bbo, &primary->batch,
931 GEN8_MI_BATCH_BUFFER_START_length * 4);
932 break;
933 }
934 default:
935 assert(!"Invalid execution mode");
936 }
937
938 anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
939 &secondary->surface_relocs, 0);
940 }
941
942 static VkResult
943 anv_execbuf_add_bo(struct anv_execbuf *exec,
944 struct anv_bo *bo,
945 struct anv_reloc_list *relocs,
946 const VkAllocationCallbacks *alloc)
947 {
948 struct drm_i915_gem_exec_object2 *obj = NULL;
949
950 if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
951 obj = &exec->objects[bo->index];
952
953 if (obj == NULL) {
954 /* We've never seen this one before. Add it to the list and assign
955 * an id that we can use later.
956 */
957 if (exec->bo_count >= exec->array_length) {
958 uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
959
960 struct drm_i915_gem_exec_object2 *new_objects =
961 vk_alloc(alloc, new_len * sizeof(*new_objects),
962 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
963 if (new_objects == NULL)
964 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
965
966 struct anv_bo **new_bos =
967 vk_alloc(alloc, new_len * sizeof(*new_bos),
968 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
969 if (new_bos == NULL) {
970 vk_free(alloc, new_objects);
971 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
972 }
973
974 if (exec->objects) {
975 memcpy(new_objects, exec->objects,
976 exec->bo_count * sizeof(*new_objects));
977 memcpy(new_bos, exec->bos,
978 exec->bo_count * sizeof(*new_bos));
979 }
980
981 vk_free(alloc, exec->objects);
982 vk_free(alloc, exec->bos);
983
984 exec->objects = new_objects;
985 exec->bos = new_bos;
986 exec->array_length = new_len;
987 }
988
989 assert(exec->bo_count < exec->array_length);
990
991 bo->index = exec->bo_count++;
992 obj = &exec->objects[bo->index];
993 exec->bos[bo->index] = bo;
994
995 obj->handle = bo->gem_handle;
996 obj->relocation_count = 0;
997 obj->relocs_ptr = 0;
998 obj->alignment = 0;
999 obj->offset = bo->offset;
1000 obj->flags = bo->is_winsys_bo ? EXEC_OBJECT_WRITE : 0;
1001 obj->rsvd1 = 0;
1002 obj->rsvd2 = 0;
1003 }
1004
1005 if (relocs != NULL && obj->relocation_count == 0) {
1006 /* This is the first time we've ever seen a list of relocations for
1007 * this BO. Go ahead and set the relocations and then walk the list
1008 * of relocations and add them all.
1009 */
1010 obj->relocation_count = relocs->num_relocs;
1011 obj->relocs_ptr = (uintptr_t) relocs->relocs;
1012
1013 for (size_t i = 0; i < relocs->num_relocs; i++) {
1014 /* A quick sanity check on relocations */
1015 assert(relocs->relocs[i].offset < bo->size);
1016 anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL, alloc);
1017 }
1018 }
1019
1020 return VK_SUCCESS;
1021 }
1022
1023 static void
1024 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
1025 struct anv_reloc_list *list)
1026 {
1027 for (size_t i = 0; i < list->num_relocs; i++)
1028 list->relocs[i].target_handle = list->reloc_bos[i]->index;
1029 }
1030
1031 static void
1032 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1033 {
1034 unsigned reloc_size = 0;
1035 if (device->info.gen >= 8) {
1036 /* From the Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress:
1037 *
1038 * "This field specifies the address of the memory location where the
1039 * register value specified in the DWord above will read from. The
1040 * address specifies the DWord location of the data. Range =
1041 * GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress
1042 * [63:48] are ignored by the HW and assumed to be in correct
1043 * canonical form [63:48] == [47]."
1044 */
1045 const int shift = 63 - 47;
1046 reloc_size = sizeof(uint64_t);
1047 *(uint64_t *)p = (((int64_t)v) << shift) >> shift;
1048 } else {
1049 reloc_size = sizeof(uint32_t);
1050 *(uint32_t *)p = v;
1051 }
1052
1053 if (flush && !device->info.has_llc)
1054 anv_clflush_range(p, reloc_size);
1055 }
1056
1057 static void
1058 adjust_relocations_from_state_pool(struct anv_block_pool *pool,
1059 struct anv_reloc_list *relocs)
1060 {
1061 for (size_t i = 0; i < relocs->num_relocs; i++) {
1062 /* All of the relocations from this block pool to other BO's should
1063 * have been emitted relative to the surface block pool center. We
1064 * need to add the center offset to make them relative to the
1065 * beginning of the actual GEM bo.
1066 */
1067 relocs->relocs[i].offset += pool->center_bo_offset;
1068 }
1069 }
1070
1071 static void
1072 adjust_relocations_to_state_pool(struct anv_block_pool *pool,
1073 struct anv_bo *from_bo,
1074 struct anv_reloc_list *relocs,
1075 uint32_t *last_pool_center_bo_offset)
1076 {
1077 assert(*last_pool_center_bo_offset <= pool->center_bo_offset);
1078 uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset;
1079
1080 /* When we initially emit relocations into a block pool, we don't
1081 * actually know what the final center_bo_offset will be so we just emit
1082 * it as if center_bo_offset == 0. Now that we know what the center
1083 * offset is, we need to walk the list of relocations and adjust any
1084 * relocations that point to the pool bo with the correct offset.
1085 */
1086 for (size_t i = 0; i < relocs->num_relocs; i++) {
1087 if (relocs->reloc_bos[i] == &pool->bo) {
1088 /* Adjust the delta value in the relocation to correctly
1089 * correspond to the new delta. Initially, this value may have
1090 * been negative (if treated as unsigned), but we trust in
1091 * uint32_t roll-over to fix that for us at this point.
1092 */
1093 relocs->relocs[i].delta += delta;
1094
1095 /* Since the delta has changed, we need to update the actual
1096 * relocated value with the new presumed value. This function
1097 * should only be called on batch buffers, so we know it isn't in
1098 * use by the GPU at the moment.
1099 */
1100 assert(relocs->relocs[i].offset < from_bo->size);
1101 write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset,
1102 relocs->relocs[i].presumed_offset +
1103 relocs->relocs[i].delta, false);
1104 }
1105 }
1106
1107 *last_pool_center_bo_offset = pool->center_bo_offset;
1108 }
1109
1110 void
1111 anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
1112 {
1113 struct anv_batch *batch = &cmd_buffer->batch;
1114 struct anv_block_pool *ss_pool =
1115 &cmd_buffer->device->surface_state_block_pool;
1116
1117 cmd_buffer->execbuf2.bo_count = 0;
1118
1119 adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs);
1120 anv_execbuf_add_bo(&cmd_buffer->execbuf2, &ss_pool->bo,
1121 &cmd_buffer->surface_relocs,
1122 &cmd_buffer->pool->alloc);
1123
1124 /* First, we walk over all of the bos we've seen and add them and their
1125 * relocations to the validate list.
1126 */
1127 struct anv_batch_bo **bbo;
1128 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1129 adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
1130 &(*bbo)->last_ss_pool_bo_offset);
1131
1132 anv_execbuf_add_bo(&cmd_buffer->execbuf2, &(*bbo)->bo, &(*bbo)->relocs,
1133 &cmd_buffer->pool->alloc);
1134 }
1135
1136 struct anv_batch_bo *first_batch_bo =
1137 list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
1138
1139 /* The kernel requires that the last entry in the validation list be the
1140 * batch buffer to execute. We can simply swap the element
1141 * corresponding to the first batch_bo in the chain with the last
1142 * element in the list.
1143 */
1144 if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) {
1145 uint32_t idx = first_batch_bo->bo.index;
1146 uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1;
1147
1148 struct drm_i915_gem_exec_object2 tmp_obj =
1149 cmd_buffer->execbuf2.objects[idx];
1150 assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo);
1151
1152 cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx];
1153 cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx];
1154 cmd_buffer->execbuf2.bos[idx]->index = idx;
1155
1156 cmd_buffer->execbuf2.objects[last_idx] = tmp_obj;
1157 cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo;
1158 first_batch_bo->bo.index = last_idx;
1159 }
1160
1161 /* Now we go through and fixup all of the relocation lists to point to
1162 * the correct indices in the object array. We have to do this after we
1163 * reorder the list above as some of the indices may have changed.
1164 */
1165 u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
1166 anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
1167
1168 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
1169
1170 if (!cmd_buffer->device->info.has_llc) {
1171 __builtin_ia32_mfence();
1172 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1173 for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
1174 __builtin_ia32_clflush((*bbo)->bo.map + i);
1175 }
1176 }
1177
1178 cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
1179 .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
1180 .buffer_count = cmd_buffer->execbuf2.bo_count,
1181 .batch_start_offset = 0,
1182 .batch_len = batch->next - batch->start,
1183 .cliprects_ptr = 0,
1184 .num_cliprects = 0,
1185 .DR1 = 0,
1186 .DR4 = 0,
1187 .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER |
1188 I915_EXEC_CONSTANTS_REL_GENERAL,
1189 .rsvd1 = cmd_buffer->device->context_id,
1190 .rsvd2 = 0,
1191 };
1192 }
1193
1194 VkResult
1195 anv_cmd_buffer_execbuf(struct anv_device *device,
1196 struct anv_cmd_buffer *cmd_buffer)
1197 {
1198 /* Since surface states are shared between command buffers and we don't
1199 * know what order they will be submitted to the kernel, we don't know what
1200 * address is actually written in the surface state object at any given
1201 * time. The only option is to set a bogus presumed offset and let
1202 * relocations do their job.
1203 */
1204 for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
1205 cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
1206
1207 return anv_device_execbuf(device, &cmd_buffer->execbuf2.execbuf,
1208 cmd_buffer->execbuf2.bos);
1209 }