e9bd67c9442ef6fc4079f82924bd700d9a5d4511
[mesa.git] / src / vulkan / anv_batch_chain.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "gen7_pack.h"
33 #include "gen8_pack.h"
34
35 /** \file anv_batch_chain.c
36 *
37 * This file contains functions related to anv_cmd_buffer as a data
38 * structure. This involves everything required to create and destroy
39 * the actual batch buffers as well as link them together and handle
40 * relocations and surface state. It specifically does *not* contain any
41 * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
42 */
43
44 /*-----------------------------------------------------------------------*
45 * Functions related to anv_reloc_list
46 *-----------------------------------------------------------------------*/
47
48 static VkResult
49 anv_reloc_list_init_clone(struct anv_reloc_list *list,
50 const VkAllocationCallbacks *alloc,
51 const struct anv_reloc_list *other_list)
52 {
53 if (other_list) {
54 list->num_relocs = other_list->num_relocs;
55 list->array_length = other_list->array_length;
56 } else {
57 list->num_relocs = 0;
58 list->array_length = 256;
59 }
60
61 list->relocs =
62 anv_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
63 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
64
65 if (list->relocs == NULL)
66 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
67
68 list->reloc_bos =
69 anv_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
70 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
71
72 if (list->reloc_bos == NULL) {
73 anv_free(alloc, list->relocs);
74 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
75 }
76
77 if (other_list) {
78 memcpy(list->relocs, other_list->relocs,
79 list->array_length * sizeof(*list->relocs));
80 memcpy(list->reloc_bos, other_list->reloc_bos,
81 list->array_length * sizeof(*list->reloc_bos));
82 }
83
84 return VK_SUCCESS;
85 }
86
87 VkResult
88 anv_reloc_list_init(struct anv_reloc_list *list,
89 const VkAllocationCallbacks *alloc)
90 {
91 return anv_reloc_list_init_clone(list, alloc, NULL);
92 }
93
94 void
95 anv_reloc_list_finish(struct anv_reloc_list *list,
96 const VkAllocationCallbacks *alloc)
97 {
98 anv_free(alloc, list->relocs);
99 anv_free(alloc, list->reloc_bos);
100 }
101
102 static VkResult
103 anv_reloc_list_grow(struct anv_reloc_list *list,
104 const VkAllocationCallbacks *alloc,
105 size_t num_additional_relocs)
106 {
107 if (list->num_relocs + num_additional_relocs <= list->array_length)
108 return VK_SUCCESS;
109
110 size_t new_length = list->array_length * 2;
111 while (new_length < list->num_relocs + num_additional_relocs)
112 new_length *= 2;
113
114 struct drm_i915_gem_relocation_entry *new_relocs =
115 anv_alloc(alloc, new_length * sizeof(*list->relocs), 8,
116 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
117 if (new_relocs == NULL)
118 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
119
120 struct anv_bo **new_reloc_bos =
121 anv_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
122 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
123 if (new_relocs == NULL) {
124 anv_free(alloc, new_relocs);
125 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
126 }
127
128 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
129 memcpy(new_reloc_bos, list->reloc_bos,
130 list->num_relocs * sizeof(*list->reloc_bos));
131
132 anv_free(alloc, list->relocs);
133 anv_free(alloc, list->reloc_bos);
134
135 list->array_length = new_length;
136 list->relocs = new_relocs;
137 list->reloc_bos = new_reloc_bos;
138
139 return VK_SUCCESS;
140 }
141
142 uint64_t
143 anv_reloc_list_add(struct anv_reloc_list *list,
144 const VkAllocationCallbacks *alloc,
145 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
146 {
147 struct drm_i915_gem_relocation_entry *entry;
148 int index;
149
150 anv_reloc_list_grow(list, alloc, 1);
151 /* TODO: Handle failure */
152
153 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
154 index = list->num_relocs++;
155 list->reloc_bos[index] = target_bo;
156 entry = &list->relocs[index];
157 entry->target_handle = target_bo->gem_handle;
158 entry->delta = delta;
159 entry->offset = offset;
160 entry->presumed_offset = target_bo->offset;
161 entry->read_domains = 0;
162 entry->write_domain = 0;
163 VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
164
165 return target_bo->offset + delta;
166 }
167
168 static void
169 anv_reloc_list_append(struct anv_reloc_list *list,
170 const VkAllocationCallbacks *alloc,
171 struct anv_reloc_list *other, uint32_t offset)
172 {
173 anv_reloc_list_grow(list, alloc, other->num_relocs);
174 /* TODO: Handle failure */
175
176 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
177 other->num_relocs * sizeof(other->relocs[0]));
178 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
179 other->num_relocs * sizeof(other->reloc_bos[0]));
180
181 for (uint32_t i = 0; i < other->num_relocs; i++)
182 list->relocs[i + list->num_relocs].offset += offset;
183
184 list->num_relocs += other->num_relocs;
185 }
186
187 /*-----------------------------------------------------------------------*
188 * Functions related to anv_batch
189 *-----------------------------------------------------------------------*/
190
191 void *
192 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
193 {
194 if (batch->next + num_dwords * 4 > batch->end)
195 batch->extend_cb(batch, batch->user_data);
196
197 void *p = batch->next;
198
199 batch->next += num_dwords * 4;
200 assert(batch->next <= batch->end);
201
202 return p;
203 }
204
205 uint64_t
206 anv_batch_emit_reloc(struct anv_batch *batch,
207 void *location, struct anv_bo *bo, uint32_t delta)
208 {
209 return anv_reloc_list_add(batch->relocs, batch->alloc,
210 location - batch->start, bo, delta);
211 }
212
213 void
214 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
215 {
216 uint32_t size, offset;
217
218 size = other->next - other->start;
219 assert(size % 4 == 0);
220
221 if (batch->next + size > batch->end)
222 batch->extend_cb(batch, batch->user_data);
223
224 assert(batch->next + size <= batch->end);
225
226 VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
227 memcpy(batch->next, other->start, size);
228
229 offset = batch->next - batch->start;
230 anv_reloc_list_append(batch->relocs, batch->alloc,
231 other->relocs, offset);
232
233 batch->next += size;
234 }
235
236 /*-----------------------------------------------------------------------*
237 * Functions related to anv_batch_bo
238 *-----------------------------------------------------------------------*/
239
240 static VkResult
241 anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
242 struct anv_batch_bo **bbo_out)
243 {
244 VkResult result;
245
246 struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
247 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
248 if (bbo == NULL)
249 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
250
251 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
252 if (result != VK_SUCCESS)
253 goto fail_alloc;
254
255 result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
256 if (result != VK_SUCCESS)
257 goto fail_bo_alloc;
258
259 *bbo_out = bbo;
260
261 return VK_SUCCESS;
262
263 fail_bo_alloc:
264 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
265 fail_alloc:
266 anv_free(&cmd_buffer->pool->alloc, bbo);
267
268 return result;
269 }
270
271 static VkResult
272 anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
273 const struct anv_batch_bo *other_bbo,
274 struct anv_batch_bo **bbo_out)
275 {
276 VkResult result;
277
278 struct anv_batch_bo *bbo = anv_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
279 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
280 if (bbo == NULL)
281 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
282
283 result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
284 if (result != VK_SUCCESS)
285 goto fail_alloc;
286
287 result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
288 &other_bbo->relocs);
289 if (result != VK_SUCCESS)
290 goto fail_bo_alloc;
291
292 bbo->length = other_bbo->length;
293 memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
294
295 bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset;
296
297 *bbo_out = bbo;
298
299 return VK_SUCCESS;
300
301 fail_bo_alloc:
302 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
303 fail_alloc:
304 anv_free(&cmd_buffer->pool->alloc, bbo);
305
306 return result;
307 }
308
309 static void
310 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
311 size_t batch_padding)
312 {
313 batch->next = batch->start = bbo->bo.map;
314 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
315 batch->relocs = &bbo->relocs;
316 bbo->last_ss_pool_bo_offset = 0;
317 bbo->relocs.num_relocs = 0;
318 }
319
320 static void
321 anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
322 size_t batch_padding)
323 {
324 batch->start = bbo->bo.map;
325 batch->next = bbo->bo.map + bbo->length;
326 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
327 batch->relocs = &bbo->relocs;
328 }
329
330 static void
331 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
332 {
333 assert(batch->start == bbo->bo.map);
334 bbo->length = batch->next - batch->start;
335 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
336 }
337
338 static void
339 anv_batch_bo_destroy(struct anv_batch_bo *bbo,
340 struct anv_cmd_buffer *cmd_buffer)
341 {
342 anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
343 anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
344 anv_free(&cmd_buffer->pool->alloc, bbo);
345 }
346
347 static VkResult
348 anv_batch_bo_list_clone(const struct list_head *list,
349 struct anv_cmd_buffer *cmd_buffer,
350 struct list_head *new_list)
351 {
352 VkResult result = VK_SUCCESS;
353
354 list_inithead(new_list);
355
356 struct anv_batch_bo *prev_bbo = NULL;
357 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
358 struct anv_batch_bo *new_bbo;
359 result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
360 if (result != VK_SUCCESS)
361 break;
362 list_addtail(&new_bbo->link, new_list);
363
364 if (prev_bbo) {
365 /* As we clone this list of batch_bo's, they chain one to the
366 * other using MI_BATCH_BUFFER_START commands. We need to fix up
367 * those relocations as we go. Fortunately, this is pretty easy
368 * as it will always be the last relocation in the list.
369 */
370 uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
371 assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
372 prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
373 }
374
375 prev_bbo = new_bbo;
376 }
377
378 if (result != VK_SUCCESS) {
379 list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
380 anv_batch_bo_destroy(bbo, cmd_buffer);
381 }
382
383 return result;
384 }
385
386 /*-----------------------------------------------------------------------*
387 * Functions related to anv_batch_bo
388 *-----------------------------------------------------------------------*/
389
390 static inline struct anv_batch_bo *
391 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
392 {
393 return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
394 }
395
396 struct anv_address
397 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
398 {
399 return (struct anv_address) {
400 .bo = &cmd_buffer->device->surface_state_block_pool.bo,
401 .offset = *(int32_t *)anv_vector_head(&cmd_buffer->bt_blocks),
402 };
403 }
404
405 static void
406 emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
407 struct anv_bo *bo, uint32_t offset)
408 {
409 /* In gen8+ the address field grew to two dwords to accomodate 48 bit
410 * offsets. The high 16 bits are in the last dword, so we can use the gen8
411 * version in either case, as long as we set the instruction length in the
412 * header accordingly. This means that we always emit three dwords here
413 * and all the padding and adjustment we do in this file works for all
414 * gens.
415 */
416
417 const uint32_t gen7_length =
418 GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
419 const uint32_t gen8_length =
420 GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
421
422 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START,
423 .DwordLength = cmd_buffer->device->info.gen < 8 ?
424 gen7_length : gen8_length,
425 ._2ndLevelBatchBuffer = _1stlevelbatch,
426 .AddressSpaceIndicator = ASI_PPGTT,
427 .BatchBufferStartAddress = { bo, offset });
428 }
429
430 static void
431 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
432 struct anv_batch_bo *bbo)
433 {
434 struct anv_batch *batch = &cmd_buffer->batch;
435 struct anv_batch_bo *current_bbo =
436 anv_cmd_buffer_current_batch_bo(cmd_buffer);
437
438 /* We set the end of the batch a little short so we would be sure we
439 * have room for the chaining command. Since we're about to emit the
440 * chaining command, let's set it back where it should go.
441 */
442 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
443 assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
444
445 emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
446
447 anv_batch_bo_finish(current_bbo, batch);
448 }
449
450 static VkResult
451 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
452 {
453 struct anv_cmd_buffer *cmd_buffer = _data;
454 struct anv_batch_bo *new_bbo;
455
456 VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
457 if (result != VK_SUCCESS)
458 return result;
459
460 struct anv_batch_bo **seen_bbo = anv_vector_add(&cmd_buffer->seen_bbos);
461 if (seen_bbo == NULL) {
462 anv_batch_bo_destroy(new_bbo, cmd_buffer);
463 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
464 }
465 *seen_bbo = new_bbo;
466
467 cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
468
469 list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
470
471 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
472
473 return VK_SUCCESS;
474 }
475
476 struct anv_state
477 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
478 uint32_t entries, uint32_t *state_offset)
479 {
480 struct anv_block_pool *block_pool =
481 &cmd_buffer->device->surface_state_block_pool;
482 int32_t *bt_block = anv_vector_head(&cmd_buffer->bt_blocks);
483 struct anv_state state;
484
485 state.alloc_size = align_u32(entries * 4, 32);
486
487 if (cmd_buffer->bt_next + state.alloc_size > block_pool->block_size)
488 return (struct anv_state) { 0 };
489
490 state.offset = cmd_buffer->bt_next;
491 state.map = block_pool->map + *bt_block + state.offset;
492
493 cmd_buffer->bt_next += state.alloc_size;
494
495 assert(*bt_block < 0);
496 *state_offset = -(*bt_block);
497
498 return state;
499 }
500
501 struct anv_state
502 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
503 {
504 return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 64, 64);
505 }
506
507 struct anv_state
508 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
509 uint32_t size, uint32_t alignment)
510 {
511 return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
512 size, alignment);
513 }
514
515 VkResult
516 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
517 {
518 struct anv_block_pool *block_pool =
519 &cmd_buffer->device->surface_state_block_pool;
520
521 int32_t *offset = anv_vector_add(&cmd_buffer->bt_blocks);
522 if (offset == NULL)
523 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
524
525 *offset = anv_block_pool_alloc_back(block_pool);
526 cmd_buffer->bt_next = 0;
527
528 return VK_SUCCESS;
529 }
530
531 VkResult
532 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
533 {
534 struct anv_batch_bo *batch_bo;
535 VkResult result;
536
537 list_inithead(&cmd_buffer->batch_bos);
538
539 result = anv_batch_bo_create(cmd_buffer, &batch_bo);
540 if (result != VK_SUCCESS)
541 return result;
542
543 list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
544
545 cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
546 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
547 cmd_buffer->batch.user_data = cmd_buffer;
548
549 anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
550 GEN8_MI_BATCH_BUFFER_START_length * 4);
551
552 int success = anv_vector_init(&cmd_buffer->seen_bbos,
553 sizeof(struct anv_bo *),
554 8 * sizeof(struct anv_bo *));
555 if (!success)
556 goto fail_batch_bo;
557
558 *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
559
560 success = anv_vector_init(&cmd_buffer->bt_blocks, sizeof(int32_t),
561 8 * sizeof(int32_t));
562 if (!success)
563 goto fail_seen_bbos;
564
565 result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
566 &cmd_buffer->pool->alloc);
567 if (result != VK_SUCCESS)
568 goto fail_bt_blocks;
569
570 anv_cmd_buffer_new_binding_table_block(cmd_buffer);
571
572 cmd_buffer->execbuf2.objects = NULL;
573 cmd_buffer->execbuf2.bos = NULL;
574 cmd_buffer->execbuf2.array_length = 0;
575
576 return VK_SUCCESS;
577
578 fail_bt_blocks:
579 anv_vector_finish(&cmd_buffer->bt_blocks);
580 fail_seen_bbos:
581 anv_vector_finish(&cmd_buffer->seen_bbos);
582 fail_batch_bo:
583 anv_batch_bo_destroy(batch_bo, cmd_buffer);
584
585 return result;
586 }
587
588 void
589 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
590 {
591 int32_t *bt_block;
592 anv_vector_foreach(bt_block, &cmd_buffer->bt_blocks) {
593 anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
594 *bt_block);
595 }
596 anv_vector_finish(&cmd_buffer->bt_blocks);
597
598 anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
599
600 anv_vector_finish(&cmd_buffer->seen_bbos);
601
602 /* Destroy all of the batch buffers */
603 list_for_each_entry_safe(struct anv_batch_bo, bbo,
604 &cmd_buffer->batch_bos, link) {
605 anv_batch_bo_destroy(bbo, cmd_buffer);
606 }
607
608 anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects);
609 anv_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos);
610 }
611
612 void
613 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
614 {
615 /* Delete all but the first batch bo */
616 assert(!list_empty(&cmd_buffer->batch_bos));
617 while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
618 struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
619 list_del(&bbo->link);
620 anv_batch_bo_destroy(bbo, cmd_buffer);
621 }
622 assert(!list_empty(&cmd_buffer->batch_bos));
623
624 anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
625 &cmd_buffer->batch,
626 GEN8_MI_BATCH_BUFFER_START_length * 4);
627
628 while (anv_vector_length(&cmd_buffer->bt_blocks) > 1) {
629 int32_t *bt_block = anv_vector_remove(&cmd_buffer->bt_blocks);
630 anv_block_pool_free(&cmd_buffer->device->surface_state_block_pool,
631 *bt_block);
632 }
633 assert(anv_vector_length(&cmd_buffer->bt_blocks) == 1);
634 cmd_buffer->bt_next = 0;
635
636 cmd_buffer->surface_relocs.num_relocs = 0;
637
638 /* Reset the list of seen buffers */
639 cmd_buffer->seen_bbos.head = 0;
640 cmd_buffer->seen_bbos.tail = 0;
641
642 *(struct anv_batch_bo **)anv_vector_add(&cmd_buffer->seen_bbos) =
643 anv_cmd_buffer_current_batch_bo(cmd_buffer);
644 }
645
646 void
647 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
648 {
649 struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
650
651 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
652 /* When we start a batch buffer, we subtract a certain amount of
653 * padding from the end to ensure that we always have room to emit a
654 * BATCH_BUFFER_START to chain to the next BO. We need to remove
655 * that padding before we end the batch; otherwise, we may end up
656 * with our BATCH_BUFFER_END in another BO.
657 */
658 cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
659 assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
660
661 anv_batch_emit(&cmd_buffer->batch, GEN7_MI_BATCH_BUFFER_END);
662
663 /* Round batch up to an even number of dwords. */
664 if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
665 anv_batch_emit(&cmd_buffer->batch, GEN7_MI_NOOP);
666
667 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
668 }
669
670 anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
671
672 if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
673 /* If this is a secondary command buffer, we need to determine the
674 * mode in which it will be executed with vkExecuteCommands. We
675 * determine this statically here so that this stays in sync with the
676 * actual ExecuteCommands implementation.
677 */
678 if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
679 (batch_bo->length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
680 /* If the secondary has exactly one batch buffer in its list *and*
681 * that batch buffer is less than half of the maximum size, we're
682 * probably better of simply copying it into our batch.
683 */
684 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
685 } else if (!(cmd_buffer->usage_flags &
686 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
687 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
688
689 /* When we chain, we need to add an MI_BATCH_BUFFER_START command
690 * with its relocation. In order to handle this we'll increment here
691 * so we can unconditionally decrement right before adding the
692 * MI_BATCH_BUFFER_START command.
693 */
694 batch_bo->relocs.num_relocs++;
695 cmd_buffer->batch.next += GEN8_MI_BATCH_BUFFER_START_length * 4;
696 } else {
697 cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
698 }
699 }
700 }
701
702 static inline VkResult
703 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
704 struct list_head *list)
705 {
706 list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
707 struct anv_batch_bo **bbo_ptr = anv_vector_add(&cmd_buffer->seen_bbos);
708 if (bbo_ptr == NULL)
709 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
710
711 *bbo_ptr = bbo;
712 }
713
714 return VK_SUCCESS;
715 }
716
717 void
718 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
719 struct anv_cmd_buffer *secondary)
720 {
721 switch (secondary->exec_mode) {
722 case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
723 anv_batch_emit_batch(&primary->batch, &secondary->batch);
724 anv_cmd_buffer_emit_state_base_address(primary);
725 break;
726 case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
727 struct anv_batch_bo *first_bbo =
728 list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
729 struct anv_batch_bo *last_bbo =
730 list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
731
732 emit_batch_buffer_start(primary, &first_bbo->bo, 0);
733
734 struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
735 assert(primary->batch.start == this_bbo->bo.map);
736 uint32_t offset = primary->batch.next - primary->batch.start;
737 const uint32_t inst_size = GEN8_MI_BATCH_BUFFER_START_length * 4;
738
739 /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
740 * can emit a new command and relocation for the current splice. In
741 * order to handle the initial-use case, we incremented next and
742 * num_relocs in end_batch_buffer() so we can alyways just subtract
743 * here.
744 */
745 last_bbo->relocs.num_relocs--;
746 secondary->batch.next -= inst_size;
747 emit_batch_buffer_start(secondary, &this_bbo->bo, offset);
748 anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
749
750 /* After patching up the secondary buffer, we need to clflush the
751 * modified instruction in case we're on a !llc platform. We use a
752 * little loop to handle the case where the instruction crosses a cache
753 * line boundary.
754 */
755 if (!primary->device->info.has_llc) {
756 void *inst = secondary->batch.next - inst_size;
757 void *p = (void *) (((uintptr_t) inst) & ~CACHELINE_MASK);
758 __builtin_ia32_sfence();
759 while (p < secondary->batch.next) {
760 __builtin_ia32_clflush(p);
761 p += CACHELINE_SIZE;
762 }
763 }
764
765 anv_cmd_buffer_emit_state_base_address(primary);
766 break;
767 }
768 case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
769 struct list_head copy_list;
770 VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
771 secondary,
772 &copy_list);
773 if (result != VK_SUCCESS)
774 return; /* FIXME */
775
776 anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
777
778 struct anv_batch_bo *first_bbo =
779 list_first_entry(&copy_list, struct anv_batch_bo, link);
780 struct anv_batch_bo *last_bbo =
781 list_last_entry(&copy_list, struct anv_batch_bo, link);
782
783 cmd_buffer_chain_to_batch_bo(primary, first_bbo);
784
785 list_splicetail(&copy_list, &primary->batch_bos);
786
787 anv_batch_bo_continue(last_bbo, &primary->batch,
788 GEN8_MI_BATCH_BUFFER_START_length * 4);
789
790 anv_cmd_buffer_emit_state_base_address(primary);
791 break;
792 }
793 default:
794 assert(!"Invalid execution mode");
795 }
796
797 anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
798 &secondary->surface_relocs, 0);
799 }
800
801 static VkResult
802 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
803 struct anv_bo *bo,
804 struct anv_reloc_list *relocs)
805 {
806 struct drm_i915_gem_exec_object2 *obj = NULL;
807
808 if (bo->index < cmd_buffer->execbuf2.bo_count &&
809 cmd_buffer->execbuf2.bos[bo->index] == bo)
810 obj = &cmd_buffer->execbuf2.objects[bo->index];
811
812 if (obj == NULL) {
813 /* We've never seen this one before. Add it to the list and assign
814 * an id that we can use later.
815 */
816 if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) {
817 uint32_t new_len = cmd_buffer->execbuf2.objects ?
818 cmd_buffer->execbuf2.array_length * 2 : 64;
819
820 struct drm_i915_gem_exec_object2 *new_objects =
821 anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects),
822 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
823 if (new_objects == NULL)
824 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
825
826 struct anv_bo **new_bos =
827 anv_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos),
828 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
829 if (new_objects == NULL) {
830 anv_free(&cmd_buffer->pool->alloc, new_objects);
831 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
832 }
833
834 if (cmd_buffer->execbuf2.objects) {
835 memcpy(new_objects, cmd_buffer->execbuf2.objects,
836 cmd_buffer->execbuf2.bo_count * sizeof(*new_objects));
837 memcpy(new_bos, cmd_buffer->execbuf2.bos,
838 cmd_buffer->execbuf2.bo_count * sizeof(*new_bos));
839 }
840
841 cmd_buffer->execbuf2.objects = new_objects;
842 cmd_buffer->execbuf2.bos = new_bos;
843 cmd_buffer->execbuf2.array_length = new_len;
844 }
845
846 assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length);
847
848 bo->index = cmd_buffer->execbuf2.bo_count++;
849 obj = &cmd_buffer->execbuf2.objects[bo->index];
850 cmd_buffer->execbuf2.bos[bo->index] = bo;
851
852 obj->handle = bo->gem_handle;
853 obj->relocation_count = 0;
854 obj->relocs_ptr = 0;
855 obj->alignment = 0;
856 obj->offset = bo->offset;
857 obj->flags = 0;
858 obj->rsvd1 = 0;
859 obj->rsvd2 = 0;
860 }
861
862 if (relocs != NULL && obj->relocation_count == 0) {
863 /* This is the first time we've ever seen a list of relocations for
864 * this BO. Go ahead and set the relocations and then walk the list
865 * of relocations and add them all.
866 */
867 obj->relocation_count = relocs->num_relocs;
868 obj->relocs_ptr = (uintptr_t) relocs->relocs;
869
870 for (size_t i = 0; i < relocs->num_relocs; i++) {
871 /* A quick sanity check on relocations */
872 assert(relocs->relocs[i].offset < bo->size);
873 anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL);
874 }
875 }
876
877 return VK_SUCCESS;
878 }
879
880 static void
881 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
882 struct anv_reloc_list *list)
883 {
884 struct anv_bo *bo;
885
886 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
887 * struct drm_i915_gem_exec_object2 against the bos current offset and if
888 * all bos haven't moved it will skip relocation processing alltogether.
889 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
890 * value of offset so we can set it either way. For that to work we need
891 * to make sure all relocs use the same presumed offset.
892 */
893
894 for (size_t i = 0; i < list->num_relocs; i++) {
895 bo = list->reloc_bos[i];
896 if (bo->offset != list->relocs[i].presumed_offset)
897 cmd_buffer->execbuf2.need_reloc = true;
898
899 list->relocs[i].target_handle = bo->index;
900 }
901 }
902
903 static uint64_t
904 read_reloc(const struct anv_device *device, const void *p)
905 {
906 if (device->info.gen >= 8)
907 return *(uint64_t *)p;
908 else
909 return *(uint32_t *)p;
910 }
911
912 static void
913 write_reloc(const struct anv_device *device, void *p, uint64_t v)
914 {
915 if (device->info.gen >= 8)
916 *(uint64_t *)p = v;
917 else
918 *(uint32_t *)p = v;
919 }
920
921 static void
922 adjust_relocations_from_block_pool(struct anv_block_pool *pool,
923 struct anv_reloc_list *relocs)
924 {
925 for (size_t i = 0; i < relocs->num_relocs; i++) {
926 /* In general, we don't know how stale the relocated value is. It
927 * may have been used last time or it may not. Since we don't want
928 * to stomp it while the GPU may be accessing it, we haven't updated
929 * it anywhere else in the code. Instead, we just set the presumed
930 * offset to what it is now based on the delta and the data in the
931 * block pool. Then the kernel will update it for us if needed.
932 */
933 assert(relocs->relocs[i].offset < pool->state.end);
934 const void *p = pool->map + relocs->relocs[i].offset;
935
936 /* We're reading back the relocated value from potentially incoherent
937 * memory here. However, any change to the value will be from the kernel
938 * writing out relocations, which will keep the CPU cache up to date.
939 */
940 relocs->relocs[i].presumed_offset =
941 read_reloc(pool->device, p) - relocs->relocs[i].delta;
942
943 /* All of the relocations from this block pool to other BO's should
944 * have been emitted relative to the surface block pool center. We
945 * need to add the center offset to make them relative to the
946 * beginning of the actual GEM bo.
947 */
948 relocs->relocs[i].offset += pool->center_bo_offset;
949 }
950 }
951
952 static void
953 adjust_relocations_to_block_pool(struct anv_block_pool *pool,
954 struct anv_bo *from_bo,
955 struct anv_reloc_list *relocs,
956 uint32_t *last_pool_center_bo_offset)
957 {
958 assert(*last_pool_center_bo_offset <= pool->center_bo_offset);
959 uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset;
960
961 /* When we initially emit relocations into a block pool, we don't
962 * actually know what the final center_bo_offset will be so we just emit
963 * it as if center_bo_offset == 0. Now that we know what the center
964 * offset is, we need to walk the list of relocations and adjust any
965 * relocations that point to the pool bo with the correct offset.
966 */
967 for (size_t i = 0; i < relocs->num_relocs; i++) {
968 if (relocs->reloc_bos[i] == &pool->bo) {
969 /* Adjust the delta value in the relocation to correctly
970 * correspond to the new delta. Initially, this value may have
971 * been negative (if treated as unsigned), but we trust in
972 * uint32_t roll-over to fix that for us at this point.
973 */
974 relocs->relocs[i].delta += delta;
975
976 /* Since the delta has changed, we need to update the actual
977 * relocated value with the new presumed value. This function
978 * should only be called on batch buffers, so we know it isn't in
979 * use by the GPU at the moment.
980 */
981 assert(relocs->relocs[i].offset < from_bo->size);
982 write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset,
983 relocs->relocs[i].presumed_offset +
984 relocs->relocs[i].delta);
985 }
986 }
987
988 *last_pool_center_bo_offset = pool->center_bo_offset;
989 }
990
991 void
992 anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
993 {
994 struct anv_batch *batch = &cmd_buffer->batch;
995 struct anv_block_pool *ss_pool =
996 &cmd_buffer->device->surface_state_block_pool;
997
998 cmd_buffer->execbuf2.bo_count = 0;
999 cmd_buffer->execbuf2.need_reloc = false;
1000
1001 adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs);
1002 anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs);
1003
1004 /* First, we walk over all of the bos we've seen and add them and their
1005 * relocations to the validate list.
1006 */
1007 struct anv_batch_bo **bbo;
1008 anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1009 adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
1010 &(*bbo)->last_ss_pool_bo_offset);
1011
1012 anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs);
1013 }
1014
1015 struct anv_batch_bo *first_batch_bo =
1016 list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
1017
1018 /* The kernel requires that the last entry in the validation list be the
1019 * batch buffer to execute. We can simply swap the element
1020 * corresponding to the first batch_bo in the chain with the last
1021 * element in the list.
1022 */
1023 if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) {
1024 uint32_t idx = first_batch_bo->bo.index;
1025 uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1;
1026
1027 struct drm_i915_gem_exec_object2 tmp_obj =
1028 cmd_buffer->execbuf2.objects[idx];
1029 assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo);
1030
1031 cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx];
1032 cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx];
1033 cmd_buffer->execbuf2.bos[idx]->index = idx;
1034
1035 cmd_buffer->execbuf2.objects[last_idx] = tmp_obj;
1036 cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo;
1037 first_batch_bo->bo.index = last_idx;
1038 }
1039
1040 /* Now we go through and fixup all of the relocation lists to point to
1041 * the correct indices in the object array. We have to do this after we
1042 * reorder the list above as some of the indices may have changed.
1043 */
1044 anv_vector_foreach(bbo, &cmd_buffer->seen_bbos)
1045 anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
1046
1047 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
1048
1049 if (!cmd_buffer->device->info.has_llc) {
1050 __builtin_ia32_sfence();
1051 anv_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1052 for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
1053 __builtin_ia32_clflush((*bbo)->bo.map + i);
1054 }
1055 }
1056
1057 cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
1058 .buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
1059 .buffer_count = cmd_buffer->execbuf2.bo_count,
1060 .batch_start_offset = 0,
1061 .batch_len = batch->next - batch->start,
1062 .cliprects_ptr = 0,
1063 .num_cliprects = 0,
1064 .DR1 = 0,
1065 .DR4 = 0,
1066 .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER |
1067 I915_EXEC_CONSTANTS_REL_GENERAL,
1068 .rsvd1 = cmd_buffer->device->context_id,
1069 .rsvd2 = 0,
1070 };
1071
1072 if (!cmd_buffer->execbuf2.need_reloc)
1073 cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC;
1074 }