2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen7_pack.h"
33 #include "gen8_pack.h"
35 /** \file anv_batch_chain.c
37 * This file contains functions related to anv_cmd_buffer as a data
38 * structure. This involves everything required to create and destroy
39 * the actual batch buffers as well as link them together and handle
40 * relocations and surface state. It specifically does *not* contain any
41 * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
44 /*-----------------------------------------------------------------------*
45 * Functions related to anv_reloc_list
46 *-----------------------------------------------------------------------*/
49 anv_reloc_list_init_clone(struct anv_reloc_list
*list
,
50 struct anv_device
*device
,
51 const struct anv_reloc_list
*other_list
)
54 list
->num_relocs
= other_list
->num_relocs
;
55 list
->array_length
= other_list
->array_length
;
58 list
->array_length
= 256;
62 anv_device_alloc(device
, list
->array_length
* sizeof(*list
->relocs
), 8,
63 VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
65 if (list
->relocs
== NULL
)
66 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
69 anv_device_alloc(device
, list
->array_length
* sizeof(*list
->reloc_bos
), 8,
70 VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
72 if (list
->reloc_bos
== NULL
) {
73 anv_device_free(device
, list
->relocs
);
74 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
78 memcpy(list
->relocs
, other_list
->relocs
,
79 list
->array_length
* sizeof(*list
->relocs
));
80 memcpy(list
->reloc_bos
, other_list
->reloc_bos
,
81 list
->array_length
* sizeof(*list
->reloc_bos
));
88 anv_reloc_list_init(struct anv_reloc_list
*list
, struct anv_device
*device
)
90 return anv_reloc_list_init_clone(list
, device
, NULL
);
94 anv_reloc_list_finish(struct anv_reloc_list
*list
, struct anv_device
*device
)
96 anv_device_free(device
, list
->relocs
);
97 anv_device_free(device
, list
->reloc_bos
);
101 anv_reloc_list_grow(struct anv_reloc_list
*list
, struct anv_device
*device
,
102 size_t num_additional_relocs
)
104 if (list
->num_relocs
+ num_additional_relocs
<= list
->array_length
)
107 size_t new_length
= list
->array_length
* 2;
108 while (new_length
< list
->num_relocs
+ num_additional_relocs
)
111 struct drm_i915_gem_relocation_entry
*new_relocs
=
112 anv_device_alloc(device
, new_length
* sizeof(*list
->relocs
), 8,
113 VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
114 if (new_relocs
== NULL
)
115 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
117 struct anv_bo
**new_reloc_bos
=
118 anv_device_alloc(device
, new_length
* sizeof(*list
->reloc_bos
), 8,
119 VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
120 if (new_relocs
== NULL
) {
121 anv_device_free(device
, new_relocs
);
122 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
125 memcpy(new_relocs
, list
->relocs
, list
->num_relocs
* sizeof(*list
->relocs
));
126 memcpy(new_reloc_bos
, list
->reloc_bos
,
127 list
->num_relocs
* sizeof(*list
->reloc_bos
));
129 anv_device_free(device
, list
->relocs
);
130 anv_device_free(device
, list
->reloc_bos
);
132 list
->array_length
= new_length
;
133 list
->relocs
= new_relocs
;
134 list
->reloc_bos
= new_reloc_bos
;
140 anv_reloc_list_add(struct anv_reloc_list
*list
, struct anv_device
*device
,
141 uint32_t offset
, struct anv_bo
*target_bo
, uint32_t delta
)
143 struct drm_i915_gem_relocation_entry
*entry
;
146 anv_reloc_list_grow(list
, device
, 1);
147 /* TODO: Handle failure */
149 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
150 index
= list
->num_relocs
++;
151 list
->reloc_bos
[index
] = target_bo
;
152 entry
= &list
->relocs
[index
];
153 entry
->target_handle
= target_bo
->gem_handle
;
154 entry
->delta
= delta
;
155 entry
->offset
= offset
;
156 entry
->presumed_offset
= target_bo
->offset
;
157 entry
->read_domains
= 0;
158 entry
->write_domain
= 0;
160 return target_bo
->offset
+ delta
;
164 anv_reloc_list_append(struct anv_reloc_list
*list
, struct anv_device
*device
,
165 struct anv_reloc_list
*other
, uint32_t offset
)
167 anv_reloc_list_grow(list
, device
, other
->num_relocs
);
168 /* TODO: Handle failure */
170 memcpy(&list
->relocs
[list
->num_relocs
], &other
->relocs
[0],
171 other
->num_relocs
* sizeof(other
->relocs
[0]));
172 memcpy(&list
->reloc_bos
[list
->num_relocs
], &other
->reloc_bos
[0],
173 other
->num_relocs
* sizeof(other
->reloc_bos
[0]));
175 for (uint32_t i
= 0; i
< other
->num_relocs
; i
++)
176 list
->relocs
[i
+ list
->num_relocs
].offset
+= offset
;
178 list
->num_relocs
+= other
->num_relocs
;
181 /*-----------------------------------------------------------------------*
182 * Functions related to anv_batch
183 *-----------------------------------------------------------------------*/
186 anv_batch_emit_dwords(struct anv_batch
*batch
, int num_dwords
)
188 if (batch
->next
+ num_dwords
* 4 > batch
->end
)
189 batch
->extend_cb(batch
, batch
->user_data
);
191 void *p
= batch
->next
;
193 batch
->next
+= num_dwords
* 4;
194 assert(batch
->next
<= batch
->end
);
200 anv_batch_emit_reloc(struct anv_batch
*batch
,
201 void *location
, struct anv_bo
*bo
, uint32_t delta
)
203 return anv_reloc_list_add(batch
->relocs
, batch
->device
,
204 location
- batch
->start
, bo
, delta
);
208 anv_batch_emit_batch(struct anv_batch
*batch
, struct anv_batch
*other
)
210 uint32_t size
, offset
;
212 size
= other
->next
- other
->start
;
213 assert(size
% 4 == 0);
215 if (batch
->next
+ size
> batch
->end
)
216 batch
->extend_cb(batch
, batch
->user_data
);
218 assert(batch
->next
+ size
<= batch
->end
);
220 VG(VALGRIND_CHECK_MEM_IS_DEFINED(other
->start
, size
));
221 memcpy(batch
->next
, other
->start
, size
);
223 offset
= batch
->next
- batch
->start
;
224 anv_reloc_list_append(batch
->relocs
, batch
->device
,
225 other
->relocs
, offset
);
230 /*-----------------------------------------------------------------------*
231 * Functions related to anv_batch_bo
232 *-----------------------------------------------------------------------*/
235 anv_batch_bo_create(struct anv_device
*device
, struct anv_batch_bo
**bbo_out
)
239 struct anv_batch_bo
*bbo
=
240 anv_device_alloc(device
, sizeof(*bbo
), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
242 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
244 result
= anv_bo_pool_alloc(&device
->batch_bo_pool
, &bbo
->bo
);
245 if (result
!= VK_SUCCESS
)
248 result
= anv_reloc_list_init(&bbo
->relocs
, device
);
249 if (result
!= VK_SUCCESS
)
257 anv_bo_pool_free(&device
->batch_bo_pool
, &bbo
->bo
);
259 anv_device_free(device
, bbo
);
265 anv_batch_bo_clone(struct anv_device
*device
,
266 const struct anv_batch_bo
*other_bbo
,
267 struct anv_batch_bo
**bbo_out
)
271 struct anv_batch_bo
*bbo
=
272 anv_device_alloc(device
, sizeof(*bbo
), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
274 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
276 result
= anv_bo_pool_alloc(&device
->batch_bo_pool
, &bbo
->bo
);
277 if (result
!= VK_SUCCESS
)
280 result
= anv_reloc_list_init_clone(&bbo
->relocs
, device
, &other_bbo
->relocs
);
281 if (result
!= VK_SUCCESS
)
284 bbo
->length
= other_bbo
->length
;
285 memcpy(bbo
->bo
.map
, other_bbo
->bo
.map
, other_bbo
->length
);
287 bbo
->last_ss_pool_bo_offset
= other_bbo
->last_ss_pool_bo_offset
;
294 anv_bo_pool_free(&device
->batch_bo_pool
, &bbo
->bo
);
296 anv_device_free(device
, bbo
);
302 anv_batch_bo_start(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
,
303 size_t batch_padding
)
305 batch
->next
= batch
->start
= bbo
->bo
.map
;
306 batch
->end
= bbo
->bo
.map
+ bbo
->bo
.size
- batch_padding
;
307 batch
->relocs
= &bbo
->relocs
;
308 bbo
->last_ss_pool_bo_offset
= 0;
309 bbo
->relocs
.num_relocs
= 0;
313 anv_batch_bo_continue(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
,
314 size_t batch_padding
)
316 batch
->start
= bbo
->bo
.map
;
317 batch
->next
= bbo
->bo
.map
+ bbo
->length
;
318 batch
->end
= bbo
->bo
.map
+ bbo
->bo
.size
- batch_padding
;
319 batch
->relocs
= &bbo
->relocs
;
323 anv_batch_bo_finish(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
)
325 assert(batch
->start
== bbo
->bo
.map
);
326 bbo
->length
= batch
->next
- batch
->start
;
327 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch
->start
, bbo
->length
));
331 anv_batch_bo_destroy(struct anv_batch_bo
*bbo
, struct anv_device
*device
)
333 anv_reloc_list_finish(&bbo
->relocs
, device
);
334 anv_bo_pool_free(&device
->batch_bo_pool
, &bbo
->bo
);
335 anv_device_free(device
, bbo
);
339 anv_batch_bo_list_clone(const struct list_head
*list
, struct anv_device
*device
,
340 struct list_head
*new_list
)
342 VkResult result
= VK_SUCCESS
;
344 list_inithead(new_list
);
346 struct anv_batch_bo
*prev_bbo
= NULL
;
347 list_for_each_entry(struct anv_batch_bo
, bbo
, list
, link
) {
348 struct anv_batch_bo
*new_bbo
;
349 result
= anv_batch_bo_clone(device
, bbo
, &new_bbo
);
350 if (result
!= VK_SUCCESS
)
352 list_addtail(&new_bbo
->link
, new_list
);
355 /* As we clone this list of batch_bo's, they chain one to the
356 * other using MI_BATCH_BUFFER_START commands. We need to fix up
357 * those relocations as we go. Fortunately, this is pretty easy
358 * as it will always be the last relocation in the list.
360 uint32_t last_idx
= prev_bbo
->relocs
.num_relocs
- 1;
361 assert(prev_bbo
->relocs
.reloc_bos
[last_idx
] == &bbo
->bo
);
362 prev_bbo
->relocs
.reloc_bos
[last_idx
] = &new_bbo
->bo
;
368 if (result
!= VK_SUCCESS
) {
369 list_for_each_entry_safe(struct anv_batch_bo
, bbo
, new_list
, link
)
370 anv_batch_bo_destroy(bbo
, device
);
376 /*-----------------------------------------------------------------------*
377 * Functions related to anv_batch_bo
378 *-----------------------------------------------------------------------*/
380 static inline struct anv_batch_bo
*
381 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer
*cmd_buffer
)
383 return LIST_ENTRY(struct anv_batch_bo
, cmd_buffer
->batch_bos
.prev
, link
);
387 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer
*cmd_buffer
)
389 return (struct anv_address
) {
390 .bo
= &cmd_buffer
->device
->surface_state_block_pool
.bo
,
391 .offset
= *(int32_t *)anv_vector_head(&cmd_buffer
->bt_blocks
),
396 emit_batch_buffer_start(struct anv_batch
*batch
, struct anv_bo
*bo
, uint32_t offset
)
398 /* In gen8+ the address field grew to two dwords to accomodate 48 bit
399 * offsets. The high 16 bits are in the last dword, so we can use the gen8
400 * version in either case, as long as we set the instruction length in the
401 * header accordingly. This means that we always emit three dwords here
402 * and all the padding and adjustment we do in this file works for all
406 const uint32_t gen7_length
=
407 GEN7_MI_BATCH_BUFFER_START_length
- GEN7_MI_BATCH_BUFFER_START_length_bias
;
408 const uint32_t gen8_length
=
409 GEN8_MI_BATCH_BUFFER_START_length
- GEN8_MI_BATCH_BUFFER_START_length_bias
;
411 anv_batch_emit(batch
, GEN8_MI_BATCH_BUFFER_START
,
412 .DwordLength
= batch
->device
->info
.gen
< 8 ? gen7_length
: gen8_length
,
413 ._2ndLevelBatchBuffer
= _1stlevelbatch
,
414 .AddressSpaceIndicator
= ASI_PPGTT
,
415 .BatchBufferStartAddress
= { bo
, offset
});
419 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer
*cmd_buffer
,
420 struct anv_batch_bo
*bbo
)
422 struct anv_batch
*batch
= &cmd_buffer
->batch
;
423 struct anv_batch_bo
*current_bbo
=
424 anv_cmd_buffer_current_batch_bo(cmd_buffer
);
426 /* We set the end of the batch a little short so we would be sure we
427 * have room for the chaining command. Since we're about to emit the
428 * chaining command, let's set it back where it should go.
430 batch
->end
+= GEN8_MI_BATCH_BUFFER_START_length
* 4;
431 assert(batch
->end
== current_bbo
->bo
.map
+ current_bbo
->bo
.size
);
433 emit_batch_buffer_start(batch
, &bbo
->bo
, 0);
435 anv_batch_bo_finish(current_bbo
, batch
);
439 anv_cmd_buffer_chain_batch(struct anv_batch
*batch
, void *_data
)
441 struct anv_cmd_buffer
*cmd_buffer
= _data
;
442 struct anv_batch_bo
*new_bbo
;
444 VkResult result
= anv_batch_bo_create(cmd_buffer
->device
, &new_bbo
);
445 if (result
!= VK_SUCCESS
)
448 struct anv_batch_bo
**seen_bbo
= anv_vector_add(&cmd_buffer
->seen_bbos
);
449 if (seen_bbo
== NULL
) {
450 anv_batch_bo_destroy(new_bbo
, cmd_buffer
->device
);
451 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
455 cmd_buffer_chain_to_batch_bo(cmd_buffer
, new_bbo
);
457 list_addtail(&new_bbo
->link
, &cmd_buffer
->batch_bos
);
459 anv_batch_bo_start(new_bbo
, batch
, GEN8_MI_BATCH_BUFFER_START_length
* 4);
465 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
466 uint32_t entries
, uint32_t *state_offset
)
468 struct anv_block_pool
*block_pool
=
469 &cmd_buffer
->device
->surface_state_block_pool
;
470 int32_t *bt_block
= anv_vector_head(&cmd_buffer
->bt_blocks
);
471 struct anv_state state
;
473 state
.alloc_size
= align_u32(entries
* 4, 32);
475 if (cmd_buffer
->bt_next
+ state
.alloc_size
> block_pool
->block_size
)
476 return (struct anv_state
) { 0 };
478 state
.offset
= cmd_buffer
->bt_next
;
479 state
.map
= block_pool
->map
+ *bt_block
+ state
.offset
;
481 cmd_buffer
->bt_next
+= state
.alloc_size
;
483 assert(*bt_block
< 0);
484 *state_offset
= -(*bt_block
);
490 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer
*cmd_buffer
)
492 return anv_state_stream_alloc(&cmd_buffer
->surface_state_stream
, 64, 64);
496 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer
*cmd_buffer
,
497 uint32_t size
, uint32_t alignment
)
499 return anv_state_stream_alloc(&cmd_buffer
->dynamic_state_stream
,
504 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer
*cmd_buffer
)
506 struct anv_block_pool
*block_pool
=
507 &cmd_buffer
->device
->surface_state_block_pool
;
509 int32_t *offset
= anv_vector_add(&cmd_buffer
->bt_blocks
);
511 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
513 *offset
= anv_block_pool_alloc_back(block_pool
);
514 cmd_buffer
->bt_next
= 0;
520 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
522 struct anv_batch_bo
*batch_bo
;
523 struct anv_device
*device
= cmd_buffer
->device
;
526 list_inithead(&cmd_buffer
->batch_bos
);
528 result
= anv_batch_bo_create(device
, &batch_bo
);
529 if (result
!= VK_SUCCESS
)
532 list_addtail(&batch_bo
->link
, &cmd_buffer
->batch_bos
);
534 cmd_buffer
->batch
.device
= device
;
535 cmd_buffer
->batch
.extend_cb
= anv_cmd_buffer_chain_batch
;
536 cmd_buffer
->batch
.user_data
= cmd_buffer
;
538 anv_batch_bo_start(batch_bo
, &cmd_buffer
->batch
,
539 GEN8_MI_BATCH_BUFFER_START_length
* 4);
541 int success
= anv_vector_init(&cmd_buffer
->seen_bbos
,
542 sizeof(struct anv_bo
*),
543 8 * sizeof(struct anv_bo
*));
547 *(struct anv_batch_bo
**)anv_vector_add(&cmd_buffer
->seen_bbos
) = batch_bo
;
549 success
= anv_vector_init(&cmd_buffer
->bt_blocks
, sizeof(int32_t),
550 8 * sizeof(int32_t));
554 result
= anv_reloc_list_init(&cmd_buffer
->surface_relocs
,
556 if (result
!= VK_SUCCESS
)
559 anv_cmd_buffer_new_binding_table_block(cmd_buffer
);
561 cmd_buffer
->execbuf2
.objects
= NULL
;
562 cmd_buffer
->execbuf2
.bos
= NULL
;
563 cmd_buffer
->execbuf2
.array_length
= 0;
568 anv_vector_finish(&cmd_buffer
->bt_blocks
);
570 anv_vector_finish(&cmd_buffer
->seen_bbos
);
572 anv_batch_bo_destroy(batch_bo
, device
);
578 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
580 struct anv_device
*device
= cmd_buffer
->device
;
583 anv_vector_foreach(bt_block
, &cmd_buffer
->bt_blocks
) {
584 anv_block_pool_free(&cmd_buffer
->device
->surface_state_block_pool
,
587 anv_vector_finish(&cmd_buffer
->bt_blocks
);
589 anv_reloc_list_finish(&cmd_buffer
->surface_relocs
, cmd_buffer
->device
);
591 anv_vector_finish(&cmd_buffer
->seen_bbos
);
593 /* Destroy all of the batch buffers */
594 list_for_each_entry_safe(struct anv_batch_bo
, bbo
,
595 &cmd_buffer
->batch_bos
, link
) {
596 anv_batch_bo_destroy(bbo
, device
);
599 anv_device_free(device
, cmd_buffer
->execbuf2
.objects
);
600 anv_device_free(device
, cmd_buffer
->execbuf2
.bos
);
604 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
606 struct anv_device
*device
= cmd_buffer
->device
;
608 /* Delete all but the first batch bo */
609 assert(!list_empty(&cmd_buffer
->batch_bos
));
610 while (cmd_buffer
->batch_bos
.next
!= cmd_buffer
->batch_bos
.prev
) {
611 struct anv_batch_bo
*bbo
= anv_cmd_buffer_current_batch_bo(cmd_buffer
);
612 list_del(&bbo
->link
);
613 anv_batch_bo_destroy(bbo
, device
);
615 assert(!list_empty(&cmd_buffer
->batch_bos
));
617 anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer
),
619 GEN8_MI_BATCH_BUFFER_START_length
* 4);
621 while (anv_vector_length(&cmd_buffer
->bt_blocks
) > 1) {
622 int32_t *bt_block
= anv_vector_remove(&cmd_buffer
->bt_blocks
);
623 anv_block_pool_free(&cmd_buffer
->device
->surface_state_block_pool
,
626 assert(anv_vector_length(&cmd_buffer
->bt_blocks
) == 1);
627 cmd_buffer
->bt_next
= 0;
629 cmd_buffer
->surface_relocs
.num_relocs
= 0;
631 /* Reset the list of seen buffers */
632 cmd_buffer
->seen_bbos
.head
= 0;
633 cmd_buffer
->seen_bbos
.tail
= 0;
635 *(struct anv_batch_bo
**)anv_vector_add(&cmd_buffer
->seen_bbos
) =
636 anv_cmd_buffer_current_batch_bo(cmd_buffer
);
640 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer
*cmd_buffer
)
642 struct anv_batch_bo
*batch_bo
= anv_cmd_buffer_current_batch_bo(cmd_buffer
);
644 if (cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_PRIMARY
) {
645 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MI_BATCH_BUFFER_END
);
647 /* Round batch up to an even number of dwords. */
648 if ((cmd_buffer
->batch
.next
- cmd_buffer
->batch
.start
) & 4)
649 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MI_NOOP
);
651 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_PRIMARY
;
654 anv_batch_bo_finish(batch_bo
, &cmd_buffer
->batch
);
656 if (cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_SECONDARY
) {
657 /* If this is a secondary command buffer, we need to determine the
658 * mode in which it will be executed with vkExecuteCommands. We
659 * determine this statically here so that this stays in sync with the
660 * actual ExecuteCommands implementation.
662 if ((cmd_buffer
->batch_bos
.next
== cmd_buffer
->batch_bos
.prev
) &&
663 (anv_cmd_buffer_current_batch_bo(cmd_buffer
)->length
<
664 ANV_CMD_BUFFER_BATCH_SIZE
/ 2)) {
665 /* If the secondary has exactly one batch buffer in its list *and*
666 * that batch buffer is less than half of the maximum size, we're
667 * probably better of simply copying it into our batch.
669 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_EMIT
;
670 } else if (cmd_buffer
->opt_flags
&
671 VK_CMD_BUFFER_OPTIMIZE_NO_SIMULTANEOUS_USE_BIT
) {
672 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_CHAIN
;
674 /* When we chain, we need to add an MI_BATCH_BUFFER_START command
675 * with its relocation. In order to handle this we'll increment here
676 * so we can unconditionally decrement right before adding the
677 * MI_BATCH_BUFFER_START command.
679 anv_cmd_buffer_current_batch_bo(cmd_buffer
)->relocs
.num_relocs
++;
680 cmd_buffer
->batch
.next
+= GEN8_MI_BATCH_BUFFER_START_length
* 4;
682 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN
;
687 static inline VkResult
688 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer
*cmd_buffer
,
689 struct list_head
*list
)
691 list_for_each_entry(struct anv_batch_bo
, bbo
, list
, link
) {
692 struct anv_batch_bo
**bbo_ptr
= anv_vector_add(&cmd_buffer
->seen_bbos
);
694 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
703 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer
*primary
,
704 struct anv_cmd_buffer
*secondary
)
706 switch (secondary
->exec_mode
) {
707 case ANV_CMD_BUFFER_EXEC_MODE_EMIT
:
708 anv_batch_emit_batch(&primary
->batch
, &secondary
->batch
);
710 case ANV_CMD_BUFFER_EXEC_MODE_CHAIN
: {
711 struct anv_batch_bo
*first_bbo
=
712 list_first_entry(&secondary
->batch_bos
, struct anv_batch_bo
, link
);
713 struct anv_batch_bo
*last_bbo
=
714 list_last_entry(&secondary
->batch_bos
, struct anv_batch_bo
, link
);
716 emit_batch_buffer_start(&primary
->batch
, &first_bbo
->bo
, 0);
718 struct anv_batch_bo
*this_bbo
= anv_cmd_buffer_current_batch_bo(primary
);
719 assert(primary
->batch
.start
== this_bbo
->bo
.map
);
720 uint32_t offset
= primary
->batch
.next
- primary
->batch
.start
;
722 /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
723 * can emit a new command and relocation for the current splice. In
724 * order to handle the initial-use case, we incremented next and
725 * num_relocs in end_batch_buffer() so we can alyways just subtract
728 last_bbo
->relocs
.num_relocs
--;
729 secondary
->batch
.next
-= GEN8_MI_BATCH_BUFFER_START_length
* 4;
730 emit_batch_buffer_start(&secondary
->batch
, &this_bbo
->bo
, offset
);
731 anv_cmd_buffer_add_seen_bbos(primary
, &secondary
->batch_bos
);
734 case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN
: {
735 struct list_head copy_list
;
736 VkResult result
= anv_batch_bo_list_clone(&secondary
->batch_bos
,
739 if (result
!= VK_SUCCESS
)
742 anv_cmd_buffer_add_seen_bbos(primary
, ©_list
);
744 struct anv_batch_bo
*first_bbo
=
745 list_first_entry(©_list
, struct anv_batch_bo
, link
);
746 struct anv_batch_bo
*last_bbo
=
747 list_last_entry(©_list
, struct anv_batch_bo
, link
);
749 cmd_buffer_chain_to_batch_bo(primary
, first_bbo
);
751 list_splicetail(©_list
, &primary
->batch_bos
);
753 anv_batch_bo_continue(last_bbo
, &primary
->batch
,
754 GEN8_MI_BATCH_BUFFER_START_length
* 4);
756 anv_cmd_buffer_emit_state_base_address(primary
);
760 assert(!"Invalid execution mode");
763 anv_reloc_list_append(&primary
->surface_relocs
, primary
->device
,
764 &secondary
->surface_relocs
, 0);
768 anv_cmd_buffer_add_bo(struct anv_cmd_buffer
*cmd_buffer
,
770 struct anv_reloc_list
*relocs
)
772 struct drm_i915_gem_exec_object2
*obj
= NULL
;
774 if (bo
->index
< cmd_buffer
->execbuf2
.bo_count
&&
775 cmd_buffer
->execbuf2
.bos
[bo
->index
] == bo
)
776 obj
= &cmd_buffer
->execbuf2
.objects
[bo
->index
];
779 /* We've never seen this one before. Add it to the list and assign
780 * an id that we can use later.
782 if (cmd_buffer
->execbuf2
.bo_count
>= cmd_buffer
->execbuf2
.array_length
) {
783 uint32_t new_len
= cmd_buffer
->execbuf2
.objects
?
784 cmd_buffer
->execbuf2
.array_length
* 2 : 64;
786 struct drm_i915_gem_exec_object2
*new_objects
=
787 anv_device_alloc(cmd_buffer
->device
, new_len
* sizeof(*new_objects
),
788 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
789 if (new_objects
== NULL
)
790 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
792 struct anv_bo
**new_bos
=
793 anv_device_alloc(cmd_buffer
->device
, new_len
* sizeof(*new_bos
),
794 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL
);
795 if (new_objects
== NULL
) {
796 anv_device_free(cmd_buffer
->device
, new_objects
);
797 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
800 if (cmd_buffer
->execbuf2
.objects
) {
801 memcpy(new_objects
, cmd_buffer
->execbuf2
.objects
,
802 cmd_buffer
->execbuf2
.bo_count
* sizeof(*new_objects
));
803 memcpy(new_bos
, cmd_buffer
->execbuf2
.bos
,
804 cmd_buffer
->execbuf2
.bo_count
* sizeof(*new_bos
));
807 cmd_buffer
->execbuf2
.objects
= new_objects
;
808 cmd_buffer
->execbuf2
.bos
= new_bos
;
809 cmd_buffer
->execbuf2
.array_length
= new_len
;
812 assert(cmd_buffer
->execbuf2
.bo_count
< cmd_buffer
->execbuf2
.array_length
);
814 bo
->index
= cmd_buffer
->execbuf2
.bo_count
++;
815 obj
= &cmd_buffer
->execbuf2
.objects
[bo
->index
];
816 cmd_buffer
->execbuf2
.bos
[bo
->index
] = bo
;
818 obj
->handle
= bo
->gem_handle
;
819 obj
->relocation_count
= 0;
822 obj
->offset
= bo
->offset
;
828 if (relocs
!= NULL
&& obj
->relocation_count
== 0) {
829 /* This is the first time we've ever seen a list of relocations for
830 * this BO. Go ahead and set the relocations and then walk the list
831 * of relocations and add them all.
833 obj
->relocation_count
= relocs
->num_relocs
;
834 obj
->relocs_ptr
= (uintptr_t) relocs
->relocs
;
836 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
837 /* A quick sanity check on relocations */
838 assert(relocs
->relocs
[i
].offset
< bo
->size
);
839 anv_cmd_buffer_add_bo(cmd_buffer
, relocs
->reloc_bos
[i
], NULL
);
847 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer
*cmd_buffer
,
848 struct anv_reloc_list
*list
)
852 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
853 * struct drm_i915_gem_exec_object2 against the bos current offset and if
854 * all bos haven't moved it will skip relocation processing alltogether.
855 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
856 * value of offset so we can set it either way. For that to work we need
857 * to make sure all relocs use the same presumed offset.
860 for (size_t i
= 0; i
< list
->num_relocs
; i
++) {
861 bo
= list
->reloc_bos
[i
];
862 if (bo
->offset
!= list
->relocs
[i
].presumed_offset
)
863 cmd_buffer
->execbuf2
.need_reloc
= true;
865 list
->relocs
[i
].target_handle
= bo
->index
;
870 adjust_relocations_from_block_pool(struct anv_block_pool
*pool
,
871 struct anv_reloc_list
*relocs
)
873 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
874 /* In general, we don't know how stale the relocated value is. It
875 * may have been used last time or it may not. Since we don't want
876 * to stomp it while the GPU may be accessing it, we haven't updated
877 * it anywhere else in the code. Instead, we just set the presumed
878 * offset to what it is now based on the delta and the data in the
879 * block pool. Then the kernel will update it for us if needed.
881 assert(relocs
->relocs
[i
].offset
< pool
->state
.end
);
882 uint32_t *reloc_data
= pool
->map
+ relocs
->relocs
[i
].offset
;
883 relocs
->relocs
[i
].presumed_offset
= *reloc_data
- relocs
->relocs
[i
].delta
;
885 /* All of the relocations from this block pool to other BO's should
886 * have been emitted relative to the surface block pool center. We
887 * need to add the center offset to make them relative to the
888 * beginning of the actual GEM bo.
890 relocs
->relocs
[i
].offset
+= pool
->center_bo_offset
;
895 adjust_relocations_to_block_pool(struct anv_block_pool
*pool
,
896 struct anv_bo
*from_bo
,
897 struct anv_reloc_list
*relocs
,
898 uint32_t *last_pool_center_bo_offset
)
900 assert(*last_pool_center_bo_offset
<= pool
->center_bo_offset
);
901 uint32_t delta
= pool
->center_bo_offset
- *last_pool_center_bo_offset
;
903 /* When we initially emit relocations into a block pool, we don't
904 * actually know what the final center_bo_offset will be so we just emit
905 * it as if center_bo_offset == 0. Now that we know what the center
906 * offset is, we need to walk the list of relocations and adjust any
907 * relocations that point to the pool bo with the correct offset.
909 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
910 if (relocs
->reloc_bos
[i
] == &pool
->bo
) {
911 /* Adjust the delta value in the relocation to correctly
912 * correspond to the new delta. Initially, this value may have
913 * been negative (if treated as unsigned), but we trust in
914 * uint32_t roll-over to fix that for us at this point.
916 relocs
->relocs
[i
].delta
+= delta
;
918 /* Since the delta has changed, we need to update the actual
919 * relocated value with the new presumed value. This function
920 * should only be called on batch buffers, so we know it isn't in
921 * use by the GPU at the moment.
923 assert(relocs
->relocs
[i
].offset
< from_bo
->size
);
924 uint32_t *reloc_data
= from_bo
->map
+ relocs
->relocs
[i
].offset
;
925 *reloc_data
= relocs
->relocs
[i
].presumed_offset
+
926 relocs
->relocs
[i
].delta
;
930 *last_pool_center_bo_offset
= pool
->center_bo_offset
;
934 anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer
*cmd_buffer
)
936 struct anv_batch
*batch
= &cmd_buffer
->batch
;
937 struct anv_block_pool
*ss_pool
=
938 &cmd_buffer
->device
->surface_state_block_pool
;
940 cmd_buffer
->execbuf2
.bo_count
= 0;
941 cmd_buffer
->execbuf2
.need_reloc
= false;
943 adjust_relocations_from_block_pool(ss_pool
, &cmd_buffer
->surface_relocs
);
944 anv_cmd_buffer_add_bo(cmd_buffer
, &ss_pool
->bo
, &cmd_buffer
->surface_relocs
);
946 /* First, we walk over all of the bos we've seen and add them and their
947 * relocations to the validate list.
949 struct anv_batch_bo
**bbo
;
950 anv_vector_foreach(bbo
, &cmd_buffer
->seen_bbos
) {
951 adjust_relocations_to_block_pool(ss_pool
, &(*bbo
)->bo
, &(*bbo
)->relocs
,
952 &(*bbo
)->last_ss_pool_bo_offset
);
954 anv_cmd_buffer_add_bo(cmd_buffer
, &(*bbo
)->bo
, &(*bbo
)->relocs
);
957 struct anv_batch_bo
*first_batch_bo
=
958 list_first_entry(&cmd_buffer
->batch_bos
, struct anv_batch_bo
, link
);
960 /* The kernel requires that the last entry in the validation list be the
961 * batch buffer to execute. We can simply swap the element
962 * corresponding to the first batch_bo in the chain with the last
963 * element in the list.
965 if (first_batch_bo
->bo
.index
!= cmd_buffer
->execbuf2
.bo_count
- 1) {
966 uint32_t idx
= first_batch_bo
->bo
.index
;
967 uint32_t last_idx
= cmd_buffer
->execbuf2
.bo_count
- 1;
969 struct drm_i915_gem_exec_object2 tmp_obj
=
970 cmd_buffer
->execbuf2
.objects
[idx
];
971 assert(cmd_buffer
->execbuf2
.bos
[idx
] == &first_batch_bo
->bo
);
973 cmd_buffer
->execbuf2
.objects
[idx
] = cmd_buffer
->execbuf2
.objects
[last_idx
];
974 cmd_buffer
->execbuf2
.bos
[idx
] = cmd_buffer
->execbuf2
.bos
[last_idx
];
975 cmd_buffer
->execbuf2
.bos
[idx
]->index
= idx
;
977 cmd_buffer
->execbuf2
.objects
[last_idx
] = tmp_obj
;
978 cmd_buffer
->execbuf2
.bos
[last_idx
] = &first_batch_bo
->bo
;
979 first_batch_bo
->bo
.index
= last_idx
;
982 /* Now we go through and fixup all of the relocation lists to point to
983 * the correct indices in the object array. We have to do this after we
984 * reorder the list above as some of the indices may have changed.
986 anv_vector_foreach(bbo
, &cmd_buffer
->seen_bbos
)
987 anv_cmd_buffer_process_relocs(cmd_buffer
, &(*bbo
)->relocs
);
989 anv_cmd_buffer_process_relocs(cmd_buffer
, &cmd_buffer
->surface_relocs
);
991 cmd_buffer
->execbuf2
.execbuf
= (struct drm_i915_gem_execbuffer2
) {
992 .buffers_ptr
= (uintptr_t) cmd_buffer
->execbuf2
.objects
,
993 .buffer_count
= cmd_buffer
->execbuf2
.bo_count
,
994 .batch_start_offset
= 0,
995 .batch_len
= batch
->next
- batch
->start
,
1000 .flags
= I915_EXEC_HANDLE_LUT
| I915_EXEC_RENDER
|
1001 I915_EXEC_CONSTANTS_REL_GENERAL
,
1002 .rsvd1
= cmd_buffer
->device
->context_id
,
1006 if (!cmd_buffer
->execbuf2
.need_reloc
)
1007 cmd_buffer
->execbuf2
.execbuf
.flags
|= I915_EXEC_NO_RELOC
;