2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen7_pack.h"
33 #include "gen8_pack.h"
35 /** \file anv_batch_chain.c
37 * This file contains functions related to anv_cmd_buffer as a data
38 * structure. This involves everything required to create and destroy
39 * the actual batch buffers as well as link them together and handle
40 * relocations and surface state. It specifically does *not* contain any
41 * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
44 /*-----------------------------------------------------------------------*
45 * Functions related to anv_reloc_list
46 *-----------------------------------------------------------------------*/
49 anv_reloc_list_init_clone(struct anv_reloc_list
*list
,
50 const VkAllocationCallbacks
*alloc
,
51 const struct anv_reloc_list
*other_list
)
54 list
->num_relocs
= other_list
->num_relocs
;
55 list
->array_length
= other_list
->array_length
;
58 list
->array_length
= 256;
62 anv_alloc(alloc
, list
->array_length
* sizeof(*list
->relocs
), 8,
63 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
65 if (list
->relocs
== NULL
)
66 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
69 anv_alloc(alloc
, list
->array_length
* sizeof(*list
->reloc_bos
), 8,
70 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
72 if (list
->reloc_bos
== NULL
) {
73 anv_free(alloc
, list
->relocs
);
74 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
78 memcpy(list
->relocs
, other_list
->relocs
,
79 list
->array_length
* sizeof(*list
->relocs
));
80 memcpy(list
->reloc_bos
, other_list
->reloc_bos
,
81 list
->array_length
* sizeof(*list
->reloc_bos
));
88 anv_reloc_list_init(struct anv_reloc_list
*list
,
89 const VkAllocationCallbacks
*alloc
)
91 return anv_reloc_list_init_clone(list
, alloc
, NULL
);
95 anv_reloc_list_finish(struct anv_reloc_list
*list
,
96 const VkAllocationCallbacks
*alloc
)
98 anv_free(alloc
, list
->relocs
);
99 anv_free(alloc
, list
->reloc_bos
);
103 anv_reloc_list_grow(struct anv_reloc_list
*list
,
104 const VkAllocationCallbacks
*alloc
,
105 size_t num_additional_relocs
)
107 if (list
->num_relocs
+ num_additional_relocs
<= list
->array_length
)
110 size_t new_length
= list
->array_length
* 2;
111 while (new_length
< list
->num_relocs
+ num_additional_relocs
)
114 struct drm_i915_gem_relocation_entry
*new_relocs
=
115 anv_alloc(alloc
, new_length
* sizeof(*list
->relocs
), 8,
116 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
117 if (new_relocs
== NULL
)
118 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
120 struct anv_bo
**new_reloc_bos
=
121 anv_alloc(alloc
, new_length
* sizeof(*list
->reloc_bos
), 8,
122 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
123 if (new_relocs
== NULL
) {
124 anv_free(alloc
, new_relocs
);
125 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
128 memcpy(new_relocs
, list
->relocs
, list
->num_relocs
* sizeof(*list
->relocs
));
129 memcpy(new_reloc_bos
, list
->reloc_bos
,
130 list
->num_relocs
* sizeof(*list
->reloc_bos
));
132 anv_free(alloc
, list
->relocs
);
133 anv_free(alloc
, list
->reloc_bos
);
135 list
->array_length
= new_length
;
136 list
->relocs
= new_relocs
;
137 list
->reloc_bos
= new_reloc_bos
;
143 anv_reloc_list_add(struct anv_reloc_list
*list
,
144 const VkAllocationCallbacks
*alloc
,
145 uint32_t offset
, struct anv_bo
*target_bo
, uint32_t delta
)
147 struct drm_i915_gem_relocation_entry
*entry
;
150 anv_reloc_list_grow(list
, alloc
, 1);
151 /* TODO: Handle failure */
153 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
154 index
= list
->num_relocs
++;
155 list
->reloc_bos
[index
] = target_bo
;
156 entry
= &list
->relocs
[index
];
157 entry
->target_handle
= target_bo
->gem_handle
;
158 entry
->delta
= delta
;
159 entry
->offset
= offset
;
160 entry
->presumed_offset
= target_bo
->offset
;
161 entry
->read_domains
= 0;
162 entry
->write_domain
= 0;
163 VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry
, sizeof(*entry
)));
165 return target_bo
->offset
+ delta
;
169 anv_reloc_list_append(struct anv_reloc_list
*list
,
170 const VkAllocationCallbacks
*alloc
,
171 struct anv_reloc_list
*other
, uint32_t offset
)
173 anv_reloc_list_grow(list
, alloc
, other
->num_relocs
);
174 /* TODO: Handle failure */
176 memcpy(&list
->relocs
[list
->num_relocs
], &other
->relocs
[0],
177 other
->num_relocs
* sizeof(other
->relocs
[0]));
178 memcpy(&list
->reloc_bos
[list
->num_relocs
], &other
->reloc_bos
[0],
179 other
->num_relocs
* sizeof(other
->reloc_bos
[0]));
181 for (uint32_t i
= 0; i
< other
->num_relocs
; i
++)
182 list
->relocs
[i
+ list
->num_relocs
].offset
+= offset
;
184 list
->num_relocs
+= other
->num_relocs
;
187 /*-----------------------------------------------------------------------*
188 * Functions related to anv_batch
189 *-----------------------------------------------------------------------*/
192 anv_batch_emit_dwords(struct anv_batch
*batch
, int num_dwords
)
194 if (batch
->next
+ num_dwords
* 4 > batch
->end
)
195 batch
->extend_cb(batch
, batch
->user_data
);
197 void *p
= batch
->next
;
199 batch
->next
+= num_dwords
* 4;
200 assert(batch
->next
<= batch
->end
);
206 anv_batch_emit_reloc(struct anv_batch
*batch
,
207 void *location
, struct anv_bo
*bo
, uint32_t delta
)
209 return anv_reloc_list_add(batch
->relocs
, batch
->alloc
,
210 location
- batch
->start
, bo
, delta
);
214 anv_batch_emit_batch(struct anv_batch
*batch
, struct anv_batch
*other
)
216 uint32_t size
, offset
;
218 size
= other
->next
- other
->start
;
219 assert(size
% 4 == 0);
221 if (batch
->next
+ size
> batch
->end
)
222 batch
->extend_cb(batch
, batch
->user_data
);
224 assert(batch
->next
+ size
<= batch
->end
);
226 VG(VALGRIND_CHECK_MEM_IS_DEFINED(other
->start
, size
));
227 memcpy(batch
->next
, other
->start
, size
);
229 offset
= batch
->next
- batch
->start
;
230 anv_reloc_list_append(batch
->relocs
, batch
->alloc
,
231 other
->relocs
, offset
);
236 /*-----------------------------------------------------------------------*
237 * Functions related to anv_batch_bo
238 *-----------------------------------------------------------------------*/
241 anv_batch_bo_create(struct anv_cmd_buffer
*cmd_buffer
,
242 struct anv_batch_bo
**bbo_out
)
246 struct anv_batch_bo
*bbo
= anv_alloc(&cmd_buffer
->pool
->alloc
, sizeof(*bbo
),
247 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
249 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
251 result
= anv_bo_pool_alloc(&cmd_buffer
->device
->batch_bo_pool
, &bbo
->bo
);
252 if (result
!= VK_SUCCESS
)
255 result
= anv_reloc_list_init(&bbo
->relocs
, &cmd_buffer
->pool
->alloc
);
256 if (result
!= VK_SUCCESS
)
264 anv_bo_pool_free(&cmd_buffer
->device
->batch_bo_pool
, &bbo
->bo
);
266 anv_free(&cmd_buffer
->pool
->alloc
, bbo
);
272 anv_batch_bo_clone(struct anv_cmd_buffer
*cmd_buffer
,
273 const struct anv_batch_bo
*other_bbo
,
274 struct anv_batch_bo
**bbo_out
)
278 struct anv_batch_bo
*bbo
= anv_alloc(&cmd_buffer
->pool
->alloc
, sizeof(*bbo
),
279 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
281 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
283 result
= anv_bo_pool_alloc(&cmd_buffer
->device
->batch_bo_pool
, &bbo
->bo
);
284 if (result
!= VK_SUCCESS
)
287 result
= anv_reloc_list_init_clone(&bbo
->relocs
, &cmd_buffer
->pool
->alloc
,
289 if (result
!= VK_SUCCESS
)
292 bbo
->length
= other_bbo
->length
;
293 memcpy(bbo
->bo
.map
, other_bbo
->bo
.map
, other_bbo
->length
);
295 bbo
->last_ss_pool_bo_offset
= other_bbo
->last_ss_pool_bo_offset
;
302 anv_bo_pool_free(&cmd_buffer
->device
->batch_bo_pool
, &bbo
->bo
);
304 anv_free(&cmd_buffer
->pool
->alloc
, bbo
);
310 anv_batch_bo_start(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
,
311 size_t batch_padding
)
313 batch
->next
= batch
->start
= bbo
->bo
.map
;
314 batch
->end
= bbo
->bo
.map
+ bbo
->bo
.size
- batch_padding
;
315 batch
->relocs
= &bbo
->relocs
;
316 bbo
->last_ss_pool_bo_offset
= 0;
317 bbo
->relocs
.num_relocs
= 0;
321 anv_batch_bo_continue(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
,
322 size_t batch_padding
)
324 batch
->start
= bbo
->bo
.map
;
325 batch
->next
= bbo
->bo
.map
+ bbo
->length
;
326 batch
->end
= bbo
->bo
.map
+ bbo
->bo
.size
- batch_padding
;
327 batch
->relocs
= &bbo
->relocs
;
331 anv_batch_bo_finish(struct anv_batch_bo
*bbo
, struct anv_batch
*batch
)
333 assert(batch
->start
== bbo
->bo
.map
);
334 bbo
->length
= batch
->next
- batch
->start
;
335 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch
->start
, bbo
->length
));
339 anv_batch_bo_destroy(struct anv_batch_bo
*bbo
,
340 struct anv_cmd_buffer
*cmd_buffer
)
342 anv_reloc_list_finish(&bbo
->relocs
, &cmd_buffer
->pool
->alloc
);
343 anv_bo_pool_free(&cmd_buffer
->device
->batch_bo_pool
, &bbo
->bo
);
344 anv_free(&cmd_buffer
->pool
->alloc
, bbo
);
348 anv_batch_bo_list_clone(const struct list_head
*list
,
349 struct anv_cmd_buffer
*cmd_buffer
,
350 struct list_head
*new_list
)
352 VkResult result
= VK_SUCCESS
;
354 list_inithead(new_list
);
356 struct anv_batch_bo
*prev_bbo
= NULL
;
357 list_for_each_entry(struct anv_batch_bo
, bbo
, list
, link
) {
358 struct anv_batch_bo
*new_bbo
;
359 result
= anv_batch_bo_clone(cmd_buffer
, bbo
, &new_bbo
);
360 if (result
!= VK_SUCCESS
)
362 list_addtail(&new_bbo
->link
, new_list
);
365 /* As we clone this list of batch_bo's, they chain one to the
366 * other using MI_BATCH_BUFFER_START commands. We need to fix up
367 * those relocations as we go. Fortunately, this is pretty easy
368 * as it will always be the last relocation in the list.
370 uint32_t last_idx
= prev_bbo
->relocs
.num_relocs
- 1;
371 assert(prev_bbo
->relocs
.reloc_bos
[last_idx
] == &bbo
->bo
);
372 prev_bbo
->relocs
.reloc_bos
[last_idx
] = &new_bbo
->bo
;
378 if (result
!= VK_SUCCESS
) {
379 list_for_each_entry_safe(struct anv_batch_bo
, bbo
, new_list
, link
)
380 anv_batch_bo_destroy(bbo
, cmd_buffer
);
386 /*-----------------------------------------------------------------------*
387 * Functions related to anv_batch_bo
388 *-----------------------------------------------------------------------*/
390 static inline struct anv_batch_bo
*
391 anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer
*cmd_buffer
)
393 return LIST_ENTRY(struct anv_batch_bo
, cmd_buffer
->batch_bos
.prev
, link
);
397 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer
*cmd_buffer
)
399 return (struct anv_address
) {
400 .bo
= &cmd_buffer
->device
->surface_state_block_pool
.bo
,
401 .offset
= *(int32_t *)anv_vector_head(&cmd_buffer
->bt_blocks
),
406 emit_batch_buffer_start(struct anv_cmd_buffer
*cmd_buffer
,
407 struct anv_bo
*bo
, uint32_t offset
)
409 /* In gen8+ the address field grew to two dwords to accomodate 48 bit
410 * offsets. The high 16 bits are in the last dword, so we can use the gen8
411 * version in either case, as long as we set the instruction length in the
412 * header accordingly. This means that we always emit three dwords here
413 * and all the padding and adjustment we do in this file works for all
417 const uint32_t gen7_length
=
418 GEN7_MI_BATCH_BUFFER_START_length
- GEN7_MI_BATCH_BUFFER_START_length_bias
;
419 const uint32_t gen8_length
=
420 GEN8_MI_BATCH_BUFFER_START_length
- GEN8_MI_BATCH_BUFFER_START_length_bias
;
422 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_BATCH_BUFFER_START
,
423 .DwordLength
= cmd_buffer
->device
->info
.gen
< 8 ?
424 gen7_length
: gen8_length
,
425 ._2ndLevelBatchBuffer
= _1stlevelbatch
,
426 .AddressSpaceIndicator
= ASI_PPGTT
,
427 .BatchBufferStartAddress
= { bo
, offset
});
431 cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer
*cmd_buffer
,
432 struct anv_batch_bo
*bbo
)
434 struct anv_batch
*batch
= &cmd_buffer
->batch
;
435 struct anv_batch_bo
*current_bbo
=
436 anv_cmd_buffer_current_batch_bo(cmd_buffer
);
438 /* We set the end of the batch a little short so we would be sure we
439 * have room for the chaining command. Since we're about to emit the
440 * chaining command, let's set it back where it should go.
442 batch
->end
+= GEN8_MI_BATCH_BUFFER_START_length
* 4;
443 assert(batch
->end
== current_bbo
->bo
.map
+ current_bbo
->bo
.size
);
445 emit_batch_buffer_start(cmd_buffer
, &bbo
->bo
, 0);
447 anv_batch_bo_finish(current_bbo
, batch
);
451 anv_cmd_buffer_chain_batch(struct anv_batch
*batch
, void *_data
)
453 struct anv_cmd_buffer
*cmd_buffer
= _data
;
454 struct anv_batch_bo
*new_bbo
;
456 VkResult result
= anv_batch_bo_create(cmd_buffer
, &new_bbo
);
457 if (result
!= VK_SUCCESS
)
460 struct anv_batch_bo
**seen_bbo
= anv_vector_add(&cmd_buffer
->seen_bbos
);
461 if (seen_bbo
== NULL
) {
462 anv_batch_bo_destroy(new_bbo
, cmd_buffer
);
463 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
467 cmd_buffer_chain_to_batch_bo(cmd_buffer
, new_bbo
);
469 list_addtail(&new_bbo
->link
, &cmd_buffer
->batch_bos
);
471 anv_batch_bo_start(new_bbo
, batch
, GEN8_MI_BATCH_BUFFER_START_length
* 4);
477 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
478 uint32_t entries
, uint32_t *state_offset
)
480 struct anv_block_pool
*block_pool
=
481 &cmd_buffer
->device
->surface_state_block_pool
;
482 int32_t *bt_block
= anv_vector_head(&cmd_buffer
->bt_blocks
);
483 struct anv_state state
;
485 state
.alloc_size
= align_u32(entries
* 4, 32);
487 if (cmd_buffer
->bt_next
+ state
.alloc_size
> block_pool
->block_size
)
488 return (struct anv_state
) { 0 };
490 state
.offset
= cmd_buffer
->bt_next
;
491 state
.map
= block_pool
->map
+ *bt_block
+ state
.offset
;
493 cmd_buffer
->bt_next
+= state
.alloc_size
;
495 assert(*bt_block
< 0);
496 *state_offset
= -(*bt_block
);
502 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer
*cmd_buffer
)
504 return anv_state_stream_alloc(&cmd_buffer
->surface_state_stream
, 64, 64);
508 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer
*cmd_buffer
,
509 uint32_t size
, uint32_t alignment
)
511 return anv_state_stream_alloc(&cmd_buffer
->dynamic_state_stream
,
516 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer
*cmd_buffer
)
518 struct anv_block_pool
*block_pool
=
519 &cmd_buffer
->device
->surface_state_block_pool
;
521 int32_t *offset
= anv_vector_add(&cmd_buffer
->bt_blocks
);
523 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
525 *offset
= anv_block_pool_alloc_back(block_pool
);
526 cmd_buffer
->bt_next
= 0;
532 anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
534 struct anv_batch_bo
*batch_bo
;
537 list_inithead(&cmd_buffer
->batch_bos
);
539 result
= anv_batch_bo_create(cmd_buffer
, &batch_bo
);
540 if (result
!= VK_SUCCESS
)
543 list_addtail(&batch_bo
->link
, &cmd_buffer
->batch_bos
);
545 cmd_buffer
->batch
.alloc
= &cmd_buffer
->pool
->alloc
;
546 cmd_buffer
->batch
.extend_cb
= anv_cmd_buffer_chain_batch
;
547 cmd_buffer
->batch
.user_data
= cmd_buffer
;
549 anv_batch_bo_start(batch_bo
, &cmd_buffer
->batch
,
550 GEN8_MI_BATCH_BUFFER_START_length
* 4);
552 int success
= anv_vector_init(&cmd_buffer
->seen_bbos
,
553 sizeof(struct anv_bo
*),
554 8 * sizeof(struct anv_bo
*));
558 *(struct anv_batch_bo
**)anv_vector_add(&cmd_buffer
->seen_bbos
) = batch_bo
;
560 success
= anv_vector_init(&cmd_buffer
->bt_blocks
, sizeof(int32_t),
561 8 * sizeof(int32_t));
565 result
= anv_reloc_list_init(&cmd_buffer
->surface_relocs
,
566 &cmd_buffer
->pool
->alloc
);
567 if (result
!= VK_SUCCESS
)
570 anv_cmd_buffer_new_binding_table_block(cmd_buffer
);
572 cmd_buffer
->execbuf2
.objects
= NULL
;
573 cmd_buffer
->execbuf2
.bos
= NULL
;
574 cmd_buffer
->execbuf2
.array_length
= 0;
579 anv_vector_finish(&cmd_buffer
->bt_blocks
);
581 anv_vector_finish(&cmd_buffer
->seen_bbos
);
583 anv_batch_bo_destroy(batch_bo
, cmd_buffer
);
589 anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
592 anv_vector_foreach(bt_block
, &cmd_buffer
->bt_blocks
) {
593 anv_block_pool_free(&cmd_buffer
->device
->surface_state_block_pool
,
596 anv_vector_finish(&cmd_buffer
->bt_blocks
);
598 anv_reloc_list_finish(&cmd_buffer
->surface_relocs
, &cmd_buffer
->pool
->alloc
);
600 anv_vector_finish(&cmd_buffer
->seen_bbos
);
602 /* Destroy all of the batch buffers */
603 list_for_each_entry_safe(struct anv_batch_bo
, bbo
,
604 &cmd_buffer
->batch_bos
, link
) {
605 anv_batch_bo_destroy(bbo
, cmd_buffer
);
608 anv_free(&cmd_buffer
->pool
->alloc
, cmd_buffer
->execbuf2
.objects
);
609 anv_free(&cmd_buffer
->pool
->alloc
, cmd_buffer
->execbuf2
.bos
);
613 anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer
*cmd_buffer
)
615 /* Delete all but the first batch bo */
616 assert(!list_empty(&cmd_buffer
->batch_bos
));
617 while (cmd_buffer
->batch_bos
.next
!= cmd_buffer
->batch_bos
.prev
) {
618 struct anv_batch_bo
*bbo
= anv_cmd_buffer_current_batch_bo(cmd_buffer
);
619 list_del(&bbo
->link
);
620 anv_batch_bo_destroy(bbo
, cmd_buffer
);
622 assert(!list_empty(&cmd_buffer
->batch_bos
));
624 anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer
),
626 GEN8_MI_BATCH_BUFFER_START_length
* 4);
628 while (anv_vector_length(&cmd_buffer
->bt_blocks
) > 1) {
629 int32_t *bt_block
= anv_vector_remove(&cmd_buffer
->bt_blocks
);
630 anv_block_pool_free(&cmd_buffer
->device
->surface_state_block_pool
,
633 assert(anv_vector_length(&cmd_buffer
->bt_blocks
) == 1);
634 cmd_buffer
->bt_next
= 0;
636 cmd_buffer
->surface_relocs
.num_relocs
= 0;
638 /* Reset the list of seen buffers */
639 cmd_buffer
->seen_bbos
.head
= 0;
640 cmd_buffer
->seen_bbos
.tail
= 0;
642 *(struct anv_batch_bo
**)anv_vector_add(&cmd_buffer
->seen_bbos
) =
643 anv_cmd_buffer_current_batch_bo(cmd_buffer
);
647 anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer
*cmd_buffer
)
649 struct anv_batch_bo
*batch_bo
= anv_cmd_buffer_current_batch_bo(cmd_buffer
);
651 if (cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_PRIMARY
) {
652 /* When we start a batch buffer, we subtract a certain amount of
653 * padding from the end to ensure that we always have room to emit a
654 * BATCH_BUFFER_START to chain to the next BO. We need to remove
655 * that padding before we end the batch; otherwise, we may end up
656 * with our BATCH_BUFFER_END in another BO.
658 cmd_buffer
->batch
.end
+= GEN8_MI_BATCH_BUFFER_START_length
* 4;
659 assert(cmd_buffer
->batch
.end
== batch_bo
->bo
.map
+ batch_bo
->bo
.size
);
661 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MI_BATCH_BUFFER_END
);
663 /* Round batch up to an even number of dwords. */
664 if ((cmd_buffer
->batch
.next
- cmd_buffer
->batch
.start
) & 4)
665 anv_batch_emit(&cmd_buffer
->batch
, GEN7_MI_NOOP
);
667 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_PRIMARY
;
670 anv_batch_bo_finish(batch_bo
, &cmd_buffer
->batch
);
672 if (cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_SECONDARY
) {
673 /* If this is a secondary command buffer, we need to determine the
674 * mode in which it will be executed with vkExecuteCommands. We
675 * determine this statically here so that this stays in sync with the
676 * actual ExecuteCommands implementation.
678 if ((cmd_buffer
->batch_bos
.next
== cmd_buffer
->batch_bos
.prev
) &&
679 (batch_bo
->length
< ANV_CMD_BUFFER_BATCH_SIZE
/ 2)) {
680 /* If the secondary has exactly one batch buffer in its list *and*
681 * that batch buffer is less than half of the maximum size, we're
682 * probably better of simply copying it into our batch.
684 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_EMIT
;
685 } else if (!(cmd_buffer
->usage_flags
&
686 VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT
)) {
687 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_CHAIN
;
689 /* When we chain, we need to add an MI_BATCH_BUFFER_START command
690 * with its relocation. In order to handle this we'll increment here
691 * so we can unconditionally decrement right before adding the
692 * MI_BATCH_BUFFER_START command.
694 batch_bo
->relocs
.num_relocs
++;
695 cmd_buffer
->batch
.next
+= GEN8_MI_BATCH_BUFFER_START_length
* 4;
697 cmd_buffer
->exec_mode
= ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN
;
702 static inline VkResult
703 anv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer
*cmd_buffer
,
704 struct list_head
*list
)
706 list_for_each_entry(struct anv_batch_bo
, bbo
, list
, link
) {
707 struct anv_batch_bo
**bbo_ptr
= anv_vector_add(&cmd_buffer
->seen_bbos
);
709 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
718 anv_cmd_buffer_add_secondary(struct anv_cmd_buffer
*primary
,
719 struct anv_cmd_buffer
*secondary
)
721 switch (secondary
->exec_mode
) {
722 case ANV_CMD_BUFFER_EXEC_MODE_EMIT
:
723 anv_batch_emit_batch(&primary
->batch
, &secondary
->batch
);
724 anv_cmd_buffer_emit_state_base_address(primary
);
726 case ANV_CMD_BUFFER_EXEC_MODE_CHAIN
: {
727 struct anv_batch_bo
*first_bbo
=
728 list_first_entry(&secondary
->batch_bos
, struct anv_batch_bo
, link
);
729 struct anv_batch_bo
*last_bbo
=
730 list_last_entry(&secondary
->batch_bos
, struct anv_batch_bo
, link
);
732 emit_batch_buffer_start(primary
, &first_bbo
->bo
, 0);
734 struct anv_batch_bo
*this_bbo
= anv_cmd_buffer_current_batch_bo(primary
);
735 assert(primary
->batch
.start
== this_bbo
->bo
.map
);
736 uint32_t offset
= primary
->batch
.next
- primary
->batch
.start
;
737 const uint32_t inst_size
= GEN8_MI_BATCH_BUFFER_START_length
* 4;
739 /* Roll back the previous MI_BATCH_BUFFER_START and its relocation so we
740 * can emit a new command and relocation for the current splice. In
741 * order to handle the initial-use case, we incremented next and
742 * num_relocs in end_batch_buffer() so we can alyways just subtract
745 last_bbo
->relocs
.num_relocs
--;
746 secondary
->batch
.next
-= inst_size
;
747 emit_batch_buffer_start(secondary
, &this_bbo
->bo
, offset
);
748 anv_cmd_buffer_add_seen_bbos(primary
, &secondary
->batch_bos
);
750 /* After patching up the secondary buffer, we need to clflush the
751 * modified instruction in case we're on a !llc platform. We use a
752 * little loop to handle the case where the instruction crosses a cache
755 if (!primary
->device
->info
.has_llc
) {
756 void *inst
= secondary
->batch
.next
- inst_size
;
757 void *p
= (void *) (((uintptr_t) inst
) & ~CACHELINE_MASK
);
758 __builtin_ia32_sfence();
759 while (p
< secondary
->batch
.next
) {
760 __builtin_ia32_clflush(p
);
765 anv_cmd_buffer_emit_state_base_address(primary
);
768 case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN
: {
769 struct list_head copy_list
;
770 VkResult result
= anv_batch_bo_list_clone(&secondary
->batch_bos
,
773 if (result
!= VK_SUCCESS
)
776 anv_cmd_buffer_add_seen_bbos(primary
, ©_list
);
778 struct anv_batch_bo
*first_bbo
=
779 list_first_entry(©_list
, struct anv_batch_bo
, link
);
780 struct anv_batch_bo
*last_bbo
=
781 list_last_entry(©_list
, struct anv_batch_bo
, link
);
783 cmd_buffer_chain_to_batch_bo(primary
, first_bbo
);
785 list_splicetail(©_list
, &primary
->batch_bos
);
787 anv_batch_bo_continue(last_bbo
, &primary
->batch
,
788 GEN8_MI_BATCH_BUFFER_START_length
* 4);
790 anv_cmd_buffer_emit_state_base_address(primary
);
794 assert(!"Invalid execution mode");
797 anv_reloc_list_append(&primary
->surface_relocs
, &primary
->pool
->alloc
,
798 &secondary
->surface_relocs
, 0);
802 anv_cmd_buffer_add_bo(struct anv_cmd_buffer
*cmd_buffer
,
804 struct anv_reloc_list
*relocs
)
806 struct drm_i915_gem_exec_object2
*obj
= NULL
;
808 if (bo
->index
< cmd_buffer
->execbuf2
.bo_count
&&
809 cmd_buffer
->execbuf2
.bos
[bo
->index
] == bo
)
810 obj
= &cmd_buffer
->execbuf2
.objects
[bo
->index
];
813 /* We've never seen this one before. Add it to the list and assign
814 * an id that we can use later.
816 if (cmd_buffer
->execbuf2
.bo_count
>= cmd_buffer
->execbuf2
.array_length
) {
817 uint32_t new_len
= cmd_buffer
->execbuf2
.objects
?
818 cmd_buffer
->execbuf2
.array_length
* 2 : 64;
820 struct drm_i915_gem_exec_object2
*new_objects
=
821 anv_alloc(&cmd_buffer
->pool
->alloc
, new_len
* sizeof(*new_objects
),
822 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
823 if (new_objects
== NULL
)
824 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
826 struct anv_bo
**new_bos
=
827 anv_alloc(&cmd_buffer
->pool
->alloc
, new_len
* sizeof(*new_bos
),
828 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
829 if (new_objects
== NULL
) {
830 anv_free(&cmd_buffer
->pool
->alloc
, new_objects
);
831 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
834 if (cmd_buffer
->execbuf2
.objects
) {
835 memcpy(new_objects
, cmd_buffer
->execbuf2
.objects
,
836 cmd_buffer
->execbuf2
.bo_count
* sizeof(*new_objects
));
837 memcpy(new_bos
, cmd_buffer
->execbuf2
.bos
,
838 cmd_buffer
->execbuf2
.bo_count
* sizeof(*new_bos
));
841 cmd_buffer
->execbuf2
.objects
= new_objects
;
842 cmd_buffer
->execbuf2
.bos
= new_bos
;
843 cmd_buffer
->execbuf2
.array_length
= new_len
;
846 assert(cmd_buffer
->execbuf2
.bo_count
< cmd_buffer
->execbuf2
.array_length
);
848 bo
->index
= cmd_buffer
->execbuf2
.bo_count
++;
849 obj
= &cmd_buffer
->execbuf2
.objects
[bo
->index
];
850 cmd_buffer
->execbuf2
.bos
[bo
->index
] = bo
;
852 obj
->handle
= bo
->gem_handle
;
853 obj
->relocation_count
= 0;
856 obj
->offset
= bo
->offset
;
862 if (relocs
!= NULL
&& obj
->relocation_count
== 0) {
863 /* This is the first time we've ever seen a list of relocations for
864 * this BO. Go ahead and set the relocations and then walk the list
865 * of relocations and add them all.
867 obj
->relocation_count
= relocs
->num_relocs
;
868 obj
->relocs_ptr
= (uintptr_t) relocs
->relocs
;
870 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
871 /* A quick sanity check on relocations */
872 assert(relocs
->relocs
[i
].offset
< bo
->size
);
873 anv_cmd_buffer_add_bo(cmd_buffer
, relocs
->reloc_bos
[i
], NULL
);
881 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer
*cmd_buffer
,
882 struct anv_reloc_list
*list
)
886 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
887 * struct drm_i915_gem_exec_object2 against the bos current offset and if
888 * all bos haven't moved it will skip relocation processing alltogether.
889 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
890 * value of offset so we can set it either way. For that to work we need
891 * to make sure all relocs use the same presumed offset.
894 for (size_t i
= 0; i
< list
->num_relocs
; i
++) {
895 bo
= list
->reloc_bos
[i
];
896 if (bo
->offset
!= list
->relocs
[i
].presumed_offset
)
897 cmd_buffer
->execbuf2
.need_reloc
= true;
899 list
->relocs
[i
].target_handle
= bo
->index
;
904 read_reloc(const struct anv_device
*device
, const void *p
)
906 if (device
->info
.gen
>= 8)
907 return *(uint64_t *)p
;
909 return *(uint32_t *)p
;
913 write_reloc(const struct anv_device
*device
, void *p
, uint64_t v
)
915 if (device
->info
.gen
>= 8)
922 adjust_relocations_from_block_pool(struct anv_block_pool
*pool
,
923 struct anv_reloc_list
*relocs
)
925 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
926 /* In general, we don't know how stale the relocated value is. It
927 * may have been used last time or it may not. Since we don't want
928 * to stomp it while the GPU may be accessing it, we haven't updated
929 * it anywhere else in the code. Instead, we just set the presumed
930 * offset to what it is now based on the delta and the data in the
931 * block pool. Then the kernel will update it for us if needed.
933 assert(relocs
->relocs
[i
].offset
< pool
->state
.end
);
934 const void *p
= pool
->map
+ relocs
->relocs
[i
].offset
;
936 /* We're reading back the relocated value from potentially incoherent
937 * memory here. However, any change to the value will be from the kernel
938 * writing out relocations, which will keep the CPU cache up to date.
940 relocs
->relocs
[i
].presumed_offset
=
941 read_reloc(pool
->device
, p
) - relocs
->relocs
[i
].delta
;
943 /* All of the relocations from this block pool to other BO's should
944 * have been emitted relative to the surface block pool center. We
945 * need to add the center offset to make them relative to the
946 * beginning of the actual GEM bo.
948 relocs
->relocs
[i
].offset
+= pool
->center_bo_offset
;
953 adjust_relocations_to_block_pool(struct anv_block_pool
*pool
,
954 struct anv_bo
*from_bo
,
955 struct anv_reloc_list
*relocs
,
956 uint32_t *last_pool_center_bo_offset
)
958 assert(*last_pool_center_bo_offset
<= pool
->center_bo_offset
);
959 uint32_t delta
= pool
->center_bo_offset
- *last_pool_center_bo_offset
;
961 /* When we initially emit relocations into a block pool, we don't
962 * actually know what the final center_bo_offset will be so we just emit
963 * it as if center_bo_offset == 0. Now that we know what the center
964 * offset is, we need to walk the list of relocations and adjust any
965 * relocations that point to the pool bo with the correct offset.
967 for (size_t i
= 0; i
< relocs
->num_relocs
; i
++) {
968 if (relocs
->reloc_bos
[i
] == &pool
->bo
) {
969 /* Adjust the delta value in the relocation to correctly
970 * correspond to the new delta. Initially, this value may have
971 * been negative (if treated as unsigned), but we trust in
972 * uint32_t roll-over to fix that for us at this point.
974 relocs
->relocs
[i
].delta
+= delta
;
976 /* Since the delta has changed, we need to update the actual
977 * relocated value with the new presumed value. This function
978 * should only be called on batch buffers, so we know it isn't in
979 * use by the GPU at the moment.
981 assert(relocs
->relocs
[i
].offset
< from_bo
->size
);
982 write_reloc(pool
->device
, from_bo
->map
+ relocs
->relocs
[i
].offset
,
983 relocs
->relocs
[i
].presumed_offset
+
984 relocs
->relocs
[i
].delta
);
988 *last_pool_center_bo_offset
= pool
->center_bo_offset
;
992 anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer
*cmd_buffer
)
994 struct anv_batch
*batch
= &cmd_buffer
->batch
;
995 struct anv_block_pool
*ss_pool
=
996 &cmd_buffer
->device
->surface_state_block_pool
;
998 cmd_buffer
->execbuf2
.bo_count
= 0;
999 cmd_buffer
->execbuf2
.need_reloc
= false;
1001 adjust_relocations_from_block_pool(ss_pool
, &cmd_buffer
->surface_relocs
);
1002 anv_cmd_buffer_add_bo(cmd_buffer
, &ss_pool
->bo
, &cmd_buffer
->surface_relocs
);
1004 /* First, we walk over all of the bos we've seen and add them and their
1005 * relocations to the validate list.
1007 struct anv_batch_bo
**bbo
;
1008 anv_vector_foreach(bbo
, &cmd_buffer
->seen_bbos
) {
1009 adjust_relocations_to_block_pool(ss_pool
, &(*bbo
)->bo
, &(*bbo
)->relocs
,
1010 &(*bbo
)->last_ss_pool_bo_offset
);
1012 anv_cmd_buffer_add_bo(cmd_buffer
, &(*bbo
)->bo
, &(*bbo
)->relocs
);
1015 struct anv_batch_bo
*first_batch_bo
=
1016 list_first_entry(&cmd_buffer
->batch_bos
, struct anv_batch_bo
, link
);
1018 /* The kernel requires that the last entry in the validation list be the
1019 * batch buffer to execute. We can simply swap the element
1020 * corresponding to the first batch_bo in the chain with the last
1021 * element in the list.
1023 if (first_batch_bo
->bo
.index
!= cmd_buffer
->execbuf2
.bo_count
- 1) {
1024 uint32_t idx
= first_batch_bo
->bo
.index
;
1025 uint32_t last_idx
= cmd_buffer
->execbuf2
.bo_count
- 1;
1027 struct drm_i915_gem_exec_object2 tmp_obj
=
1028 cmd_buffer
->execbuf2
.objects
[idx
];
1029 assert(cmd_buffer
->execbuf2
.bos
[idx
] == &first_batch_bo
->bo
);
1031 cmd_buffer
->execbuf2
.objects
[idx
] = cmd_buffer
->execbuf2
.objects
[last_idx
];
1032 cmd_buffer
->execbuf2
.bos
[idx
] = cmd_buffer
->execbuf2
.bos
[last_idx
];
1033 cmd_buffer
->execbuf2
.bos
[idx
]->index
= idx
;
1035 cmd_buffer
->execbuf2
.objects
[last_idx
] = tmp_obj
;
1036 cmd_buffer
->execbuf2
.bos
[last_idx
] = &first_batch_bo
->bo
;
1037 first_batch_bo
->bo
.index
= last_idx
;
1040 /* Now we go through and fixup all of the relocation lists to point to
1041 * the correct indices in the object array. We have to do this after we
1042 * reorder the list above as some of the indices may have changed.
1044 anv_vector_foreach(bbo
, &cmd_buffer
->seen_bbos
)
1045 anv_cmd_buffer_process_relocs(cmd_buffer
, &(*bbo
)->relocs
);
1047 anv_cmd_buffer_process_relocs(cmd_buffer
, &cmd_buffer
->surface_relocs
);
1049 if (!cmd_buffer
->device
->info
.has_llc
) {
1050 __builtin_ia32_sfence();
1051 anv_vector_foreach(bbo
, &cmd_buffer
->seen_bbos
) {
1052 for (uint32_t i
= 0; i
< (*bbo
)->length
; i
+= CACHELINE_SIZE
)
1053 __builtin_ia32_clflush((*bbo
)->bo
.map
+ i
);
1057 cmd_buffer
->execbuf2
.execbuf
= (struct drm_i915_gem_execbuffer2
) {
1058 .buffers_ptr
= (uintptr_t) cmd_buffer
->execbuf2
.objects
,
1059 .buffer_count
= cmd_buffer
->execbuf2
.bo_count
,
1060 .batch_start_offset
= 0,
1061 .batch_len
= batch
->next
- batch
->start
,
1066 .flags
= I915_EXEC_HANDLE_LUT
| I915_EXEC_RENDER
|
1067 I915_EXEC_CONSTANTS_REL_GENERAL
,
1068 .rsvd1
= cmd_buffer
->device
->context_id
,
1072 if (!cmd_buffer
->execbuf2
.need_reloc
)
1073 cmd_buffer
->execbuf2
.execbuf
.flags
|= I915_EXEC_NO_RELOC
;