2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 * Batchbuffer and command submission module.
30 * Every API draw call results in a number of GPU commands, which we
31 * collect into a "batch buffer". Typically, many draw calls are grouped
32 * into a single batch to amortize command submission overhead.
34 * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.
35 * One critical piece of data is the "validation list", which contains a
36 * list of the buffer objects (BOs) which the commands in the GPU need.
37 * The kernel will make sure these are resident and pinned at the correct
38 * virtual memory address before executing our batch. If a BO is not in
39 * the validation list, it effectively does not exist, so take care.
42 #include "iris_batch.h"
43 #include "iris_binder.h"
44 #include "iris_bufmgr.h"
45 #include "iris_context.h"
47 #include "drm-uapi/i915_drm.h"
49 #include "util/hash_table.h"
51 #include "main/macros.h"
56 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
58 #define BATCH_SZ (20 * 1024)
60 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
61 * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may
62 * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16.
64 #define BATCH_RESERVED 16
67 iris_batch_reset(struct iris_batch
*batch
);
70 * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
73 dump_validation_list(struct iris_batch
*batch
)
75 fprintf(stderr
, "Validation list (length %d):\n", batch
->exec_count
);
77 for (int i
= 0; i
< batch
->exec_count
; i
++) {
78 uint64_t flags
= batch
->validation_list
[i
].flags
;
79 assert(batch
->validation_list
[i
].handle
==
80 batch
->exec_bos
[i
]->gem_handle
);
81 fprintf(stderr
, "[%2d]: %2d %-14s %p %-7s @ 0x%016llx (%"PRIu64
"B) - %d refs\n",
83 batch
->validation_list
[i
].handle
,
84 batch
->exec_bos
[i
]->name
,
86 (flags
& EXEC_OBJECT_WRITE
) ? "(write)" : "",
87 batch
->validation_list
[i
].offset
,
88 batch
->exec_bos
[i
]->size
,
89 batch
->exec_bos
[i
]->refcount
);
94 * Return BO information to the batch decoder (for debugging).
96 static struct gen_batch_decode_bo
97 decode_get_bo(void *v_batch
, uint64_t address
)
99 struct iris_batch
*batch
= v_batch
;
101 for (int i
= 0; i
< batch
->exec_count
; i
++) {
102 struct iris_bo
*bo
= batch
->exec_bos
[i
];
103 /* The decoder zeroes out the top 16 bits, so we need to as well */
104 uint64_t bo_address
= bo
->gtt_offset
& (~0ull >> 16);
106 if (address
>= bo_address
&& address
< bo_address
+ bo
->size
) {
107 return (struct gen_batch_decode_bo
) {
110 .map
= iris_bo_map(batch
->dbg
, bo
, MAP_READ
) +
111 (address
- bo_address
),
116 return (struct gen_batch_decode_bo
) { };
120 * Decode the current batch.
123 decode_batch(struct iris_batch
*batch
)
125 void *map
= iris_bo_map(batch
->dbg
, batch
->exec_bos
[0], MAP_READ
);
126 gen_print_batch(&batch
->decoder
, map
, batch
->primary_batch_size
,
127 batch
->exec_bos
[0]->gtt_offset
);
131 uint_key_compare(const void *a
, const void *b
)
137 uint_key_hash(const void *key
)
139 return (uintptr_t) key
;
143 iris_init_batch(struct iris_batch
*batch
,
144 struct iris_screen
*screen
,
145 struct iris_vtable
*vtbl
,
146 struct pipe_debug_callback
*dbg
,
149 batch
->screen
= screen
;
153 /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
154 assert((ring
& ~I915_EXEC_RING_MASK
) == 0);
155 assert(util_bitcount(ring
) == 1);
158 batch
->exec_count
= 0;
159 batch
->exec_array_size
= 100;
161 malloc(batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
162 batch
->validation_list
=
163 malloc(batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
165 batch
->binder
.bo
= NULL
;
167 batch
->cache
.render
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
168 _mesa_key_pointer_equal
);
169 batch
->cache
.depth
= _mesa_set_create(NULL
, _mesa_hash_pointer
,
170 _mesa_key_pointer_equal
);
171 if (unlikely(INTEL_DEBUG
)) {
173 _mesa_hash_table_create(NULL
, uint_key_hash
, uint_key_compare
);
175 const unsigned decode_flags
=
176 GEN_BATCH_DECODE_FULL
|
177 ((INTEL_DEBUG
& DEBUG_COLOR
) ? GEN_BATCH_DECODE_IN_COLOR
: 0) |
178 GEN_BATCH_DECODE_OFFSETS
|
179 GEN_BATCH_DECODE_FLOATS
;
181 gen_batch_decode_ctx_init(&batch
->decoder
, &screen
->devinfo
,
182 stderr
, decode_flags
, NULL
,
183 decode_get_bo
, NULL
, batch
);
184 batch
->decoder
.max_vbo_decoded_lines
= 32;
187 iris_batch_reset(batch
);
190 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
193 add_exec_bo(struct iris_batch
*batch
, struct iris_bo
*bo
)
195 unsigned index
= READ_ONCE(bo
->index
);
197 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
200 /* May have been shared between multiple active batches */
201 for (index
= 0; index
< batch
->exec_count
; index
++) {
202 if (batch
->exec_bos
[index
] == bo
)
206 iris_bo_reference(bo
);
208 if (batch
->exec_count
== batch
->exec_array_size
) {
209 batch
->exec_array_size
*= 2;
211 realloc(batch
->exec_bos
,
212 batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
213 batch
->validation_list
=
214 realloc(batch
->validation_list
,
215 batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
218 batch
->validation_list
[batch
->exec_count
] =
219 (struct drm_i915_gem_exec_object2
) {
220 .handle
= bo
->gem_handle
,
221 .offset
= bo
->gtt_offset
,
225 bo
->index
= batch
->exec_count
;
226 batch
->exec_bos
[batch
->exec_count
] = bo
;
227 batch
->aperture_space
+= bo
->size
;
229 return batch
->exec_count
++;
233 create_batch(struct iris_batch
*batch
)
235 struct iris_screen
*screen
= batch
->screen
;
236 struct iris_bufmgr
*bufmgr
= screen
->bufmgr
;
238 batch
->bo
= iris_bo_alloc(bufmgr
, "command buffer",
239 BATCH_SZ
+ BATCH_RESERVED
, IRIS_MEMZONE_OTHER
);
240 batch
->bo
->kflags
|= EXEC_OBJECT_CAPTURE
;
241 batch
->map
= iris_bo_map(NULL
, batch
->bo
, MAP_READ
| MAP_WRITE
);
242 batch
->map_next
= batch
->map
;
243 batch
->contains_draw
= false;
245 add_exec_bo(batch
, batch
->bo
);
249 iris_batch_reset(struct iris_batch
*batch
)
251 if (batch
->last_bo
!= NULL
) {
252 iris_bo_unreference(batch
->last_bo
);
253 batch
->last_bo
= NULL
;
255 batch
->last_bo
= batch
->bo
;
256 batch
->primary_batch_size
= 0;
259 assert(batch
->bo
->index
== 0);
261 iris_destroy_binder(&batch
->binder
);
262 iris_init_binder(&batch
->binder
, batch
->bo
->bufmgr
);
264 if (batch
->state_sizes
)
265 _mesa_hash_table_clear(batch
->state_sizes
, NULL
);
267 iris_cache_sets_clear(batch
);
271 iris_batch_free(struct iris_batch
*batch
)
273 for (int i
= 0; i
< batch
->exec_count
; i
++) {
274 iris_bo_unreference(batch
->exec_bos
[i
]);
276 free(batch
->exec_bos
);
277 free(batch
->validation_list
);
278 iris_bo_unreference(batch
->bo
);
281 batch
->map_next
= NULL
;
283 iris_bo_unreference(batch
->last_bo
);
285 _mesa_hash_table_destroy(batch
->cache
.render
, NULL
);
286 _mesa_set_destroy(batch
->cache
.depth
, NULL
);
288 iris_destroy_binder(&batch
->binder
);
290 if (batch
->state_sizes
) {
291 _mesa_hash_table_destroy(batch
->state_sizes
, NULL
);
292 gen_batch_decode_ctx_finish(&batch
->decoder
);
297 batch_bytes_used(struct iris_batch
*batch
)
299 return batch
->map_next
- batch
->map
;
303 * If we've chained to a secondary batch, or are getting near to the end,
304 * then flush. This should only be called between draws.
307 iris_batch_maybe_flush(struct iris_batch
*batch
, unsigned estimate
)
309 if (batch
->bo
!= batch
->exec_bos
[0] ||
310 batch_bytes_used(batch
) + estimate
>= BATCH_SZ
) {
311 iris_batch_flush(batch
);
316 * Ensure the current command buffer has \param size bytes of space
317 * remaining. If not, this creates a secondary batch buffer and emits
318 * a jump from the primary batch to the start of the secondary.
320 * Most callers want iris_get_command_space() instead.
323 iris_require_command_space(struct iris_batch
*batch
, unsigned size
)
325 const unsigned required_bytes
= batch_bytes_used(batch
) + size
;
327 if (required_bytes
>= BATCH_SZ
) {
328 /* We only support chaining a single time. */
329 assert(batch
->bo
== batch
->exec_bos
[0]);
331 uint32_t *cmd
= batch
->map_next
;
332 uint64_t *addr
= batch
->map_next
+ 4;
333 uint32_t *noop
= batch
->map_next
+ 12;
334 batch
->map_next
+= 12;
336 /* No longer held by batch->bo, still held by validation list */
337 iris_bo_unreference(batch
->bo
);
338 batch
->primary_batch_size
= ALIGN(batch_bytes_used(batch
), 8);
341 /* Emit MI_BATCH_BUFFER_START to chain to another batch. */
342 *cmd
= (0x31 << 23) | (1 << 8) | (3 - 2);
343 *addr
= batch
->bo
->gtt_offset
;
349 * Allocate space in the current command buffer, and return a pointer
350 * to the mapped area so the caller can write commands there.
352 * This should be called whenever emitting commands.
355 iris_get_command_space(struct iris_batch
*batch
, unsigned bytes
)
357 iris_require_command_space(batch
, bytes
);
358 void *map
= batch
->map_next
;
359 batch
->map_next
+= bytes
;
364 * Helper to emit GPU commands - allocates space, copies them there.
367 iris_batch_emit(struct iris_batch
*batch
, const void *data
, unsigned size
)
369 void *map
= iris_get_command_space(batch
, size
);
370 memcpy(map
, data
, size
);
374 * Terminate a batch with MI_BATCH_BUFFER_END.
377 iris_finish_batch(struct iris_batch
*batch
)
381 /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
382 * requires our batch size to be QWord aligned, so we pad it out if
383 * necessary by emitting an extra MI_NOOP after the end.
385 const bool qword_aligned
= (batch_bytes_used(batch
) % 8) == 0;
386 uint32_t *map
= batch
->map_next
;
388 map
[0] = (0xA << 23);
391 batch
->map_next
+= qword_aligned
? 8 : 4;
393 if (batch
->bo
== batch
->exec_bos
[0])
394 batch
->primary_batch_size
= batch_bytes_used(batch
);
398 * Submit the batch to the GPU via execbuffer2.
401 submit_batch(struct iris_batch
*batch
, int in_fence_fd
, int *out_fence_fd
)
403 iris_bo_unmap(batch
->bo
);
405 /* The requirement for using I915_EXEC_NO_RELOC are:
407 * The addresses written in the objects must match the corresponding
408 * reloc.gtt_offset which in turn must match the corresponding
411 * Any render targets written to in the batch must be flagged with
414 * To avoid stalling, execobject.offset should match the current
415 * address of that object within the active context.
417 struct drm_i915_gem_execbuffer2 execbuf
= {
418 .buffers_ptr
= (uintptr_t) batch
->validation_list
,
419 .buffer_count
= batch
->exec_count
,
420 .batch_start_offset
= 0,
421 .batch_len
= batch
->primary_batch_size
,
422 .flags
= batch
->ring
|
424 I915_EXEC_BATCH_FIRST
|
425 I915_EXEC_HANDLE_LUT
,
426 .rsvd1
= batch
->hw_ctx_id
, /* rsvd1 is actually the context ID */
429 unsigned long cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2
;
431 if (in_fence_fd
!= -1) {
432 execbuf
.rsvd2
= in_fence_fd
;
433 execbuf
.flags
|= I915_EXEC_FENCE_IN
;
436 if (out_fence_fd
!= NULL
) {
437 cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
;
439 execbuf
.flags
|= I915_EXEC_FENCE_OUT
;
442 int ret
= drm_ioctl(batch
->screen
->fd
, cmd
, &execbuf
);
445 DBG("execbuf FAILED: errno = %d\n", -ret
);
447 DBG("execbuf succeeded\n");
450 for (int i
= 0; i
< batch
->exec_count
; i
++) {
451 struct iris_bo
*bo
= batch
->exec_bos
[i
];
457 if (ret
== 0 && out_fence_fd
!= NULL
)
458 *out_fence_fd
= execbuf
.rsvd2
>> 32;
464 * Flush the batch buffer, submitting it to the GPU and resetting it so
465 * we're ready to emit the next batch.
467 * \param in_fence_fd is ignored if -1. Otherwise, this function takes
468 * ownership of the fd.
470 * \param out_fence_fd is ignored if NULL. Otherwise, the caller must
471 * take ownership of the returned fd.
474 _iris_batch_flush_fence(struct iris_batch
*batch
,
475 int in_fence_fd
, int *out_fence_fd
,
476 const char *file
, int line
)
478 if (batch_bytes_used(batch
) == 0)
481 iris_finish_batch(batch
);
483 if (unlikely(INTEL_DEBUG
& (DEBUG_BATCH
| DEBUG_SUBMIT
))) {
484 int bytes_for_commands
= batch_bytes_used(batch
);
485 int bytes_for_binder
= batch
->binder
.insert_point
;
486 int second_bytes
= 0;
487 if (batch
->bo
!= batch
->exec_bos
[0]) {
488 second_bytes
= bytes_for_commands
;
489 bytes_for_commands
+= batch
->primary_batch_size
;
491 fprintf(stderr
, "%19s:%-3d: Batchbuffer flush with %5d+%5db (%0.1f%%) "
492 "(cmds), %5db (%0.1f%%) (binder), %4d BOs (%0.1fMb aperture)\n",
494 batch
->primary_batch_size
, second_bytes
,
495 100.0f
* bytes_for_commands
/ BATCH_SZ
,
496 bytes_for_binder
, 100.0f
* bytes_for_binder
/ IRIS_BINDER_SIZE
,
498 (float) batch
->aperture_space
/ (1024 * 1024));
499 dump_validation_list(batch
);
502 if (unlikely(INTEL_DEBUG
& DEBUG_BATCH
)) {
506 int ret
= submit_batch(batch
, in_fence_fd
, out_fence_fd
);
511 //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
512 //iris_check_for_reset(ice);
514 if (unlikely(INTEL_DEBUG
& DEBUG_SYNC
)) {
515 dbg_printf("waiting for idle\n");
516 iris_bo_wait_rendering(batch
->bo
);
520 const bool color
= INTEL_DEBUG
& DEBUG_COLOR
;
521 fprintf(stderr
, "%siris: Failed to submit batchbuffer: %-80s%s\n",
522 color
? "\e[1;41m" : "", strerror(-ret
), color
? "\e[0m" : "");
527 /* Clean up after the batch we submitted and prepare for a new one. */
528 for (int i
= 0; i
< batch
->exec_count
; i
++) {
529 iris_bo_unreference(batch
->exec_bos
[i
]);
530 batch
->exec_bos
[i
] = NULL
;
532 batch
->exec_count
= 0;
533 batch
->aperture_space
= 0;
535 /* Start a new batch buffer. */
536 iris_batch_reset(batch
);
542 * Does the current batch refer to the given BO?
544 * (In other words, is the BO in the current batch's validation list?)
547 iris_batch_references(struct iris_batch
*batch
, struct iris_bo
*bo
)
549 unsigned index
= READ_ONCE(bo
->index
);
550 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
553 for (int i
= 0; i
< batch
->exec_count
; i
++) {
554 if (batch
->exec_bos
[i
] == bo
)
561 * Add a buffer to the current batch's validation list.
563 * You must call this on any BO you wish to use in this batch, to ensure
564 * that it's resident when the GPU commands execute.
567 iris_use_pinned_bo(struct iris_batch
*batch
,
571 assert(bo
->kflags
& EXEC_OBJECT_PINNED
);
572 unsigned index
= add_exec_bo(batch
, bo
);
574 batch
->validation_list
[index
].flags
|= EXEC_OBJECT_WRITE
;