2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "iris_batch.h"
26 #include "iris_bufmgr.h"
27 #include "iris_context.h"
29 #include "drm-uapi/i915_drm.h"
31 #include "util/hash_table.h"
33 #include "main/macros.h"
38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
40 #define BATCH_SZ (20 * 1024)
42 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
43 * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may
44 * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16.
46 #define BATCH_RESERVED 16
48 static void decode_batch(struct iris_batch
*batch
);
51 iris_batch_reset(struct iris_batch
*batch
);
54 dump_validation_list(struct iris_batch
*batch
)
56 fprintf(stderr
, "Validation list (length %d):\n", batch
->exec_count
);
58 for (int i
= 0; i
< batch
->exec_count
; i
++) {
59 uint64_t flags
= batch
->validation_list
[i
].flags
;
60 assert(batch
->validation_list
[i
].handle
==
61 batch
->exec_bos
[i
]->gem_handle
);
62 fprintf(stderr
, "[%2d]: %2d %-14s %p %-7s @ 0x%016llx (%"PRIu64
"B)\n",
64 batch
->validation_list
[i
].handle
,
65 batch
->exec_bos
[i
]->name
,
67 (flags
& EXEC_OBJECT_WRITE
) ? "(write)" : "",
68 batch
->validation_list
[i
].offset
,
69 batch
->exec_bos
[i
]->size
);
73 static struct gen_batch_decode_bo
74 decode_get_bo(void *v_batch
, uint64_t address
)
76 struct iris_batch
*batch
= v_batch
;
78 for (int i
= 0; i
< batch
->exec_count
; i
++) {
79 struct iris_bo
*bo
= batch
->exec_bos
[i
];
80 /* The decoder zeroes out the top 16 bits, so we need to as well */
81 uint64_t bo_address
= bo
->gtt_offset
& (~0ull >> 16);
83 if (address
>= bo_address
&& address
< bo_address
+ bo
->size
) {
84 return (struct gen_batch_decode_bo
) {
87 .map
= iris_bo_map(batch
->dbg
, bo
, MAP_READ
) +
88 (address
- bo_address
),
93 return (struct gen_batch_decode_bo
) { };
97 uint_key_compare(const void *a
, const void *b
)
103 uint_key_hash(const void *key
)
105 return (uintptr_t) key
;
109 iris_init_batch(struct iris_batch
*batch
,
110 struct iris_screen
*screen
,
111 struct iris_vtable
*vtbl
,
112 struct pipe_debug_callback
*dbg
,
115 batch
->screen
= screen
;
119 /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
120 assert((ring
& ~I915_EXEC_RING_MASK
) == 0);
121 assert(util_bitcount(ring
) == 1);
124 batch
->exec_count
= 0;
125 batch
->exec_array_size
= 100;
127 malloc(batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
128 batch
->validation_list
=
129 malloc(batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
131 batch
->cache
.render
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
132 _mesa_key_pointer_equal
);
133 batch
->cache
.depth
= _mesa_set_create(NULL
, _mesa_hash_pointer
,
134 _mesa_key_pointer_equal
);
135 if (unlikely(INTEL_DEBUG
)) {
137 _mesa_hash_table_create(NULL
, uint_key_hash
, uint_key_compare
);
139 const unsigned decode_flags
=
140 GEN_BATCH_DECODE_FULL
|
141 ((INTEL_DEBUG
& DEBUG_COLOR
) ? GEN_BATCH_DECODE_IN_COLOR
: 0) |
142 GEN_BATCH_DECODE_OFFSETS
|
143 GEN_BATCH_DECODE_FLOATS
;
145 gen_batch_decode_ctx_init(&batch
->decoder
, &screen
->devinfo
,
146 stderr
, decode_flags
, NULL
,
147 decode_get_bo
, NULL
, batch
);
150 iris_batch_reset(batch
);
153 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
156 add_exec_bo(struct iris_batch
*batch
, struct iris_bo
*bo
)
158 unsigned index
= READ_ONCE(bo
->index
);
160 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
163 /* May have been shared between multiple active batches */
164 for (index
= 0; index
< batch
->exec_count
; index
++) {
165 if (batch
->exec_bos
[index
] == bo
)
169 iris_bo_reference(bo
);
171 if (batch
->exec_count
== batch
->exec_array_size
) {
172 batch
->exec_array_size
*= 2;
174 realloc(batch
->exec_bos
,
175 batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
176 batch
->validation_list
=
177 realloc(batch
->validation_list
,
178 batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
181 batch
->validation_list
[batch
->exec_count
] =
182 (struct drm_i915_gem_exec_object2
) {
183 .handle
= bo
->gem_handle
,
184 .offset
= bo
->gtt_offset
,
188 bo
->index
= batch
->exec_count
;
189 batch
->exec_bos
[batch
->exec_count
] = bo
;
190 batch
->aperture_space
+= bo
->size
;
192 return batch
->exec_count
++;
196 create_batch(struct iris_batch
*batch
)
198 struct iris_screen
*screen
= batch
->screen
;
199 struct iris_bufmgr
*bufmgr
= screen
->bufmgr
;
201 batch
->bo
= iris_bo_alloc(bufmgr
, "command buffer",
202 BATCH_SZ
+ BATCH_RESERVED
, IRIS_MEMZONE_OTHER
);
203 batch
->bo
->kflags
|= EXEC_OBJECT_CAPTURE
;
204 batch
->map
= iris_bo_map(NULL
, batch
->bo
, MAP_READ
| MAP_WRITE
);
205 batch
->map_next
= batch
->map
;
207 add_exec_bo(batch
, batch
->bo
);
211 iris_batch_reset(struct iris_batch
*batch
)
213 if (batch
->last_bo
!= NULL
) {
214 iris_bo_unreference(batch
->last_bo
);
215 batch
->last_bo
= NULL
;
217 batch
->last_bo
= batch
->bo
;
218 batch
->primary_batch_size
= 0;
221 assert(batch
->bo
->index
== 0);
223 if (batch
->state_sizes
)
224 _mesa_hash_table_clear(batch
->state_sizes
, NULL
);
226 iris_cache_sets_clear(batch
);
230 iris_batch_free(struct iris_batch
*batch
)
232 for (int i
= 0; i
< batch
->exec_count
; i
++) {
233 iris_bo_unreference(batch
->exec_bos
[i
]);
235 free(batch
->exec_bos
);
236 free(batch
->validation_list
);
237 iris_bo_unreference(batch
->bo
);
240 batch
->map_next
= NULL
;
242 iris_bo_unreference(batch
->last_bo
);
244 _mesa_hash_table_destroy(batch
->cache
.render
, NULL
);
245 _mesa_set_destroy(batch
->cache
.depth
, NULL
);
247 if (batch
->state_sizes
) {
248 _mesa_hash_table_destroy(batch
->state_sizes
, NULL
);
249 gen_batch_decode_ctx_finish(&batch
->decoder
);
254 batch_bytes_used(struct iris_batch
*batch
)
256 return batch
->map_next
- batch
->map
;
260 * If we've chained to a secondary batch, or are getting near to the end,
261 * then flush. This should only be called between draws.
264 iris_batch_maybe_flush(struct iris_batch
*batch
, unsigned estimate
)
266 if (batch
->bo
!= batch
->exec_bos
[0] ||
267 batch_bytes_used(batch
) + estimate
>= BATCH_SZ
) {
268 iris_batch_flush(batch
);
273 iris_require_command_space(struct iris_batch
*batch
, unsigned size
)
275 const unsigned required_bytes
= batch_bytes_used(batch
) + size
;
277 if (required_bytes
>= BATCH_SZ
) {
278 /* No longer held by batch->bo, still held by validation list */
279 iris_bo_unreference(batch
->bo
);
280 batch
->primary_batch_size
= batch_bytes_used(batch
);
282 const uint32_t MI_BATCH_BUFFER_START
= (0x31 << 23) | (1 << 8);
284 uint32_t *cmd
= batch
->map
+= sizeof(uint32_t);
285 uint64_t *addr
= batch
->map
+= sizeof(uint64_t);
289 *cmd
= MI_BATCH_BUFFER_START
;
290 *addr
= batch
->bo
->gtt_offset
;
295 iris_get_command_space(struct iris_batch
*batch
, unsigned bytes
)
297 iris_require_command_space(batch
, bytes
);
298 return batch
->map_next
+= bytes
;
302 iris_batch_emit(struct iris_batch
*batch
, const void *data
, unsigned size
)
304 void *map
= iris_get_command_space(batch
, size
);
305 memcpy(map
, data
, size
);
309 * Called from iris_batch_flush before emitting MI_BATCHBUFFER_END and
312 * This function can emit state (say, to preserve registers that aren't saved
316 iris_finish_batch(struct iris_batch
*batch
)
320 /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
321 * requires our batch size to be QWord aligned, so we pad it out if
322 * necessary by emitting an extra MI_NOOP after the end.
324 const uint32_t MI_BATCH_BUFFER_END_AND_NOOP
[2] = { (0xA << 23), 0 };
325 const bool qword_aligned
= (batch_bytes_used(batch
) % 8) == 0;
326 iris_batch_emit(batch
, MI_BATCH_BUFFER_END_AND_NOOP
, qword_aligned
? 8 : 4);
330 submit_batch(struct iris_batch
*batch
, int in_fence_fd
, int *out_fence_fd
)
332 iris_bo_unmap(batch
->bo
);
334 /* The requirement for using I915_EXEC_NO_RELOC are:
336 * The addresses written in the objects must match the corresponding
337 * reloc.gtt_offset which in turn must match the corresponding
340 * Any render targets written to in the batch must be flagged with
343 * To avoid stalling, execobject.offset should match the current
344 * address of that object within the active context.
346 struct drm_i915_gem_execbuffer2 execbuf
= {
347 .buffers_ptr
= (uintptr_t) batch
->validation_list
,
348 .buffer_count
= batch
->exec_count
,
349 .batch_start_offset
= 0,
350 .batch_len
= batch
->bo
== batch
->exec_bos
[0] ? batch_bytes_used(batch
)
351 : batch
->primary_batch_size
,
352 .flags
= batch
->ring
|
354 I915_EXEC_BATCH_FIRST
|
355 I915_EXEC_HANDLE_LUT
,
356 .rsvd1
= batch
->hw_ctx_id
, /* rsvd1 is actually the context ID */
359 unsigned long cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2
;
361 if (in_fence_fd
!= -1) {
362 execbuf
.rsvd2
= in_fence_fd
;
363 execbuf
.flags
|= I915_EXEC_FENCE_IN
;
366 if (out_fence_fd
!= NULL
) {
367 cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
;
369 execbuf
.flags
|= I915_EXEC_FENCE_OUT
;
372 int ret
= drm_ioctl(batch
->screen
->fd
, cmd
, &execbuf
);
375 DBG("execbuf FAILED: errno = %d\n", -ret
);
377 DBG("execbuf succeeded\n");
380 for (int i
= 0; i
< batch
->exec_count
; i
++) {
381 struct iris_bo
*bo
= batch
->exec_bos
[i
];
387 if (ret
== 0 && out_fence_fd
!= NULL
)
388 *out_fence_fd
= execbuf
.rsvd2
>> 32;
394 * The in_fence_fd is ignored if -1. Otherwise this function takes ownership
397 * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
398 * of the returned fd.
401 _iris_batch_flush_fence(struct iris_batch
*batch
,
402 int in_fence_fd
, int *out_fence_fd
,
403 const char *file
, int line
)
405 if (batch_bytes_used(batch
) == 0)
408 iris_finish_batch(batch
);
410 if (unlikely(INTEL_DEBUG
& (DEBUG_BATCH
| DEBUG_SUBMIT
))) {
411 int bytes_for_commands
= batch_bytes_used(batch
);
412 if (batch
->bo
!= batch
->exec_bos
[0])
413 bytes_for_commands
+= batch
->primary_batch_size
;
414 fprintf(stderr
, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%), "
415 "%4d BOs (%0.1fMb aperture)\n",
417 bytes_for_commands
, 100.0f
* bytes_for_commands
/ BATCH_SZ
,
419 (float) batch
->aperture_space
/ (1024 * 1024));
420 dump_validation_list(batch
);
423 if (unlikely(INTEL_DEBUG
& DEBUG_BATCH
)) {
427 int ret
= submit_batch(batch
, in_fence_fd
, out_fence_fd
);
434 //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
435 //iris_check_for_reset(ice);
437 if (unlikely(INTEL_DEBUG
& DEBUG_SYNC
)) {
438 dbg_printf("waiting for idle\n");
439 iris_bo_wait_rendering(batch
->bo
);
442 /* Clean up after the batch we submitted and prepare for a new one. */
443 for (int i
= 0; i
< batch
->exec_count
; i
++) {
444 iris_bo_unreference(batch
->exec_bos
[i
]);
445 batch
->exec_bos
[i
] = NULL
;
447 batch
->exec_count
= 0;
448 batch
->aperture_space
= 0;
450 /* Start a new batch buffer. */
451 iris_batch_reset(batch
);
457 iris_batch_references(struct iris_batch
*batch
, struct iris_bo
*bo
)
459 unsigned index
= READ_ONCE(bo
->index
);
460 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
463 for (int i
= 0; i
< batch
->exec_count
; i
++) {
464 if (batch
->exec_bos
[i
] == bo
)
470 /* This is the only way buffers get added to the validate list.
473 iris_use_pinned_bo(struct iris_batch
*batch
,
477 assert(bo
->kflags
& EXEC_OBJECT_PINNED
);
478 unsigned index
= add_exec_bo(batch
, bo
);
480 batch
->validation_list
[index
].flags
|= EXEC_OBJECT_WRITE
;
484 decode_batch(struct iris_batch
*batch
)
486 if (batch
->bo
!= batch
->exec_bos
[0]) {
487 void *map
= iris_bo_map(batch
->dbg
, batch
->exec_bos
[0], MAP_READ
);
488 gen_print_batch(&batch
->decoder
, map
, batch
->primary_batch_size
,
489 batch
->exec_bos
[0]->gtt_offset
);
492 gen_print_batch(&batch
->decoder
, batch
->map
, batch_bytes_used(batch
),
493 batch
->bo
->gtt_offset
);