2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "iris_batch.h"
26 #include "iris_binder.h"
27 #include "iris_bufmgr.h"
28 #include "iris_context.h"
30 #include "drm-uapi/i915_drm.h"
32 #include "util/hash_table.h"
34 #include "main/macros.h"
39 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
41 #define BATCH_SZ (20 * 1024)
43 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
44 * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may
45 * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16.
47 #define BATCH_RESERVED 16
49 static void decode_batch(struct iris_batch
*batch
);
52 iris_batch_reset(struct iris_batch
*batch
);
55 dump_validation_list(struct iris_batch
*batch
)
57 fprintf(stderr
, "Validation list (length %d):\n", batch
->exec_count
);
59 for (int i
= 0; i
< batch
->exec_count
; i
++) {
60 uint64_t flags
= batch
->validation_list
[i
].flags
;
61 assert(batch
->validation_list
[i
].handle
==
62 batch
->exec_bos
[i
]->gem_handle
);
63 fprintf(stderr
, "[%2d]: %2d %-14s %p %-7s @ 0x%016llx (%"PRIu64
"B) - %d refs\n",
65 batch
->validation_list
[i
].handle
,
66 batch
->exec_bos
[i
]->name
,
68 (flags
& EXEC_OBJECT_WRITE
) ? "(write)" : "",
69 batch
->validation_list
[i
].offset
,
70 batch
->exec_bos
[i
]->size
,
71 batch
->exec_bos
[i
]->refcount
);
75 static struct gen_batch_decode_bo
76 decode_get_bo(void *v_batch
, uint64_t address
)
78 struct iris_batch
*batch
= v_batch
;
80 for (int i
= 0; i
< batch
->exec_count
; i
++) {
81 struct iris_bo
*bo
= batch
->exec_bos
[i
];
82 /* The decoder zeroes out the top 16 bits, so we need to as well */
83 uint64_t bo_address
= bo
->gtt_offset
& (~0ull >> 16);
85 if (address
>= bo_address
&& address
< bo_address
+ bo
->size
) {
86 return (struct gen_batch_decode_bo
) {
89 .map
= iris_bo_map(batch
->dbg
, bo
, MAP_READ
) +
90 (address
- bo_address
),
95 return (struct gen_batch_decode_bo
) { };
99 uint_key_compare(const void *a
, const void *b
)
105 uint_key_hash(const void *key
)
107 return (uintptr_t) key
;
111 iris_init_batch(struct iris_batch
*batch
,
112 struct iris_screen
*screen
,
113 struct iris_vtable
*vtbl
,
114 struct pipe_debug_callback
*dbg
,
117 batch
->screen
= screen
;
121 /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
122 assert((ring
& ~I915_EXEC_RING_MASK
) == 0);
123 assert(util_bitcount(ring
) == 1);
126 batch
->exec_count
= 0;
127 batch
->exec_array_size
= 100;
129 malloc(batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
130 batch
->validation_list
=
131 malloc(batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
133 batch
->binder
.bo
= NULL
;
135 batch
->cache
.render
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
136 _mesa_key_pointer_equal
);
137 batch
->cache
.depth
= _mesa_set_create(NULL
, _mesa_hash_pointer
,
138 _mesa_key_pointer_equal
);
139 if (unlikely(INTEL_DEBUG
)) {
141 _mesa_hash_table_create(NULL
, uint_key_hash
, uint_key_compare
);
143 const unsigned decode_flags
=
144 GEN_BATCH_DECODE_FULL
|
145 ((INTEL_DEBUG
& DEBUG_COLOR
) ? GEN_BATCH_DECODE_IN_COLOR
: 0) |
146 GEN_BATCH_DECODE_OFFSETS
|
147 GEN_BATCH_DECODE_FLOATS
;
149 gen_batch_decode_ctx_init(&batch
->decoder
, &screen
->devinfo
,
150 stderr
, decode_flags
, NULL
,
151 decode_get_bo
, NULL
, batch
);
152 batch
->decoder
.max_vbo_decoded_lines
= 32;
155 iris_batch_reset(batch
);
158 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
161 add_exec_bo(struct iris_batch
*batch
, struct iris_bo
*bo
)
163 unsigned index
= READ_ONCE(bo
->index
);
165 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
168 /* May have been shared between multiple active batches */
169 for (index
= 0; index
< batch
->exec_count
; index
++) {
170 if (batch
->exec_bos
[index
] == bo
)
174 iris_bo_reference(bo
);
176 if (batch
->exec_count
== batch
->exec_array_size
) {
177 batch
->exec_array_size
*= 2;
179 realloc(batch
->exec_bos
,
180 batch
->exec_array_size
* sizeof(batch
->exec_bos
[0]));
181 batch
->validation_list
=
182 realloc(batch
->validation_list
,
183 batch
->exec_array_size
* sizeof(batch
->validation_list
[0]));
186 batch
->validation_list
[batch
->exec_count
] =
187 (struct drm_i915_gem_exec_object2
) {
188 .handle
= bo
->gem_handle
,
189 .offset
= bo
->gtt_offset
,
193 bo
->index
= batch
->exec_count
;
194 batch
->exec_bos
[batch
->exec_count
] = bo
;
195 batch
->aperture_space
+= bo
->size
;
197 return batch
->exec_count
++;
201 create_batch(struct iris_batch
*batch
)
203 struct iris_screen
*screen
= batch
->screen
;
204 struct iris_bufmgr
*bufmgr
= screen
->bufmgr
;
206 batch
->bo
= iris_bo_alloc(bufmgr
, "command buffer",
207 BATCH_SZ
+ BATCH_RESERVED
, IRIS_MEMZONE_OTHER
);
208 batch
->bo
->kflags
|= EXEC_OBJECT_CAPTURE
;
209 batch
->map
= iris_bo_map(NULL
, batch
->bo
, MAP_READ
| MAP_WRITE
);
210 batch
->map_next
= batch
->map
;
211 batch
->contains_draw
= false;
213 add_exec_bo(batch
, batch
->bo
);
217 iris_batch_reset(struct iris_batch
*batch
)
219 if (batch
->last_bo
!= NULL
) {
220 iris_bo_unreference(batch
->last_bo
);
221 batch
->last_bo
= NULL
;
223 batch
->last_bo
= batch
->bo
;
224 batch
->primary_batch_size
= 0;
227 assert(batch
->bo
->index
== 0);
229 iris_destroy_binder(&batch
->binder
);
230 iris_init_binder(&batch
->binder
, batch
->bo
->bufmgr
);
232 if (batch
->state_sizes
)
233 _mesa_hash_table_clear(batch
->state_sizes
, NULL
);
235 iris_cache_sets_clear(batch
);
239 iris_batch_free(struct iris_batch
*batch
)
241 for (int i
= 0; i
< batch
->exec_count
; i
++) {
242 iris_bo_unreference(batch
->exec_bos
[i
]);
244 free(batch
->exec_bos
);
245 free(batch
->validation_list
);
246 iris_bo_unreference(batch
->bo
);
249 batch
->map_next
= NULL
;
251 iris_bo_unreference(batch
->last_bo
);
253 _mesa_hash_table_destroy(batch
->cache
.render
, NULL
);
254 _mesa_set_destroy(batch
->cache
.depth
, NULL
);
256 iris_destroy_binder(&batch
->binder
);
258 if (batch
->state_sizes
) {
259 _mesa_hash_table_destroy(batch
->state_sizes
, NULL
);
260 gen_batch_decode_ctx_finish(&batch
->decoder
);
265 batch_bytes_used(struct iris_batch
*batch
)
267 return batch
->map_next
- batch
->map
;
271 * If we've chained to a secondary batch, or are getting near to the end,
272 * then flush. This should only be called between draws.
275 iris_batch_maybe_flush(struct iris_batch
*batch
, unsigned estimate
)
277 if (batch
->bo
!= batch
->exec_bos
[0] ||
278 batch_bytes_used(batch
) + estimate
>= BATCH_SZ
) {
279 iris_batch_flush(batch
);
284 iris_require_command_space(struct iris_batch
*batch
, unsigned size
)
286 const unsigned required_bytes
= batch_bytes_used(batch
) + size
;
288 if (required_bytes
>= BATCH_SZ
) {
289 /* We only support chaining a single time. */
290 assert(batch
->bo
== batch
->exec_bos
[0]);
292 uint32_t *cmd
= batch
->map_next
;
293 uint64_t *addr
= batch
->map_next
+ 4;
294 uint32_t *noop
= batch
->map_next
+ 12;
295 batch
->map_next
+= 12;
297 /* No longer held by batch->bo, still held by validation list */
298 iris_bo_unreference(batch
->bo
);
299 batch
->primary_batch_size
= ALIGN(batch_bytes_used(batch
), 8);
302 /* Emit MI_BATCH_BUFFER_START to chain to another batch. */
303 *cmd
= (0x31 << 23) | (1 << 8) | (3 - 2);
304 *addr
= batch
->bo
->gtt_offset
;
310 iris_get_command_space(struct iris_batch
*batch
, unsigned bytes
)
312 iris_require_command_space(batch
, bytes
);
313 void *map
= batch
->map_next
;
314 batch
->map_next
+= bytes
;
319 iris_batch_emit(struct iris_batch
*batch
, const void *data
, unsigned size
)
321 void *map
= iris_get_command_space(batch
, size
);
322 memcpy(map
, data
, size
);
326 * Called from iris_batch_flush before emitting MI_BATCHBUFFER_END and
329 * This function can emit state (say, to preserve registers that aren't saved
333 iris_finish_batch(struct iris_batch
*batch
)
335 if (batch
->bo
== batch
->exec_bos
[0])
336 batch
->primary_batch_size
= batch_bytes_used(batch
);
340 /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
341 * requires our batch size to be QWord aligned, so we pad it out if
342 * necessary by emitting an extra MI_NOOP after the end.
344 const bool qword_aligned
= (batch_bytes_used(batch
) % 8) == 0;
345 uint32_t *map
= batch
->map_next
;
347 map
[0] = (0xA << 23);
350 batch
->map_next
+= qword_aligned
? 8 : 4;
354 submit_batch(struct iris_batch
*batch
, int in_fence_fd
, int *out_fence_fd
)
356 iris_bo_unmap(batch
->bo
);
358 /* The requirement for using I915_EXEC_NO_RELOC are:
360 * The addresses written in the objects must match the corresponding
361 * reloc.gtt_offset which in turn must match the corresponding
364 * Any render targets written to in the batch must be flagged with
367 * To avoid stalling, execobject.offset should match the current
368 * address of that object within the active context.
370 struct drm_i915_gem_execbuffer2 execbuf
= {
371 .buffers_ptr
= (uintptr_t) batch
->validation_list
,
372 .buffer_count
= batch
->exec_count
,
373 .batch_start_offset
= 0,
374 .batch_len
= batch
->primary_batch_size
,
375 .flags
= batch
->ring
|
377 I915_EXEC_BATCH_FIRST
|
378 I915_EXEC_HANDLE_LUT
,
379 .rsvd1
= batch
->hw_ctx_id
, /* rsvd1 is actually the context ID */
382 unsigned long cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2
;
384 if (in_fence_fd
!= -1) {
385 execbuf
.rsvd2
= in_fence_fd
;
386 execbuf
.flags
|= I915_EXEC_FENCE_IN
;
389 if (out_fence_fd
!= NULL
) {
390 cmd
= DRM_IOCTL_I915_GEM_EXECBUFFER2_WR
;
392 execbuf
.flags
|= I915_EXEC_FENCE_OUT
;
395 int ret
= drm_ioctl(batch
->screen
->fd
, cmd
, &execbuf
);
398 DBG("execbuf FAILED: errno = %d\n", -ret
);
400 DBG("execbuf succeeded\n");
403 for (int i
= 0; i
< batch
->exec_count
; i
++) {
404 struct iris_bo
*bo
= batch
->exec_bos
[i
];
410 if (ret
== 0 && out_fence_fd
!= NULL
)
411 *out_fence_fd
= execbuf
.rsvd2
>> 32;
417 * The in_fence_fd is ignored if -1. Otherwise this function takes ownership
420 * The out_fence_fd is ignored if NULL. Otherwise, the caller takes ownership
421 * of the returned fd.
424 _iris_batch_flush_fence(struct iris_batch
*batch
,
425 int in_fence_fd
, int *out_fence_fd
,
426 const char *file
, int line
)
428 if (batch_bytes_used(batch
) == 0)
431 iris_finish_batch(batch
);
433 if (unlikely(INTEL_DEBUG
& (DEBUG_BATCH
| DEBUG_SUBMIT
))) {
434 int bytes_for_commands
= batch_bytes_used(batch
);
435 int bytes_for_binder
= batch
->binder
.insert_point
;
436 int second_bytes
= 0;
437 if (batch
->bo
!= batch
->exec_bos
[0]) {
438 second_bytes
= bytes_for_commands
;
439 bytes_for_commands
+= batch
->primary_batch_size
;
441 fprintf(stderr
, "%19s:%-3d: Batchbuffer flush with %5d+%5db (%0.1f%%) "
442 "(cmds), %5db (%0.1f%%) (binder), %4d BOs (%0.1fMb aperture)\n",
444 batch
->primary_batch_size
, second_bytes
,
445 100.0f
* bytes_for_commands
/ BATCH_SZ
,
446 bytes_for_binder
, 100.0f
* bytes_for_binder
/ IRIS_BINDER_SIZE
,
448 (float) batch
->aperture_space
/ (1024 * 1024));
449 dump_validation_list(batch
);
452 if (unlikely(INTEL_DEBUG
& DEBUG_BATCH
)) {
456 int ret
= submit_batch(batch
, in_fence_fd
, out_fence_fd
);
463 //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
464 //iris_check_for_reset(ice);
466 if (unlikely(INTEL_DEBUG
& DEBUG_SYNC
)) {
467 dbg_printf("waiting for idle\n");
468 iris_bo_wait_rendering(batch
->bo
);
471 /* Clean up after the batch we submitted and prepare for a new one. */
472 for (int i
= 0; i
< batch
->exec_count
; i
++) {
473 iris_bo_unreference(batch
->exec_bos
[i
]);
474 batch
->exec_bos
[i
] = NULL
;
476 batch
->exec_count
= 0;
477 batch
->aperture_space
= 0;
479 /* Start a new batch buffer. */
480 iris_batch_reset(batch
);
486 iris_batch_references(struct iris_batch
*batch
, struct iris_bo
*bo
)
488 unsigned index
= READ_ONCE(bo
->index
);
489 if (index
< batch
->exec_count
&& batch
->exec_bos
[index
] == bo
)
492 for (int i
= 0; i
< batch
->exec_count
; i
++) {
493 if (batch
->exec_bos
[i
] == bo
)
499 /* This is the only way buffers get added to the validate list.
502 iris_use_pinned_bo(struct iris_batch
*batch
,
506 assert(bo
->kflags
& EXEC_OBJECT_PINNED
);
507 unsigned index
= add_exec_bo(batch
, bo
);
509 batch
->validation_list
[index
].flags
|= EXEC_OBJECT_WRITE
;
513 decode_batch(struct iris_batch
*batch
)
515 //if (batch->bo != batch->exec_bos[0]) {
516 void *map
= iris_bo_map(batch
->dbg
, batch
->exec_bos
[0], MAP_READ
);
517 gen_print_batch(&batch
->decoder
, map
, batch
->primary_batch_size
,
518 batch
->exec_bos
[0]->gtt_offset
);
520 //fprintf(stderr, "Secondary batch...\n");
523 //gen_print_batch(&batch->decoder, batch->map, batch_bytes_used(batch),
524 //batch->bo->gtt_offset);