iris: comment everything
[mesa.git] / src / gallium / drivers / iris / iris_batch.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * @file iris_batch.c
27 *
28 * Batchbuffer and command submission module.
29 *
30 * Every API draw call results in a number of GPU commands, which we
31 * collect into a "batch buffer". Typically, many draw calls are grouped
32 * into a single batch to amortize command submission overhead.
33 *
34 * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.
35 * One critical piece of data is the "validation list", which contains a
36 * list of the buffer objects (BOs) which the commands in the GPU need.
37 * The kernel will make sure these are resident and pinned at the correct
38 * virtual memory address before executing our batch. If a BO is not in
39 * the validation list, it effectively does not exist, so take care.
40 */
41
42 #include "iris_batch.h"
43 #include "iris_binder.h"
44 #include "iris_bufmgr.h"
45 #include "iris_context.h"
46
47 #include "drm-uapi/i915_drm.h"
48
49 #include "util/hash_table.h"
50 #include "util/set.h"
51 #include "main/macros.h"
52
53 #include <errno.h>
54 #include <xf86drm.h>
55
56 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
57
58 #define BATCH_SZ (20 * 1024)
59
60 /* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
61 * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may
62 * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16.
63 */
64 #define BATCH_RESERVED 16
65
66 static void
67 iris_batch_reset(struct iris_batch *batch);
68
69 /**
70 * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
71 */
72 static void
73 dump_validation_list(struct iris_batch *batch)
74 {
75 fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
76
77 for (int i = 0; i < batch->exec_count; i++) {
78 uint64_t flags = batch->validation_list[i].flags;
79 assert(batch->validation_list[i].handle ==
80 batch->exec_bos[i]->gem_handle);
81 fprintf(stderr, "[%2d]: %2d %-14s %p %-7s @ 0x%016llx (%"PRIu64"B) - %d refs\n",
82 i,
83 batch->validation_list[i].handle,
84 batch->exec_bos[i]->name,
85 batch->exec_bos[i],
86 (flags & EXEC_OBJECT_WRITE) ? "(write)" : "",
87 batch->validation_list[i].offset,
88 batch->exec_bos[i]->size,
89 batch->exec_bos[i]->refcount);
90 }
91 }
92
93 /**
94 * Return BO information to the batch decoder (for debugging).
95 */
96 static struct gen_batch_decode_bo
97 decode_get_bo(void *v_batch, uint64_t address)
98 {
99 struct iris_batch *batch = v_batch;
100
101 for (int i = 0; i < batch->exec_count; i++) {
102 struct iris_bo *bo = batch->exec_bos[i];
103 /* The decoder zeroes out the top 16 bits, so we need to as well */
104 uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
105
106 if (address >= bo_address && address < bo_address + bo->size) {
107 return (struct gen_batch_decode_bo) {
108 .addr = address,
109 .size = bo->size,
110 .map = iris_bo_map(batch->dbg, bo, MAP_READ) +
111 (address - bo_address),
112 };
113 }
114 }
115
116 return (struct gen_batch_decode_bo) { };
117 }
118
119 /**
120 * Decode the current batch.
121 */
122 static void
123 decode_batch(struct iris_batch *batch)
124 {
125 void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
126 gen_print_batch(&batch->decoder, map, batch->primary_batch_size,
127 batch->exec_bos[0]->gtt_offset);
128 }
129
130 static bool
131 uint_key_compare(const void *a, const void *b)
132 {
133 return a == b;
134 }
135
136 static uint32_t
137 uint_key_hash(const void *key)
138 {
139 return (uintptr_t) key;
140 }
141
142 void
143 iris_init_batch(struct iris_batch *batch,
144 struct iris_screen *screen,
145 struct iris_vtable *vtbl,
146 struct pipe_debug_callback *dbg,
147 uint8_t ring)
148 {
149 batch->screen = screen;
150 batch->vtbl = vtbl;
151 batch->dbg = dbg;
152
153 /* ring should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
154 assert((ring & ~I915_EXEC_RING_MASK) == 0);
155 assert(util_bitcount(ring) == 1);
156 batch->ring = ring;
157
158 batch->exec_count = 0;
159 batch->exec_array_size = 100;
160 batch->exec_bos =
161 malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
162 batch->validation_list =
163 malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
164
165 batch->binder.bo = NULL;
166
167 batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
168 _mesa_key_pointer_equal);
169 batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer,
170 _mesa_key_pointer_equal);
171 if (unlikely(INTEL_DEBUG)) {
172 batch->state_sizes =
173 _mesa_hash_table_create(NULL, uint_key_hash, uint_key_compare);
174
175 const unsigned decode_flags =
176 GEN_BATCH_DECODE_FULL |
177 ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
178 GEN_BATCH_DECODE_OFFSETS |
179 GEN_BATCH_DECODE_FLOATS;
180
181 gen_batch_decode_ctx_init(&batch->decoder, &screen->devinfo,
182 stderr, decode_flags, NULL,
183 decode_get_bo, NULL, batch);
184 batch->decoder.max_vbo_decoded_lines = 32;
185 }
186
187 iris_batch_reset(batch);
188 }
189
190 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
191
192 static unsigned
193 add_exec_bo(struct iris_batch *batch, struct iris_bo *bo)
194 {
195 unsigned index = READ_ONCE(bo->index);
196
197 if (index < batch->exec_count && batch->exec_bos[index] == bo)
198 return index;
199
200 /* May have been shared between multiple active batches */
201 for (index = 0; index < batch->exec_count; index++) {
202 if (batch->exec_bos[index] == bo)
203 return index;
204 }
205
206 iris_bo_reference(bo);
207
208 if (batch->exec_count == batch->exec_array_size) {
209 batch->exec_array_size *= 2;
210 batch->exec_bos =
211 realloc(batch->exec_bos,
212 batch->exec_array_size * sizeof(batch->exec_bos[0]));
213 batch->validation_list =
214 realloc(batch->validation_list,
215 batch->exec_array_size * sizeof(batch->validation_list[0]));
216 }
217
218 batch->validation_list[batch->exec_count] =
219 (struct drm_i915_gem_exec_object2) {
220 .handle = bo->gem_handle,
221 .offset = bo->gtt_offset,
222 .flags = bo->kflags,
223 };
224
225 bo->index = batch->exec_count;
226 batch->exec_bos[batch->exec_count] = bo;
227 batch->aperture_space += bo->size;
228
229 return batch->exec_count++;
230 }
231
232 static void
233 create_batch(struct iris_batch *batch)
234 {
235 struct iris_screen *screen = batch->screen;
236 struct iris_bufmgr *bufmgr = screen->bufmgr;
237
238 batch->bo = iris_bo_alloc(bufmgr, "command buffer",
239 BATCH_SZ + BATCH_RESERVED, IRIS_MEMZONE_OTHER);
240 batch->bo->kflags |= EXEC_OBJECT_CAPTURE;
241 batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE);
242 batch->map_next = batch->map;
243 batch->contains_draw = false;
244
245 add_exec_bo(batch, batch->bo);
246 }
247
248 static void
249 iris_batch_reset(struct iris_batch *batch)
250 {
251 if (batch->last_bo != NULL) {
252 iris_bo_unreference(batch->last_bo);
253 batch->last_bo = NULL;
254 }
255 batch->last_bo = batch->bo;
256 batch->primary_batch_size = 0;
257
258 create_batch(batch);
259 assert(batch->bo->index == 0);
260
261 iris_destroy_binder(&batch->binder);
262 iris_init_binder(&batch->binder, batch->bo->bufmgr);
263
264 if (batch->state_sizes)
265 _mesa_hash_table_clear(batch->state_sizes, NULL);
266
267 iris_cache_sets_clear(batch);
268 }
269
270 void
271 iris_batch_free(struct iris_batch *batch)
272 {
273 for (int i = 0; i < batch->exec_count; i++) {
274 iris_bo_unreference(batch->exec_bos[i]);
275 }
276 free(batch->exec_bos);
277 free(batch->validation_list);
278 iris_bo_unreference(batch->bo);
279 batch->bo = NULL;
280 batch->map = NULL;
281 batch->map_next = NULL;
282
283 iris_bo_unreference(batch->last_bo);
284
285 _mesa_hash_table_destroy(batch->cache.render, NULL);
286 _mesa_set_destroy(batch->cache.depth, NULL);
287
288 iris_destroy_binder(&batch->binder);
289
290 if (batch->state_sizes) {
291 _mesa_hash_table_destroy(batch->state_sizes, NULL);
292 gen_batch_decode_ctx_finish(&batch->decoder);
293 }
294 }
295
296 static unsigned
297 batch_bytes_used(struct iris_batch *batch)
298 {
299 return batch->map_next - batch->map;
300 }
301
302 /**
303 * If we've chained to a secondary batch, or are getting near to the end,
304 * then flush. This should only be called between draws.
305 */
306 void
307 iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate)
308 {
309 if (batch->bo != batch->exec_bos[0] ||
310 batch_bytes_used(batch) + estimate >= BATCH_SZ) {
311 iris_batch_flush(batch);
312 }
313 }
314
315 /**
316 * Ensure the current command buffer has \param size bytes of space
317 * remaining. If not, this creates a secondary batch buffer and emits
318 * a jump from the primary batch to the start of the secondary.
319 *
320 * Most callers want iris_get_command_space() instead.
321 */
322 void
323 iris_require_command_space(struct iris_batch *batch, unsigned size)
324 {
325 const unsigned required_bytes = batch_bytes_used(batch) + size;
326
327 if (required_bytes >= BATCH_SZ) {
328 /* We only support chaining a single time. */
329 assert(batch->bo == batch->exec_bos[0]);
330
331 uint32_t *cmd = batch->map_next;
332 uint64_t *addr = batch->map_next + 4;
333 uint32_t *noop = batch->map_next + 12;
334 batch->map_next += 12;
335
336 /* No longer held by batch->bo, still held by validation list */
337 iris_bo_unreference(batch->bo);
338 batch->primary_batch_size = ALIGN(batch_bytes_used(batch), 8);
339 create_batch(batch);
340
341 /* Emit MI_BATCH_BUFFER_START to chain to another batch. */
342 *cmd = (0x31 << 23) | (1 << 8) | (3 - 2);
343 *addr = batch->bo->gtt_offset;
344 *noop = 0;
345 }
346 }
347
348 /**
349 * Allocate space in the current command buffer, and return a pointer
350 * to the mapped area so the caller can write commands there.
351 *
352 * This should be called whenever emitting commands.
353 */
354 void *
355 iris_get_command_space(struct iris_batch *batch, unsigned bytes)
356 {
357 iris_require_command_space(batch, bytes);
358 void *map = batch->map_next;
359 batch->map_next += bytes;
360 return map;
361 }
362
363 /**
364 * Helper to emit GPU commands - allocates space, copies them there.
365 */
366 void
367 iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
368 {
369 void *map = iris_get_command_space(batch, size);
370 memcpy(map, data, size);
371 }
372
373 /**
374 * Terminate a batch with MI_BATCH_BUFFER_END.
375 */
376 static void
377 iris_finish_batch(struct iris_batch *batch)
378 {
379 // XXX: ISP DIS
380
381 /* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
382 * requires our batch size to be QWord aligned, so we pad it out if
383 * necessary by emitting an extra MI_NOOP after the end.
384 */
385 const bool qword_aligned = (batch_bytes_used(batch) % 8) == 0;
386 uint32_t *map = batch->map_next;
387
388 map[0] = (0xA << 23);
389 map[1] = 0;
390
391 batch->map_next += qword_aligned ? 8 : 4;
392
393 if (batch->bo == batch->exec_bos[0])
394 batch->primary_batch_size = batch_bytes_used(batch);
395 }
396
397 /**
398 * Submit the batch to the GPU via execbuffer2.
399 */
400 static int
401 submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
402 {
403 iris_bo_unmap(batch->bo);
404
405 /* The requirement for using I915_EXEC_NO_RELOC are:
406 *
407 * The addresses written in the objects must match the corresponding
408 * reloc.gtt_offset which in turn must match the corresponding
409 * execobject.offset.
410 *
411 * Any render targets written to in the batch must be flagged with
412 * EXEC_OBJECT_WRITE.
413 *
414 * To avoid stalling, execobject.offset should match the current
415 * address of that object within the active context.
416 */
417 struct drm_i915_gem_execbuffer2 execbuf = {
418 .buffers_ptr = (uintptr_t) batch->validation_list,
419 .buffer_count = batch->exec_count,
420 .batch_start_offset = 0,
421 .batch_len = batch->primary_batch_size,
422 .flags = batch->ring |
423 I915_EXEC_NO_RELOC |
424 I915_EXEC_BATCH_FIRST |
425 I915_EXEC_HANDLE_LUT,
426 .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
427 };
428
429 unsigned long cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2;
430
431 if (in_fence_fd != -1) {
432 execbuf.rsvd2 = in_fence_fd;
433 execbuf.flags |= I915_EXEC_FENCE_IN;
434 }
435
436 if (out_fence_fd != NULL) {
437 cmd = DRM_IOCTL_I915_GEM_EXECBUFFER2_WR;
438 *out_fence_fd = -1;
439 execbuf.flags |= I915_EXEC_FENCE_OUT;
440 }
441
442 int ret = drm_ioctl(batch->screen->fd, cmd, &execbuf);
443 if (ret != 0) {
444 ret = -errno;
445 DBG("execbuf FAILED: errno = %d\n", -ret);
446 } else {
447 DBG("execbuf succeeded\n");
448 }
449
450 for (int i = 0; i < batch->exec_count; i++) {
451 struct iris_bo *bo = batch->exec_bos[i];
452
453 bo->idle = false;
454 bo->index = -1;
455 }
456
457 if (ret == 0 && out_fence_fd != NULL)
458 *out_fence_fd = execbuf.rsvd2 >> 32;
459
460 return ret;
461 }
462
463 /**
464 * Flush the batch buffer, submitting it to the GPU and resetting it so
465 * we're ready to emit the next batch.
466 *
467 * \param in_fence_fd is ignored if -1. Otherwise, this function takes
468 * ownership of the fd.
469 *
470 * \param out_fence_fd is ignored if NULL. Otherwise, the caller must
471 * take ownership of the returned fd.
472 */
473 int
474 _iris_batch_flush_fence(struct iris_batch *batch,
475 int in_fence_fd, int *out_fence_fd,
476 const char *file, int line)
477 {
478 if (batch_bytes_used(batch) == 0)
479 return 0;
480
481 iris_finish_batch(batch);
482
483 if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
484 int bytes_for_commands = batch_bytes_used(batch);
485 int bytes_for_binder = batch->binder.insert_point;
486 int second_bytes = 0;
487 if (batch->bo != batch->exec_bos[0]) {
488 second_bytes = bytes_for_commands;
489 bytes_for_commands += batch->primary_batch_size;
490 }
491 fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5d+%5db (%0.1f%%) "
492 "(cmds), %5db (%0.1f%%) (binder), %4d BOs (%0.1fMb aperture)\n",
493 file, line,
494 batch->primary_batch_size, second_bytes,
495 100.0f * bytes_for_commands / BATCH_SZ,
496 bytes_for_binder, 100.0f * bytes_for_binder / IRIS_BINDER_SIZE,
497 batch->exec_count,
498 (float) batch->aperture_space / (1024 * 1024));
499 dump_validation_list(batch);
500 }
501
502 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
503 decode_batch(batch);
504 }
505
506 int ret = submit_batch(batch, in_fence_fd, out_fence_fd);
507
508 //throttle(iris);
509
510 if (ret >= 0) {
511 //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
512 //iris_check_for_reset(ice);
513
514 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
515 dbg_printf("waiting for idle\n");
516 iris_bo_wait_rendering(batch->bo);
517 }
518 } else {
519 #ifdef DEBUG
520 const bool color = INTEL_DEBUG & DEBUG_COLOR;
521 fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n",
522 color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : "");
523 abort();
524 #endif
525 }
526
527 /* Clean up after the batch we submitted and prepare for a new one. */
528 for (int i = 0; i < batch->exec_count; i++) {
529 iris_bo_unreference(batch->exec_bos[i]);
530 batch->exec_bos[i] = NULL;
531 }
532 batch->exec_count = 0;
533 batch->aperture_space = 0;
534
535 /* Start a new batch buffer. */
536 iris_batch_reset(batch);
537
538 return 0;
539 }
540
541 /**
542 * Does the current batch refer to the given BO?
543 *
544 * (In other words, is the BO in the current batch's validation list?)
545 */
546 bool
547 iris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
548 {
549 unsigned index = READ_ONCE(bo->index);
550 if (index < batch->exec_count && batch->exec_bos[index] == bo)
551 return true;
552
553 for (int i = 0; i < batch->exec_count; i++) {
554 if (batch->exec_bos[i] == bo)
555 return true;
556 }
557 return false;
558 }
559
560 /**
561 * Add a buffer to the current batch's validation list.
562 *
563 * You must call this on any BO you wish to use in this batch, to ensure
564 * that it's resident when the GPU commands execute.
565 */
566 void
567 iris_use_pinned_bo(struct iris_batch *batch,
568 struct iris_bo *bo,
569 bool writable)
570 {
571 assert(bo->kflags & EXEC_OBJECT_PINNED);
572 unsigned index = add_exec_bo(batch, bo);
573 if (writable)
574 batch->validation_list[index].flags |= EXEC_OBJECT_WRITE;
575 }