panfrost: Get rid of pan_drm.c
[mesa.git] / src / gallium / drivers / panfrost / pan_job.c
1 /*
2 * Copyright (C) 2019 Alyssa Rosenzweig
3 * Copyright (C) 2014-2017 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 */
25
26 #include <assert.h>
27
28 #include "drm-uapi/panfrost_drm.h"
29
30 #include "pan_context.h"
31 #include "util/hash_table.h"
32 #include "util/ralloc.h"
33 #include "util/u_format.h"
34 #include "util/u_pack_color.h"
35 #include "pan_util.h"
36 #include "pandecode/decode.h"
37
38 static struct panfrost_batch *
39 panfrost_create_batch(struct panfrost_context *ctx,
40 const struct pipe_framebuffer_state *key)
41 {
42 struct panfrost_batch *batch = rzalloc(ctx, struct panfrost_batch);
43
44 batch->ctx = ctx;
45
46 batch->bos = _mesa_set_create(batch,
47 _mesa_hash_pointer,
48 _mesa_key_pointer_equal);
49
50 batch->minx = batch->miny = ~0;
51 batch->maxx = batch->maxy = 0;
52 batch->transient_offset = 0;
53
54 util_dynarray_init(&batch->headers, batch);
55 util_dynarray_init(&batch->gpu_headers, batch);
56 util_copy_framebuffer_state(&batch->key, key);
57
58 return batch;
59 }
60
61 static void
62 panfrost_free_batch(struct panfrost_batch *batch)
63 {
64 if (!batch)
65 return;
66
67 struct panfrost_context *ctx = batch->ctx;
68
69 set_foreach(batch->bos, entry) {
70 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
71 panfrost_bo_unreference(ctx->base.screen, bo);
72 }
73
74 _mesa_hash_table_remove_key(ctx->batches, &batch->key);
75
76 if (ctx->batch == batch)
77 ctx->batch = NULL;
78
79 util_unreference_framebuffer_state(&batch->key);
80 ralloc_free(batch);
81 }
82
83 static struct panfrost_batch *
84 panfrost_get_batch(struct panfrost_context *ctx,
85 const struct pipe_framebuffer_state *key)
86 {
87 /* Lookup the job first */
88 struct hash_entry *entry = _mesa_hash_table_search(ctx->batches, key);
89
90 if (entry)
91 return entry->data;
92
93 /* Otherwise, let's create a job */
94
95 struct panfrost_batch *batch = panfrost_create_batch(ctx, key);
96
97 /* Save the created job */
98 _mesa_hash_table_insert(ctx->batches, &batch->key, batch);
99
100 return batch;
101 }
102
103 /* Get the job corresponding to the FBO we're currently rendering into */
104
105 struct panfrost_batch *
106 panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
107 {
108 /* If we're wallpapering, we special case to workaround
109 * u_blitter abuse */
110
111 if (ctx->wallpaper_batch)
112 return ctx->wallpaper_batch;
113
114 /* If we already began rendering, use that */
115
116 if (ctx->batch) {
117 assert(util_framebuffer_state_equal(&ctx->batch->key,
118 &ctx->pipe_framebuffer));
119 return ctx->batch;
120 }
121
122 /* If not, look up the job */
123 struct panfrost_batch *batch = panfrost_get_batch(ctx,
124 &ctx->pipe_framebuffer);
125
126 /* Set this job as the current FBO job. Will be reset when updating the
127 * FB state and when submitting or releasing a job.
128 */
129 ctx->batch = batch;
130 return batch;
131 }
132
133 void
134 panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo)
135 {
136 if (!bo)
137 return;
138
139 if (_mesa_set_search(batch->bos, bo))
140 return;
141
142 panfrost_bo_reference(bo);
143 _mesa_set_add(batch->bos, bo);
144 }
145
146 /* Returns the polygon list's GPU address if available, or otherwise allocates
147 * the polygon list. It's perfectly fast to use allocate/free BO directly,
148 * since we'll hit the BO cache and this is one-per-batch anyway. */
149
150 mali_ptr
151 panfrost_batch_get_polygon_list(struct panfrost_batch *batch, unsigned size)
152 {
153 if (batch->polygon_list) {
154 assert(batch->polygon_list->size >= size);
155 } else {
156 struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
157
158 /* Create the BO as invisible, as there's no reason to map */
159
160 batch->polygon_list = panfrost_bo_create(screen, size,
161 PAN_ALLOCATE_INVISIBLE);
162 panfrost_batch_add_bo(batch, batch->polygon_list);
163
164 /* A BO reference has been retained by panfrost_batch_add_bo(),
165 * so we need to unreference it here if we want the BO to be
166 * automatically released when the batch is destroyed.
167 */
168 panfrost_bo_unreference(&screen->base, batch->polygon_list);
169 }
170
171 return batch->polygon_list->gpu;
172 }
173
174 static void
175 panfrost_batch_draw_wallpaper(struct panfrost_batch *batch)
176 {
177 /* Nothing to reload? TODO: MRT wallpapers */
178 if (batch->key.cbufs[0] == NULL)
179 return;
180
181 /* Check if the buffer has any content on it worth preserving */
182
183 struct pipe_surface *surf = batch->key.cbufs[0];
184 struct panfrost_resource *rsrc = pan_resource(surf->texture);
185 unsigned level = surf->u.tex.level;
186
187 if (!rsrc->slices[level].initialized)
188 return;
189
190 batch->ctx->wallpaper_batch = batch;
191
192 /* Clamp the rendering area to the damage extent. The
193 * KHR_partial_update() spec states that trying to render outside of
194 * the damage region is "undefined behavior", so we should be safe.
195 */
196 unsigned damage_width = (rsrc->damage.extent.maxx - rsrc->damage.extent.minx);
197 unsigned damage_height = (rsrc->damage.extent.maxy - rsrc->damage.extent.miny);
198
199 if (damage_width && damage_height) {
200 panfrost_batch_intersection_scissor(batch,
201 rsrc->damage.extent.minx,
202 rsrc->damage.extent.miny,
203 rsrc->damage.extent.maxx,
204 rsrc->damage.extent.maxy);
205 }
206
207 /* FIXME: Looks like aligning on a tile is not enough, but
208 * aligning on twice the tile size seems to works. We don't
209 * know exactly what happens here but this deserves extra
210 * investigation to figure it out.
211 */
212 batch->minx = batch->minx & ~((MALI_TILE_LENGTH * 2) - 1);
213 batch->miny = batch->miny & ~((MALI_TILE_LENGTH * 2) - 1);
214 batch->maxx = MIN2(ALIGN_POT(batch->maxx, MALI_TILE_LENGTH * 2),
215 rsrc->base.width0);
216 batch->maxy = MIN2(ALIGN_POT(batch->maxy, MALI_TILE_LENGTH * 2),
217 rsrc->base.height0);
218
219 struct pipe_scissor_state damage;
220 struct pipe_box rects[4];
221
222 /* Clamp the damage box to the rendering area. */
223 damage.minx = MAX2(batch->minx, rsrc->damage.biggest_rect.x);
224 damage.miny = MAX2(batch->miny, rsrc->damage.biggest_rect.y);
225 damage.maxx = MIN2(batch->maxx,
226 rsrc->damage.biggest_rect.x +
227 rsrc->damage.biggest_rect.width);
228 damage.maxy = MIN2(batch->maxy,
229 rsrc->damage.biggest_rect.y +
230 rsrc->damage.biggest_rect.height);
231
232 /* One damage rectangle means we can end up with at most 4 reload
233 * regions:
234 * 1: left region, only exists if damage.x > 0
235 * 2: right region, only exists if damage.x + damage.width < fb->width
236 * 3: top region, only exists if damage.y > 0. The intersection with
237 * the left and right regions are dropped
238 * 4: bottom region, only exists if damage.y + damage.height < fb->height.
239 * The intersection with the left and right regions are dropped
240 *
241 * ____________________________
242 * | | 3 | |
243 * | |___________| |
244 * | | damage | |
245 * | 1 | rect | 2 |
246 * | |___________| |
247 * | | 4 | |
248 * |_______|___________|______|
249 */
250 u_box_2d(batch->minx, batch->miny, damage.minx - batch->minx,
251 batch->maxy - batch->miny, &rects[0]);
252 u_box_2d(damage.maxx, batch->miny, batch->maxx - damage.maxx,
253 batch->maxy - batch->miny, &rects[1]);
254 u_box_2d(damage.minx, batch->miny, damage.maxx - damage.minx,
255 damage.miny - batch->miny, &rects[2]);
256 u_box_2d(damage.minx, damage.maxy, damage.maxx - damage.minx,
257 batch->maxy - damage.maxy, &rects[3]);
258
259 for (unsigned i = 0; i < 4; i++) {
260 /* Width and height are always >= 0 even if width is declared as a
261 * signed integer: u_box_2d() helper takes unsigned args and
262 * panfrost_set_damage_region() is taking care of clamping
263 * negative values.
264 */
265 if (!rects[i].width || !rects[i].height)
266 continue;
267
268 /* Blit the wallpaper in */
269 panfrost_blit_wallpaper(batch->ctx, &rects[i]);
270 }
271 batch->ctx->wallpaper_batch = NULL;
272 }
273
274 static int
275 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
276 mali_ptr first_job_desc,
277 uint32_t reqs)
278 {
279 struct panfrost_context *ctx = batch->ctx;
280 struct pipe_context *gallium = (struct pipe_context *) ctx;
281 struct panfrost_screen *screen = pan_screen(gallium->screen);
282 struct drm_panfrost_submit submit = {0,};
283 uint32_t *bo_handles;
284 int ret;
285
286 submit.in_syncs = (u64) (uintptr_t) &ctx->out_sync;
287 submit.in_sync_count = 1;
288
289 submit.out_sync = ctx->out_sync;
290
291 submit.jc = first_job_desc;
292 submit.requirements = reqs;
293
294 bo_handles = calloc(batch->bos->entries, sizeof(*bo_handles));
295 assert(bo_handles);
296
297 set_foreach(batch->bos, entry) {
298 struct panfrost_bo *bo = (struct panfrost_bo *)entry->key;
299 assert(bo->gem_handle > 0);
300 bo_handles[submit.bo_handle_count++] = bo->gem_handle;
301 }
302
303 submit.bo_handles = (u64) (uintptr_t) bo_handles;
304 ret = drmIoctl(screen->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
305 free(bo_handles);
306 if (ret) {
307 fprintf(stderr, "Error submitting: %m\n");
308 return errno;
309 }
310
311 /* Trace the job if we're doing that */
312 if (pan_debug & PAN_DBG_TRACE) {
313 /* Wait so we can get errors reported back */
314 drmSyncobjWait(screen->fd, &ctx->out_sync, 1, INT64_MAX, 0, NULL);
315 pandecode_jc(submit.jc, FALSE);
316 }
317
318 return 0;
319 }
320
321 static int
322 panfrost_batch_submit_jobs(struct panfrost_batch *batch)
323 {
324 struct panfrost_context *ctx = batch->ctx;
325 bool has_draws = batch->first_job.gpu;
326 int ret = 0;
327
328 panfrost_batch_add_bo(batch, ctx->scratchpad);
329 panfrost_batch_add_bo(batch, ctx->tiler_heap);
330
331 if (has_draws) {
332 ret = panfrost_batch_submit_ioctl(batch, batch->first_job.gpu, 0);
333 assert(!ret);
334 }
335
336 if (batch->first_tiler.gpu || batch->clear) {
337 mali_ptr fragjob = panfrost_fragment_job(batch, has_draws);
338
339 ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
340 assert(!ret);
341 }
342
343 return ret;
344 }
345
346 void
347 panfrost_batch_submit(struct panfrost_batch *batch)
348 {
349 assert(batch);
350
351 struct panfrost_context *ctx = batch->ctx;
352 int ret;
353
354 /* Nothing to do! */
355 if (!batch->last_job.gpu && !batch->clear)
356 goto out;
357
358 if (!batch->clear && batch->last_tiler.gpu)
359 panfrost_batch_draw_wallpaper(batch);
360
361 panfrost_scoreboard_link_batch(batch);
362
363 ret = panfrost_batch_submit_jobs(batch);
364
365 if (ret)
366 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
367
368 out:
369 /* If this is the bound batch, the panfrost_context parameters are
370 * relevant so submitting it invalidates those paramaters, but if it's
371 * not bound, the context parameters are for some other batch so we
372 * can't invalidate them.
373 */
374 if (ctx->batch == batch)
375 panfrost_invalidate_frame(ctx);
376
377 /* The job has been submitted, let's invalidate the current FBO job
378 * cache.
379 */
380 assert(!ctx->batch || batch == ctx->batch);
381 ctx->batch = NULL;
382
383 /* We always stall the pipeline for correct results since pipelined
384 * rendering is quite broken right now (to be fixed by the panfrost_job
385 * refactor, just take the perf hit for correctness)
386 */
387 drmSyncobjWait(pan_screen(ctx->base.screen)->fd, &ctx->out_sync, 1,
388 INT64_MAX, 0, NULL);
389 panfrost_free_batch(batch);
390 }
391
392 void
393 panfrost_batch_set_requirements(struct panfrost_batch *batch)
394 {
395 struct panfrost_context *ctx = batch->ctx;
396
397 if (ctx->rasterizer && ctx->rasterizer->base.multisample)
398 batch->requirements |= PAN_REQ_MSAA;
399
400 if (ctx->depth_stencil && ctx->depth_stencil->depth.writemask)
401 batch->requirements |= PAN_REQ_DEPTH_WRITE;
402 }
403
404 /* Helper to smear a 32-bit color across 128-bit components */
405
406 static void
407 pan_pack_color_32(uint32_t *packed, uint32_t v)
408 {
409 for (unsigned i = 0; i < 4; ++i)
410 packed[i] = v;
411 }
412
413 static void
414 pan_pack_color_64(uint32_t *packed, uint32_t lo, uint32_t hi)
415 {
416 for (unsigned i = 0; i < 4; i += 2) {
417 packed[i + 0] = lo;
418 packed[i + 1] = hi;
419 }
420 }
421
422 static void
423 pan_pack_color(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
424 {
425 /* Alpha magicked to 1.0 if there is no alpha */
426
427 bool has_alpha = util_format_has_alpha(format);
428 float clear_alpha = has_alpha ? color->f[3] : 1.0f;
429
430 /* Packed color depends on the framebuffer format */
431
432 const struct util_format_description *desc =
433 util_format_description(format);
434
435 if (util_format_is_rgba8_variant(desc)) {
436 pan_pack_color_32(packed,
437 (float_to_ubyte(clear_alpha) << 24) |
438 (float_to_ubyte(color->f[2]) << 16) |
439 (float_to_ubyte(color->f[1]) << 8) |
440 (float_to_ubyte(color->f[0]) << 0));
441 } else if (format == PIPE_FORMAT_B5G6R5_UNORM) {
442 /* First, we convert the components to R5, G6, B5 separately */
443 unsigned r5 = CLAMP(color->f[0], 0.0, 1.0) * 31.0;
444 unsigned g6 = CLAMP(color->f[1], 0.0, 1.0) * 63.0;
445 unsigned b5 = CLAMP(color->f[2], 0.0, 1.0) * 31.0;
446
447 /* Then we pack into a sparse u32. TODO: Why these shifts? */
448 pan_pack_color_32(packed, (b5 << 25) | (g6 << 14) | (r5 << 5));
449 } else if (format == PIPE_FORMAT_B4G4R4A4_UNORM) {
450 /* We scale the components against 0xF0 (=240.0), rather than 0xFF */
451 unsigned r4 = CLAMP(color->f[0], 0.0, 1.0) * 240.0;
452 unsigned g4 = CLAMP(color->f[1], 0.0, 1.0) * 240.0;
453 unsigned b4 = CLAMP(color->f[2], 0.0, 1.0) * 240.0;
454 unsigned a4 = CLAMP(clear_alpha, 0.0, 1.0) * 240.0;
455
456 /* Pack on *byte* intervals */
457 pan_pack_color_32(packed, (a4 << 24) | (b4 << 16) | (g4 << 8) | r4);
458 } else if (format == PIPE_FORMAT_B5G5R5A1_UNORM) {
459 /* Scale as expected but shift oddly */
460 unsigned r5 = round(CLAMP(color->f[0], 0.0, 1.0)) * 31.0;
461 unsigned g5 = round(CLAMP(color->f[1], 0.0, 1.0)) * 31.0;
462 unsigned b5 = round(CLAMP(color->f[2], 0.0, 1.0)) * 31.0;
463 unsigned a1 = round(CLAMP(clear_alpha, 0.0, 1.0)) * 1.0;
464
465 pan_pack_color_32(packed, (a1 << 31) | (b5 << 25) | (g5 << 15) | (r5 << 5));
466 } else {
467 /* Try Gallium's generic default path. Doesn't work for all
468 * formats but it's a good guess. */
469
470 union util_color out;
471
472 if (util_format_is_pure_integer(format)) {
473 memcpy(out.ui, color->ui, 16);
474 } else {
475 util_pack_color(color->f, format, &out);
476 }
477
478 unsigned size = util_format_get_blocksize(format);
479
480 if (size == 1) {
481 unsigned b = out.ui[0];
482 unsigned s = b | (b << 8);
483 pan_pack_color_32(packed, s | (s << 16));
484 } else if (size == 2)
485 pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
486 else if (size == 4)
487 pan_pack_color_32(packed, out.ui[0]);
488 else if (size == 8)
489 pan_pack_color_64(packed, out.ui[0], out.ui[1]);
490 else if (size == 16)
491 memcpy(packed, out.ui, 16);
492 else
493 unreachable("Unknown generic format size packing clear colour");
494 }
495 }
496
497 void
498 panfrost_batch_clear(struct panfrost_batch *batch,
499 unsigned buffers,
500 const union pipe_color_union *color,
501 double depth, unsigned stencil)
502 {
503 struct panfrost_context *ctx = batch->ctx;
504
505 if (buffers & PIPE_CLEAR_COLOR) {
506 for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
507 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
508 continue;
509
510 enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
511 pan_pack_color(batch->clear_color[i], color, format);
512 }
513 }
514
515 if (buffers & PIPE_CLEAR_DEPTH) {
516 batch->clear_depth = depth;
517 }
518
519 if (buffers & PIPE_CLEAR_STENCIL) {
520 batch->clear_stencil = stencil;
521 }
522
523 batch->clear |= buffers;
524
525 /* Clearing affects the entire framebuffer (by definition -- this is
526 * the Gallium clear callback, which clears the whole framebuffer. If
527 * the scissor test were enabled from the GL side, the state tracker
528 * would emit a quad instead and we wouldn't go down this code path) */
529
530 panfrost_batch_union_scissor(batch, 0, 0,
531 ctx->pipe_framebuffer.width,
532 ctx->pipe_framebuffer.height);
533 }
534
535 static bool
536 panfrost_batch_compare(const void *a, const void *b)
537 {
538 return util_framebuffer_state_equal(a, b);
539 }
540
541 static uint32_t
542 panfrost_batch_hash(const void *key)
543 {
544 return _mesa_hash_data(key, sizeof(struct pipe_framebuffer_state));
545 }
546
547 /* Given a new bounding rectangle (scissor), let the job cover the union of the
548 * new and old bounding rectangles */
549
550 void
551 panfrost_batch_union_scissor(struct panfrost_batch *batch,
552 unsigned minx, unsigned miny,
553 unsigned maxx, unsigned maxy)
554 {
555 batch->minx = MIN2(batch->minx, minx);
556 batch->miny = MIN2(batch->miny, miny);
557 batch->maxx = MAX2(batch->maxx, maxx);
558 batch->maxy = MAX2(batch->maxy, maxy);
559 }
560
561 void
562 panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
563 unsigned minx, unsigned miny,
564 unsigned maxx, unsigned maxy)
565 {
566 batch->minx = MAX2(batch->minx, minx);
567 batch->miny = MAX2(batch->miny, miny);
568 batch->maxx = MIN2(batch->maxx, maxx);
569 batch->maxy = MIN2(batch->maxy, maxy);
570 }
571
572 /* Are we currently rendering to the screen (rather than an FBO)? */
573
574 bool
575 panfrost_batch_is_scanout(struct panfrost_batch *batch)
576 {
577 /* If there is no color buffer, it's an FBO */
578 if (batch->key.nr_cbufs != 1)
579 return false;
580
581 /* If we're too early that no framebuffer was sent, it's scanout */
582 if (!batch->key.cbufs[0])
583 return true;
584
585 return batch->key.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET ||
586 batch->key.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT ||
587 batch->key.cbufs[0]->texture->bind & PIPE_BIND_SHARED;
588 }
589
590 void
591 panfrost_batch_init(struct panfrost_context *ctx)
592 {
593 ctx->batches = _mesa_hash_table_create(ctx,
594 panfrost_batch_hash,
595 panfrost_batch_compare);
596 }