vc4: Reuse list_for_each_entry_rev().
[mesa.git] / src / gallium / drivers / vc4 / vc4_job.c
1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file vc4_job.c
25 *
26 * Functions for submitting VC4 render jobs to the kernel.
27 */
28
29 #include <xf86drm.h>
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
33
34 static void
35 remove_from_ht(struct hash_table *ht, void *key)
36 {
37 struct hash_entry *entry = _mesa_hash_table_search(ht, key);
38 _mesa_hash_table_remove(ht, entry);
39 }
40
41 static void
42 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
43 {
44 struct vc4_bo **referenced_bos = job->bo_pointers.base;
45 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
46 vc4_bo_unreference(&referenced_bos[i]);
47 }
48
49 remove_from_ht(vc4->jobs, &job->key);
50
51 if (job->color_write) {
52 remove_from_ht(vc4->write_jobs, job->color_write->texture);
53 pipe_surface_reference(&job->color_write, NULL);
54 }
55 if (job->msaa_color_write) {
56 remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture);
57 pipe_surface_reference(&job->msaa_color_write, NULL);
58 }
59 if (job->zs_write) {
60 remove_from_ht(vc4->write_jobs, job->zs_write->texture);
61 pipe_surface_reference(&job->zs_write, NULL);
62 }
63 if (job->msaa_zs_write) {
64 remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture);
65 pipe_surface_reference(&job->msaa_zs_write, NULL);
66 }
67
68 pipe_surface_reference(&job->color_read, NULL);
69 pipe_surface_reference(&job->zs_read, NULL);
70
71 if (vc4->job == job)
72 vc4->job = NULL;
73
74 ralloc_free(job);
75 }
76
77 static struct vc4_job *
78 vc4_job_create(struct vc4_context *vc4)
79 {
80 struct vc4_job *job = rzalloc(vc4, struct vc4_job);
81
82 vc4_init_cl(job, &job->bcl);
83 vc4_init_cl(job, &job->shader_rec);
84 vc4_init_cl(job, &job->uniforms);
85 vc4_init_cl(job, &job->bo_handles);
86 vc4_init_cl(job, &job->bo_pointers);
87
88 job->draw_min_x = ~0;
89 job->draw_min_y = ~0;
90 job->draw_max_x = 0;
91 job->draw_max_y = 0;
92
93 job->last_gem_handle_hindex = ~0;
94
95 if (vc4->perfmon)
96 job->perfmon = vc4->perfmon;
97
98 return job;
99 }
100
101 void
102 vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
103 struct pipe_resource *prsc)
104 {
105 struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
106 prsc);
107 if (entry) {
108 struct vc4_job *job = entry->data;
109 vc4_job_submit(vc4, job);
110 }
111 }
112
113 void
114 vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
115 struct pipe_resource *prsc)
116 {
117 struct vc4_resource *rsc = vc4_resource(prsc);
118
119 vc4_flush_jobs_writing_resource(vc4, prsc);
120
121 hash_table_foreach(vc4->jobs, entry) {
122 struct vc4_job *job = entry->data;
123
124 struct vc4_bo **referenced_bos = job->bo_pointers.base;
125 bool found = false;
126 for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
127 if (referenced_bos[i] == rsc->bo) {
128 found = true;
129 break;
130 }
131 }
132 if (found) {
133 vc4_job_submit(vc4, job);
134 continue;
135 }
136
137 /* Also check for the Z/color buffers, since the references to
138 * those are only added immediately before submit.
139 */
140 if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
141 struct vc4_resource *ctex =
142 vc4_resource(job->color_read->texture);
143 if (ctex->bo == rsc->bo) {
144 vc4_job_submit(vc4, job);
145 continue;
146 }
147 }
148
149 if (job->zs_read && !(job->cleared &
150 (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
151 struct vc4_resource *ztex =
152 vc4_resource(job->zs_read->texture);
153 if (ztex->bo == rsc->bo) {
154 vc4_job_submit(vc4, job);
155 continue;
156 }
157 }
158 }
159 }
160
161 /**
162 * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
163 *
164 * If we've already started rendering to this FBO, then return old same job,
165 * otherwise make a new one. If we're beginning rendering to an FBO, make
166 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
167 * have been flushed.
168 */
169 struct vc4_job *
170 vc4_get_job(struct vc4_context *vc4,
171 struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
172 {
173 /* Return the existing job for this FBO if we have one */
174 struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
175 struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
176 &local_key);
177 if (entry)
178 return entry->data;
179
180 /* Creating a new job. Make sure that any previous jobs reading or
181 * writing these buffers are flushed.
182 */
183 if (cbuf)
184 vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
185 if (zsbuf)
186 vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
187
188 struct vc4_job *job = vc4_job_create(vc4);
189
190 if (cbuf) {
191 if (cbuf->texture->nr_samples > 1) {
192 job->msaa = true;
193 pipe_surface_reference(&job->msaa_color_write, cbuf);
194 } else {
195 pipe_surface_reference(&job->color_write, cbuf);
196 }
197 }
198
199 if (zsbuf) {
200 if (zsbuf->texture->nr_samples > 1) {
201 job->msaa = true;
202 pipe_surface_reference(&job->msaa_zs_write, zsbuf);
203 } else {
204 pipe_surface_reference(&job->zs_write, zsbuf);
205 }
206 }
207
208 if (job->msaa) {
209 job->tile_width = 32;
210 job->tile_height = 32;
211 } else {
212 job->tile_width = 64;
213 job->tile_height = 64;
214 }
215
216 if (cbuf)
217 _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
218 if (zsbuf)
219 _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
220
221 job->key.cbuf = cbuf;
222 job->key.zsbuf = zsbuf;
223 _mesa_hash_table_insert(vc4->jobs, &job->key, job);
224
225 return job;
226 }
227
228 struct vc4_job *
229 vc4_get_job_for_fbo(struct vc4_context *vc4)
230 {
231 if (vc4->job)
232 return vc4->job;
233
234 struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
235 struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
236 struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
237
238 /* The dirty flags are tracking what's been updated while vc4->job has
239 * been bound, so set them all to ~0 when switching between jobs. We
240 * also need to reset all state at the start of rendering.
241 */
242 vc4->dirty = ~0;
243
244 /* Set up the read surfaces in the job. If they aren't actually
245 * getting read (due to a clear starting the frame), job->cleared will
246 * mask out the read.
247 */
248 pipe_surface_reference(&job->color_read, cbuf);
249 pipe_surface_reference(&job->zs_read, zsbuf);
250
251 /* If we're binding to uninitialized buffers, no need to load their
252 * contents before drawing.
253 */
254 if (cbuf) {
255 struct vc4_resource *rsc = vc4_resource(cbuf->texture);
256 if (!rsc->writes)
257 job->cleared |= PIPE_CLEAR_COLOR0;
258 }
259
260 if (zsbuf) {
261 struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
262 if (!rsc->writes)
263 job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
264 }
265
266 job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
267 job->tile_width);
268 job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
269 job->tile_height);
270
271 /* Initialize the job with the raster order flags -- each draw will
272 * check that we haven't changed the flags, since that requires a
273 * flush.
274 */
275 if (vc4->rasterizer)
276 job->flags = vc4->rasterizer->tile_raster_order_flags;
277
278 vc4->job = job;
279
280 return job;
281 }
282
283 static void
284 vc4_submit_setup_rcl_surface(struct vc4_job *job,
285 struct drm_vc4_submit_rcl_surface *submit_surf,
286 struct pipe_surface *psurf,
287 bool is_depth, bool is_write)
288 {
289 struct vc4_surface *surf = vc4_surface(psurf);
290
291 if (!surf)
292 return;
293
294 struct vc4_resource *rsc = vc4_resource(psurf->texture);
295 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
296 submit_surf->offset = surf->offset;
297
298 if (psurf->texture->nr_samples <= 1) {
299 if (is_depth) {
300 submit_surf->bits =
301 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
302 VC4_LOADSTORE_TILE_BUFFER_BUFFER);
303
304 } else {
305 submit_surf->bits =
306 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
307 VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
308 VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
309 VC4_LOADSTORE_TILE_BUFFER_BGR565 :
310 VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
311 VC4_LOADSTORE_TILE_BUFFER_FORMAT);
312 }
313 submit_surf->bits |=
314 VC4_SET_FIELD(surf->tiling,
315 VC4_LOADSTORE_TILE_BUFFER_TILING);
316 } else {
317 assert(!is_write);
318 submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
319 }
320
321 if (is_write)
322 rsc->writes++;
323 }
324
325 static void
326 vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
327 struct drm_vc4_submit_rcl_surface *submit_surf,
328 struct pipe_surface *psurf)
329 {
330 struct vc4_surface *surf = vc4_surface(psurf);
331
332 if (!surf)
333 return;
334
335 struct vc4_resource *rsc = vc4_resource(psurf->texture);
336 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
337 submit_surf->offset = surf->offset;
338
339 if (psurf->texture->nr_samples <= 1) {
340 submit_surf->bits =
341 VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
342 VC4_RENDER_CONFIG_FORMAT_BGR565 :
343 VC4_RENDER_CONFIG_FORMAT_RGBA8888,
344 VC4_RENDER_CONFIG_FORMAT) |
345 VC4_SET_FIELD(surf->tiling,
346 VC4_RENDER_CONFIG_MEMORY_FORMAT);
347 }
348
349 rsc->writes++;
350 }
351
352 static void
353 vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
354 struct drm_vc4_submit_rcl_surface *submit_surf,
355 struct pipe_surface *psurf)
356 {
357 struct vc4_surface *surf = vc4_surface(psurf);
358
359 if (!surf)
360 return;
361
362 struct vc4_resource *rsc = vc4_resource(psurf->texture);
363 submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
364 submit_surf->offset = surf->offset;
365 submit_surf->bits = 0;
366 rsc->writes++;
367 }
368
369 /**
370 * Submits the job to the kernel and then reinitializes it.
371 */
372 void
373 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
374 {
375 if (!job->needs_flush)
376 goto done;
377
378 /* The RCL setup would choke if the draw bounds cause no drawing, so
379 * just drop the drawing if that's the case.
380 */
381 if (job->draw_max_x <= job->draw_min_x ||
382 job->draw_max_y <= job->draw_min_y) {
383 goto done;
384 }
385
386 if (vc4_debug & VC4_DEBUG_CL) {
387 fprintf(stderr, "BCL:\n");
388 vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
389 }
390
391 if (cl_offset(&job->bcl) > 0) {
392 /* Increment the semaphore indicating that binning is done and
393 * unblocking the render thread. Note that this doesn't act
394 * until the FLUSH completes.
395 */
396 cl_ensure_space(&job->bcl, 8);
397 cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
398 /* The FLUSH caps all of our bin lists with a
399 * VC4_PACKET_RETURN.
400 */
401 cl_emit(&job->bcl, FLUSH, flush);
402 }
403 struct drm_vc4_submit_cl submit = {
404 .color_read.hindex = ~0,
405 .zs_read.hindex = ~0,
406 .color_write.hindex = ~0,
407 .msaa_color_write.hindex = ~0,
408 .zs_write.hindex = ~0,
409 .msaa_zs_write.hindex = ~0,
410 };
411
412 cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
413 cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
414
415 if (job->resolve & PIPE_CLEAR_COLOR) {
416 if (!(job->cleared & PIPE_CLEAR_COLOR)) {
417 vc4_submit_setup_rcl_surface(job, &submit.color_read,
418 job->color_read,
419 false, false);
420 }
421 vc4_submit_setup_rcl_render_config_surface(job,
422 &submit.color_write,
423 job->color_write);
424 vc4_submit_setup_rcl_msaa_surface(job,
425 &submit.msaa_color_write,
426 job->msaa_color_write);
427 }
428 if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
429 if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
430 vc4_submit_setup_rcl_surface(job, &submit.zs_read,
431 job->zs_read, true, false);
432 }
433 vc4_submit_setup_rcl_surface(job, &submit.zs_write,
434 job->zs_write, true, true);
435 vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
436 job->msaa_zs_write);
437 }
438
439 if (job->msaa) {
440 /* This bit controls how many pixels the general
441 * (i.e. subsampled) loads/stores are iterating over
442 * (multisample loads replicate out to the other samples).
443 */
444 submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
445 /* Controls whether color_write's
446 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
447 */
448 submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
449 }
450
451 submit.bo_handles = (uintptr_t)job->bo_handles.base;
452 submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
453 submit.bin_cl = (uintptr_t)job->bcl.base;
454 submit.bin_cl_size = cl_offset(&job->bcl);
455 submit.shader_rec = (uintptr_t)job->shader_rec.base;
456 submit.shader_rec_size = cl_offset(&job->shader_rec);
457 submit.shader_rec_count = job->shader_rec_count;
458 submit.uniforms = (uintptr_t)job->uniforms.base;
459 submit.uniforms_size = cl_offset(&job->uniforms);
460 if (job->perfmon)
461 submit.perfmonid = job->perfmon->id;
462
463 assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
464 submit.min_x_tile = job->draw_min_x / job->tile_width;
465 submit.min_y_tile = job->draw_min_y / job->tile_height;
466 submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
467 submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
468 submit.width = job->draw_width;
469 submit.height = job->draw_height;
470 if (job->cleared) {
471 submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
472 submit.clear_color[0] = job->clear_color[0];
473 submit.clear_color[1] = job->clear_color[1];
474 submit.clear_z = job->clear_depth;
475 submit.clear_s = job->clear_stencil;
476 }
477 submit.flags |= job->flags;
478
479 if (vc4->screen->has_syncobj) {
480 submit.out_sync = vc4->job_syncobj;
481
482 if (vc4->in_fence_fd >= 0) {
483 /* This replaces the fence in the syncobj. */
484 drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
485 vc4->in_fence_fd);
486 submit.in_sync = vc4->in_syncobj;
487 close(vc4->in_fence_fd);
488 vc4->in_fence_fd = -1;
489 }
490 }
491
492 if (!(vc4_debug & VC4_DEBUG_NORAST)) {
493 int ret;
494
495 ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
496 static bool warned = false;
497 if (ret && !warned) {
498 fprintf(stderr, "Draw call returned %s. "
499 "Expect corruption.\n", strerror(errno));
500 warned = true;
501 } else if (!ret) {
502 vc4->last_emit_seqno = submit.seqno;
503 if (job->perfmon)
504 job->perfmon->last_seqno = submit.seqno;
505 }
506 }
507
508 if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
509 if (!vc4_wait_seqno(vc4->screen,
510 vc4->last_emit_seqno - 5,
511 PIPE_TIMEOUT_INFINITE,
512 "job throttling")) {
513 fprintf(stderr, "Job throttling failed\n");
514 }
515 }
516
517 if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
518 if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
519 PIPE_TIMEOUT_INFINITE, "sync")) {
520 fprintf(stderr, "Wait failed.\n");
521 abort();
522 }
523 }
524
525 done:
526 vc4_job_free(vc4, job);
527 }
528
529 static bool
530 vc4_job_compare(const void *a, const void *b)
531 {
532 return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
533 }
534
535 static uint32_t
536 vc4_job_hash(const void *key)
537 {
538 return _mesa_hash_data(key, sizeof(struct vc4_job_key));
539 }
540
541 int
542 vc4_job_init(struct vc4_context *vc4)
543 {
544 vc4->jobs = _mesa_hash_table_create(vc4,
545 vc4_job_hash,
546 vc4_job_compare);
547 vc4->write_jobs = _mesa_hash_table_create(vc4,
548 _mesa_hash_pointer,
549 _mesa_key_pointer_equal);
550
551 if (vc4->screen->has_syncobj) {
552 /* Create the syncobj as signaled since with no job executed
553 * there is nothing to wait on.
554 */
555 int ret = drmSyncobjCreate(vc4->fd,
556 DRM_SYNCOBJ_CREATE_SIGNALED,
557 &vc4->job_syncobj);
558 if (ret) {
559 /* If the screen indicated syncobj support, we should
560 * be able to create a signaled syncobj.
561 * At this point it is too late to pretend the screen
562 * has no syncobj support.
563 */
564 return ret;
565 }
566 }
567
568 return 0;
569 }
570