2 * Copyright © 2014-2015 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * Functions for submitting VC4 render jobs to the kernel.
30 #include "vc4_cl_dump.h"
31 #include "vc4_context.h"
32 #include "util/hash_table.h"
35 remove_from_ht(struct hash_table
*ht
, void *key
)
37 struct hash_entry
*entry
= _mesa_hash_table_search(ht
, key
);
38 _mesa_hash_table_remove(ht
, entry
);
42 vc4_job_free(struct vc4_context
*vc4
, struct vc4_job
*job
)
44 struct vc4_bo
**referenced_bos
= job
->bo_pointers
.base
;
45 for (int i
= 0; i
< cl_offset(&job
->bo_handles
) / 4; i
++) {
46 vc4_bo_unreference(&referenced_bos
[i
]);
49 remove_from_ht(vc4
->jobs
, &job
->key
);
51 if (job
->color_write
) {
52 remove_from_ht(vc4
->write_jobs
, job
->color_write
->texture
);
53 pipe_surface_reference(&job
->color_write
, NULL
);
55 if (job
->msaa_color_write
) {
56 remove_from_ht(vc4
->write_jobs
, job
->msaa_color_write
->texture
);
57 pipe_surface_reference(&job
->msaa_color_write
, NULL
);
60 remove_from_ht(vc4
->write_jobs
, job
->zs_write
->texture
);
61 pipe_surface_reference(&job
->zs_write
, NULL
);
63 if (job
->msaa_zs_write
) {
64 remove_from_ht(vc4
->write_jobs
, job
->msaa_zs_write
->texture
);
65 pipe_surface_reference(&job
->msaa_zs_write
, NULL
);
68 pipe_surface_reference(&job
->color_read
, NULL
);
69 pipe_surface_reference(&job
->zs_read
, NULL
);
77 static struct vc4_job
*
78 vc4_job_create(struct vc4_context
*vc4
)
80 struct vc4_job
*job
= rzalloc(vc4
, struct vc4_job
);
82 vc4_init_cl(job
, &job
->bcl
);
83 vc4_init_cl(job
, &job
->shader_rec
);
84 vc4_init_cl(job
, &job
->uniforms
);
85 vc4_init_cl(job
, &job
->bo_handles
);
86 vc4_init_cl(job
, &job
->bo_pointers
);
93 job
->last_gem_handle_hindex
= ~0;
96 job
->perfmon
= vc4
->perfmon
;
102 vc4_flush_jobs_writing_resource(struct vc4_context
*vc4
,
103 struct pipe_resource
*prsc
)
105 struct hash_entry
*entry
= _mesa_hash_table_search(vc4
->write_jobs
,
108 struct vc4_job
*job
= entry
->data
;
109 vc4_job_submit(vc4
, job
);
114 vc4_flush_jobs_reading_resource(struct vc4_context
*vc4
,
115 struct pipe_resource
*prsc
)
117 struct vc4_resource
*rsc
= vc4_resource(prsc
);
119 vc4_flush_jobs_writing_resource(vc4
, prsc
);
121 hash_table_foreach(vc4
->jobs
, entry
) {
122 struct vc4_job
*job
= entry
->data
;
124 struct vc4_bo
**referenced_bos
= job
->bo_pointers
.base
;
126 for (int i
= 0; i
< cl_offset(&job
->bo_handles
) / 4; i
++) {
127 if (referenced_bos
[i
] == rsc
->bo
) {
133 vc4_job_submit(vc4
, job
);
137 /* Also check for the Z/color buffers, since the references to
138 * those are only added immediately before submit.
140 if (job
->color_read
&& !(job
->cleared
& PIPE_CLEAR_COLOR
)) {
141 struct vc4_resource
*ctex
=
142 vc4_resource(job
->color_read
->texture
);
143 if (ctex
->bo
== rsc
->bo
) {
144 vc4_job_submit(vc4
, job
);
149 if (job
->zs_read
&& !(job
->cleared
&
150 (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))) {
151 struct vc4_resource
*ztex
=
152 vc4_resource(job
->zs_read
->texture
);
153 if (ztex
->bo
== rsc
->bo
) {
154 vc4_job_submit(vc4
, job
);
162 * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
164 * If we've already started rendering to this FBO, then return old same job,
165 * otherwise make a new one. If we're beginning rendering to an FBO, make
166 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
170 vc4_get_job(struct vc4_context
*vc4
,
171 struct pipe_surface
*cbuf
, struct pipe_surface
*zsbuf
)
173 /* Return the existing job for this FBO if we have one */
174 struct vc4_job_key local_key
= {.cbuf
= cbuf
, .zsbuf
= zsbuf
};
175 struct hash_entry
*entry
= _mesa_hash_table_search(vc4
->jobs
,
180 /* Creating a new job. Make sure that any previous jobs reading or
181 * writing these buffers are flushed.
184 vc4_flush_jobs_reading_resource(vc4
, cbuf
->texture
);
186 vc4_flush_jobs_reading_resource(vc4
, zsbuf
->texture
);
188 struct vc4_job
*job
= vc4_job_create(vc4
);
191 if (cbuf
->texture
->nr_samples
> 1) {
193 pipe_surface_reference(&job
->msaa_color_write
, cbuf
);
195 pipe_surface_reference(&job
->color_write
, cbuf
);
200 if (zsbuf
->texture
->nr_samples
> 1) {
202 pipe_surface_reference(&job
->msaa_zs_write
, zsbuf
);
204 pipe_surface_reference(&job
->zs_write
, zsbuf
);
209 job
->tile_width
= 32;
210 job
->tile_height
= 32;
212 job
->tile_width
= 64;
213 job
->tile_height
= 64;
217 _mesa_hash_table_insert(vc4
->write_jobs
, cbuf
->texture
, job
);
219 _mesa_hash_table_insert(vc4
->write_jobs
, zsbuf
->texture
, job
);
221 job
->key
.cbuf
= cbuf
;
222 job
->key
.zsbuf
= zsbuf
;
223 _mesa_hash_table_insert(vc4
->jobs
, &job
->key
, job
);
229 vc4_get_job_for_fbo(struct vc4_context
*vc4
)
234 struct pipe_surface
*cbuf
= vc4
->framebuffer
.cbufs
[0];
235 struct pipe_surface
*zsbuf
= vc4
->framebuffer
.zsbuf
;
236 struct vc4_job
*job
= vc4_get_job(vc4
, cbuf
, zsbuf
);
238 /* The dirty flags are tracking what's been updated while vc4->job has
239 * been bound, so set them all to ~0 when switching between jobs. We
240 * also need to reset all state at the start of rendering.
244 /* Set up the read surfaces in the job. If they aren't actually
245 * getting read (due to a clear starting the frame), job->cleared will
248 pipe_surface_reference(&job
->color_read
, cbuf
);
249 pipe_surface_reference(&job
->zs_read
, zsbuf
);
251 /* If we're binding to uninitialized buffers, no need to load their
252 * contents before drawing.
255 struct vc4_resource
*rsc
= vc4_resource(cbuf
->texture
);
257 job
->cleared
|= PIPE_CLEAR_COLOR0
;
261 struct vc4_resource
*rsc
= vc4_resource(zsbuf
->texture
);
263 job
->cleared
|= PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
;
266 job
->draw_tiles_x
= DIV_ROUND_UP(vc4
->framebuffer
.width
,
268 job
->draw_tiles_y
= DIV_ROUND_UP(vc4
->framebuffer
.height
,
271 /* Initialize the job with the raster order flags -- each draw will
272 * check that we haven't changed the flags, since that requires a
276 job
->flags
= vc4
->rasterizer
->tile_raster_order_flags
;
284 vc4_submit_setup_rcl_surface(struct vc4_job
*job
,
285 struct drm_vc4_submit_rcl_surface
*submit_surf
,
286 struct pipe_surface
*psurf
,
287 bool is_depth
, bool is_write
)
289 struct vc4_surface
*surf
= vc4_surface(psurf
);
294 struct vc4_resource
*rsc
= vc4_resource(psurf
->texture
);
295 submit_surf
->hindex
= vc4_gem_hindex(job
, rsc
->bo
);
296 submit_surf
->offset
= surf
->offset
;
298 if (psurf
->texture
->nr_samples
<= 1) {
301 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS
,
302 VC4_LOADSTORE_TILE_BUFFER_BUFFER
);
306 VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR
,
307 VC4_LOADSTORE_TILE_BUFFER_BUFFER
) |
308 VC4_SET_FIELD(vc4_rt_format_is_565(psurf
->format
) ?
309 VC4_LOADSTORE_TILE_BUFFER_BGR565
:
310 VC4_LOADSTORE_TILE_BUFFER_RGBA8888
,
311 VC4_LOADSTORE_TILE_BUFFER_FORMAT
);
314 VC4_SET_FIELD(surf
->tiling
,
315 VC4_LOADSTORE_TILE_BUFFER_TILING
);
318 submit_surf
->flags
|= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES
;
326 vc4_submit_setup_rcl_render_config_surface(struct vc4_job
*job
,
327 struct drm_vc4_submit_rcl_surface
*submit_surf
,
328 struct pipe_surface
*psurf
)
330 struct vc4_surface
*surf
= vc4_surface(psurf
);
335 struct vc4_resource
*rsc
= vc4_resource(psurf
->texture
);
336 submit_surf
->hindex
= vc4_gem_hindex(job
, rsc
->bo
);
337 submit_surf
->offset
= surf
->offset
;
339 if (psurf
->texture
->nr_samples
<= 1) {
341 VC4_SET_FIELD(vc4_rt_format_is_565(surf
->base
.format
) ?
342 VC4_RENDER_CONFIG_FORMAT_BGR565
:
343 VC4_RENDER_CONFIG_FORMAT_RGBA8888
,
344 VC4_RENDER_CONFIG_FORMAT
) |
345 VC4_SET_FIELD(surf
->tiling
,
346 VC4_RENDER_CONFIG_MEMORY_FORMAT
);
353 vc4_submit_setup_rcl_msaa_surface(struct vc4_job
*job
,
354 struct drm_vc4_submit_rcl_surface
*submit_surf
,
355 struct pipe_surface
*psurf
)
357 struct vc4_surface
*surf
= vc4_surface(psurf
);
362 struct vc4_resource
*rsc
= vc4_resource(psurf
->texture
);
363 submit_surf
->hindex
= vc4_gem_hindex(job
, rsc
->bo
);
364 submit_surf
->offset
= surf
->offset
;
365 submit_surf
->bits
= 0;
370 * Submits the job to the kernel and then reinitializes it.
373 vc4_job_submit(struct vc4_context
*vc4
, struct vc4_job
*job
)
375 if (!job
->needs_flush
)
378 /* The RCL setup would choke if the draw bounds cause no drawing, so
379 * just drop the drawing if that's the case.
381 if (job
->draw_max_x
<= job
->draw_min_x
||
382 job
->draw_max_y
<= job
->draw_min_y
) {
386 if (vc4_debug
& VC4_DEBUG_CL
) {
387 fprintf(stderr
, "BCL:\n");
388 vc4_dump_cl(job
->bcl
.base
, cl_offset(&job
->bcl
), false);
391 if (cl_offset(&job
->bcl
) > 0) {
392 /* Increment the semaphore indicating that binning is done and
393 * unblocking the render thread. Note that this doesn't act
394 * until the FLUSH completes.
396 cl_ensure_space(&job
->bcl
, 8);
397 cl_emit(&job
->bcl
, INCREMENT_SEMAPHORE
, incr
);
398 /* The FLUSH caps all of our bin lists with a
401 cl_emit(&job
->bcl
, FLUSH
, flush
);
403 struct drm_vc4_submit_cl submit
= {
404 .color_read
.hindex
= ~0,
405 .zs_read
.hindex
= ~0,
406 .color_write
.hindex
= ~0,
407 .msaa_color_write
.hindex
= ~0,
408 .zs_write
.hindex
= ~0,
409 .msaa_zs_write
.hindex
= ~0,
412 cl_ensure_space(&job
->bo_handles
, 6 * sizeof(uint32_t));
413 cl_ensure_space(&job
->bo_pointers
, 6 * sizeof(struct vc4_bo
*));
415 if (job
->resolve
& PIPE_CLEAR_COLOR
) {
416 if (!(job
->cleared
& PIPE_CLEAR_COLOR
)) {
417 vc4_submit_setup_rcl_surface(job
, &submit
.color_read
,
421 vc4_submit_setup_rcl_render_config_surface(job
,
424 vc4_submit_setup_rcl_msaa_surface(job
,
425 &submit
.msaa_color_write
,
426 job
->msaa_color_write
);
428 if (job
->resolve
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
)) {
429 if (!(job
->cleared
& (PIPE_CLEAR_DEPTH
| PIPE_CLEAR_STENCIL
))) {
430 vc4_submit_setup_rcl_surface(job
, &submit
.zs_read
,
431 job
->zs_read
, true, false);
433 vc4_submit_setup_rcl_surface(job
, &submit
.zs_write
,
434 job
->zs_write
, true, true);
435 vc4_submit_setup_rcl_msaa_surface(job
, &submit
.msaa_zs_write
,
440 /* This bit controls how many pixels the general
441 * (i.e. subsampled) loads/stores are iterating over
442 * (multisample loads replicate out to the other samples).
444 submit
.color_write
.bits
|= VC4_RENDER_CONFIG_MS_MODE_4X
;
445 /* Controls whether color_write's
446 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
448 submit
.color_write
.bits
|= VC4_RENDER_CONFIG_DECIMATE_MODE_4X
;
451 submit
.bo_handles
= (uintptr_t)job
->bo_handles
.base
;
452 submit
.bo_handle_count
= cl_offset(&job
->bo_handles
) / 4;
453 submit
.bin_cl
= (uintptr_t)job
->bcl
.base
;
454 submit
.bin_cl_size
= cl_offset(&job
->bcl
);
455 submit
.shader_rec
= (uintptr_t)job
->shader_rec
.base
;
456 submit
.shader_rec_size
= cl_offset(&job
->shader_rec
);
457 submit
.shader_rec_count
= job
->shader_rec_count
;
458 submit
.uniforms
= (uintptr_t)job
->uniforms
.base
;
459 submit
.uniforms_size
= cl_offset(&job
->uniforms
);
461 submit
.perfmonid
= job
->perfmon
->id
;
463 assert(job
->draw_min_x
!= ~0 && job
->draw_min_y
!= ~0);
464 submit
.min_x_tile
= job
->draw_min_x
/ job
->tile_width
;
465 submit
.min_y_tile
= job
->draw_min_y
/ job
->tile_height
;
466 submit
.max_x_tile
= (job
->draw_max_x
- 1) / job
->tile_width
;
467 submit
.max_y_tile
= (job
->draw_max_y
- 1) / job
->tile_height
;
468 submit
.width
= job
->draw_width
;
469 submit
.height
= job
->draw_height
;
471 submit
.flags
|= VC4_SUBMIT_CL_USE_CLEAR_COLOR
;
472 submit
.clear_color
[0] = job
->clear_color
[0];
473 submit
.clear_color
[1] = job
->clear_color
[1];
474 submit
.clear_z
= job
->clear_depth
;
475 submit
.clear_s
= job
->clear_stencil
;
477 submit
.flags
|= job
->flags
;
479 if (vc4
->screen
->has_syncobj
) {
480 submit
.out_sync
= vc4
->job_syncobj
;
482 if (vc4
->in_fence_fd
>= 0) {
483 /* This replaces the fence in the syncobj. */
484 drmSyncobjImportSyncFile(vc4
->fd
, vc4
->in_syncobj
,
486 submit
.in_sync
= vc4
->in_syncobj
;
487 close(vc4
->in_fence_fd
);
488 vc4
->in_fence_fd
= -1;
492 if (!(vc4_debug
& VC4_DEBUG_NORAST
)) {
495 ret
= vc4_ioctl(vc4
->fd
, DRM_IOCTL_VC4_SUBMIT_CL
, &submit
);
496 static bool warned
= false;
497 if (ret
&& !warned
) {
498 fprintf(stderr
, "Draw call returned %s. "
499 "Expect corruption.\n", strerror(errno
));
502 vc4
->last_emit_seqno
= submit
.seqno
;
504 job
->perfmon
->last_seqno
= submit
.seqno
;
508 if (vc4
->last_emit_seqno
- vc4
->screen
->finished_seqno
> 5) {
509 if (!vc4_wait_seqno(vc4
->screen
,
510 vc4
->last_emit_seqno
- 5,
511 PIPE_TIMEOUT_INFINITE
,
513 fprintf(stderr
, "Job throttling failed\n");
517 if (vc4_debug
& VC4_DEBUG_ALWAYS_SYNC
) {
518 if (!vc4_wait_seqno(vc4
->screen
, vc4
->last_emit_seqno
,
519 PIPE_TIMEOUT_INFINITE
, "sync")) {
520 fprintf(stderr
, "Wait failed.\n");
526 vc4_job_free(vc4
, job
);
530 vc4_job_compare(const void *a
, const void *b
)
532 return memcmp(a
, b
, sizeof(struct vc4_job_key
)) == 0;
536 vc4_job_hash(const void *key
)
538 return _mesa_hash_data(key
, sizeof(struct vc4_job_key
));
542 vc4_job_init(struct vc4_context
*vc4
)
544 vc4
->jobs
= _mesa_hash_table_create(vc4
,
547 vc4
->write_jobs
= _mesa_hash_table_create(vc4
,
549 _mesa_key_pointer_equal
);
551 if (vc4
->screen
->has_syncobj
) {
552 /* Create the syncobj as signaled since with no job executed
553 * there is nothing to wait on.
555 int ret
= drmSyncobjCreate(vc4
->fd
,
556 DRM_SYNCOBJ_CREATE_SIGNALED
,
559 /* If the screen indicated syncobj support, we should
560 * be able to create a signaled syncobj.
561 * At this point it is too late to pretend the screen
562 * has no syncobj support.