2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 * Functions for submitting VC5 render jobs to the kernel.
30 #include "v3d_context.h"
31 /* The OQ/semaphore packets are the same across V3D versions. */
32 #define V3D_VERSION 33
33 #include "broadcom/cle/v3dx_pack.h"
34 #include "broadcom/common/v3d_macros.h"
35 #include "util/hash_table.h"
36 #include "util/ralloc.h"
38 #include "broadcom/clif/clif_dump.h"
41 v3d_job_free(struct v3d_context
*v3d
, struct v3d_job
*job
)
43 set_foreach(job
->bos
, entry
) {
44 struct v3d_bo
*bo
= (struct v3d_bo
*)entry
->key
;
45 v3d_bo_unreference(&bo
);
48 _mesa_hash_table_remove_key(v3d
->jobs
, &job
->key
);
50 if (job
->write_prscs
) {
51 set_foreach(job
->write_prscs
, entry
) {
52 const struct pipe_resource
*prsc
= entry
->key
;
54 _mesa_hash_table_remove_key(v3d
->write_jobs
, prsc
);
58 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
60 _mesa_hash_table_remove_key(v3d
->write_jobs
,
61 job
->cbufs
[i
]->texture
);
62 pipe_surface_reference(&job
->cbufs
[i
], NULL
);
66 struct v3d_resource
*rsc
= v3d_resource(job
->zsbuf
->texture
);
67 if (rsc
->separate_stencil
)
68 _mesa_hash_table_remove_key(v3d
->write_jobs
,
69 &rsc
->separate_stencil
->base
);
71 _mesa_hash_table_remove_key(v3d
->write_jobs
,
73 pipe_surface_reference(&job
->zsbuf
, NULL
);
79 v3d_destroy_cl(&job
->bcl
);
80 v3d_destroy_cl(&job
->rcl
);
81 v3d_destroy_cl(&job
->indirect
);
82 v3d_bo_unreference(&job
->tile_alloc
);
83 v3d_bo_unreference(&job
->tile_state
);
89 v3d_job_create(struct v3d_context
*v3d
)
91 struct v3d_job
*job
= rzalloc(v3d
, struct v3d_job
);
95 v3d_init_cl(job
, &job
->bcl
);
96 v3d_init_cl(job
, &job
->rcl
);
97 v3d_init_cl(job
, &job
->indirect
);
100 job
->draw_min_y
= ~0;
104 job
->bos
= _mesa_set_create(job
,
106 _mesa_key_pointer_equal
);
111 v3d_job_add_bo(struct v3d_job
*job
, struct v3d_bo
*bo
)
116 if (_mesa_set_search(job
->bos
, bo
))
119 v3d_bo_reference(bo
);
120 _mesa_set_add(job
->bos
, bo
);
121 job
->referenced_size
+= bo
->size
;
123 uint32_t *bo_handles
= (void *)(uintptr_t)job
->submit
.bo_handles
;
125 if (job
->submit
.bo_handle_count
>= job
->bo_handles_size
) {
126 job
->bo_handles_size
= MAX2(4, job
->bo_handles_size
* 2);
127 bo_handles
= reralloc(job
, bo_handles
,
128 uint32_t, job
->bo_handles_size
);
129 job
->submit
.bo_handles
= (uintptr_t)(void *)bo_handles
;
131 bo_handles
[job
->submit
.bo_handle_count
++] = bo
->handle
;
135 v3d_job_add_write_resource(struct v3d_job
*job
, struct pipe_resource
*prsc
)
137 struct v3d_context
*v3d
= job
->v3d
;
139 if (!job
->write_prscs
) {
140 job
->write_prscs
= _mesa_set_create(job
,
142 _mesa_key_pointer_equal
);
145 _mesa_set_add(job
->write_prscs
, prsc
);
146 _mesa_hash_table_insert(v3d
->write_jobs
, prsc
, job
);
150 v3d_flush_jobs_using_bo(struct v3d_context
*v3d
, struct v3d_bo
*bo
)
152 hash_table_foreach(v3d
->jobs
, entry
) {
153 struct v3d_job
*job
= entry
->data
;
155 if (_mesa_set_search(job
->bos
, bo
))
156 v3d_job_submit(v3d
, job
);
161 v3d_job_add_tf_write_resource(struct v3d_job
*job
, struct pipe_resource
*prsc
)
163 v3d_job_add_write_resource(job
, prsc
);
165 if (!job
->tf_write_prscs
)
166 job
->tf_write_prscs
= _mesa_pointer_set_create(job
);
168 _mesa_set_add(job
->tf_write_prscs
, prsc
);
172 v3d_job_writes_resource_from_tf(struct v3d_job
*job
,
173 struct pipe_resource
*prsc
)
175 if (!job
->tf_enabled
)
178 if (!job
->tf_write_prscs
)
181 return _mesa_set_search(job
->tf_write_prscs
, prsc
) != NULL
;
185 v3d_flush_jobs_writing_resource(struct v3d_context
*v3d
,
186 struct pipe_resource
*prsc
,
187 enum v3d_flush_cond flush_cond
,
188 bool is_compute_pipeline
)
190 struct hash_entry
*entry
= _mesa_hash_table_search(v3d
->write_jobs
,
192 struct v3d_resource
*rsc
= v3d_resource(prsc
);
194 /* We need to sync if graphics pipeline reads a resource written
195 * by the compute pipeline. The same would be needed for the case of
196 * graphics-compute dependency but nowadays all compute jobs
197 * are serialized with the previous submitted job.
199 if (!is_compute_pipeline
&& rsc
->bo
!= NULL
&& rsc
->compute_written
) {
200 v3d
->sync_on_last_compute_job
= true;
201 rsc
->compute_written
= false;
207 struct v3d_job
*job
= entry
->data
;
210 switch (flush_cond
) {
211 case V3D_FLUSH_ALWAYS
:
214 case V3D_FLUSH_NOT_CURRENT_JOB
:
215 needs_flush
= !v3d
->job
|| v3d
->job
!= job
;
217 case V3D_FLUSH_DEFAULT
:
219 /* For writes from TF in the same job we use the "Wait for TF"
220 * feature provided by the hardware so we don't want to flush.
221 * The exception to this is when the caller is about to map the
222 * resource since in that case we don't have a 'Wait for TF'
223 * command the in command stream. In this scenario the caller
224 * is expected to set 'always_flush' to True.
226 needs_flush
= !v3d_job_writes_resource_from_tf(job
, prsc
);
230 v3d_job_submit(v3d
, job
);
234 v3d_flush_jobs_reading_resource(struct v3d_context
*v3d
,
235 struct pipe_resource
*prsc
,
236 enum v3d_flush_cond flush_cond
,
237 bool is_compute_pipeline
)
239 struct v3d_resource
*rsc
= v3d_resource(prsc
);
241 /* We only need to force the flush on TF writes, which is the only
242 * case where we might skip the flush to use the 'Wait for TF'
243 * command. Here we are flushing for a read, which means that the
244 * caller intends to write to the resource, so we don't care if
245 * there was a previous TF write to it.
247 v3d_flush_jobs_writing_resource(v3d
, prsc
, flush_cond
,
248 is_compute_pipeline
);
250 hash_table_foreach(v3d
->jobs
, entry
) {
251 struct v3d_job
*job
= entry
->data
;
253 if (!_mesa_set_search(job
->bos
, rsc
->bo
))
257 switch (flush_cond
) {
258 case V3D_FLUSH_NOT_CURRENT_JOB
:
259 needs_flush
= !v3d
->job
|| v3d
->job
!= job
;
261 case V3D_FLUSH_ALWAYS
:
262 case V3D_FLUSH_DEFAULT
:
268 v3d_job_submit(v3d
, job
);
270 /* Reminder: v3d->jobs is safe to keep iterating even
271 * after deletion of an entry.
278 v3d_job_set_tile_buffer_size(struct v3d_job
*job
)
280 static const uint8_t tile_sizes
[] = {
287 int tile_size_index
= 0;
289 tile_size_index
+= 2;
291 if (job
->cbufs
[3] || job
->cbufs
[2])
292 tile_size_index
+= 2;
293 else if (job
->cbufs
[1])
296 int max_bpp
= RENDER_TARGET_MAXIMUM_32BPP
;
297 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
299 struct v3d_surface
*surf
= v3d_surface(job
->cbufs
[i
]);
300 max_bpp
= MAX2(max_bpp
, surf
->internal_bpp
);
303 job
->internal_bpp
= max_bpp
;
304 STATIC_ASSERT(RENDER_TARGET_MAXIMUM_32BPP
== 0);
305 tile_size_index
+= max_bpp
;
307 assert(tile_size_index
< ARRAY_SIZE(tile_sizes
));
308 job
->tile_width
= tile_sizes
[tile_size_index
* 2 + 0];
309 job
->tile_height
= tile_sizes
[tile_size_index
* 2 + 1];
313 * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
315 * If we've already started rendering to this FBO, then return the same job,
316 * otherwise make a new one. If we're beginning rendering to an FBO, make
317 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
321 v3d_get_job(struct v3d_context
*v3d
,
322 struct pipe_surface
**cbufs
, struct pipe_surface
*zsbuf
)
324 /* Return the existing job for this FBO if we have one */
325 struct v3d_job_key local_key
= {
334 struct hash_entry
*entry
= _mesa_hash_table_search(v3d
->jobs
,
339 /* Creating a new job. Make sure that any previous jobs reading or
340 * writing these buffers are flushed.
342 struct v3d_job
*job
= v3d_job_create(v3d
);
344 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
346 v3d_flush_jobs_reading_resource(v3d
, cbufs
[i
]->texture
,
349 pipe_surface_reference(&job
->cbufs
[i
], cbufs
[i
]);
351 if (cbufs
[i
]->texture
->nr_samples
> 1)
356 v3d_flush_jobs_reading_resource(v3d
, zsbuf
->texture
,
359 pipe_surface_reference(&job
->zsbuf
, zsbuf
);
360 if (zsbuf
->texture
->nr_samples
> 1)
364 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++) {
366 _mesa_hash_table_insert(v3d
->write_jobs
,
367 cbufs
[i
]->texture
, job
);
370 _mesa_hash_table_insert(v3d
->write_jobs
, zsbuf
->texture
, job
);
372 struct v3d_resource
*rsc
= v3d_resource(zsbuf
->texture
);
373 if (rsc
->separate_stencil
) {
374 v3d_flush_jobs_reading_resource(v3d
,
375 &rsc
->separate_stencil
->base
,
378 _mesa_hash_table_insert(v3d
->write_jobs
,
379 &rsc
->separate_stencil
->base
,
384 memcpy(&job
->key
, &local_key
, sizeof(local_key
));
385 _mesa_hash_table_insert(v3d
->jobs
, &job
->key
, job
);
391 v3d_get_job_for_fbo(struct v3d_context
*v3d
)
396 struct pipe_surface
**cbufs
= v3d
->framebuffer
.cbufs
;
397 struct pipe_surface
*zsbuf
= v3d
->framebuffer
.zsbuf
;
398 struct v3d_job
*job
= v3d_get_job(v3d
, cbufs
, zsbuf
);
400 if (v3d
->framebuffer
.samples
>= 1)
403 v3d_job_set_tile_buffer_size(job
);
405 /* The dirty flags are tracking what's been updated while v3d->job has
406 * been bound, so set them all to ~0 when switching between jobs. We
407 * also need to reset all state at the start of rendering.
411 /* If we're binding to uninitialized buffers, no need to load their
412 * contents before drawing.
414 for (int i
= 0; i
< 4; i
++) {
416 struct v3d_resource
*rsc
= v3d_resource(cbufs
[i
]->texture
);
418 job
->clear
|= PIPE_CLEAR_COLOR0
<< i
;
423 struct v3d_resource
*rsc
= v3d_resource(zsbuf
->texture
);
425 job
->clear
|= PIPE_CLEAR_DEPTH
;
427 if (rsc
->separate_stencil
)
428 rsc
= rsc
->separate_stencil
;
431 job
->clear
|= PIPE_CLEAR_STENCIL
;
434 job
->draw_tiles_x
= DIV_ROUND_UP(v3d
->framebuffer
.width
,
436 job
->draw_tiles_y
= DIV_ROUND_UP(v3d
->framebuffer
.height
,
445 v3d_clif_dump(struct v3d_context
*v3d
, struct v3d_job
*job
)
447 if (!(V3D_DEBUG
& (V3D_DEBUG_CL
| V3D_DEBUG_CLIF
)))
450 struct clif_dump
*clif
= clif_dump_init(&v3d
->screen
->devinfo
,
452 V3D_DEBUG
& V3D_DEBUG_CL
);
454 set_foreach(job
->bos
, entry
) {
455 struct v3d_bo
*bo
= (void *)entry
->key
;
456 char *name
= ralloc_asprintf(NULL
, "%s_0x%x",
457 bo
->name
, bo
->offset
);
460 clif_dump_add_bo(clif
, name
, bo
->offset
, bo
->size
, bo
->map
);
465 clif_dump(clif
, &job
->submit
);
467 clif_dump_destroy(clif
);
471 v3d_read_and_accumulate_primitive_counters(struct v3d_context
*v3d
)
473 assert(v3d
->prim_counts
);
475 perf_debug("stalling on TF counts readback\n");
476 struct v3d_resource
*rsc
= v3d_resource(v3d
->prim_counts
);
477 if (v3d_bo_wait(rsc
->bo
, PIPE_TIMEOUT_INFINITE
, "prim-counts")) {
478 uint32_t *map
= v3d_bo_map(rsc
->bo
) + v3d
->prim_counts_offset
;
479 v3d
->tf_prims_generated
+= map
[V3D_PRIM_COUNTS_TF_WRITTEN
];
480 /* When we only have a vertex shader we determine the primitive
481 * count in the CPU so don't update it here again.
484 v3d
->prims_generated
+= map
[V3D_PRIM_COUNTS_WRITTEN
];
489 * Submits the job to the kernel and then reinitializes it.
492 v3d_job_submit(struct v3d_context
*v3d
, struct v3d_job
*job
)
494 struct v3d_screen
*screen
= v3d
->screen
;
496 if (!job
->needs_flush
)
499 if (screen
->devinfo
.ver
>= 41)
504 if (cl_offset(&job
->bcl
) > 0) {
505 if (screen
->devinfo
.ver
>= 41)
506 v3d41_bcl_epilogue(v3d
, job
);
508 v3d33_bcl_epilogue(v3d
, job
);
511 /* While the RCL will implicitly depend on the last RCL to have
512 * finished, we also need to block on any previous TFU job we may have
515 job
->submit
.in_sync_rcl
= v3d
->out_sync
;
517 /* Update the sync object for the last rendering by our context. */
518 job
->submit
.out_sync
= v3d
->out_sync
;
520 job
->submit
.bcl_end
= job
->bcl
.bo
->offset
+ cl_offset(&job
->bcl
);
521 job
->submit
.rcl_end
= job
->rcl
.bo
->offset
+ cl_offset(&job
->rcl
);
523 job
->submit
.flags
= 0;
524 if (job
->tmu_dirty_rcl
&& screen
->has_cache_flush
)
525 job
->submit
.flags
|= DRM_V3D_SUBMIT_CL_FLUSH_CACHE
;
527 /* On V3D 4.1, the tile alloc/state setup moved to register writes
528 * instead of binner packets.
530 if (screen
->devinfo
.ver
>= 41) {
531 v3d_job_add_bo(job
, job
->tile_alloc
);
532 job
->submit
.qma
= job
->tile_alloc
->offset
;
533 job
->submit
.qms
= job
->tile_alloc
->size
;
535 v3d_job_add_bo(job
, job
->tile_state
);
536 job
->submit
.qts
= job
->tile_state
->offset
;
539 v3d_clif_dump(v3d
, job
);
541 if (!(V3D_DEBUG
& V3D_DEBUG_NORAST
)) {
544 ret
= v3d_ioctl(v3d
->fd
, DRM_IOCTL_V3D_SUBMIT_CL
, &job
->submit
);
545 static bool warned
= false;
546 if (ret
&& !warned
) {
547 fprintf(stderr
, "Draw call returned %s. "
548 "Expect corruption.\n", strerror(errno
));
552 /* If we are submitting a job in the middle of transform
553 * feedback we need to read the primitive counts and accumulate
554 * them, otherwise they will be reset at the start of the next
555 * draw when we emit the Tile Binning Mode Configuration packet.
557 * If the job doesn't have any TF draw calls, then we know
558 * the primitive count must be zero and we can skip stalling
559 * for this. This also fixes a problem because it seems that
560 * in this scenario the counters are not reset with the Tile
561 * Binning Mode Configuration packet, which would translate
562 * to us reading an obsolete (possibly non-zero) value from
565 if (v3d
->streamout
.num_targets
&& job
->tf_draw_calls_queued
> 0)
566 v3d_read_and_accumulate_primitive_counters(v3d
);
570 v3d_job_free(v3d
, job
);
574 v3d_job_compare(const void *a
, const void *b
)
576 return memcmp(a
, b
, sizeof(struct v3d_job_key
)) == 0;
580 v3d_job_hash(const void *key
)
582 return _mesa_hash_data(key
, sizeof(struct v3d_job_key
));
586 v3d_job_init(struct v3d_context
*v3d
)
588 v3d
->jobs
= _mesa_hash_table_create(v3d
,
591 v3d
->write_jobs
= _mesa_hash_table_create(v3d
,
593 _mesa_key_pointer_equal
);