2 * Copyright 2013-2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #include "util/os_time.h"
28 #include "util/u_memory.h"
29 #include "util/u_queue.h"
30 #include "util/u_upload_mgr.h"
33 #include "radeon/r600_cs.h"
35 struct si_fine_fence
{
36 struct r600_resource
*buf
;
40 struct si_multi_fence
{
41 struct pipe_reference reference
;
42 struct pipe_fence_handle
*gfx
;
43 struct pipe_fence_handle
*sdma
;
44 struct tc_unflushed_batch_token
*tc_token
;
45 struct util_queue_fence ready
;
47 /* If the context wasn't flushed at fence creation, this is non-NULL. */
49 struct r600_common_context
*ctx
;
53 struct si_fine_fence fine
;
56 static void si_add_fence_dependency(struct r600_common_context
*rctx
,
57 struct pipe_fence_handle
*fence
)
59 struct radeon_winsys
*ws
= rctx
->ws
;
62 ws
->cs_add_fence_dependency(rctx
->dma
.cs
, fence
);
63 ws
->cs_add_fence_dependency(rctx
->gfx
.cs
, fence
);
66 static void si_fence_reference(struct pipe_screen
*screen
,
67 struct pipe_fence_handle
**dst
,
68 struct pipe_fence_handle
*src
)
70 struct radeon_winsys
*ws
= ((struct r600_common_screen
*)screen
)->ws
;
71 struct si_multi_fence
**rdst
= (struct si_multi_fence
**)dst
;
72 struct si_multi_fence
*rsrc
= (struct si_multi_fence
*)src
;
74 if (pipe_reference(&(*rdst
)->reference
, &rsrc
->reference
)) {
75 ws
->fence_reference(&(*rdst
)->gfx
, NULL
);
76 ws
->fence_reference(&(*rdst
)->sdma
, NULL
);
77 tc_unflushed_batch_token_reference(&(*rdst
)->tc_token
, NULL
);
78 r600_resource_reference(&(*rdst
)->fine
.buf
, NULL
);
84 static struct si_multi_fence
*si_create_multi_fence()
86 struct si_multi_fence
*fence
= CALLOC_STRUCT(si_multi_fence
);
90 pipe_reference_init(&fence
->reference
, 1);
91 util_queue_fence_init(&fence
->ready
);
96 struct pipe_fence_handle
*si_create_fence(struct pipe_context
*ctx
,
97 struct tc_unflushed_batch_token
*tc_token
)
99 struct si_multi_fence
*fence
= si_create_multi_fence();
103 util_queue_fence_reset(&fence
->ready
);
104 tc_unflushed_batch_token_reference(&fence
->tc_token
, tc_token
);
106 return (struct pipe_fence_handle
*)fence
;
109 static void si_fence_server_sync(struct pipe_context
*ctx
,
110 struct pipe_fence_handle
*fence
)
112 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
113 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
115 util_queue_fence_wait(&rfence
->ready
);
117 /* Unflushed fences from the same context are no-ops. */
118 if (rfence
->gfx_unflushed
.ctx
&&
119 rfence
->gfx_unflushed
.ctx
== rctx
)
122 /* All unflushed commands will not start execution before
123 * this fence dependency is signalled.
125 * Should we flush the context to allow more GPU parallelism?
128 si_add_fence_dependency(rctx
, rfence
->sdma
);
130 si_add_fence_dependency(rctx
, rfence
->gfx
);
133 static bool si_fine_fence_signaled(struct radeon_winsys
*rws
,
134 const struct si_fine_fence
*fine
)
136 char *map
= rws
->buffer_map(fine
->buf
->buf
, NULL
, PIPE_TRANSFER_READ
|
137 PIPE_TRANSFER_UNSYNCHRONIZED
);
141 uint32_t *fence
= (uint32_t*)(map
+ fine
->offset
);
145 static void si_fine_fence_set(struct si_context
*ctx
,
146 struct si_fine_fence
*fine
,
151 assert(util_bitcount(flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) == 1);
153 /* Use uncached system memory for the fence. */
154 u_upload_alloc(ctx
->b
.b
.stream_uploader
, 0, 4, 4,
155 &fine
->offset
, (struct pipe_resource
**)&fine
->buf
, (void **)&fence_ptr
);
161 uint64_t fence_va
= fine
->buf
->gpu_address
+ fine
->offset
;
163 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.gfx
, fine
->buf
,
164 RADEON_USAGE_WRITE
, RADEON_PRIO_QUERY
);
165 if (flags
& PIPE_FLUSH_TOP_OF_PIPE
) {
166 struct radeon_winsys_cs
*cs
= ctx
->b
.gfx
.cs
;
167 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
168 radeon_emit(cs
, S_370_DST_SEL(V_370_MEM_ASYNC
) |
169 S_370_WR_CONFIRM(1) |
170 S_370_ENGINE_SEL(V_370_PFP
));
171 radeon_emit(cs
, fence_va
);
172 radeon_emit(cs
, fence_va
>> 32);
173 radeon_emit(cs
, 0x80000000);
174 } else if (flags
& PIPE_FLUSH_BOTTOM_OF_PIPE
) {
175 si_gfx_write_event_eop(&ctx
->b
, V_028A90_BOTTOM_OF_PIPE_TS
, 0,
176 EOP_DATA_SEL_VALUE_32BIT
,
177 NULL
, fence_va
, 0x80000000,
178 PIPE_QUERY_GPU_FINISHED
);
184 static boolean
si_fence_finish(struct pipe_screen
*screen
,
185 struct pipe_context
*ctx
,
186 struct pipe_fence_handle
*fence
,
189 struct radeon_winsys
*rws
= ((struct r600_common_screen
*)screen
)->ws
;
190 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
191 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout
);
193 if (!util_queue_fence_is_signalled(&rfence
->ready
)) {
194 if (rfence
->tc_token
) {
195 /* Ensure that si_flush_from_st will be called for
196 * this fence, but only if we're in the API thread
197 * where the context is current.
199 * Note that the batch containing the flush may already
200 * be in flight in the driver thread, so the fence
201 * may not be ready yet when this call returns.
203 threaded_context_flush(ctx
, rfence
->tc_token
,
210 if (timeout
== PIPE_TIMEOUT_INFINITE
) {
211 util_queue_fence_wait(&rfence
->ready
);
213 if (!util_queue_fence_wait_timeout(&rfence
->ready
, abs_timeout
))
217 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
218 int64_t time
= os_time_get_nano();
219 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
224 if (!rws
->fence_wait(rws
, rfence
->sdma
, timeout
))
227 /* Recompute the timeout after waiting. */
228 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
229 int64_t time
= os_time_get_nano();
230 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
237 if (rfence
->fine
.buf
&&
238 si_fine_fence_signaled(rws
, &rfence
->fine
)) {
239 rws
->fence_reference(&rfence
->gfx
, NULL
);
240 r600_resource_reference(&rfence
->fine
.buf
, NULL
);
244 /* Flush the gfx IB if it hasn't been flushed yet. */
245 if (ctx
&& rfence
->gfx_unflushed
.ctx
) {
246 struct si_context
*sctx
;
248 sctx
= (struct si_context
*)threaded_context_unwrap_unsync(ctx
);
249 if (rfence
->gfx_unflushed
.ctx
== &sctx
->b
&&
250 rfence
->gfx_unflushed
.ib_index
== sctx
->b
.num_gfx_cs_flushes
) {
251 /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
254 * "If the sync object being blocked upon will not be
255 * signaled in finite time (for example, by an associated
256 * fence command issued previously, but not yet flushed to
257 * the graphics pipeline), then ClientWaitSync may hang
258 * forever. To help prevent this behavior, if
259 * ClientWaitSync is called and all of the following are
262 * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
263 * * sync is unsignaled when ClientWaitSync is called,
264 * * and the calls to ClientWaitSync and FenceSync were
265 * issued from the same context,
267 * then the GL will behave as if the equivalent of Flush
268 * were inserted immediately after the creation of sync."
270 * This means we need to flush for such fences even when we're
273 threaded_context_unwrap_sync(ctx
);
274 sctx
->b
.gfx
.flush(&sctx
->b
, timeout
? 0 : RADEON_FLUSH_ASYNC
, NULL
);
275 rfence
->gfx_unflushed
.ctx
= NULL
;
280 /* Recompute the timeout after all that. */
281 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
282 int64_t time
= os_time_get_nano();
283 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
288 if (rws
->fence_wait(rws
, rfence
->gfx
, timeout
))
291 /* Re-check in case the GPU is slow or hangs, but the commands before
292 * the fine-grained fence have completed. */
293 if (rfence
->fine
.buf
&&
294 si_fine_fence_signaled(rws
, &rfence
->fine
))
300 static void si_create_fence_fd(struct pipe_context
*ctx
,
301 struct pipe_fence_handle
**pfence
, int fd
)
303 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)ctx
->screen
;
304 struct radeon_winsys
*ws
= rscreen
->ws
;
305 struct si_multi_fence
*rfence
;
309 if (!rscreen
->info
.has_sync_file
)
312 rfence
= si_create_multi_fence();
316 rfence
->gfx
= ws
->fence_import_sync_file(ws
, fd
);
322 *pfence
= (struct pipe_fence_handle
*)rfence
;
325 static int si_fence_get_fd(struct pipe_screen
*screen
,
326 struct pipe_fence_handle
*fence
)
328 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)screen
;
329 struct radeon_winsys
*ws
= rscreen
->ws
;
330 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
331 int gfx_fd
= -1, sdma_fd
= -1;
333 if (!rscreen
->info
.has_sync_file
)
336 util_queue_fence_wait(&rfence
->ready
);
338 /* Deferred fences aren't supported. */
339 assert(!rfence
->gfx_unflushed
.ctx
);
340 if (rfence
->gfx_unflushed
.ctx
)
344 sdma_fd
= ws
->fence_export_sync_file(ws
, rfence
->sdma
);
349 gfx_fd
= ws
->fence_export_sync_file(ws
, rfence
->gfx
);
357 /* If we don't have FDs at this point, it means we don't have fences
364 /* Get a fence that will be a combination of both fences. */
365 sync_accumulate("radeonsi", &gfx_fd
, sdma_fd
);
370 static void si_flush_from_st(struct pipe_context
*ctx
,
371 struct pipe_fence_handle
**fence
,
374 struct pipe_screen
*screen
= ctx
->screen
;
375 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
376 struct radeon_winsys
*ws
= rctx
->ws
;
377 struct pipe_fence_handle
*gfx_fence
= NULL
;
378 struct pipe_fence_handle
*sdma_fence
= NULL
;
379 bool deferred_fence
= false;
380 struct si_fine_fence fine
= {};
381 unsigned rflags
= RADEON_FLUSH_ASYNC
;
383 if (flags
& PIPE_FLUSH_END_OF_FRAME
)
384 rflags
|= RADEON_FLUSH_END_OF_FRAME
;
386 if (flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) {
387 assert(flags
& PIPE_FLUSH_DEFERRED
);
390 si_fine_fence_set((struct si_context
*)rctx
, &fine
, flags
);
393 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
395 rctx
->dma
.flush(rctx
, rflags
, fence
? &sdma_fence
: NULL
);
397 if (!radeon_emitted(rctx
->gfx
.cs
, rctx
->initial_gfx_cs_size
)) {
399 ws
->fence_reference(&gfx_fence
, rctx
->last_gfx_fence
);
400 if (!(flags
& PIPE_FLUSH_DEFERRED
))
401 ws
->cs_sync_flush(rctx
->gfx
.cs
);
403 /* Instead of flushing, create a deferred fence. Constraints:
404 * - The state tracker must allow a deferred flush.
405 * - The state tracker must request a fence.
406 * - fence_get_fd is not allowed.
407 * Thread safety in fence_finish must be ensured by the state tracker.
409 if (flags
& PIPE_FLUSH_DEFERRED
&&
410 !(flags
& PIPE_FLUSH_FENCE_FD
) &&
412 gfx_fence
= rctx
->ws
->cs_get_next_fence(rctx
->gfx
.cs
);
413 deferred_fence
= true;
415 rctx
->gfx
.flush(rctx
, rflags
, fence
? &gfx_fence
: NULL
);
419 /* Both engines can signal out of order, so we need to keep both fences. */
421 struct si_multi_fence
*multi_fence
;
423 if (flags
& TC_FLUSH_ASYNC
) {
424 multi_fence
= (struct si_multi_fence
*)*fence
;
427 multi_fence
= si_create_multi_fence();
429 ws
->fence_reference(&sdma_fence
, NULL
);
430 ws
->fence_reference(&gfx_fence
, NULL
);
434 screen
->fence_reference(screen
, fence
, NULL
);
435 *fence
= (struct pipe_fence_handle
*)multi_fence
;
438 /* If both fences are NULL, fence_finish will always return true. */
439 multi_fence
->gfx
= gfx_fence
;
440 multi_fence
->sdma
= sdma_fence
;
442 if (deferred_fence
) {
443 multi_fence
->gfx_unflushed
.ctx
= rctx
;
444 multi_fence
->gfx_unflushed
.ib_index
= rctx
->num_gfx_cs_flushes
;
447 multi_fence
->fine
= fine
;
450 if (flags
& TC_FLUSH_ASYNC
) {
451 util_queue_fence_signal(&multi_fence
->ready
);
452 tc_unflushed_batch_token_reference(&multi_fence
->tc_token
, NULL
);
457 if (!(flags
& PIPE_FLUSH_DEFERRED
)) {
459 ws
->cs_sync_flush(rctx
->dma
.cs
);
460 ws
->cs_sync_flush(rctx
->gfx
.cs
);
464 void si_init_fence_functions(struct si_context
*ctx
)
466 ctx
->b
.b
.flush
= si_flush_from_st
;
467 ctx
->b
.b
.create_fence_fd
= si_create_fence_fd
;
468 ctx
->b
.b
.fence_server_sync
= si_fence_server_sync
;
471 void si_init_screen_fence_functions(struct si_screen
*screen
)
473 screen
->b
.b
.fence_finish
= si_fence_finish
;
474 screen
->b
.b
.fence_reference
= si_fence_reference
;
475 screen
->b
.b
.fence_get_fd
= si_fence_get_fd
;