2 * Copyright 2013-2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #include "util/os_time.h"
28 #include "util/u_memory.h"
29 #include "util/u_queue.h"
30 #include "util/u_upload_mgr.h"
33 #include "radeon/r600_cs.h"
35 struct si_fine_fence
{
36 struct r600_resource
*buf
;
40 struct si_multi_fence
{
41 struct pipe_reference reference
;
42 struct pipe_fence_handle
*gfx
;
43 struct pipe_fence_handle
*sdma
;
44 struct tc_unflushed_batch_token
*tc_token
;
45 struct util_queue_fence ready
;
47 /* If the context wasn't flushed at fence creation, this is non-NULL. */
49 struct r600_common_context
*ctx
;
53 struct si_fine_fence fine
;
56 static void si_add_fence_dependency(struct r600_common_context
*rctx
,
57 struct pipe_fence_handle
*fence
)
59 struct radeon_winsys
*ws
= rctx
->ws
;
62 ws
->cs_add_fence_dependency(rctx
->dma
.cs
, fence
);
63 ws
->cs_add_fence_dependency(rctx
->gfx
.cs
, fence
);
66 static void si_fence_reference(struct pipe_screen
*screen
,
67 struct pipe_fence_handle
**dst
,
68 struct pipe_fence_handle
*src
)
70 struct radeon_winsys
*ws
= ((struct r600_common_screen
*)screen
)->ws
;
71 struct si_multi_fence
**rdst
= (struct si_multi_fence
**)dst
;
72 struct si_multi_fence
*rsrc
= (struct si_multi_fence
*)src
;
74 if (pipe_reference(&(*rdst
)->reference
, &rsrc
->reference
)) {
75 ws
->fence_reference(&(*rdst
)->gfx
, NULL
);
76 ws
->fence_reference(&(*rdst
)->sdma
, NULL
);
77 tc_unflushed_batch_token_reference(&(*rdst
)->tc_token
, NULL
);
78 r600_resource_reference(&(*rdst
)->fine
.buf
, NULL
);
84 static struct si_multi_fence
*si_create_multi_fence()
86 struct si_multi_fence
*fence
= CALLOC_STRUCT(si_multi_fence
);
90 pipe_reference_init(&fence
->reference
, 1);
91 util_queue_fence_init(&fence
->ready
);
96 struct pipe_fence_handle
*si_create_fence(struct pipe_context
*ctx
,
97 struct tc_unflushed_batch_token
*tc_token
)
99 struct si_multi_fence
*fence
= si_create_multi_fence();
103 util_queue_fence_reset(&fence
->ready
);
104 tc_unflushed_batch_token_reference(&fence
->tc_token
, tc_token
);
106 return (struct pipe_fence_handle
*)fence
;
109 static void si_fence_server_sync(struct pipe_context
*ctx
,
110 struct pipe_fence_handle
*fence
)
112 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
113 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
115 util_queue_fence_wait(&rfence
->ready
);
117 /* Unflushed fences from the same context are no-ops. */
118 if (rfence
->gfx_unflushed
.ctx
&&
119 rfence
->gfx_unflushed
.ctx
== rctx
)
122 /* All unflushed commands will not start execution before
123 * this fence dependency is signalled.
125 * Should we flush the context to allow more GPU parallelism?
128 si_add_fence_dependency(rctx
, rfence
->sdma
);
130 si_add_fence_dependency(rctx
, rfence
->gfx
);
133 static bool si_fine_fence_signaled(struct radeon_winsys
*rws
,
134 const struct si_fine_fence
*fine
)
136 char *map
= rws
->buffer_map(fine
->buf
->buf
, NULL
, PIPE_TRANSFER_READ
|
137 PIPE_TRANSFER_UNSYNCHRONIZED
);
141 uint32_t *fence
= (uint32_t*)(map
+ fine
->offset
);
145 static void si_fine_fence_set(struct si_context
*ctx
,
146 struct si_fine_fence
*fine
,
151 assert(util_bitcount(flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) == 1);
153 /* Use uncached system memory for the fence. */
154 u_upload_alloc(ctx
->b
.b
.stream_uploader
, 0, 4, 4,
155 &fine
->offset
, (struct pipe_resource
**)&fine
->buf
, (void **)&fence_ptr
);
161 uint64_t fence_va
= fine
->buf
->gpu_address
+ fine
->offset
;
163 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.gfx
, fine
->buf
,
164 RADEON_USAGE_WRITE
, RADEON_PRIO_QUERY
);
165 if (flags
& PIPE_FLUSH_TOP_OF_PIPE
) {
166 struct radeon_winsys_cs
*cs
= ctx
->b
.gfx
.cs
;
167 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
168 radeon_emit(cs
, S_370_DST_SEL(V_370_MEM_ASYNC
) |
169 S_370_WR_CONFIRM(1) |
170 S_370_ENGINE_SEL(V_370_PFP
));
171 radeon_emit(cs
, fence_va
);
172 radeon_emit(cs
, fence_va
>> 32);
173 radeon_emit(cs
, 0x80000000);
174 } else if (flags
& PIPE_FLUSH_BOTTOM_OF_PIPE
) {
175 si_gfx_write_event_eop(&ctx
->b
, V_028A90_BOTTOM_OF_PIPE_TS
, 0,
176 EOP_DATA_SEL_VALUE_32BIT
,
177 NULL
, fence_va
, 0x80000000,
178 PIPE_QUERY_GPU_FINISHED
);
184 static boolean
si_fence_finish(struct pipe_screen
*screen
,
185 struct pipe_context
*ctx
,
186 struct pipe_fence_handle
*fence
,
189 struct radeon_winsys
*rws
= ((struct r600_common_screen
*)screen
)->ws
;
190 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
191 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout
);
193 if (!util_queue_fence_is_signalled(&rfence
->ready
)) {
197 if (rfence
->tc_token
) {
198 /* Ensure that si_flush_from_st will be called for
199 * this fence, but only if we're in the API thread
200 * where the context is current.
202 * Note that the batch containing the flush may already
203 * be in flight in the driver thread, so the fence
204 * may not be ready yet when this call returns.
206 threaded_context_flush(ctx
, rfence
->tc_token
);
209 if (timeout
== PIPE_TIMEOUT_INFINITE
) {
210 util_queue_fence_wait(&rfence
->ready
);
212 if (!util_queue_fence_wait_timeout(&rfence
->ready
, abs_timeout
))
216 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
217 int64_t time
= os_time_get_nano();
218 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
223 if (!rws
->fence_wait(rws
, rfence
->sdma
, timeout
))
226 /* Recompute the timeout after waiting. */
227 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
228 int64_t time
= os_time_get_nano();
229 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
236 if (rfence
->fine
.buf
&&
237 si_fine_fence_signaled(rws
, &rfence
->fine
)) {
238 rws
->fence_reference(&rfence
->gfx
, NULL
);
239 r600_resource_reference(&rfence
->fine
.buf
, NULL
);
243 /* Flush the gfx IB if it hasn't been flushed yet. */
244 if (ctx
&& rfence
->gfx_unflushed
.ctx
) {
245 struct si_context
*sctx
;
247 sctx
= (struct si_context
*)threaded_context_unwrap_unsync(ctx
);
248 if (rfence
->gfx_unflushed
.ctx
== &sctx
->b
&&
249 rfence
->gfx_unflushed
.ib_index
== sctx
->b
.num_gfx_cs_flushes
) {
250 /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
253 * "If the sync object being blocked upon will not be
254 * signaled in finite time (for example, by an associated
255 * fence command issued previously, but not yet flushed to
256 * the graphics pipeline), then ClientWaitSync may hang
257 * forever. To help prevent this behavior, if
258 * ClientWaitSync is called and all of the following are
261 * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
262 * * sync is unsignaled when ClientWaitSync is called,
263 * * and the calls to ClientWaitSync and FenceSync were
264 * issued from the same context,
266 * then the GL will behave as if the equivalent of Flush
267 * were inserted immediately after the creation of sync."
269 * This means we need to flush for such fences even when we're
272 threaded_context_unwrap_sync(ctx
);
273 sctx
->b
.gfx
.flush(&sctx
->b
, timeout
? 0 : RADEON_FLUSH_ASYNC
, NULL
);
274 rfence
->gfx_unflushed
.ctx
= NULL
;
279 /* Recompute the timeout after all that. */
280 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
281 int64_t time
= os_time_get_nano();
282 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
287 if (rws
->fence_wait(rws
, rfence
->gfx
, timeout
))
290 /* Re-check in case the GPU is slow or hangs, but the commands before
291 * the fine-grained fence have completed. */
292 if (rfence
->fine
.buf
&&
293 si_fine_fence_signaled(rws
, &rfence
->fine
))
299 static void si_create_fence_fd(struct pipe_context
*ctx
,
300 struct pipe_fence_handle
**pfence
, int fd
)
302 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)ctx
->screen
;
303 struct radeon_winsys
*ws
= rscreen
->ws
;
304 struct si_multi_fence
*rfence
;
308 if (!rscreen
->info
.has_sync_file
)
311 rfence
= si_create_multi_fence();
315 rfence
->gfx
= ws
->fence_import_sync_file(ws
, fd
);
321 *pfence
= (struct pipe_fence_handle
*)rfence
;
324 static int si_fence_get_fd(struct pipe_screen
*screen
,
325 struct pipe_fence_handle
*fence
)
327 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)screen
;
328 struct radeon_winsys
*ws
= rscreen
->ws
;
329 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
330 int gfx_fd
= -1, sdma_fd
= -1;
332 if (!rscreen
->info
.has_sync_file
)
335 util_queue_fence_wait(&rfence
->ready
);
337 /* Deferred fences aren't supported. */
338 assert(!rfence
->gfx_unflushed
.ctx
);
339 if (rfence
->gfx_unflushed
.ctx
)
343 sdma_fd
= ws
->fence_export_sync_file(ws
, rfence
->sdma
);
348 gfx_fd
= ws
->fence_export_sync_file(ws
, rfence
->gfx
);
356 /* If we don't have FDs at this point, it means we don't have fences
363 /* Get a fence that will be a combination of both fences. */
364 sync_accumulate("radeonsi", &gfx_fd
, sdma_fd
);
369 static void si_flush_from_st(struct pipe_context
*ctx
,
370 struct pipe_fence_handle
**fence
,
373 struct pipe_screen
*screen
= ctx
->screen
;
374 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
375 struct radeon_winsys
*ws
= rctx
->ws
;
376 struct pipe_fence_handle
*gfx_fence
= NULL
;
377 struct pipe_fence_handle
*sdma_fence
= NULL
;
378 bool deferred_fence
= false;
379 struct si_fine_fence fine
= {};
380 unsigned rflags
= RADEON_FLUSH_ASYNC
;
382 if (flags
& PIPE_FLUSH_END_OF_FRAME
)
383 rflags
|= RADEON_FLUSH_END_OF_FRAME
;
385 if (flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) {
386 assert(flags
& PIPE_FLUSH_DEFERRED
);
389 si_fine_fence_set((struct si_context
*)rctx
, &fine
, flags
);
392 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
394 rctx
->dma
.flush(rctx
, rflags
, fence
? &sdma_fence
: NULL
);
396 if (!radeon_emitted(rctx
->gfx
.cs
, rctx
->initial_gfx_cs_size
)) {
398 ws
->fence_reference(&gfx_fence
, rctx
->last_gfx_fence
);
399 if (!(flags
& PIPE_FLUSH_DEFERRED
))
400 ws
->cs_sync_flush(rctx
->gfx
.cs
);
402 /* Instead of flushing, create a deferred fence. Constraints:
403 * - The state tracker must allow a deferred flush.
404 * - The state tracker must request a fence.
405 * - fence_get_fd is not allowed.
406 * Thread safety in fence_finish must be ensured by the state tracker.
408 if (flags
& PIPE_FLUSH_DEFERRED
&&
409 !(flags
& PIPE_FLUSH_FENCE_FD
) &&
411 gfx_fence
= rctx
->ws
->cs_get_next_fence(rctx
->gfx
.cs
);
412 deferred_fence
= true;
414 rctx
->gfx
.flush(rctx
, rflags
, fence
? &gfx_fence
: NULL
);
418 /* Both engines can signal out of order, so we need to keep both fences. */
420 struct si_multi_fence
*multi_fence
;
422 if (flags
& TC_FLUSH_ASYNC
) {
423 multi_fence
= (struct si_multi_fence
*)*fence
;
426 multi_fence
= si_create_multi_fence();
428 ws
->fence_reference(&sdma_fence
, NULL
);
429 ws
->fence_reference(&gfx_fence
, NULL
);
433 screen
->fence_reference(screen
, fence
, NULL
);
434 *fence
= (struct pipe_fence_handle
*)multi_fence
;
437 /* If both fences are NULL, fence_finish will always return true. */
438 multi_fence
->gfx
= gfx_fence
;
439 multi_fence
->sdma
= sdma_fence
;
441 if (deferred_fence
) {
442 multi_fence
->gfx_unflushed
.ctx
= rctx
;
443 multi_fence
->gfx_unflushed
.ib_index
= rctx
->num_gfx_cs_flushes
;
446 multi_fence
->fine
= fine
;
448 if (flags
& TC_FLUSH_ASYNC
) {
449 util_queue_fence_signal(&multi_fence
->ready
);
450 tc_unflushed_batch_token_reference(&multi_fence
->tc_token
, NULL
);
454 if (!(flags
& PIPE_FLUSH_DEFERRED
)) {
456 ws
->cs_sync_flush(rctx
->dma
.cs
);
457 ws
->cs_sync_flush(rctx
->gfx
.cs
);
461 void si_init_fence_functions(struct si_context
*ctx
)
463 ctx
->b
.b
.flush
= si_flush_from_st
;
464 ctx
->b
.b
.create_fence_fd
= si_create_fence_fd
;
465 ctx
->b
.b
.fence_server_sync
= si_fence_server_sync
;
468 void si_init_screen_fence_functions(struct si_screen
*screen
)
470 screen
->b
.b
.fence_finish
= si_fence_finish
;
471 screen
->b
.b
.fence_reference
= si_fence_reference
;
472 screen
->b
.b
.fence_get_fd
= si_fence_get_fd
;