2 * Copyright 2013-2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 #include "util/os_time.h"
28 #include "util/u_memory.h"
29 #include "util/u_queue.h"
30 #include "util/u_upload_mgr.h"
33 #include "radeon/r600_cs.h"
35 struct si_fine_fence
{
36 struct r600_resource
*buf
;
40 struct si_multi_fence
{
41 struct pipe_reference reference
;
42 struct pipe_fence_handle
*gfx
;
43 struct pipe_fence_handle
*sdma
;
44 struct tc_unflushed_batch_token
*tc_token
;
45 struct util_queue_fence ready
;
47 /* If the context wasn't flushed at fence creation, this is non-NULL. */
49 struct r600_common_context
*ctx
;
53 struct si_fine_fence fine
;
56 static void si_add_fence_dependency(struct r600_common_context
*rctx
,
57 struct pipe_fence_handle
*fence
)
59 struct radeon_winsys
*ws
= rctx
->ws
;
62 ws
->cs_add_fence_dependency(rctx
->dma
.cs
, fence
);
63 ws
->cs_add_fence_dependency(rctx
->gfx
.cs
, fence
);
66 static void si_fence_reference(struct pipe_screen
*screen
,
67 struct pipe_fence_handle
**dst
,
68 struct pipe_fence_handle
*src
)
70 struct radeon_winsys
*ws
= ((struct r600_common_screen
*)screen
)->ws
;
71 struct si_multi_fence
**rdst
= (struct si_multi_fence
**)dst
;
72 struct si_multi_fence
*rsrc
= (struct si_multi_fence
*)src
;
74 if (pipe_reference(&(*rdst
)->reference
, &rsrc
->reference
)) {
75 ws
->fence_reference(&(*rdst
)->gfx
, NULL
);
76 ws
->fence_reference(&(*rdst
)->sdma
, NULL
);
77 tc_unflushed_batch_token_reference(&(*rdst
)->tc_token
, NULL
);
78 r600_resource_reference(&(*rdst
)->fine
.buf
, NULL
);
84 static struct si_multi_fence
*si_create_multi_fence()
86 struct si_multi_fence
*fence
= CALLOC_STRUCT(si_multi_fence
);
90 pipe_reference_init(&fence
->reference
, 1);
91 util_queue_fence_init(&fence
->ready
);
96 struct pipe_fence_handle
*si_create_fence(struct pipe_context
*ctx
,
97 struct tc_unflushed_batch_token
*tc_token
)
99 struct si_multi_fence
*fence
= si_create_multi_fence();
103 util_queue_fence_reset(&fence
->ready
);
104 tc_unflushed_batch_token_reference(&fence
->tc_token
, tc_token
);
106 return (struct pipe_fence_handle
*)fence
;
109 static void si_fence_server_sync(struct pipe_context
*ctx
,
110 struct pipe_fence_handle
*fence
)
112 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
113 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
115 util_queue_fence_wait(&rfence
->ready
);
117 /* Unflushed fences from the same context are no-ops. */
118 if (rfence
->gfx_unflushed
.ctx
&&
119 rfence
->gfx_unflushed
.ctx
== rctx
)
122 /* All unflushed commands will not start execution before
123 * this fence dependency is signalled.
125 * Should we flush the context to allow more GPU parallelism?
128 si_add_fence_dependency(rctx
, rfence
->sdma
);
130 si_add_fence_dependency(rctx
, rfence
->gfx
);
133 static bool si_fine_fence_signaled(struct radeon_winsys
*rws
,
134 const struct si_fine_fence
*fine
)
136 char *map
= rws
->buffer_map(fine
->buf
->buf
, NULL
, PIPE_TRANSFER_READ
|
137 PIPE_TRANSFER_UNSYNCHRONIZED
);
141 uint32_t *fence
= (uint32_t*)(map
+ fine
->offset
);
145 static void si_fine_fence_set(struct si_context
*ctx
,
146 struct si_fine_fence
*fine
,
151 assert(util_bitcount(flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) == 1);
153 /* Use uncached system memory for the fence. */
154 u_upload_alloc(ctx
->b
.b
.stream_uploader
, 0, 4, 4,
155 &fine
->offset
, (struct pipe_resource
**)&fine
->buf
, (void **)&fence_ptr
);
161 uint64_t fence_va
= fine
->buf
->gpu_address
+ fine
->offset
;
163 radeon_add_to_buffer_list(&ctx
->b
, &ctx
->b
.gfx
, fine
->buf
,
164 RADEON_USAGE_WRITE
, RADEON_PRIO_QUERY
);
165 if (flags
& PIPE_FLUSH_TOP_OF_PIPE
) {
166 struct radeon_winsys_cs
*cs
= ctx
->b
.gfx
.cs
;
167 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, 3, 0));
168 radeon_emit(cs
, S_370_DST_SEL(V_370_MEM_ASYNC
) |
169 S_370_WR_CONFIRM(1) |
170 S_370_ENGINE_SEL(V_370_PFP
));
171 radeon_emit(cs
, fence_va
);
172 radeon_emit(cs
, fence_va
>> 32);
173 radeon_emit(cs
, 0x80000000);
174 } else if (flags
& PIPE_FLUSH_BOTTOM_OF_PIPE
) {
175 si_gfx_write_event_eop(&ctx
->b
, V_028A90_BOTTOM_OF_PIPE_TS
, 0,
176 EOP_DATA_SEL_VALUE_32BIT
,
177 NULL
, fence_va
, 0x80000000,
178 PIPE_QUERY_GPU_FINISHED
);
184 static boolean
si_fence_finish(struct pipe_screen
*screen
,
185 struct pipe_context
*ctx
,
186 struct pipe_fence_handle
*fence
,
189 struct radeon_winsys
*rws
= ((struct r600_common_screen
*)screen
)->ws
;
190 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
191 struct r600_common_context
*rctx
;
192 int64_t abs_timeout
= os_time_get_absolute_timeout(timeout
);
194 ctx
= threaded_context_unwrap_sync(ctx
);
195 rctx
= ctx
? (struct r600_common_context
*)ctx
: NULL
;
197 if (!util_queue_fence_is_signalled(&rfence
->ready
)) {
201 if (rfence
->tc_token
) {
202 /* Ensure that si_flush_from_st will be called for
203 * this fence, but only if we're in the API thread
204 * where the context is current.
206 * Note that the batch containing the flush may already
207 * be in flight in the driver thread, so the fence
208 * may not be ready yet when this call returns.
210 threaded_context_flush(ctx
, rfence
->tc_token
);
213 if (timeout
== PIPE_TIMEOUT_INFINITE
) {
214 util_queue_fence_wait(&rfence
->ready
);
216 if (!util_queue_fence_wait_timeout(&rfence
->ready
, abs_timeout
))
220 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
221 int64_t time
= os_time_get_nano();
222 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
227 if (!rws
->fence_wait(rws
, rfence
->sdma
, timeout
))
230 /* Recompute the timeout after waiting. */
231 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
232 int64_t time
= os_time_get_nano();
233 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
240 if (rfence
->fine
.buf
&&
241 si_fine_fence_signaled(rws
, &rfence
->fine
)) {
242 rws
->fence_reference(&rfence
->gfx
, NULL
);
243 r600_resource_reference(&rfence
->fine
.buf
, NULL
);
247 /* Flush the gfx IB if it hasn't been flushed yet. */
249 rfence
->gfx_unflushed
.ctx
== rctx
&&
250 rfence
->gfx_unflushed
.ib_index
== rctx
->num_gfx_cs_flushes
) {
251 /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
254 * "If the sync object being blocked upon will not be
255 * signaled in finite time (for example, by an associated
256 * fence command issued previously, but not yet flushed to
257 * the graphics pipeline), then ClientWaitSync may hang
258 * forever. To help prevent this behavior, if
259 * ClientWaitSync is called and all of the following are
262 * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
263 * * sync is unsignaled when ClientWaitSync is called,
264 * * and the calls to ClientWaitSync and FenceSync were
265 * issued from the same context,
267 * then the GL will behave as if the equivalent of Flush
268 * were inserted immediately after the creation of sync."
270 * This means we need to flush for such fences even when we're
273 rctx
->gfx
.flush(rctx
, timeout
? 0 : RADEON_FLUSH_ASYNC
, NULL
);
274 rfence
->gfx_unflushed
.ctx
= NULL
;
279 /* Recompute the timeout after all that. */
280 if (timeout
&& timeout
!= PIPE_TIMEOUT_INFINITE
) {
281 int64_t time
= os_time_get_nano();
282 timeout
= abs_timeout
> time
? abs_timeout
- time
: 0;
286 if (rws
->fence_wait(rws
, rfence
->gfx
, timeout
))
289 /* Re-check in case the GPU is slow or hangs, but the commands before
290 * the fine-grained fence have completed. */
291 if (rfence
->fine
.buf
&&
292 si_fine_fence_signaled(rws
, &rfence
->fine
))
298 static void si_create_fence_fd(struct pipe_context
*ctx
,
299 struct pipe_fence_handle
**pfence
, int fd
)
301 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)ctx
->screen
;
302 struct radeon_winsys
*ws
= rscreen
->ws
;
303 struct si_multi_fence
*rfence
;
307 if (!rscreen
->info
.has_sync_file
)
310 rfence
= si_create_multi_fence();
314 rfence
->gfx
= ws
->fence_import_sync_file(ws
, fd
);
320 *pfence
= (struct pipe_fence_handle
*)rfence
;
323 static int si_fence_get_fd(struct pipe_screen
*screen
,
324 struct pipe_fence_handle
*fence
)
326 struct r600_common_screen
*rscreen
= (struct r600_common_screen
*)screen
;
327 struct radeon_winsys
*ws
= rscreen
->ws
;
328 struct si_multi_fence
*rfence
= (struct si_multi_fence
*)fence
;
329 int gfx_fd
= -1, sdma_fd
= -1;
331 if (!rscreen
->info
.has_sync_file
)
334 util_queue_fence_wait(&rfence
->ready
);
336 /* Deferred fences aren't supported. */
337 assert(!rfence
->gfx_unflushed
.ctx
);
338 if (rfence
->gfx_unflushed
.ctx
)
342 sdma_fd
= ws
->fence_export_sync_file(ws
, rfence
->sdma
);
347 gfx_fd
= ws
->fence_export_sync_file(ws
, rfence
->gfx
);
355 /* If we don't have FDs at this point, it means we don't have fences
362 /* Get a fence that will be a combination of both fences. */
363 sync_accumulate("radeonsi", &gfx_fd
, sdma_fd
);
368 static void si_flush_from_st(struct pipe_context
*ctx
,
369 struct pipe_fence_handle
**fence
,
372 struct pipe_screen
*screen
= ctx
->screen
;
373 struct r600_common_context
*rctx
= (struct r600_common_context
*)ctx
;
374 struct radeon_winsys
*ws
= rctx
->ws
;
375 struct pipe_fence_handle
*gfx_fence
= NULL
;
376 struct pipe_fence_handle
*sdma_fence
= NULL
;
377 bool deferred_fence
= false;
378 struct si_fine_fence fine
= {};
379 unsigned rflags
= RADEON_FLUSH_ASYNC
;
381 if (flags
& PIPE_FLUSH_END_OF_FRAME
)
382 rflags
|= RADEON_FLUSH_END_OF_FRAME
;
384 if (flags
& (PIPE_FLUSH_TOP_OF_PIPE
| PIPE_FLUSH_BOTTOM_OF_PIPE
)) {
385 assert(flags
& PIPE_FLUSH_DEFERRED
);
388 si_fine_fence_set((struct si_context
*)rctx
, &fine
, flags
);
391 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
393 rctx
->dma
.flush(rctx
, rflags
, fence
? &sdma_fence
: NULL
);
395 if (!radeon_emitted(rctx
->gfx
.cs
, rctx
->initial_gfx_cs_size
)) {
397 ws
->fence_reference(&gfx_fence
, rctx
->last_gfx_fence
);
398 if (!(flags
& PIPE_FLUSH_DEFERRED
))
399 ws
->cs_sync_flush(rctx
->gfx
.cs
);
401 /* Instead of flushing, create a deferred fence. Constraints:
402 * - The state tracker must allow a deferred flush.
403 * - The state tracker must request a fence.
404 * - fence_get_fd is not allowed.
405 * Thread safety in fence_finish must be ensured by the state tracker.
407 if (flags
& PIPE_FLUSH_DEFERRED
&&
408 !(flags
& PIPE_FLUSH_FENCE_FD
) &&
410 gfx_fence
= rctx
->ws
->cs_get_next_fence(rctx
->gfx
.cs
);
411 deferred_fence
= true;
413 rctx
->gfx
.flush(rctx
, rflags
, fence
? &gfx_fence
: NULL
);
417 /* Both engines can signal out of order, so we need to keep both fences. */
419 struct si_multi_fence
*multi_fence
;
421 if (flags
& TC_FLUSH_ASYNC
) {
422 multi_fence
= (struct si_multi_fence
*)*fence
;
425 multi_fence
= si_create_multi_fence();
427 ws
->fence_reference(&sdma_fence
, NULL
);
428 ws
->fence_reference(&gfx_fence
, NULL
);
432 screen
->fence_reference(screen
, fence
, NULL
);
433 *fence
= (struct pipe_fence_handle
*)multi_fence
;
436 /* If both fences are NULL, fence_finish will always return true. */
437 multi_fence
->gfx
= gfx_fence
;
438 multi_fence
->sdma
= sdma_fence
;
440 if (deferred_fence
) {
441 multi_fence
->gfx_unflushed
.ctx
= rctx
;
442 multi_fence
->gfx_unflushed
.ib_index
= rctx
->num_gfx_cs_flushes
;
445 multi_fence
->fine
= fine
;
447 if (flags
& TC_FLUSH_ASYNC
) {
448 util_queue_fence_signal(&multi_fence
->ready
);
449 tc_unflushed_batch_token_reference(&multi_fence
->tc_token
, NULL
);
453 if (!(flags
& PIPE_FLUSH_DEFERRED
)) {
455 ws
->cs_sync_flush(rctx
->dma
.cs
);
456 ws
->cs_sync_flush(rctx
->gfx
.cs
);
460 void si_init_fence_functions(struct si_context
*ctx
)
462 ctx
->b
.b
.flush
= si_flush_from_st
;
463 ctx
->b
.b
.create_fence_fd
= si_create_fence_fd
;
464 ctx
->b
.b
.fence_server_sync
= si_fence_server_sync
;
467 void si_init_screen_fence_functions(struct si_screen
*screen
)
469 screen
->b
.b
.fence_finish
= si_fence_finish
;
470 screen
->b
.b
.fence_reference
= si_fence_reference
;
471 screen
->b
.b
.fence_get_fd
= si_fence_get_fd
;