radeonsi: document some subtle details of fence_finish & fence_server_sync
[mesa.git] / src / gallium / drivers / radeonsi / si_fence.c
1 /*
2 * Copyright 2013-2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 */
24
25 #include <libsync.h>
26
27 #include "util/os_time.h"
28 #include "util/u_memory.h"
29 #include "util/u_queue.h"
30
31 #include "si_pipe.h"
32
33 struct si_multi_fence {
34 struct pipe_reference reference;
35 struct pipe_fence_handle *gfx;
36 struct pipe_fence_handle *sdma;
37 struct tc_unflushed_batch_token *tc_token;
38 struct util_queue_fence ready;
39
40 /* If the context wasn't flushed at fence creation, this is non-NULL. */
41 struct {
42 struct r600_common_context *ctx;
43 unsigned ib_index;
44 } gfx_unflushed;
45 };
46
47 static void si_add_fence_dependency(struct r600_common_context *rctx,
48 struct pipe_fence_handle *fence)
49 {
50 struct radeon_winsys *ws = rctx->ws;
51
52 if (rctx->dma.cs)
53 ws->cs_add_fence_dependency(rctx->dma.cs, fence);
54 ws->cs_add_fence_dependency(rctx->gfx.cs, fence);
55 }
56
57 static void si_fence_reference(struct pipe_screen *screen,
58 struct pipe_fence_handle **dst,
59 struct pipe_fence_handle *src)
60 {
61 struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
62 struct si_multi_fence **rdst = (struct si_multi_fence **)dst;
63 struct si_multi_fence *rsrc = (struct si_multi_fence *)src;
64
65 if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
66 ws->fence_reference(&(*rdst)->gfx, NULL);
67 ws->fence_reference(&(*rdst)->sdma, NULL);
68 tc_unflushed_batch_token_reference(&(*rdst)->tc_token, NULL);
69 FREE(*rdst);
70 }
71 *rdst = rsrc;
72 }
73
74 static struct si_multi_fence *si_create_multi_fence()
75 {
76 struct si_multi_fence *fence = CALLOC_STRUCT(si_multi_fence);
77 if (!fence)
78 return NULL;
79
80 pipe_reference_init(&fence->reference, 1);
81 util_queue_fence_init(&fence->ready);
82
83 return fence;
84 }
85
86 struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
87 struct tc_unflushed_batch_token *tc_token)
88 {
89 struct si_multi_fence *fence = si_create_multi_fence();
90 if (!fence)
91 return NULL;
92
93 util_queue_fence_reset(&fence->ready);
94 tc_unflushed_batch_token_reference(&fence->tc_token, tc_token);
95
96 return (struct pipe_fence_handle *)fence;
97 }
98
99 static void si_fence_server_sync(struct pipe_context *ctx,
100 struct pipe_fence_handle *fence)
101 {
102 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
103 struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
104
105 util_queue_fence_wait(&rfence->ready);
106
107 /* Unflushed fences from the same context are no-ops. */
108 if (rfence->gfx_unflushed.ctx &&
109 rfence->gfx_unflushed.ctx == rctx)
110 return;
111
112 /* All unflushed commands will not start execution before
113 * this fence dependency is signalled.
114 *
115 * Should we flush the context to allow more GPU parallelism?
116 */
117 if (rfence->sdma)
118 si_add_fence_dependency(rctx, rfence->sdma);
119 if (rfence->gfx)
120 si_add_fence_dependency(rctx, rfence->gfx);
121 }
122
123 static boolean si_fence_finish(struct pipe_screen *screen,
124 struct pipe_context *ctx,
125 struct pipe_fence_handle *fence,
126 uint64_t timeout)
127 {
128 struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
129 struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
130 struct r600_common_context *rctx;
131 int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
132
133 ctx = threaded_context_unwrap_sync(ctx);
134 rctx = ctx ? (struct r600_common_context*)ctx : NULL;
135
136 if (!util_queue_fence_is_signalled(&rfence->ready)) {
137 if (!timeout)
138 return false;
139
140 if (rfence->tc_token) {
141 /* Ensure that si_flush_from_st will be called for
142 * this fence, but only if we're in the API thread
143 * where the context is current.
144 *
145 * Note that the batch containing the flush may already
146 * be in flight in the driver thread, so the fence
147 * may not be ready yet when this call returns.
148 */
149 threaded_context_flush(ctx, rfence->tc_token);
150 }
151
152 if (timeout == PIPE_TIMEOUT_INFINITE) {
153 util_queue_fence_wait(&rfence->ready);
154 } else {
155 if (!util_queue_fence_wait_timeout(&rfence->ready, abs_timeout))
156 return false;
157 }
158 }
159
160 if (rfence->sdma) {
161 if (!rws->fence_wait(rws, rfence->sdma, timeout))
162 return false;
163
164 /* Recompute the timeout after waiting. */
165 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
166 int64_t time = os_time_get_nano();
167 timeout = abs_timeout > time ? abs_timeout - time : 0;
168 }
169 }
170
171 if (!rfence->gfx)
172 return true;
173
174 /* Flush the gfx IB if it hasn't been flushed yet. */
175 if (rctx &&
176 rfence->gfx_unflushed.ctx == rctx &&
177 rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) {
178 /* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
179 * spec says:
180 *
181 * "If the sync object being blocked upon will not be
182 * signaled in finite time (for example, by an associated
183 * fence command issued previously, but not yet flushed to
184 * the graphics pipeline), then ClientWaitSync may hang
185 * forever. To help prevent this behavior, if
186 * ClientWaitSync is called and all of the following are
187 * true:
188 *
189 * * the SYNC_FLUSH_COMMANDS_BIT bit is set in flags,
190 * * sync is unsignaled when ClientWaitSync is called,
191 * * and the calls to ClientWaitSync and FenceSync were
192 * issued from the same context,
193 *
194 * then the GL will behave as if the equivalent of Flush
195 * were inserted immediately after the creation of sync."
196 *
197 * This means we need to flush for such fences even when we're
198 * not going to wait.
199 */
200 rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL);
201 rfence->gfx_unflushed.ctx = NULL;
202
203 if (!timeout)
204 return false;
205
206 /* Recompute the timeout after all that. */
207 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
208 int64_t time = os_time_get_nano();
209 timeout = abs_timeout > time ? abs_timeout - time : 0;
210 }
211 }
212
213 return rws->fence_wait(rws, rfence->gfx, timeout);
214 }
215
216 static void si_create_fence_fd(struct pipe_context *ctx,
217 struct pipe_fence_handle **pfence, int fd)
218 {
219 struct r600_common_screen *rscreen = (struct r600_common_screen*)ctx->screen;
220 struct radeon_winsys *ws = rscreen->ws;
221 struct si_multi_fence *rfence;
222
223 *pfence = NULL;
224
225 if (!rscreen->info.has_sync_file)
226 return;
227
228 rfence = si_create_multi_fence();
229 if (!rfence)
230 return;
231
232 rfence->gfx = ws->fence_import_sync_file(ws, fd);
233 if (!rfence->gfx) {
234 FREE(rfence);
235 return;
236 }
237
238 *pfence = (struct pipe_fence_handle*)rfence;
239 }
240
241 static int si_fence_get_fd(struct pipe_screen *screen,
242 struct pipe_fence_handle *fence)
243 {
244 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
245 struct radeon_winsys *ws = rscreen->ws;
246 struct si_multi_fence *rfence = (struct si_multi_fence *)fence;
247 int gfx_fd = -1, sdma_fd = -1;
248
249 if (!rscreen->info.has_sync_file)
250 return -1;
251
252 util_queue_fence_wait(&rfence->ready);
253
254 /* Deferred fences aren't supported. */
255 assert(!rfence->gfx_unflushed.ctx);
256 if (rfence->gfx_unflushed.ctx)
257 return -1;
258
259 if (rfence->sdma) {
260 sdma_fd = ws->fence_export_sync_file(ws, rfence->sdma);
261 if (sdma_fd == -1)
262 return -1;
263 }
264 if (rfence->gfx) {
265 gfx_fd = ws->fence_export_sync_file(ws, rfence->gfx);
266 if (gfx_fd == -1) {
267 if (sdma_fd != -1)
268 close(sdma_fd);
269 return -1;
270 }
271 }
272
273 /* If we don't have FDs at this point, it means we don't have fences
274 * either. */
275 if (sdma_fd == -1)
276 return gfx_fd;
277 if (gfx_fd == -1)
278 return sdma_fd;
279
280 /* Get a fence that will be a combination of both fences. */
281 sync_accumulate("radeonsi", &gfx_fd, sdma_fd);
282 close(sdma_fd);
283 return gfx_fd;
284 }
285
286 static void si_flush_from_st(struct pipe_context *ctx,
287 struct pipe_fence_handle **fence,
288 unsigned flags)
289 {
290 struct pipe_screen *screen = ctx->screen;
291 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
292 struct radeon_winsys *ws = rctx->ws;
293 struct pipe_fence_handle *gfx_fence = NULL;
294 struct pipe_fence_handle *sdma_fence = NULL;
295 bool deferred_fence = false;
296 unsigned rflags = RADEON_FLUSH_ASYNC;
297
298 if (flags & PIPE_FLUSH_END_OF_FRAME)
299 rflags |= RADEON_FLUSH_END_OF_FRAME;
300
301 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
302 if (rctx->dma.cs)
303 rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
304
305 if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) {
306 if (fence)
307 ws->fence_reference(&gfx_fence, rctx->last_gfx_fence);
308 if (!(flags & PIPE_FLUSH_DEFERRED))
309 ws->cs_sync_flush(rctx->gfx.cs);
310 } else {
311 /* Instead of flushing, create a deferred fence. Constraints:
312 * - The state tracker must allow a deferred flush.
313 * - The state tracker must request a fence.
314 * - fence_get_fd is not allowed.
315 * Thread safety in fence_finish must be ensured by the state tracker.
316 */
317 if (flags & PIPE_FLUSH_DEFERRED &&
318 !(flags & PIPE_FLUSH_FENCE_FD) &&
319 fence) {
320 gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs);
321 deferred_fence = true;
322 } else {
323 rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
324 }
325 }
326
327 /* Both engines can signal out of order, so we need to keep both fences. */
328 if (fence) {
329 struct si_multi_fence *multi_fence;
330
331 if (flags & TC_FLUSH_ASYNC) {
332 multi_fence = (struct si_multi_fence *)*fence;
333 assert(multi_fence);
334 } else {
335 multi_fence = si_create_multi_fence();
336 if (!multi_fence) {
337 ws->fence_reference(&sdma_fence, NULL);
338 ws->fence_reference(&gfx_fence, NULL);
339 goto finish;
340 }
341
342 screen->fence_reference(screen, fence, NULL);
343 *fence = (struct pipe_fence_handle*)multi_fence;
344 }
345
346 /* If both fences are NULL, fence_finish will always return true. */
347 multi_fence->gfx = gfx_fence;
348 multi_fence->sdma = sdma_fence;
349
350 if (deferred_fence) {
351 multi_fence->gfx_unflushed.ctx = rctx;
352 multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes;
353 }
354
355 if (flags & TC_FLUSH_ASYNC) {
356 util_queue_fence_signal(&multi_fence->ready);
357 tc_unflushed_batch_token_reference(&multi_fence->tc_token, NULL);
358 }
359 }
360 finish:
361 if (!(flags & PIPE_FLUSH_DEFERRED)) {
362 if (rctx->dma.cs)
363 ws->cs_sync_flush(rctx->dma.cs);
364 ws->cs_sync_flush(rctx->gfx.cs);
365 }
366 }
367
368 void si_init_fence_functions(struct si_context *ctx)
369 {
370 ctx->b.b.flush = si_flush_from_st;
371 ctx->b.b.create_fence_fd = si_create_fence_fd;
372 ctx->b.b.fence_server_sync = si_fence_server_sync;
373 }
374
375 void si_init_screen_fence_functions(struct si_screen *screen)
376 {
377 screen->b.b.fence_finish = si_fence_finish;
378 screen->b.b.fence_reference = si_fence_reference;
379 screen->b.b.fence_get_fd = si_fence_get_fd;
380 }