gallium/radeon: create and return a fence in the flush function
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
84 struct radeon_drm_winsys *ws)
85 {
86 csc->fd = ws->fd;
87 csc->nrelocs = 512;
88 csc->relocs_bo = (struct radeon_bo**)
89 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
90 if (!csc->relocs_bo) {
91 return FALSE;
92 }
93
94 csc->relocs = (struct drm_radeon_cs_reloc*)
95 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
96 if (!csc->relocs) {
97 FREE(csc->relocs_bo);
98 return FALSE;
99 }
100
101 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
102 csc->chunks[0].length_dw = 0;
103 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
104 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
105 csc->chunks[1].length_dw = 0;
106 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
107 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
108 csc->chunks[2].length_dw = 2;
109 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
110
111 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
112 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
113 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
114
115 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
116 return TRUE;
117 }
118
119 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
120 {
121 unsigned i;
122
123 for (i = 0; i < csc->crelocs; i++) {
124 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
125 radeon_bo_reference(&csc->relocs_bo[i], NULL);
126 }
127
128 csc->crelocs = 0;
129 csc->validated_crelocs = 0;
130 csc->chunks[0].length_dw = 0;
131 csc->chunks[1].length_dw = 0;
132 csc->used_gart = 0;
133 csc->used_vram = 0;
134 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
135 }
136
137 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
138 {
139 radeon_cs_context_cleanup(csc);
140 FREE(csc->relocs_bo);
141 FREE(csc->relocs);
142 }
143
144
145 static struct radeon_winsys_cs *
146 radeon_drm_cs_create(struct radeon_winsys *rws,
147 enum ring_type ring_type,
148 void (*flush)(void *ctx, unsigned flags,
149 struct pipe_fence_handle **fence),
150 void *flush_ctx,
151 struct radeon_winsys_cs_handle *trace_buf)
152 {
153 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
154 struct radeon_drm_cs *cs;
155
156 cs = CALLOC_STRUCT(radeon_drm_cs);
157 if (!cs) {
158 return NULL;
159 }
160 pipe_semaphore_init(&cs->flush_completed, 1);
161
162 cs->ws = ws;
163 cs->flush_cs = flush;
164 cs->flush_data = flush_ctx;
165 cs->trace_buf = (struct radeon_bo*)trace_buf;
166
167 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
168 FREE(cs);
169 return NULL;
170 }
171 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
172 radeon_destroy_cs_context(&cs->csc1);
173 FREE(cs);
174 return NULL;
175 }
176
177 /* Set the first command buffer as current. */
178 cs->csc = &cs->csc1;
179 cs->cst = &cs->csc2;
180 cs->base.buf = cs->csc->buf;
181 cs->base.ring_type = ring_type;
182
183 p_atomic_inc(&ws->num_cs);
184 return &cs->base;
185 }
186
187 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
188
189 static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
190 enum radeon_bo_domain rd,
191 enum radeon_bo_domain wd,
192 unsigned priority,
193 enum radeon_bo_domain *added_domains)
194 {
195 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
196
197 reloc->read_domains |= rd;
198 reloc->write_domain |= wd;
199 reloc->flags = MAX2(reloc->flags, priority);
200 }
201
202 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
203 struct drm_radeon_cs_reloc **out_reloc)
204 {
205 struct drm_radeon_cs_reloc *reloc = NULL;
206 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
207 int i = -1;
208
209 if (csc->is_handle_added[hash]) {
210 i = csc->reloc_indices_hashlist[hash];
211 reloc = &csc->relocs[i];
212
213 if (reloc->handle != bo->handle) {
214 /* Hash collision, look for the BO in the list of relocs linearly. */
215 for (i = csc->crelocs - 1; i >= 0; i--) {
216 reloc = &csc->relocs[i];
217 if (reloc->handle == bo->handle) {
218 /* Put this reloc in the hash list.
219 * This will prevent additional hash collisions if there are
220 * several consecutive get_reloc calls for the same buffer.
221 *
222 * Example: Assuming buffers A,B,C collide in the hash list,
223 * the following sequence of relocs:
224 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
225 * will collide here: ^ and here: ^,
226 * meaning that we should get very few collisions in the end. */
227 csc->reloc_indices_hashlist[hash] = i;
228 break;
229 }
230 }
231 }
232 }
233 if (out_reloc)
234 *out_reloc = reloc;
235 return i;
236 }
237
238 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
239 struct radeon_bo *bo,
240 enum radeon_bo_usage usage,
241 enum radeon_bo_domain domains,
242 unsigned priority,
243 enum radeon_bo_domain *added_domains)
244 {
245 struct radeon_cs_context *csc = cs->csc;
246 struct drm_radeon_cs_reloc *reloc;
247 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
248 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
249 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
250 int i = -1;
251
252 priority = MIN2(priority, 15);
253 *added_domains = 0;
254
255 i = radeon_get_reloc(csc, bo, &reloc);
256
257 if (i >= 0) {
258 update_reloc(reloc, rd, wd, priority, added_domains);
259
260 /* For async DMA, every add_reloc call must add a buffer to the list
261 * no matter how many duplicates there are. This is due to the fact
262 * the DMA CS checker doesn't use NOP packets for offset patching,
263 * but always uses the i-th buffer from the list to patch the i-th
264 * offset. If there are N offsets in a DMA CS, there must also be N
265 * buffers in the relocation list.
266 *
267 * This doesn't have to be done if virtual memory is enabled,
268 * because there is no offset patching with virtual memory.
269 */
270 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
271 return i;
272 }
273 }
274
275 /* New relocation, check if the backing array is large enough. */
276 if (csc->crelocs >= csc->nrelocs) {
277 uint32_t size;
278 csc->nrelocs += 10;
279
280 size = csc->nrelocs * sizeof(struct radeon_bo*);
281 csc->relocs_bo = realloc(csc->relocs_bo, size);
282
283 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
284 csc->relocs = realloc(csc->relocs, size);
285
286 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
287 }
288
289 /* Initialize the new relocation. */
290 csc->relocs_bo[csc->crelocs] = NULL;
291 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
292 p_atomic_inc(&bo->num_cs_references);
293 reloc = &csc->relocs[csc->crelocs];
294 reloc->handle = bo->handle;
295 reloc->read_domains = rd;
296 reloc->write_domain = wd;
297 reloc->flags = priority;
298
299 csc->is_handle_added[hash] = TRUE;
300 csc->reloc_indices_hashlist[hash] = csc->crelocs;
301
302 csc->chunks[1].length_dw += RELOC_DWORDS;
303
304 *added_domains = rd | wd;
305 return csc->crelocs++;
306 }
307
308 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
309 struct radeon_winsys_cs_handle *buf,
310 enum radeon_bo_usage usage,
311 enum radeon_bo_domain domains,
312 enum radeon_bo_priority priority)
313 {
314 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
315 struct radeon_bo *bo = (struct radeon_bo*)buf;
316 enum radeon_bo_domain added_domains;
317 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
318
319 if (added_domains & RADEON_DOMAIN_GTT)
320 cs->csc->used_gart += bo->base.size;
321 if (added_domains & RADEON_DOMAIN_VRAM)
322 cs->csc->used_vram += bo->base.size;
323
324 return index;
325 }
326
327 static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
328 struct radeon_winsys_cs_handle *buf)
329 {
330 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
331
332 return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf, NULL);
333 }
334
335 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
336 {
337 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
338 boolean status =
339 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
340 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
341
342 if (status) {
343 cs->csc->validated_crelocs = cs->csc->crelocs;
344 } else {
345 /* Remove lately-added relocations. The validation failed with them
346 * and the CS is about to be flushed because of that. Keep only
347 * the already-validated relocations. */
348 unsigned i;
349
350 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
351 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
352 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
353 }
354 cs->csc->crelocs = cs->csc->validated_crelocs;
355
356 /* Flush if there are any relocs. Clean up otherwise. */
357 if (cs->csc->crelocs) {
358 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
359 } else {
360 radeon_cs_context_cleanup(cs->csc);
361
362 assert(cs->base.cdw == 0);
363 if (cs->base.cdw != 0) {
364 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
365 }
366 }
367 }
368 return status;
369 }
370
371 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
372 {
373 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
374 boolean status =
375 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
376 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
377
378 return status;
379 }
380
381 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
382 {
383 unsigned i;
384
385 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
386 &csc->cs, sizeof(struct drm_radeon_cs))) {
387 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
388 unsigned i;
389
390 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
391 for (i = 0; i < csc->chunks[0].length_dw; i++) {
392 fprintf(stderr, "0x%08X\n", csc->buf[i]);
393 }
394 } else {
395 fprintf(stderr, "radeon: The kernel rejected CS, "
396 "see dmesg for more information.\n");
397 }
398 }
399
400 if (cs->trace_buf) {
401 radeon_dump_cs_on_lockup(cs, csc);
402 }
403
404 for (i = 0; i < csc->crelocs; i++)
405 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
406
407 radeon_cs_context_cleanup(csc);
408 }
409
410 /*
411 * Make sure previous submission of this cs are completed
412 */
413 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
414 {
415 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
416
417 /* Wait for any pending ioctl to complete. */
418 if (cs->ws->thread) {
419 pipe_semaphore_wait(&cs->flush_completed);
420 pipe_semaphore_signal(&cs->flush_completed);
421 }
422 }
423
424 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
425
426 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
427 unsigned flags,
428 struct pipe_fence_handle **fence,
429 uint32_t cs_trace_id)
430 {
431 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
432 struct radeon_cs_context *tmp;
433
434 switch (cs->base.ring_type) {
435 case RING_DMA:
436 /* pad DMA ring to 8 DWs */
437 if (cs->ws->info.chip_class <= SI) {
438 while (rcs->cdw & 7)
439 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
440 } else {
441 while (rcs->cdw & 7)
442 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
443 }
444 break;
445 case RING_GFX:
446 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
447 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
448 */
449 if (cs->ws->info.chip_class <= SI) {
450 while (rcs->cdw & 7)
451 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
452 } else {
453 while (rcs->cdw & 7)
454 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
455 }
456 break;
457 case RING_UVD:
458 while (rcs->cdw & 15)
459 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
460 break;
461 default:
462 break;
463 }
464
465 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
466 fprintf(stderr, "radeon: command stream overflowed\n");
467 }
468
469 if (fence) {
470 radeon_fence_reference(fence, NULL);
471 *fence = radeon_cs_create_fence(rcs);
472 }
473
474 radeon_drm_cs_sync_flush(rcs);
475
476 /* Swap command streams. */
477 tmp = cs->csc;
478 cs->csc = cs->cst;
479 cs->cst = tmp;
480
481 cs->cst->cs_trace_id = cs_trace_id;
482
483 /* If the CS is not empty or overflowed, emit it in a separate thread. */
484 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
485 unsigned i, crelocs;
486
487 crelocs = cs->cst->crelocs;
488
489 cs->cst->chunks[0].length_dw = cs->base.cdw;
490
491 for (i = 0; i < crelocs; i++) {
492 /* Update the number of active asynchronous CS ioctls for the buffer. */
493 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
494 }
495
496 switch (cs->base.ring_type) {
497 case RING_DMA:
498 cs->cst->flags[0] = 0;
499 cs->cst->flags[1] = RADEON_CS_RING_DMA;
500 cs->cst->cs.num_chunks = 3;
501 if (cs->ws->info.r600_virtual_address) {
502 cs->cst->flags[0] |= RADEON_CS_USE_VM;
503 }
504 break;
505
506 case RING_UVD:
507 cs->cst->flags[0] = 0;
508 cs->cst->flags[1] = RADEON_CS_RING_UVD;
509 cs->cst->cs.num_chunks = 3;
510 break;
511
512 case RING_VCE:
513 cs->cst->flags[0] = 0;
514 cs->cst->flags[1] = RADEON_CS_RING_VCE;
515 cs->cst->cs.num_chunks = 3;
516 break;
517
518 default:
519 case RING_GFX:
520 cs->cst->flags[0] = 0;
521 cs->cst->flags[1] = RADEON_CS_RING_GFX;
522 cs->cst->cs.num_chunks = 2;
523 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
524 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
525 cs->cst->cs.num_chunks = 3;
526 }
527 if (cs->ws->info.r600_virtual_address) {
528 cs->cst->flags[0] |= RADEON_CS_USE_VM;
529 cs->cst->cs.num_chunks = 3;
530 }
531 if (flags & RADEON_FLUSH_END_OF_FRAME) {
532 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
533 cs->cst->cs.num_chunks = 3;
534 }
535 if (flags & RADEON_FLUSH_COMPUTE) {
536 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
537 cs->cst->cs.num_chunks = 3;
538 }
539 break;
540 }
541
542 if (cs->ws->thread) {
543 pipe_semaphore_wait(&cs->flush_completed);
544 radeon_drm_ws_queue_cs(cs->ws, cs);
545 if (!(flags & RADEON_FLUSH_ASYNC))
546 radeon_drm_cs_sync_flush(rcs);
547 } else {
548 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
549 }
550 } else {
551 radeon_cs_context_cleanup(cs->cst);
552 }
553
554 /* Prepare a new CS. */
555 cs->base.buf = cs->csc->buf;
556 cs->base.cdw = 0;
557
558 cs->ws->num_cs_flushes++;
559 }
560
561 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
562 {
563 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
564
565 radeon_drm_cs_sync_flush(rcs);
566 pipe_semaphore_destroy(&cs->flush_completed);
567 radeon_cs_context_cleanup(&cs->csc1);
568 radeon_cs_context_cleanup(&cs->csc2);
569 p_atomic_dec(&cs->ws->num_cs);
570 radeon_destroy_cs_context(&cs->csc1);
571 radeon_destroy_cs_context(&cs->csc2);
572 FREE(cs);
573 }
574
575 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
576 struct radeon_winsys_cs_handle *_buf,
577 enum radeon_bo_usage usage)
578 {
579 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
580 struct radeon_bo *bo = (struct radeon_bo*)_buf;
581 int index;
582
583 if (!bo->num_cs_references)
584 return FALSE;
585
586 index = radeon_get_reloc(cs->csc, bo, NULL);
587 if (index == -1)
588 return FALSE;
589
590 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
591 return TRUE;
592 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
593 return TRUE;
594
595 return FALSE;
596 }
597
598 /* FENCES */
599
600 static struct pipe_fence_handle *
601 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
602 {
603 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
604 struct pb_buffer *fence;
605
606 /* Create a fence, which is a dummy BO. */
607 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
608 RADEON_DOMAIN_GTT);
609 /* Add the fence as a dummy relocation. */
610 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
611 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
612 RADEON_PRIO_MIN);
613 return (struct pipe_fence_handle*)fence;
614 }
615
616 static bool radeon_fence_wait(struct radeon_winsys *ws,
617 struct pipe_fence_handle *fence,
618 uint64_t timeout)
619 {
620 struct pb_buffer *rfence = (struct pb_buffer*)fence;
621
622 if (timeout == 0)
623 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
624
625 if (timeout != PIPE_TIMEOUT_INFINITE) {
626 int64_t start_time = os_time_get();
627
628 /* Convert to microseconds. */
629 timeout /= 1000;
630
631 /* Wait in a loop. */
632 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
633 if (os_time_get() - start_time >= timeout) {
634 return FALSE;
635 }
636 os_time_sleep(10);
637 }
638 return TRUE;
639 }
640
641 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
642 return TRUE;
643 }
644
645 static void radeon_fence_reference(struct pipe_fence_handle **dst,
646 struct pipe_fence_handle *src)
647 {
648 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
649 }
650
651 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
652 {
653 ws->base.cs_create = radeon_drm_cs_create;
654 ws->base.cs_destroy = radeon_drm_cs_destroy;
655 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
656 ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
657 ws->base.cs_validate = radeon_drm_cs_validate;
658 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
659 ws->base.cs_flush = radeon_drm_cs_flush;
660 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
661 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
662 ws->base.fence_wait = radeon_fence_wait;
663 ws->base.fence_reference = radeon_fence_reference;
664 }