winsys/radeon: remove is_handle_added array
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
84 struct radeon_drm_winsys *ws)
85 {
86 int i;
87
88 csc->fd = ws->fd;
89 csc->nrelocs = 512;
90 csc->relocs_bo = (struct radeon_bo**)
91 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
92 if (!csc->relocs_bo) {
93 return FALSE;
94 }
95
96 csc->relocs = (struct drm_radeon_cs_reloc*)
97 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
98 if (!csc->relocs) {
99 FREE(csc->relocs_bo);
100 return FALSE;
101 }
102
103 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
104 csc->chunks[0].length_dw = 0;
105 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
106 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
107 csc->chunks[1].length_dw = 0;
108 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
109 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
110 csc->chunks[2].length_dw = 2;
111 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
112
113 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
114 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
115 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
116
117 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
118
119 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
120 csc->reloc_indices_hashlist[i] = -1;
121 }
122 return TRUE;
123 }
124
125 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
126 {
127 unsigned i;
128
129 for (i = 0; i < csc->crelocs; i++) {
130 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
131 radeon_bo_reference(&csc->relocs_bo[i], NULL);
132 }
133
134 csc->crelocs = 0;
135 csc->validated_crelocs = 0;
136 csc->chunks[0].length_dw = 0;
137 csc->chunks[1].length_dw = 0;
138 csc->used_gart = 0;
139 csc->used_vram = 0;
140
141 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
142 csc->reloc_indices_hashlist[i] = -1;
143 }
144 }
145
146 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
147 {
148 radeon_cs_context_cleanup(csc);
149 FREE(csc->relocs_bo);
150 FREE(csc->relocs);
151 }
152
153
154 static struct radeon_winsys_cs *
155 radeon_drm_cs_create(struct radeon_winsys *rws,
156 enum ring_type ring_type,
157 void (*flush)(void *ctx, unsigned flags,
158 struct pipe_fence_handle **fence),
159 void *flush_ctx,
160 struct radeon_winsys_cs_handle *trace_buf)
161 {
162 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
163 struct radeon_drm_cs *cs;
164
165 cs = CALLOC_STRUCT(radeon_drm_cs);
166 if (!cs) {
167 return NULL;
168 }
169 pipe_semaphore_init(&cs->flush_completed, 1);
170
171 cs->ws = ws;
172 cs->flush_cs = flush;
173 cs->flush_data = flush_ctx;
174 cs->trace_buf = (struct radeon_bo*)trace_buf;
175
176 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
177 FREE(cs);
178 return NULL;
179 }
180 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
181 radeon_destroy_cs_context(&cs->csc1);
182 FREE(cs);
183 return NULL;
184 }
185
186 /* Set the first command buffer as current. */
187 cs->csc = &cs->csc1;
188 cs->cst = &cs->csc2;
189 cs->base.buf = cs->csc->buf;
190 cs->base.ring_type = ring_type;
191
192 p_atomic_inc(&ws->num_cs);
193 return &cs->base;
194 }
195
196 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
197
198 static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
199 enum radeon_bo_domain rd,
200 enum radeon_bo_domain wd,
201 unsigned priority,
202 enum radeon_bo_domain *added_domains)
203 {
204 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
205
206 reloc->read_domains |= rd;
207 reloc->write_domain |= wd;
208 reloc->flags = MAX2(reloc->flags, priority);
209 }
210
211 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
212 {
213 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
214 int i = csc->reloc_indices_hashlist[hash];
215
216 /* not found or found */
217 if (i == -1 || csc->relocs_bo[i] == bo)
218 return i;
219
220 /* Hash collision, look for the BO in the list of relocs linearly. */
221 for (i = csc->crelocs - 1; i >= 0; i--) {
222 if (csc->relocs_bo[i] == bo) {
223 /* Put this reloc in the hash list.
224 * This will prevent additional hash collisions if there are
225 * several consecutive get_reloc calls for the same buffer.
226 *
227 * Example: Assuming buffers A,B,C collide in the hash list,
228 * the following sequence of relocs:
229 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
230 * will collide here: ^ and here: ^,
231 * meaning that we should get very few collisions in the end. */
232 csc->reloc_indices_hashlist[hash] = i;
233 return i;
234 }
235 }
236 return -1;
237 }
238
239 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
240 struct radeon_bo *bo,
241 enum radeon_bo_usage usage,
242 enum radeon_bo_domain domains,
243 unsigned priority,
244 enum radeon_bo_domain *added_domains)
245 {
246 struct radeon_cs_context *csc = cs->csc;
247 struct drm_radeon_cs_reloc *reloc;
248 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
249 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
250 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
251 int i = -1;
252
253 priority = MIN2(priority, 15);
254 *added_domains = 0;
255
256 i = radeon_get_reloc(csc, bo);
257
258 if (i >= 0) {
259 reloc = &csc->relocs[i];
260 update_reloc(reloc, rd, wd, priority, added_domains);
261
262 /* For async DMA, every add_reloc call must add a buffer to the list
263 * no matter how many duplicates there are. This is due to the fact
264 * the DMA CS checker doesn't use NOP packets for offset patching,
265 * but always uses the i-th buffer from the list to patch the i-th
266 * offset. If there are N offsets in a DMA CS, there must also be N
267 * buffers in the relocation list.
268 *
269 * This doesn't have to be done if virtual memory is enabled,
270 * because there is no offset patching with virtual memory.
271 */
272 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
273 return i;
274 }
275 }
276
277 /* New relocation, check if the backing array is large enough. */
278 if (csc->crelocs >= csc->nrelocs) {
279 uint32_t size;
280 csc->nrelocs += 10;
281
282 size = csc->nrelocs * sizeof(struct radeon_bo*);
283 csc->relocs_bo = realloc(csc->relocs_bo, size);
284
285 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
286 csc->relocs = realloc(csc->relocs, size);
287
288 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
289 }
290
291 /* Initialize the new relocation. */
292 csc->relocs_bo[csc->crelocs] = NULL;
293 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
294 p_atomic_inc(&bo->num_cs_references);
295 reloc = &csc->relocs[csc->crelocs];
296 reloc->handle = bo->handle;
297 reloc->read_domains = rd;
298 reloc->write_domain = wd;
299 reloc->flags = priority;
300
301 csc->reloc_indices_hashlist[hash] = csc->crelocs;
302
303 csc->chunks[1].length_dw += RELOC_DWORDS;
304
305 *added_domains = rd | wd;
306 return csc->crelocs++;
307 }
308
309 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
310 struct radeon_winsys_cs_handle *buf,
311 enum radeon_bo_usage usage,
312 enum radeon_bo_domain domains,
313 enum radeon_bo_priority priority)
314 {
315 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
316 struct radeon_bo *bo = (struct radeon_bo*)buf;
317 enum radeon_bo_domain added_domains;
318 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
319
320 if (added_domains & RADEON_DOMAIN_GTT)
321 cs->csc->used_gart += bo->base.size;
322 if (added_domains & RADEON_DOMAIN_VRAM)
323 cs->csc->used_vram += bo->base.size;
324
325 return index;
326 }
327
328 static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
329 struct radeon_winsys_cs_handle *buf)
330 {
331 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
332
333 return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
334 }
335
336 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
337 {
338 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
339 boolean status =
340 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
341 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
342
343 if (status) {
344 cs->csc->validated_crelocs = cs->csc->crelocs;
345 } else {
346 /* Remove lately-added relocations. The validation failed with them
347 * and the CS is about to be flushed because of that. Keep only
348 * the already-validated relocations. */
349 unsigned i;
350
351 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
352 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
353 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
354 }
355 cs->csc->crelocs = cs->csc->validated_crelocs;
356
357 /* Flush if there are any relocs. Clean up otherwise. */
358 if (cs->csc->crelocs) {
359 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
360 } else {
361 radeon_cs_context_cleanup(cs->csc);
362
363 assert(cs->base.cdw == 0);
364 if (cs->base.cdw != 0) {
365 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
366 }
367 }
368 }
369 return status;
370 }
371
372 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
373 {
374 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
375 boolean status =
376 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
377 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
378
379 return status;
380 }
381
382 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
383 {
384 unsigned i;
385
386 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
387 &csc->cs, sizeof(struct drm_radeon_cs))) {
388 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
389 unsigned i;
390
391 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
392 for (i = 0; i < csc->chunks[0].length_dw; i++) {
393 fprintf(stderr, "0x%08X\n", csc->buf[i]);
394 }
395 } else {
396 fprintf(stderr, "radeon: The kernel rejected CS, "
397 "see dmesg for more information.\n");
398 }
399 }
400
401 if (cs->trace_buf) {
402 radeon_dump_cs_on_lockup(cs, csc);
403 }
404
405 for (i = 0; i < csc->crelocs; i++)
406 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
407
408 radeon_cs_context_cleanup(csc);
409 }
410
411 /*
412 * Make sure previous submission of this cs are completed
413 */
414 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
415 {
416 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
417
418 /* Wait for any pending ioctl to complete. */
419 if (cs->ws->thread) {
420 pipe_semaphore_wait(&cs->flush_completed);
421 pipe_semaphore_signal(&cs->flush_completed);
422 }
423 }
424
425 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
426
427 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
428 unsigned flags,
429 struct pipe_fence_handle **fence,
430 uint32_t cs_trace_id)
431 {
432 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
433 struct radeon_cs_context *tmp;
434
435 switch (cs->base.ring_type) {
436 case RING_DMA:
437 /* pad DMA ring to 8 DWs */
438 if (cs->ws->info.chip_class <= SI) {
439 while (rcs->cdw & 7)
440 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
441 } else {
442 while (rcs->cdw & 7)
443 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
444 }
445 break;
446 case RING_GFX:
447 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
448 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
449 */
450 if (cs->ws->info.chip_class <= SI) {
451 while (rcs->cdw & 7)
452 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
453 } else {
454 while (rcs->cdw & 7)
455 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
456 }
457 break;
458 case RING_UVD:
459 while (rcs->cdw & 15)
460 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
461 break;
462 default:
463 break;
464 }
465
466 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
467 fprintf(stderr, "radeon: command stream overflowed\n");
468 }
469
470 if (fence) {
471 radeon_fence_reference(fence, NULL);
472 *fence = radeon_cs_create_fence(rcs);
473 }
474
475 radeon_drm_cs_sync_flush(rcs);
476
477 /* Swap command streams. */
478 tmp = cs->csc;
479 cs->csc = cs->cst;
480 cs->cst = tmp;
481
482 cs->cst->cs_trace_id = cs_trace_id;
483
484 /* If the CS is not empty or overflowed, emit it in a separate thread. */
485 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
486 unsigned i, crelocs;
487
488 crelocs = cs->cst->crelocs;
489
490 cs->cst->chunks[0].length_dw = cs->base.cdw;
491
492 for (i = 0; i < crelocs; i++) {
493 /* Update the number of active asynchronous CS ioctls for the buffer. */
494 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
495 }
496
497 switch (cs->base.ring_type) {
498 case RING_DMA:
499 cs->cst->flags[0] = 0;
500 cs->cst->flags[1] = RADEON_CS_RING_DMA;
501 cs->cst->cs.num_chunks = 3;
502 if (cs->ws->info.r600_virtual_address) {
503 cs->cst->flags[0] |= RADEON_CS_USE_VM;
504 }
505 break;
506
507 case RING_UVD:
508 cs->cst->flags[0] = 0;
509 cs->cst->flags[1] = RADEON_CS_RING_UVD;
510 cs->cst->cs.num_chunks = 3;
511 break;
512
513 case RING_VCE:
514 cs->cst->flags[0] = 0;
515 cs->cst->flags[1] = RADEON_CS_RING_VCE;
516 cs->cst->cs.num_chunks = 3;
517 break;
518
519 default:
520 case RING_GFX:
521 cs->cst->flags[0] = 0;
522 cs->cst->flags[1] = RADEON_CS_RING_GFX;
523 cs->cst->cs.num_chunks = 2;
524 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
525 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
526 cs->cst->cs.num_chunks = 3;
527 }
528 if (cs->ws->info.r600_virtual_address) {
529 cs->cst->flags[0] |= RADEON_CS_USE_VM;
530 cs->cst->cs.num_chunks = 3;
531 }
532 if (flags & RADEON_FLUSH_END_OF_FRAME) {
533 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
534 cs->cst->cs.num_chunks = 3;
535 }
536 if (flags & RADEON_FLUSH_COMPUTE) {
537 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
538 cs->cst->cs.num_chunks = 3;
539 }
540 break;
541 }
542
543 if (cs->ws->thread) {
544 pipe_semaphore_wait(&cs->flush_completed);
545 radeon_drm_ws_queue_cs(cs->ws, cs);
546 if (!(flags & RADEON_FLUSH_ASYNC))
547 radeon_drm_cs_sync_flush(rcs);
548 } else {
549 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
550 }
551 } else {
552 radeon_cs_context_cleanup(cs->cst);
553 }
554
555 /* Prepare a new CS. */
556 cs->base.buf = cs->csc->buf;
557 cs->base.cdw = 0;
558
559 cs->ws->num_cs_flushes++;
560 }
561
562 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
563 {
564 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
565
566 radeon_drm_cs_sync_flush(rcs);
567 pipe_semaphore_destroy(&cs->flush_completed);
568 radeon_cs_context_cleanup(&cs->csc1);
569 radeon_cs_context_cleanup(&cs->csc2);
570 p_atomic_dec(&cs->ws->num_cs);
571 radeon_destroy_cs_context(&cs->csc1);
572 radeon_destroy_cs_context(&cs->csc2);
573 FREE(cs);
574 }
575
576 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
577 struct radeon_winsys_cs_handle *_buf,
578 enum radeon_bo_usage usage)
579 {
580 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
581 struct radeon_bo *bo = (struct radeon_bo*)_buf;
582 int index;
583
584 if (!bo->num_cs_references)
585 return FALSE;
586
587 index = radeon_get_reloc(cs->csc, bo);
588 if (index == -1)
589 return FALSE;
590
591 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
592 return TRUE;
593 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
594 return TRUE;
595
596 return FALSE;
597 }
598
599 /* FENCES */
600
601 static struct pipe_fence_handle *
602 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
603 {
604 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
605 struct pb_buffer *fence;
606
607 /* Create a fence, which is a dummy BO. */
608 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
609 RADEON_DOMAIN_GTT);
610 /* Add the fence as a dummy relocation. */
611 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
612 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
613 RADEON_PRIO_MIN);
614 return (struct pipe_fence_handle*)fence;
615 }
616
617 static bool radeon_fence_wait(struct radeon_winsys *ws,
618 struct pipe_fence_handle *fence,
619 uint64_t timeout)
620 {
621 struct pb_buffer *rfence = (struct pb_buffer*)fence;
622
623 if (timeout == 0)
624 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
625
626 if (timeout != PIPE_TIMEOUT_INFINITE) {
627 int64_t start_time = os_time_get();
628
629 /* Convert to microseconds. */
630 timeout /= 1000;
631
632 /* Wait in a loop. */
633 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
634 if (os_time_get() - start_time >= timeout) {
635 return FALSE;
636 }
637 os_time_sleep(10);
638 }
639 return TRUE;
640 }
641
642 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
643 return TRUE;
644 }
645
646 static void radeon_fence_reference(struct pipe_fence_handle **dst,
647 struct pipe_fence_handle *src)
648 {
649 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
650 }
651
652 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
653 {
654 ws->base.cs_create = radeon_drm_cs_create;
655 ws->base.cs_destroy = radeon_drm_cs_destroy;
656 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
657 ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
658 ws->base.cs_validate = radeon_drm_cs_validate;
659 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
660 ws->base.cs_flush = radeon_drm_cs_flush;
661 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
662 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
663 ws->base.fence_wait = radeon_fence_wait;
664 ws->base.fence_reference = radeon_fence_reference;
665 }