gallium/radeon: unify buffer_wait and buffer_is_busy in the winsys interface
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
84 struct radeon_drm_winsys *ws)
85 {
86 int i;
87
88 csc->buf = MALLOC(ws->ib_max_size);
89 if (!csc->buf)
90 return FALSE;
91 csc->fd = ws->fd;
92 csc->nrelocs = 512;
93 csc->relocs_bo = (struct radeon_bo**)
94 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
95 if (!csc->relocs_bo) {
96 FREE(csc->buf);
97 return FALSE;
98 }
99
100 csc->relocs = (struct drm_radeon_cs_reloc*)
101 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
102 if (!csc->relocs) {
103 FREE(csc->buf);
104 FREE(csc->relocs_bo);
105 return FALSE;
106 }
107
108 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
109 csc->chunks[0].length_dw = 0;
110 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
111 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
112 csc->chunks[1].length_dw = 0;
113 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
114 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
115 csc->chunks[2].length_dw = 2;
116 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
117
118 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
119 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
120 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
121
122 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
123
124 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
125 csc->reloc_indices_hashlist[i] = -1;
126 }
127 return TRUE;
128 }
129
130 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
131 {
132 unsigned i;
133
134 for (i = 0; i < csc->crelocs; i++) {
135 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
136 radeon_bo_reference(&csc->relocs_bo[i], NULL);
137 }
138
139 csc->crelocs = 0;
140 csc->validated_crelocs = 0;
141 csc->chunks[0].length_dw = 0;
142 csc->chunks[1].length_dw = 0;
143 csc->used_gart = 0;
144 csc->used_vram = 0;
145
146 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
147 csc->reloc_indices_hashlist[i] = -1;
148 }
149 }
150
151 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
152 {
153 radeon_cs_context_cleanup(csc);
154 FREE(csc->relocs_bo);
155 FREE(csc->relocs);
156 FREE(csc->buf);
157 }
158
159
160 static struct radeon_winsys_cs *
161 radeon_drm_cs_create(struct radeon_winsys *rws,
162 enum ring_type ring_type,
163 void (*flush)(void *ctx, unsigned flags,
164 struct pipe_fence_handle **fence),
165 void *flush_ctx,
166 struct radeon_winsys_cs_handle *trace_buf)
167 {
168 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
169 struct radeon_drm_cs *cs;
170
171 cs = CALLOC_STRUCT(radeon_drm_cs);
172 if (!cs) {
173 return NULL;
174 }
175 pipe_semaphore_init(&cs->flush_completed, 1);
176
177 cs->ws = ws;
178 cs->flush_cs = flush;
179 cs->flush_data = flush_ctx;
180 cs->trace_buf = (struct radeon_bo*)trace_buf;
181
182 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
183 FREE(cs);
184 return NULL;
185 }
186 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
187 radeon_destroy_cs_context(&cs->csc1);
188 FREE(cs);
189 return NULL;
190 }
191
192 /* Set the first command buffer as current. */
193 cs->csc = &cs->csc1;
194 cs->cst = &cs->csc2;
195 cs->base.buf = cs->csc->buf;
196 cs->base.ring_type = ring_type;
197 cs->base.max_dw = ws->ib_max_size / 4;
198
199 p_atomic_inc(&ws->num_cs);
200 return &cs->base;
201 }
202
203 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
204
205 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
206 enum radeon_bo_domain rd,
207 enum radeon_bo_domain wd,
208 unsigned priority,
209 enum radeon_bo_domain *added_domains)
210 {
211 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
212
213 reloc->read_domains |= rd;
214 reloc->write_domain |= wd;
215 reloc->flags = MAX2(reloc->flags, priority);
216 }
217
218 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
219 {
220 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
221 int i = csc->reloc_indices_hashlist[hash];
222
223 /* not found or found */
224 if (i == -1 || csc->relocs_bo[i] == bo)
225 return i;
226
227 /* Hash collision, look for the BO in the list of relocs linearly. */
228 for (i = csc->crelocs - 1; i >= 0; i--) {
229 if (csc->relocs_bo[i] == bo) {
230 /* Put this reloc in the hash list.
231 * This will prevent additional hash collisions if there are
232 * several consecutive get_reloc calls for the same buffer.
233 *
234 * Example: Assuming buffers A,B,C collide in the hash list,
235 * the following sequence of relocs:
236 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
237 * will collide here: ^ and here: ^,
238 * meaning that we should get very few collisions in the end. */
239 csc->reloc_indices_hashlist[hash] = i;
240 return i;
241 }
242 }
243 return -1;
244 }
245
246 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
247 struct radeon_bo *bo,
248 enum radeon_bo_usage usage,
249 enum radeon_bo_domain domains,
250 unsigned priority,
251 enum radeon_bo_domain *added_domains)
252 {
253 struct radeon_cs_context *csc = cs->csc;
254 struct drm_radeon_cs_reloc *reloc;
255 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
256 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
257 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
258 int i = -1;
259
260 priority = MIN2(priority, 15);
261 *added_domains = 0;
262
263 i = radeon_get_reloc(csc, bo);
264
265 if (i >= 0) {
266 reloc = &csc->relocs[i];
267 update_reloc(reloc, rd, wd, priority, added_domains);
268
269 /* For async DMA, every add_reloc call must add a buffer to the list
270 * no matter how many duplicates there are. This is due to the fact
271 * the DMA CS checker doesn't use NOP packets for offset patching,
272 * but always uses the i-th buffer from the list to patch the i-th
273 * offset. If there are N offsets in a DMA CS, there must also be N
274 * buffers in the relocation list.
275 *
276 * This doesn't have to be done if virtual memory is enabled,
277 * because there is no offset patching with virtual memory.
278 */
279 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
280 return i;
281 }
282 }
283
284 /* New relocation, check if the backing array is large enough. */
285 if (csc->crelocs >= csc->nrelocs) {
286 uint32_t size;
287 csc->nrelocs += 10;
288
289 size = csc->nrelocs * sizeof(struct radeon_bo*);
290 csc->relocs_bo = realloc(csc->relocs_bo, size);
291
292 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
293 csc->relocs = realloc(csc->relocs, size);
294
295 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
296 }
297
298 /* Initialize the new relocation. */
299 csc->relocs_bo[csc->crelocs] = NULL;
300 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
301 p_atomic_inc(&bo->num_cs_references);
302 reloc = &csc->relocs[csc->crelocs];
303 reloc->handle = bo->handle;
304 reloc->read_domains = rd;
305 reloc->write_domain = wd;
306 reloc->flags = priority;
307
308 csc->reloc_indices_hashlist[hash] = csc->crelocs;
309
310 csc->chunks[1].length_dw += RELOC_DWORDS;
311
312 *added_domains = rd | wd;
313 return csc->crelocs++;
314 }
315
316 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
317 struct radeon_winsys_cs_handle *buf,
318 enum radeon_bo_usage usage,
319 enum radeon_bo_domain domains,
320 enum radeon_bo_priority priority)
321 {
322 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
323 struct radeon_bo *bo = (struct radeon_bo*)buf;
324 enum radeon_bo_domain added_domains;
325 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
326
327 if (added_domains & RADEON_DOMAIN_GTT)
328 cs->csc->used_gart += bo->base.size;
329 if (added_domains & RADEON_DOMAIN_VRAM)
330 cs->csc->used_vram += bo->base.size;
331
332 return index;
333 }
334
335 static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
336 struct radeon_winsys_cs_handle *buf)
337 {
338 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
339
340 return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
341 }
342
343 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
344 {
345 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
346 boolean status =
347 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
348 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
349
350 if (status) {
351 cs->csc->validated_crelocs = cs->csc->crelocs;
352 } else {
353 /* Remove lately-added relocations. The validation failed with them
354 * and the CS is about to be flushed because of that. Keep only
355 * the already-validated relocations. */
356 unsigned i;
357
358 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
359 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
360 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
361 }
362 cs->csc->crelocs = cs->csc->validated_crelocs;
363
364 /* Flush if there are any relocs. Clean up otherwise. */
365 if (cs->csc->crelocs) {
366 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
367 } else {
368 radeon_cs_context_cleanup(cs->csc);
369
370 assert(cs->base.cdw == 0);
371 if (cs->base.cdw != 0) {
372 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
373 }
374 }
375 }
376 return status;
377 }
378
379 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
380 {
381 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
382
383 vram += cs->csc->used_vram;
384 gtt += cs->csc->used_gart;
385
386 /* Anything that goes above the VRAM size should go to GTT. */
387 if (vram > cs->ws->info.vram_size)
388 gtt += vram - cs->ws->info.vram_size;
389
390 /* Now we just need to check if we have enough GTT. */
391 return gtt < cs->ws->info.gart_size * 0.7;
392 }
393
394 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
395 {
396 unsigned i;
397
398 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
399 &csc->cs, sizeof(struct drm_radeon_cs))) {
400 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
401 unsigned i;
402
403 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
404 for (i = 0; i < csc->chunks[0].length_dw; i++) {
405 fprintf(stderr, "0x%08X\n", csc->buf[i]);
406 }
407 } else {
408 fprintf(stderr, "radeon: The kernel rejected CS, "
409 "see dmesg for more information.\n");
410 }
411 }
412
413 if (cs->trace_buf) {
414 radeon_dump_cs_on_lockup(cs, csc);
415 }
416
417 for (i = 0; i < csc->crelocs; i++)
418 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
419
420 radeon_cs_context_cleanup(csc);
421 }
422
423 /*
424 * Make sure previous submission of this cs are completed
425 */
426 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
427 {
428 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
429
430 /* Wait for any pending ioctl to complete. */
431 if (cs->ws->thread) {
432 pipe_semaphore_wait(&cs->flush_completed);
433 pipe_semaphore_signal(&cs->flush_completed);
434 }
435 }
436
437 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
438
439 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
440 unsigned flags,
441 struct pipe_fence_handle **fence,
442 uint32_t cs_trace_id)
443 {
444 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
445 struct radeon_cs_context *tmp;
446
447 switch (cs->base.ring_type) {
448 case RING_DMA:
449 /* pad DMA ring to 8 DWs */
450 if (cs->ws->info.chip_class <= SI) {
451 while (rcs->cdw & 7)
452 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
453 } else {
454 while (rcs->cdw & 7)
455 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
456 }
457 break;
458 case RING_GFX:
459 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
460 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
461 * hawaii with old firmware needs type2 nop packet.
462 * accel_working2 with value 3 indicates the new firmware.
463 */
464 if (cs->ws->info.chip_class <= SI ||
465 (cs->ws->info.family == CHIP_HAWAII &&
466 cs->ws->accel_working2 < 3)) {
467 while (rcs->cdw & 7)
468 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
469 } else {
470 while (rcs->cdw & 7)
471 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
472 }
473 break;
474 case RING_UVD:
475 while (rcs->cdw & 15)
476 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
477 break;
478 default:
479 break;
480 }
481
482 if (rcs->cdw > rcs->max_dw) {
483 fprintf(stderr, "radeon: command stream overflowed\n");
484 }
485
486 if (fence) {
487 radeon_fence_reference(fence, NULL);
488 *fence = radeon_cs_create_fence(rcs);
489 }
490
491 radeon_drm_cs_sync_flush(rcs);
492
493 /* Swap command streams. */
494 tmp = cs->csc;
495 cs->csc = cs->cst;
496 cs->cst = tmp;
497
498 cs->cst->cs_trace_id = cs_trace_id;
499
500 /* If the CS is not empty or overflowed, emit it in a separate thread. */
501 if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
502 unsigned i, crelocs;
503
504 crelocs = cs->cst->crelocs;
505
506 cs->cst->chunks[0].length_dw = cs->base.cdw;
507
508 for (i = 0; i < crelocs; i++) {
509 /* Update the number of active asynchronous CS ioctls for the buffer. */
510 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
511 }
512
513 switch (cs->base.ring_type) {
514 case RING_DMA:
515 cs->cst->flags[0] = 0;
516 cs->cst->flags[1] = RADEON_CS_RING_DMA;
517 cs->cst->cs.num_chunks = 3;
518 if (cs->ws->info.r600_virtual_address) {
519 cs->cst->flags[0] |= RADEON_CS_USE_VM;
520 }
521 break;
522
523 case RING_UVD:
524 cs->cst->flags[0] = 0;
525 cs->cst->flags[1] = RADEON_CS_RING_UVD;
526 cs->cst->cs.num_chunks = 3;
527 break;
528
529 case RING_VCE:
530 cs->cst->flags[0] = 0;
531 cs->cst->flags[1] = RADEON_CS_RING_VCE;
532 cs->cst->cs.num_chunks = 3;
533 break;
534
535 default:
536 case RING_GFX:
537 cs->cst->flags[0] = 0;
538 cs->cst->flags[1] = RADEON_CS_RING_GFX;
539 cs->cst->cs.num_chunks = 2;
540 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
541 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
542 cs->cst->cs.num_chunks = 3;
543 }
544 if (cs->ws->info.r600_virtual_address) {
545 cs->cst->flags[0] |= RADEON_CS_USE_VM;
546 cs->cst->cs.num_chunks = 3;
547 }
548 if (flags & RADEON_FLUSH_END_OF_FRAME) {
549 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
550 cs->cst->cs.num_chunks = 3;
551 }
552 if (flags & RADEON_FLUSH_COMPUTE) {
553 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
554 cs->cst->cs.num_chunks = 3;
555 }
556 break;
557 }
558
559 if (cs->ws->thread) {
560 pipe_semaphore_wait(&cs->flush_completed);
561 radeon_drm_ws_queue_cs(cs->ws, cs);
562 if (!(flags & RADEON_FLUSH_ASYNC))
563 radeon_drm_cs_sync_flush(rcs);
564 } else {
565 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
566 }
567 } else {
568 radeon_cs_context_cleanup(cs->cst);
569 }
570
571 /* Prepare a new CS. */
572 cs->base.buf = cs->csc->buf;
573 cs->base.cdw = 0;
574
575 cs->ws->num_cs_flushes++;
576 }
577
578 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
579 {
580 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
581
582 radeon_drm_cs_sync_flush(rcs);
583 pipe_semaphore_destroy(&cs->flush_completed);
584 radeon_cs_context_cleanup(&cs->csc1);
585 radeon_cs_context_cleanup(&cs->csc2);
586 p_atomic_dec(&cs->ws->num_cs);
587 radeon_destroy_cs_context(&cs->csc1);
588 radeon_destroy_cs_context(&cs->csc2);
589 FREE(cs);
590 }
591
592 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
593 struct radeon_winsys_cs_handle *_buf,
594 enum radeon_bo_usage usage)
595 {
596 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
597 struct radeon_bo *bo = (struct radeon_bo*)_buf;
598 int index;
599
600 if (!bo->num_cs_references)
601 return FALSE;
602
603 index = radeon_get_reloc(cs->csc, bo);
604 if (index == -1)
605 return FALSE;
606
607 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
608 return TRUE;
609 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
610 return TRUE;
611
612 return FALSE;
613 }
614
615 /* FENCES */
616
617 static struct pipe_fence_handle *
618 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
619 {
620 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
621 struct pb_buffer *fence;
622
623 /* Create a fence, which is a dummy BO. */
624 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
625 RADEON_DOMAIN_GTT, 0);
626 /* Add the fence as a dummy relocation. */
627 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
628 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
629 RADEON_PRIO_MIN);
630 return (struct pipe_fence_handle*)fence;
631 }
632
633 static bool radeon_fence_wait(struct radeon_winsys *ws,
634 struct pipe_fence_handle *fence,
635 uint64_t timeout)
636 {
637 struct pb_buffer *rfence = (struct pb_buffer*)fence;
638
639 if (timeout == 0)
640 return ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE);
641
642 if (timeout != PIPE_TIMEOUT_INFINITE) {
643 int64_t start_time = os_time_get();
644
645 /* Convert to microseconds. */
646 timeout /= 1000;
647
648 /* Wait in a loop. */
649 while (!ws->buffer_wait(rfence, 0, RADEON_USAGE_READWRITE)) {
650 if (os_time_get() - start_time >= timeout) {
651 return FALSE;
652 }
653 os_time_sleep(10);
654 }
655 return TRUE;
656 }
657
658 ws->buffer_wait(rfence, PIPE_TIMEOUT_INFINITE, RADEON_USAGE_READWRITE);
659 return TRUE;
660 }
661
662 static void radeon_fence_reference(struct pipe_fence_handle **dst,
663 struct pipe_fence_handle *src)
664 {
665 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
666 }
667
668 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
669 {
670 ws->base.cs_create = radeon_drm_cs_create;
671 ws->base.cs_destroy = radeon_drm_cs_destroy;
672 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
673 ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
674 ws->base.cs_validate = radeon_drm_cs_validate;
675 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
676 ws->base.cs_flush = radeon_drm_cs_flush;
677 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
678 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
679 ws->base.fence_wait = radeon_fence_wait;
680 ws->base.fence_reference = radeon_fence_reference;
681 }