gallium/radeon: tell the winsys the exact resource binding types
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84 {
85 /* No context support here. Just return the winsys pointer
86 * as the "context". */
87 return (struct radeon_winsys_ctx*)ws;
88 }
89
90 static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91 {
92 /* No context support here. */
93 }
94
95 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
96 struct radeon_drm_winsys *ws)
97 {
98 int i;
99
100 csc->fd = ws->fd;
101 csc->nrelocs = 512;
102 csc->relocs_bo = (struct radeon_bo**)
103 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
104 if (!csc->relocs_bo) {
105 return FALSE;
106 }
107
108 csc->relocs = (struct drm_radeon_cs_reloc*)
109 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
110 if (!csc->relocs) {
111 FREE(csc->relocs_bo);
112 return FALSE;
113 }
114
115 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
116 csc->chunks[0].length_dw = 0;
117 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
118 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
119 csc->chunks[1].length_dw = 0;
120 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
121 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
122 csc->chunks[2].length_dw = 2;
123 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
124
125 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
126 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
127 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
128
129 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
130
131 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
132 csc->reloc_indices_hashlist[i] = -1;
133 }
134 return TRUE;
135 }
136
137 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
138 {
139 unsigned i;
140
141 for (i = 0; i < csc->crelocs; i++) {
142 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
143 radeon_bo_reference(&csc->relocs_bo[i], NULL);
144 }
145
146 csc->crelocs = 0;
147 csc->validated_crelocs = 0;
148 csc->chunks[0].length_dw = 0;
149 csc->chunks[1].length_dw = 0;
150 csc->used_gart = 0;
151 csc->used_vram = 0;
152
153 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
154 csc->reloc_indices_hashlist[i] = -1;
155 }
156 }
157
158 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
159 {
160 radeon_cs_context_cleanup(csc);
161 FREE(csc->relocs_bo);
162 FREE(csc->relocs);
163 }
164
165
166 static struct radeon_winsys_cs *
167 radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
168 enum ring_type ring_type,
169 void (*flush)(void *ctx, unsigned flags,
170 struct pipe_fence_handle **fence),
171 void *flush_ctx,
172 struct radeon_winsys_cs_handle *trace_buf)
173 {
174 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
175 struct radeon_drm_cs *cs;
176
177 cs = CALLOC_STRUCT(radeon_drm_cs);
178 if (!cs) {
179 return NULL;
180 }
181 pipe_semaphore_init(&cs->flush_completed, 1);
182
183 cs->ws = ws;
184 cs->flush_cs = flush;
185 cs->flush_data = flush_ctx;
186 cs->trace_buf = (struct radeon_bo*)trace_buf;
187
188 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
189 FREE(cs);
190 return NULL;
191 }
192 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
193 radeon_destroy_cs_context(&cs->csc1);
194 FREE(cs);
195 return NULL;
196 }
197
198 /* Set the first command buffer as current. */
199 cs->csc = &cs->csc1;
200 cs->cst = &cs->csc2;
201 cs->base.buf = cs->csc->buf;
202 cs->base.ring_type = ring_type;
203 cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
204
205 p_atomic_inc(&ws->num_cs);
206 return &cs->base;
207 }
208
209 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
210
211 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
212 enum radeon_bo_domain rd,
213 enum radeon_bo_domain wd,
214 unsigned priority,
215 enum radeon_bo_domain *added_domains)
216 {
217 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
218
219 reloc->read_domains |= rd;
220 reloc->write_domain |= wd;
221 reloc->flags = MAX2(reloc->flags, priority);
222 }
223
224 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
225 {
226 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
227 int i = csc->reloc_indices_hashlist[hash];
228
229 /* not found or found */
230 if (i == -1 || csc->relocs_bo[i] == bo)
231 return i;
232
233 /* Hash collision, look for the BO in the list of relocs linearly. */
234 for (i = csc->crelocs - 1; i >= 0; i--) {
235 if (csc->relocs_bo[i] == bo) {
236 /* Put this reloc in the hash list.
237 * This will prevent additional hash collisions if there are
238 * several consecutive get_reloc calls for the same buffer.
239 *
240 * Example: Assuming buffers A,B,C collide in the hash list,
241 * the following sequence of relocs:
242 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
243 * will collide here: ^ and here: ^,
244 * meaning that we should get very few collisions in the end. */
245 csc->reloc_indices_hashlist[hash] = i;
246 return i;
247 }
248 }
249 return -1;
250 }
251
252 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
253 struct radeon_bo *bo,
254 enum radeon_bo_usage usage,
255 enum radeon_bo_domain domains,
256 unsigned priority,
257 enum radeon_bo_domain *added_domains)
258 {
259 struct radeon_cs_context *csc = cs->csc;
260 struct drm_radeon_cs_reloc *reloc;
261 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
262 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
263 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
264 int i = -1;
265
266 assert(priority < 64);
267 *added_domains = 0;
268
269 i = radeon_get_reloc(csc, bo);
270
271 if (i >= 0) {
272 reloc = &csc->relocs[i];
273 update_reloc(reloc, rd, wd, priority / 4, added_domains);
274
275 /* For async DMA, every add_reloc call must add a buffer to the list
276 * no matter how many duplicates there are. This is due to the fact
277 * the DMA CS checker doesn't use NOP packets for offset patching,
278 * but always uses the i-th buffer from the list to patch the i-th
279 * offset. If there are N offsets in a DMA CS, there must also be N
280 * buffers in the relocation list.
281 *
282 * This doesn't have to be done if virtual memory is enabled,
283 * because there is no offset patching with virtual memory.
284 */
285 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
286 return i;
287 }
288 }
289
290 /* New relocation, check if the backing array is large enough. */
291 if (csc->crelocs >= csc->nrelocs) {
292 uint32_t size;
293 csc->nrelocs += 10;
294
295 size = csc->nrelocs * sizeof(struct radeon_bo*);
296 csc->relocs_bo = realloc(csc->relocs_bo, size);
297
298 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
299 csc->relocs = realloc(csc->relocs, size);
300
301 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
302 }
303
304 /* Initialize the new relocation. */
305 csc->relocs_bo[csc->crelocs] = NULL;
306 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
307 p_atomic_inc(&bo->num_cs_references);
308 reloc = &csc->relocs[csc->crelocs];
309 reloc->handle = bo->handle;
310 reloc->read_domains = rd;
311 reloc->write_domain = wd;
312 reloc->flags = priority / 4;
313
314 csc->reloc_indices_hashlist[hash] = csc->crelocs;
315
316 csc->chunks[1].length_dw += RELOC_DWORDS;
317
318 *added_domains = rd | wd;
319 return csc->crelocs++;
320 }
321
322 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
323 struct radeon_winsys_cs_handle *buf,
324 enum radeon_bo_usage usage,
325 enum radeon_bo_domain domains,
326 enum radeon_bo_priority priority)
327 {
328 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
329 struct radeon_bo *bo = (struct radeon_bo*)buf;
330 enum radeon_bo_domain added_domains;
331 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority,
332 &added_domains);
333
334 if (added_domains & RADEON_DOMAIN_GTT)
335 cs->csc->used_gart += bo->base.size;
336 if (added_domains & RADEON_DOMAIN_VRAM)
337 cs->csc->used_vram += bo->base.size;
338
339 return index;
340 }
341
342 static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs,
343 struct radeon_winsys_cs_handle *buf)
344 {
345 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
346
347 return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf);
348 }
349
350 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
351 {
352 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
353 boolean status =
354 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
355 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
356
357 if (status) {
358 cs->csc->validated_crelocs = cs->csc->crelocs;
359 } else {
360 /* Remove lately-added relocations. The validation failed with them
361 * and the CS is about to be flushed because of that. Keep only
362 * the already-validated relocations. */
363 unsigned i;
364
365 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
366 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
367 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
368 }
369 cs->csc->crelocs = cs->csc->validated_crelocs;
370
371 /* Flush if there are any relocs. Clean up otherwise. */
372 if (cs->csc->crelocs) {
373 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
374 } else {
375 radeon_cs_context_cleanup(cs->csc);
376
377 assert(cs->base.cdw == 0);
378 if (cs->base.cdw != 0) {
379 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
380 }
381 }
382 }
383 return status;
384 }
385
386 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
387 {
388 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
389
390 vram += cs->csc->used_vram;
391 gtt += cs->csc->used_gart;
392
393 /* Anything that goes above the VRAM size should go to GTT. */
394 if (vram > cs->ws->info.vram_size)
395 gtt += vram - cs->ws->info.vram_size;
396
397 /* Now we just need to check if we have enough GTT. */
398 return gtt < cs->ws->info.gart_size * 0.7;
399 }
400
401 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
402 {
403 unsigned i;
404 int r;
405
406 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
407 &csc->cs, sizeof(struct drm_radeon_cs));
408 if (r) {
409 if (r == -ENOMEM)
410 fprintf(stderr, "radeon: Not enough memory for command submission.\n");
411 else if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
412 unsigned i;
413
414 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
415 for (i = 0; i < csc->chunks[0].length_dw; i++) {
416 fprintf(stderr, "0x%08X\n", csc->buf[i]);
417 }
418 } else {
419 fprintf(stderr, "radeon: The kernel rejected CS, "
420 "see dmesg for more information.\n");
421 }
422 }
423
424 if (cs->trace_buf) {
425 radeon_dump_cs_on_lockup(cs, csc);
426 }
427
428 for (i = 0; i < csc->crelocs; i++)
429 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
430
431 radeon_cs_context_cleanup(csc);
432 }
433
434 /*
435 * Make sure previous submission of this cs are completed
436 */
437 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
438 {
439 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
440
441 /* Wait for any pending ioctl to complete. */
442 if (cs->ws->thread) {
443 pipe_semaphore_wait(&cs->flush_completed);
444 pipe_semaphore_signal(&cs->flush_completed);
445 }
446 }
447
448 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
449
450 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
451 unsigned flags,
452 struct pipe_fence_handle **fence,
453 uint32_t cs_trace_id)
454 {
455 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
456 struct radeon_cs_context *tmp;
457
458 switch (cs->base.ring_type) {
459 case RING_DMA:
460 /* pad DMA ring to 8 DWs */
461 if (cs->ws->info.chip_class <= SI) {
462 while (rcs->cdw & 7)
463 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
464 } else {
465 while (rcs->cdw & 7)
466 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
467 }
468 break;
469 case RING_GFX:
470 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
471 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
472 */
473 if (cs->ws->info.gfx_ib_pad_with_type2) {
474 while (rcs->cdw & 7)
475 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
476 } else {
477 while (rcs->cdw & 7)
478 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
479 }
480 break;
481 case RING_UVD:
482 while (rcs->cdw & 15)
483 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
484 break;
485 default:
486 break;
487 }
488
489 if (rcs->cdw > rcs->max_dw) {
490 fprintf(stderr, "radeon: command stream overflowed\n");
491 }
492
493 if (fence) {
494 radeon_fence_reference(fence, NULL);
495 *fence = radeon_cs_create_fence(rcs);
496 }
497
498 radeon_drm_cs_sync_flush(rcs);
499
500 /* Swap command streams. */
501 tmp = cs->csc;
502 cs->csc = cs->cst;
503 cs->cst = tmp;
504
505 cs->cst->cs_trace_id = cs_trace_id;
506
507 /* If the CS is not empty or overflowed, emit it in a separate thread. */
508 if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
509 unsigned i, crelocs;
510
511 crelocs = cs->cst->crelocs;
512
513 cs->cst->chunks[0].length_dw = cs->base.cdw;
514
515 for (i = 0; i < crelocs; i++) {
516 /* Update the number of active asynchronous CS ioctls for the buffer. */
517 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
518 }
519
520 switch (cs->base.ring_type) {
521 case RING_DMA:
522 cs->cst->flags[0] = 0;
523 cs->cst->flags[1] = RADEON_CS_RING_DMA;
524 cs->cst->cs.num_chunks = 3;
525 if (cs->ws->info.r600_virtual_address) {
526 cs->cst->flags[0] |= RADEON_CS_USE_VM;
527 }
528 break;
529
530 case RING_UVD:
531 cs->cst->flags[0] = 0;
532 cs->cst->flags[1] = RADEON_CS_RING_UVD;
533 cs->cst->cs.num_chunks = 3;
534 break;
535
536 case RING_VCE:
537 cs->cst->flags[0] = 0;
538 cs->cst->flags[1] = RADEON_CS_RING_VCE;
539 cs->cst->cs.num_chunks = 3;
540 break;
541
542 default:
543 case RING_GFX:
544 case RING_COMPUTE:
545 cs->cst->flags[0] = 0;
546 cs->cst->flags[1] = RADEON_CS_RING_GFX;
547 cs->cst->cs.num_chunks = 2;
548 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
549 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
550 cs->cst->cs.num_chunks = 3;
551 }
552 if (cs->ws->info.r600_virtual_address) {
553 cs->cst->flags[0] |= RADEON_CS_USE_VM;
554 cs->cst->cs.num_chunks = 3;
555 }
556 if (flags & RADEON_FLUSH_END_OF_FRAME) {
557 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
558 cs->cst->cs.num_chunks = 3;
559 }
560 if (cs->base.ring_type == RING_COMPUTE) {
561 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
562 cs->cst->cs.num_chunks = 3;
563 }
564 break;
565 }
566
567 if (cs->ws->thread) {
568 pipe_semaphore_wait(&cs->flush_completed);
569 radeon_drm_ws_queue_cs(cs->ws, cs);
570 if (!(flags & RADEON_FLUSH_ASYNC))
571 radeon_drm_cs_sync_flush(rcs);
572 } else {
573 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
574 }
575 } else {
576 radeon_cs_context_cleanup(cs->cst);
577 }
578
579 /* Prepare a new CS. */
580 cs->base.buf = cs->csc->buf;
581 cs->base.cdw = 0;
582
583 cs->ws->num_cs_flushes++;
584 }
585
586 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
587 {
588 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
589
590 radeon_drm_cs_sync_flush(rcs);
591 pipe_semaphore_destroy(&cs->flush_completed);
592 radeon_cs_context_cleanup(&cs->csc1);
593 radeon_cs_context_cleanup(&cs->csc2);
594 p_atomic_dec(&cs->ws->num_cs);
595 radeon_destroy_cs_context(&cs->csc1);
596 radeon_destroy_cs_context(&cs->csc2);
597 FREE(cs);
598 }
599
600 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
601 struct radeon_winsys_cs_handle *_buf,
602 enum radeon_bo_usage usage)
603 {
604 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
605 struct radeon_bo *bo = (struct radeon_bo*)_buf;
606 int index;
607
608 if (!bo->num_cs_references)
609 return FALSE;
610
611 index = radeon_get_reloc(cs->csc, bo);
612 if (index == -1)
613 return FALSE;
614
615 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
616 return TRUE;
617 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
618 return TRUE;
619
620 return FALSE;
621 }
622
623 /* FENCES */
624
625 static struct pipe_fence_handle *
626 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
627 {
628 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
629 struct pb_buffer *fence;
630
631 /* Create a fence, which is a dummy BO. */
632 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
633 RADEON_DOMAIN_GTT, 0);
634 /* Add the fence as a dummy relocation. */
635 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
636 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
637 RADEON_PRIO_FENCE);
638 return (struct pipe_fence_handle*)fence;
639 }
640
641 static bool radeon_fence_wait(struct radeon_winsys *ws,
642 struct pipe_fence_handle *fence,
643 uint64_t timeout)
644 {
645 return ws->buffer_wait((struct pb_buffer*)fence, timeout,
646 RADEON_USAGE_READWRITE);
647 }
648
649 static void radeon_fence_reference(struct pipe_fence_handle **dst,
650 struct pipe_fence_handle *src)
651 {
652 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
653 }
654
655 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
656 {
657 ws->base.ctx_create = radeon_drm_ctx_create;
658 ws->base.ctx_destroy = radeon_drm_ctx_destroy;
659 ws->base.cs_create = radeon_drm_cs_create;
660 ws->base.cs_destroy = radeon_drm_cs_destroy;
661 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
662 ws->base.cs_get_reloc = radeon_drm_cs_get_reloc;
663 ws->base.cs_validate = radeon_drm_cs_validate;
664 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
665 ws->base.cs_flush = radeon_drm_cs_flush;
666 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
667 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
668 ws->base.fence_wait = radeon_fence_wait;
669 ws->base.fence_reference = radeon_fence_reference;
670 }