winsys/radeon: rename nrelocs, crelocs to max_relocs, num_relocs
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Adding buffers and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_buffer. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_buffer.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84 {
85 /* No context support here. Just return the winsys pointer
86 * as the "context". */
87 return (struct radeon_winsys_ctx*)ws;
88 }
89
90 static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91 {
92 /* No context support here. */
93 }
94
95 static bool radeon_init_cs_context(struct radeon_cs_context *csc,
96 struct radeon_drm_winsys *ws)
97 {
98 int i;
99
100 csc->fd = ws->fd;
101
102 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
103 csc->chunks[0].length_dw = 0;
104 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
105 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
106 csc->chunks[1].length_dw = 0;
107 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
108 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
109 csc->chunks[2].length_dw = 2;
110 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
111
112 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
113 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
114 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
115
116 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
117
118 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
119 csc->reloc_indices_hashlist[i] = -1;
120 }
121 return true;
122 }
123
124 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
125 {
126 unsigned i;
127
128 for (i = 0; i < csc->num_relocs; i++) {
129 p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
130 radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
131 }
132
133 csc->num_relocs = 0;
134 csc->num_validated_relocs = 0;
135 csc->chunks[0].length_dw = 0;
136 csc->chunks[1].length_dw = 0;
137
138 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
139 csc->reloc_indices_hashlist[i] = -1;
140 }
141 }
142
143 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
144 {
145 radeon_cs_context_cleanup(csc);
146 FREE(csc->relocs_bo);
147 FREE(csc->relocs);
148 }
149
150
151 static struct radeon_winsys_cs *
152 radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
153 enum ring_type ring_type,
154 void (*flush)(void *ctx, unsigned flags,
155 struct pipe_fence_handle **fence),
156 void *flush_ctx)
157 {
158 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
159 struct radeon_drm_cs *cs;
160
161 cs = CALLOC_STRUCT(radeon_drm_cs);
162 if (!cs) {
163 return NULL;
164 }
165 util_queue_fence_init(&cs->flush_completed);
166
167 cs->ws = ws;
168 cs->flush_cs = flush;
169 cs->flush_data = flush_ctx;
170
171 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
172 FREE(cs);
173 return NULL;
174 }
175 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
176 radeon_destroy_cs_context(&cs->csc1);
177 FREE(cs);
178 return NULL;
179 }
180
181 /* Set the first command buffer as current. */
182 cs->csc = &cs->csc1;
183 cs->cst = &cs->csc2;
184 cs->base.current.buf = cs->csc->buf;
185 cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
186 cs->ring_type = ring_type;
187
188 p_atomic_inc(&ws->num_cs);
189 return &cs->base;
190 }
191
192 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
193 enum radeon_bo_domain rd,
194 enum radeon_bo_domain wd,
195 unsigned priority,
196 enum radeon_bo_domain *added_domains)
197 {
198 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
199
200 reloc->read_domains |= rd;
201 reloc->write_domain |= wd;
202 reloc->flags = MAX2(reloc->flags, priority);
203 }
204
205 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
206 {
207 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
208 int i = csc->reloc_indices_hashlist[hash];
209
210 /* not found or found */
211 if (i == -1 || csc->relocs_bo[i].bo == bo)
212 return i;
213
214 /* Hash collision, look for the BO in the list of relocs linearly. */
215 for (i = csc->num_relocs - 1; i >= 0; i--) {
216 if (csc->relocs_bo[i].bo == bo) {
217 /* Put this reloc in the hash list.
218 * This will prevent additional hash collisions if there are
219 * several consecutive lookup_buffer calls for the same buffer.
220 *
221 * Example: Assuming buffers A,B,C collide in the hash list,
222 * the following sequence of relocs:
223 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
224 * will collide here: ^ and here: ^,
225 * meaning that we should get very few collisions in the end. */
226 csc->reloc_indices_hashlist[hash] = i;
227 return i;
228 }
229 }
230 return -1;
231 }
232
233 static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
234 struct radeon_bo *bo,
235 enum radeon_bo_usage usage,
236 enum radeon_bo_domain domains,
237 unsigned priority,
238 enum radeon_bo_domain *added_domains)
239 {
240 struct radeon_cs_context *csc = cs->csc;
241 struct drm_radeon_cs_reloc *reloc;
242 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
243 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
244 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
245 int i = -1;
246
247 assert(priority < 64);
248 *added_domains = 0;
249
250 i = radeon_lookup_buffer(csc, bo);
251
252 if (i >= 0) {
253 reloc = &csc->relocs[i];
254 update_reloc(reloc, rd, wd, priority / 4, added_domains);
255 csc->relocs_bo[i].priority_usage |= 1llu << priority;
256
257 /* For async DMA, every add_buffer call must add a buffer to the list
258 * no matter how many duplicates there are. This is due to the fact
259 * the DMA CS checker doesn't use NOP packets for offset patching,
260 * but always uses the i-th buffer from the list to patch the i-th
261 * offset. If there are N offsets in a DMA CS, there must also be N
262 * buffers in the relocation list.
263 *
264 * This doesn't have to be done if virtual memory is enabled,
265 * because there is no offset patching with virtual memory.
266 */
267 if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
268 return i;
269 }
270 }
271
272 /* New relocation, check if the backing array is large enough. */
273 if (csc->num_relocs >= csc->max_relocs) {
274 uint32_t size;
275 csc->max_relocs = MAX2(csc->max_relocs + 16, (unsigned)(csc->max_relocs * 1.3));
276
277 size = csc->max_relocs * sizeof(csc->relocs_bo[0]);
278 csc->relocs_bo = realloc(csc->relocs_bo, size);
279
280 size = csc->max_relocs * sizeof(struct drm_radeon_cs_reloc);
281 csc->relocs = realloc(csc->relocs, size);
282
283 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
284 }
285
286 /* Initialize the new relocation. */
287 csc->relocs_bo[csc->num_relocs].bo = NULL;
288 csc->relocs_bo[csc->num_relocs].priority_usage = 1llu << priority;
289 radeon_bo_reference(&csc->relocs_bo[csc->num_relocs].bo, bo);
290 p_atomic_inc(&bo->num_cs_references);
291 reloc = &csc->relocs[csc->num_relocs];
292 reloc->handle = bo->handle;
293 reloc->read_domains = rd;
294 reloc->write_domain = wd;
295 reloc->flags = priority / 4;
296
297 csc->reloc_indices_hashlist[hash] = csc->num_relocs;
298
299 csc->chunks[1].length_dw += RELOC_DWORDS;
300
301 *added_domains = rd | wd;
302 return csc->num_relocs++;
303 }
304
305 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
306 struct pb_buffer *buf,
307 enum radeon_bo_usage usage,
308 enum radeon_bo_domain domains,
309 enum radeon_bo_priority priority)
310 {
311 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
312 struct radeon_bo *bo = (struct radeon_bo*)buf;
313 enum radeon_bo_domain added_domains;
314 unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
315 &added_domains);
316
317 if (added_domains & RADEON_DOMAIN_VRAM)
318 cs->base.used_vram += bo->base.size;
319 else if (added_domains & RADEON_DOMAIN_GTT)
320 cs->base.used_gart += bo->base.size;
321
322 return index;
323 }
324
325 static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
326 struct pb_buffer *buf)
327 {
328 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
329
330 return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
331 }
332
333 static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
334 {
335 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
336 bool status =
337 cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
338 cs->base.used_vram < cs->ws->info.vram_size * 0.8;
339
340 if (status) {
341 cs->csc->num_validated_relocs = cs->csc->num_relocs;
342 } else {
343 /* Remove lately-added buffers. The validation failed with them
344 * and the CS is about to be flushed because of that. Keep only
345 * the already-validated buffers. */
346 unsigned i;
347
348 for (i = cs->csc->num_validated_relocs; i < cs->csc->num_relocs; i++) {
349 p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
350 radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
351 }
352 cs->csc->num_relocs = cs->csc->num_validated_relocs;
353
354 /* Flush if there are any relocs. Clean up otherwise. */
355 if (cs->csc->num_relocs) {
356 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
357 } else {
358 radeon_cs_context_cleanup(cs->csc);
359 cs->base.used_vram = 0;
360 cs->base.used_gart = 0;
361
362 assert(cs->base.current.cdw == 0);
363 if (cs->base.current.cdw != 0) {
364 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
365 }
366 }
367 }
368 return status;
369 }
370
371 static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
372 {
373 assert(rcs->current.cdw <= rcs->current.max_dw);
374 return rcs->current.max_dw - rcs->current.cdw >= dw;
375 }
376
377 static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
378 struct radeon_bo_list_item *list)
379 {
380 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
381 int i;
382
383 if (list) {
384 for (i = 0; i < cs->csc->num_relocs; i++) {
385 list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
386 list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
387 list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
388 }
389 }
390 return cs->csc->num_relocs;
391 }
392
393 void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
394 {
395 struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
396 unsigned i;
397 int r;
398
399 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
400 &csc->cs, sizeof(struct drm_radeon_cs));
401 if (r) {
402 if (r == -ENOMEM)
403 fprintf(stderr, "radeon: Not enough memory for command submission.\n");
404 else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
405 unsigned i;
406
407 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
408 for (i = 0; i < csc->chunks[0].length_dw; i++) {
409 fprintf(stderr, "0x%08X\n", csc->buf[i]);
410 }
411 } else {
412 fprintf(stderr, "radeon: The kernel rejected CS, "
413 "see dmesg for more information (%i).\n", r);
414 }
415 }
416
417 for (i = 0; i < csc->num_relocs; i++)
418 p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
419
420 radeon_cs_context_cleanup(csc);
421 }
422
423 /*
424 * Make sure previous submission of this cs are completed
425 */
426 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
427 {
428 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
429
430 /* Wait for any pending ioctl of this CS to complete. */
431 if (util_queue_is_initialized(&cs->ws->cs_queue))
432 util_queue_job_wait(&cs->flush_completed);
433 }
434
435 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
436
437 static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
438 unsigned flags,
439 struct pipe_fence_handle **fence)
440 {
441 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
442 struct radeon_cs_context *tmp;
443
444 switch (cs->ring_type) {
445 case RING_DMA:
446 /* pad DMA ring to 8 DWs */
447 if (cs->ws->info.chip_class <= SI) {
448 while (rcs->current.cdw & 7)
449 radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
450 } else {
451 while (rcs->current.cdw & 7)
452 radeon_emit(&cs->base, 0x00000000); /* NOP packet */
453 }
454 break;
455 case RING_GFX:
456 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
457 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
458 */
459 if (cs->ws->info.gfx_ib_pad_with_type2) {
460 while (rcs->current.cdw & 7)
461 radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
462 } else {
463 while (rcs->current.cdw & 7)
464 radeon_emit(&cs->base, 0xffff1000); /* type3 nop packet */
465 }
466 break;
467 case RING_UVD:
468 while (rcs->current.cdw & 15)
469 radeon_emit(&cs->base, 0x80000000); /* type2 nop packet */
470 break;
471 default:
472 break;
473 }
474
475 if (rcs->current.cdw > rcs->current.max_dw) {
476 fprintf(stderr, "radeon: command stream overflowed\n");
477 }
478
479 if (fence) {
480 if (cs->next_fence) {
481 radeon_fence_reference(fence, cs->next_fence);
482 } else {
483 radeon_fence_reference(fence, NULL);
484 *fence = radeon_cs_create_fence(rcs);
485 }
486 }
487 radeon_fence_reference(&cs->next_fence, NULL);
488
489 radeon_drm_cs_sync_flush(rcs);
490
491 /* Swap command streams. */
492 tmp = cs->csc;
493 cs->csc = cs->cst;
494 cs->cst = tmp;
495
496 /* If the CS is not empty or overflowed, emit it in a separate thread. */
497 if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
498 unsigned i, num_relocs;
499
500 num_relocs = cs->cst->num_relocs;
501
502 cs->cst->chunks[0].length_dw = cs->base.current.cdw;
503
504 for (i = 0; i < num_relocs; i++) {
505 /* Update the number of active asynchronous CS ioctls for the buffer. */
506 p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
507 }
508
509 switch (cs->ring_type) {
510 case RING_DMA:
511 cs->cst->flags[0] = 0;
512 cs->cst->flags[1] = RADEON_CS_RING_DMA;
513 cs->cst->cs.num_chunks = 3;
514 if (cs->ws->info.has_virtual_memory) {
515 cs->cst->flags[0] |= RADEON_CS_USE_VM;
516 }
517 break;
518
519 case RING_UVD:
520 cs->cst->flags[0] = 0;
521 cs->cst->flags[1] = RADEON_CS_RING_UVD;
522 cs->cst->cs.num_chunks = 3;
523 break;
524
525 case RING_VCE:
526 cs->cst->flags[0] = 0;
527 cs->cst->flags[1] = RADEON_CS_RING_VCE;
528 cs->cst->cs.num_chunks = 3;
529 break;
530
531 default:
532 case RING_GFX:
533 case RING_COMPUTE:
534 cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
535 cs->cst->flags[1] = RADEON_CS_RING_GFX;
536 cs->cst->cs.num_chunks = 3;
537
538 if (cs->ws->info.has_virtual_memory) {
539 cs->cst->flags[0] |= RADEON_CS_USE_VM;
540 cs->cst->cs.num_chunks = 3;
541 }
542 if (flags & RADEON_FLUSH_END_OF_FRAME) {
543 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
544 cs->cst->cs.num_chunks = 3;
545 }
546 if (cs->ring_type == RING_COMPUTE) {
547 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
548 cs->cst->cs.num_chunks = 3;
549 }
550 break;
551 }
552
553 if (util_queue_is_initialized(&cs->ws->cs_queue)) {
554 util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
555 radeon_drm_cs_emit_ioctl_oneshot, NULL);
556 if (!(flags & RADEON_FLUSH_ASYNC))
557 radeon_drm_cs_sync_flush(rcs);
558 } else {
559 radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
560 }
561 } else {
562 radeon_cs_context_cleanup(cs->cst);
563 }
564
565 /* Prepare a new CS. */
566 cs->base.current.buf = cs->csc->buf;
567 cs->base.current.cdw = 0;
568 cs->base.used_vram = 0;
569 cs->base.used_gart = 0;
570
571 cs->ws->num_cs_flushes++;
572 return 0;
573 }
574
575 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
576 {
577 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
578
579 radeon_drm_cs_sync_flush(rcs);
580 util_queue_fence_destroy(&cs->flush_completed);
581 radeon_cs_context_cleanup(&cs->csc1);
582 radeon_cs_context_cleanup(&cs->csc2);
583 p_atomic_dec(&cs->ws->num_cs);
584 radeon_destroy_cs_context(&cs->csc1);
585 radeon_destroy_cs_context(&cs->csc2);
586 radeon_fence_reference(&cs->next_fence, NULL);
587 FREE(cs);
588 }
589
590 static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
591 struct pb_buffer *_buf,
592 enum radeon_bo_usage usage)
593 {
594 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
595 struct radeon_bo *bo = (struct radeon_bo*)_buf;
596 int index;
597
598 if (!bo->num_cs_references)
599 return false;
600
601 index = radeon_lookup_buffer(cs->csc, bo);
602 if (index == -1)
603 return false;
604
605 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
606 return true;
607 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
608 return true;
609
610 return false;
611 }
612
613 /* FENCES */
614
615 static struct pipe_fence_handle *
616 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
617 {
618 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
619 struct pb_buffer *fence;
620
621 /* Create a fence, which is a dummy BO. */
622 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
623 RADEON_DOMAIN_GTT, 0);
624 /* Add the fence as a dummy relocation. */
625 cs->ws->base.cs_add_buffer(rcs, fence,
626 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
627 RADEON_PRIO_FENCE);
628 return (struct pipe_fence_handle*)fence;
629 }
630
631 static bool radeon_fence_wait(struct radeon_winsys *ws,
632 struct pipe_fence_handle *fence,
633 uint64_t timeout)
634 {
635 return ws->buffer_wait((struct pb_buffer*)fence, timeout,
636 RADEON_USAGE_READWRITE);
637 }
638
639 static void radeon_fence_reference(struct pipe_fence_handle **dst,
640 struct pipe_fence_handle *src)
641 {
642 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
643 }
644
645 static struct pipe_fence_handle *
646 radeon_drm_cs_get_next_fence(struct radeon_winsys_cs *rcs)
647 {
648 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
649 struct pipe_fence_handle *fence = NULL;
650
651 if (cs->next_fence) {
652 radeon_fence_reference(&fence, cs->next_fence);
653 return fence;
654 }
655
656 fence = radeon_cs_create_fence(rcs);
657 if (!fence)
658 return NULL;
659
660 radeon_fence_reference(&cs->next_fence, fence);
661 return fence;
662 }
663
664 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
665 {
666 ws->base.ctx_create = radeon_drm_ctx_create;
667 ws->base.ctx_destroy = radeon_drm_ctx_destroy;
668 ws->base.cs_create = radeon_drm_cs_create;
669 ws->base.cs_destroy = radeon_drm_cs_destroy;
670 ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
671 ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
672 ws->base.cs_validate = radeon_drm_cs_validate;
673 ws->base.cs_check_space = radeon_drm_cs_check_space;
674 ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
675 ws->base.cs_flush = radeon_drm_cs_flush;
676 ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
677 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
678 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
679 ws->base.fence_wait = radeon_fence_wait;
680 ws->base.fence_reference = radeon_fence_reference;
681 }