winsys/radeon: remove definitions already present in radeon_drm.h
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
79 struct radeon_drm_winsys *ws)
80 {
81 csc->fd = ws->fd;
82 csc->nrelocs = 512;
83 csc->relocs_bo = (struct radeon_bo**)
84 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
85 if (!csc->relocs_bo) {
86 return FALSE;
87 }
88
89 csc->relocs = (struct drm_radeon_cs_reloc*)
90 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
91 if (!csc->relocs) {
92 FREE(csc->relocs_bo);
93 return FALSE;
94 }
95
96 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
97 csc->chunks[0].length_dw = 0;
98 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
99 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
100 csc->chunks[1].length_dw = 0;
101 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
102 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
103 csc->chunks[2].length_dw = 2;
104 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
105
106 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
107 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
108 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
109
110 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
111 return TRUE;
112 }
113
114 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
115 {
116 unsigned i;
117
118 for (i = 0; i < csc->crelocs; i++) {
119 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
120 radeon_bo_reference(&csc->relocs_bo[i], NULL);
121 }
122
123 csc->crelocs = 0;
124 csc->validated_crelocs = 0;
125 csc->chunks[0].length_dw = 0;
126 csc->chunks[1].length_dw = 0;
127 csc->used_gart = 0;
128 csc->used_vram = 0;
129 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
130 }
131
132 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
133 {
134 radeon_cs_context_cleanup(csc);
135 FREE(csc->relocs_bo);
136 FREE(csc->relocs);
137 }
138
139
140 static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
141 enum ring_type ring_type,
142 struct radeon_winsys_cs_handle *trace_buf)
143 {
144 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
145 struct radeon_drm_cs *cs;
146
147 cs = CALLOC_STRUCT(radeon_drm_cs);
148 if (!cs) {
149 return NULL;
150 }
151 pipe_semaphore_init(&cs->flush_completed, 1);
152
153 cs->ws = ws;
154 cs->trace_buf = (struct radeon_bo*)trace_buf;
155
156 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
157 FREE(cs);
158 return NULL;
159 }
160 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
161 radeon_destroy_cs_context(&cs->csc1);
162 FREE(cs);
163 return NULL;
164 }
165
166 /* Set the first command buffer as current. */
167 cs->csc = &cs->csc1;
168 cs->cst = &cs->csc2;
169 cs->base.buf = cs->csc->buf;
170 cs->base.ring_type = ring_type;
171
172 p_atomic_inc(&ws->num_cs);
173 return &cs->base;
174 }
175
176 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
177
178 static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
179 enum radeon_bo_domain rd,
180 enum radeon_bo_domain wd,
181 unsigned priority,
182 enum radeon_bo_domain *added_domains)
183 {
184 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
185
186 reloc->read_domains |= rd;
187 reloc->write_domain |= wd;
188 reloc->flags = MAX2(reloc->flags, priority);
189 }
190
191 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
192 {
193 struct drm_radeon_cs_reloc *reloc;
194 unsigned i;
195 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
196
197 if (csc->is_handle_added[hash]) {
198 i = csc->reloc_indices_hashlist[hash];
199 reloc = &csc->relocs[i];
200 if (reloc->handle == bo->handle) {
201 return i;
202 }
203
204 /* Hash collision, look for the BO in the list of relocs linearly. */
205 for (i = csc->crelocs; i != 0;) {
206 --i;
207 reloc = &csc->relocs[i];
208 if (reloc->handle == bo->handle) {
209 /* Put this reloc in the hash list.
210 * This will prevent additional hash collisions if there are
211 * several consecutive get_reloc calls for the same buffer.
212 *
213 * Example: Assuming buffers A,B,C collide in the hash list,
214 * the following sequence of relocs:
215 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
216 * will collide here: ^ and here: ^,
217 * meaning that we should get very few collisions in the end. */
218 csc->reloc_indices_hashlist[hash] = i;
219 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
220 return i;
221 }
222 }
223 }
224
225 return -1;
226 }
227
228 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
229 struct radeon_bo *bo,
230 enum radeon_bo_usage usage,
231 enum radeon_bo_domain domains,
232 unsigned priority,
233 enum radeon_bo_domain *added_domains)
234 {
235 struct radeon_cs_context *csc = cs->csc;
236 struct drm_radeon_cs_reloc *reloc;
237 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
238 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
239 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
240 bool update_hash = TRUE;
241 int i;
242
243 priority = MIN2(priority, 15);
244 *added_domains = 0;
245
246 if (csc->is_handle_added[hash]) {
247 i = csc->reloc_indices_hashlist[hash];
248 reloc = &csc->relocs[i];
249
250 if (reloc->handle != bo->handle) {
251 /* Hash collision, look for the BO in the list of relocs linearly. */
252 for (i = csc->crelocs - 1; i >= 0; i--) {
253 reloc = &csc->relocs[i];
254 if (reloc->handle == bo->handle) {
255 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
256 break;
257 }
258 }
259 }
260
261 if (i >= 0) {
262 update_reloc(reloc, rd, wd, priority, added_domains);
263
264 /* For async DMA, every add_reloc call must add a buffer to the list
265 * no matter how many duplicates there are. This is due to the fact
266 * the DMA CS checker doesn't use NOP packets for offset patching,
267 * but always uses the i-th buffer from the list to patch the i-th
268 * offset. If there are N offsets in a DMA CS, there must also be N
269 * buffers in the relocation list.
270 *
271 * This doesn't have to be done if virtual memory is enabled,
272 * because there is no offset patching with virtual memory.
273 */
274 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
275 csc->reloc_indices_hashlist[hash] = i;
276 return i;
277 }
278 update_hash = FALSE;
279 }
280 }
281
282 /* New relocation, check if the backing array is large enough. */
283 if (csc->crelocs >= csc->nrelocs) {
284 uint32_t size;
285 csc->nrelocs += 10;
286
287 size = csc->nrelocs * sizeof(struct radeon_bo*);
288 csc->relocs_bo = realloc(csc->relocs_bo, size);
289
290 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
291 csc->relocs = realloc(csc->relocs, size);
292
293 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
294 }
295
296 /* Initialize the new relocation. */
297 csc->relocs_bo[csc->crelocs] = NULL;
298 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
299 p_atomic_inc(&bo->num_cs_references);
300 reloc = &csc->relocs[csc->crelocs];
301 reloc->handle = bo->handle;
302 reloc->read_domains = rd;
303 reloc->write_domain = wd;
304 reloc->flags = priority;
305
306 csc->is_handle_added[hash] = TRUE;
307 if (update_hash) {
308 csc->reloc_indices_hashlist[hash] = csc->crelocs;
309 }
310
311 csc->chunks[1].length_dw += RELOC_DWORDS;
312
313 *added_domains = rd | wd;
314 return csc->crelocs++;
315 }
316
317 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
318 struct radeon_winsys_cs_handle *buf,
319 enum radeon_bo_usage usage,
320 enum radeon_bo_domain domains,
321 enum radeon_bo_priority priority)
322 {
323 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
324 struct radeon_bo *bo = (struct radeon_bo*)buf;
325 enum radeon_bo_domain added_domains;
326 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
327
328 if (added_domains & RADEON_DOMAIN_GTT)
329 cs->csc->used_gart += bo->base.size;
330 if (added_domains & RADEON_DOMAIN_VRAM)
331 cs->csc->used_vram += bo->base.size;
332
333 return index;
334 }
335
336 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
337 {
338 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
339 boolean status =
340 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
341 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
342
343 if (status) {
344 cs->csc->validated_crelocs = cs->csc->crelocs;
345 } else {
346 /* Remove lately-added relocations. The validation failed with them
347 * and the CS is about to be flushed because of that. Keep only
348 * the already-validated relocations. */
349 unsigned i;
350
351 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
352 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
353 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
354 }
355 cs->csc->crelocs = cs->csc->validated_crelocs;
356
357 /* Flush if there are any relocs. Clean up otherwise. */
358 if (cs->csc->crelocs) {
359 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
360 } else {
361 radeon_cs_context_cleanup(cs->csc);
362
363 assert(cs->base.cdw == 0);
364 if (cs->base.cdw != 0) {
365 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
366 }
367 }
368 }
369 return status;
370 }
371
372 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
373 {
374 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
375 boolean status =
376 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
377 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
378
379 return status;
380 }
381
382 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
383 struct radeon_winsys_cs_handle *buf)
384 {
385 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
386 struct radeon_bo *bo = (struct radeon_bo*)buf;
387 unsigned index = radeon_get_reloc(cs->csc, bo);
388
389 if (index == -1) {
390 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
391 return;
392 }
393
394 OUT_CS(&cs->base, 0xc0001000);
395 OUT_CS(&cs->base, index * RELOC_DWORDS);
396 }
397
398 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
399 {
400 unsigned i;
401
402 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
403 &csc->cs, sizeof(struct drm_radeon_cs))) {
404 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
405 unsigned i;
406
407 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
408 for (i = 0; i < csc->chunks[0].length_dw; i++) {
409 fprintf(stderr, "0x%08X\n", csc->buf[i]);
410 }
411 } else {
412 fprintf(stderr, "radeon: The kernel rejected CS, "
413 "see dmesg for more information.\n");
414 }
415 }
416
417 if (cs->trace_buf) {
418 radeon_dump_cs_on_lockup(cs, csc);
419 }
420
421 for (i = 0; i < csc->crelocs; i++)
422 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
423
424 radeon_cs_context_cleanup(csc);
425 }
426
427 /*
428 * Make sure previous submission of this cs are completed
429 */
430 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
431 {
432 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
433
434 /* Wait for any pending ioctl to complete. */
435 if (cs->ws->thread) {
436 pipe_semaphore_wait(&cs->flush_completed);
437 pipe_semaphore_signal(&cs->flush_completed);
438 }
439 }
440
441 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
442
443 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
444 {
445 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
446 struct radeon_cs_context *tmp;
447
448 switch (cs->base.ring_type) {
449 case RING_DMA:
450 /* pad DMA ring to 8 DWs */
451 if (cs->ws->info.chip_class <= SI) {
452 while (rcs->cdw & 7)
453 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
454 } else {
455 while (rcs->cdw & 7)
456 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
457 }
458 break;
459 case RING_GFX:
460 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
461 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
462 */
463 if (cs->ws->info.chip_class <= SI) {
464 while (rcs->cdw & 7)
465 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
466 } else {
467 while (rcs->cdw & 7)
468 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
469 }
470 break;
471 case RING_UVD:
472 while (rcs->cdw & 15)
473 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
474 break;
475 default:
476 break;
477 }
478
479 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
480 fprintf(stderr, "radeon: command stream overflowed\n");
481 }
482
483 radeon_drm_cs_sync_flush(rcs);
484
485 /* Flip command streams. */
486 tmp = cs->csc;
487 cs->csc = cs->cst;
488 cs->cst = tmp;
489
490 cs->cst->cs_trace_id = cs_trace_id;
491
492 /* If the CS is not empty or overflowed, emit it in a separate thread. */
493 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
494 unsigned i, crelocs = cs->cst->crelocs;
495
496 cs->cst->chunks[0].length_dw = cs->base.cdw;
497
498 for (i = 0; i < crelocs; i++) {
499 /* Update the number of active asynchronous CS ioctls for the buffer. */
500 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
501 }
502
503 switch (cs->base.ring_type) {
504 case RING_DMA:
505 cs->cst->flags[0] = 0;
506 cs->cst->flags[1] = RADEON_CS_RING_DMA;
507 cs->cst->cs.num_chunks = 3;
508 if (cs->ws->info.r600_virtual_address) {
509 cs->cst->flags[0] |= RADEON_CS_USE_VM;
510 }
511 break;
512
513 case RING_UVD:
514 cs->cst->flags[0] = 0;
515 cs->cst->flags[1] = RADEON_CS_RING_UVD;
516 cs->cst->cs.num_chunks = 3;
517 break;
518
519 case RING_VCE:
520 cs->cst->flags[0] = 0;
521 cs->cst->flags[1] = RADEON_CS_RING_VCE;
522 cs->cst->cs.num_chunks = 3;
523 break;
524
525 default:
526 case RING_GFX:
527 cs->cst->flags[0] = 0;
528 cs->cst->flags[1] = RADEON_CS_RING_GFX;
529 cs->cst->cs.num_chunks = 2;
530 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
531 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
532 cs->cst->cs.num_chunks = 3;
533 }
534 if (cs->ws->info.r600_virtual_address) {
535 cs->cst->flags[0] |= RADEON_CS_USE_VM;
536 cs->cst->cs.num_chunks = 3;
537 }
538 if (flags & RADEON_FLUSH_END_OF_FRAME) {
539 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
540 cs->cst->cs.num_chunks = 3;
541 }
542 if (flags & RADEON_FLUSH_COMPUTE) {
543 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
544 cs->cst->cs.num_chunks = 3;
545 }
546 break;
547 }
548
549 if (cs->ws->thread) {
550 pipe_semaphore_wait(&cs->flush_completed);
551 radeon_drm_ws_queue_cs(cs->ws, cs);
552 if (!(flags & RADEON_FLUSH_ASYNC))
553 radeon_drm_cs_sync_flush(rcs);
554 } else {
555 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
556 }
557 } else {
558 radeon_cs_context_cleanup(cs->cst);
559 }
560
561 /* Prepare a new CS. */
562 cs->base.buf = cs->csc->buf;
563 cs->base.cdw = 0;
564
565 cs->ws->num_cs_flushes++;
566 }
567
568 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
569 {
570 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
571
572 radeon_drm_cs_sync_flush(rcs);
573 pipe_semaphore_destroy(&cs->flush_completed);
574 radeon_cs_context_cleanup(&cs->csc1);
575 radeon_cs_context_cleanup(&cs->csc2);
576 p_atomic_dec(&cs->ws->num_cs);
577 radeon_destroy_cs_context(&cs->csc1);
578 radeon_destroy_cs_context(&cs->csc2);
579 FREE(cs);
580 }
581
582 static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
583 void (*flush)(void *ctx, unsigned flags),
584 void *user)
585 {
586 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
587
588 cs->flush_cs = flush;
589 cs->flush_data = user;
590 }
591
592 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
593 struct radeon_winsys_cs_handle *_buf,
594 enum radeon_bo_usage usage)
595 {
596 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
597 struct radeon_bo *bo = (struct radeon_bo*)_buf;
598 int index;
599
600 if (!bo->num_cs_references)
601 return FALSE;
602
603 index = radeon_get_reloc(cs->csc, bo);
604 if (index == -1)
605 return FALSE;
606
607 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
608 return TRUE;
609 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
610 return TRUE;
611
612 return FALSE;
613 }
614
615 /* FENCES */
616
617 static struct pipe_fence_handle *
618 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
619 {
620 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
621 struct pb_buffer *fence;
622
623 /* Create a fence, which is a dummy BO. */
624 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
625 RADEON_DOMAIN_GTT);
626 /* Add the fence as a dummy relocation. */
627 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
628 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
629 RADEON_PRIO_MIN);
630 return (struct pipe_fence_handle*)fence;
631 }
632
633 static bool radeon_fence_wait(struct radeon_winsys *ws,
634 struct pipe_fence_handle *fence,
635 uint64_t timeout)
636 {
637 struct pb_buffer *rfence = (struct pb_buffer*)fence;
638
639 if (timeout == 0)
640 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
641
642 if (timeout != PIPE_TIMEOUT_INFINITE) {
643 int64_t start_time = os_time_get();
644
645 /* Convert to microseconds. */
646 timeout /= 1000;
647
648 /* Wait in a loop. */
649 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
650 if (os_time_get() - start_time >= timeout) {
651 return FALSE;
652 }
653 os_time_sleep(10);
654 }
655 return TRUE;
656 }
657
658 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
659 return TRUE;
660 }
661
662 static void radeon_fence_reference(struct pipe_fence_handle **dst,
663 struct pipe_fence_handle *src)
664 {
665 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
666 }
667
668 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
669 {
670 ws->base.cs_create = radeon_drm_cs_create;
671 ws->base.cs_destroy = radeon_drm_cs_destroy;
672 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
673 ws->base.cs_validate = radeon_drm_cs_validate;
674 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
675 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
676 ws->base.cs_flush = radeon_drm_cs_flush;
677 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
678 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
679 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
680 ws->base.cs_create_fence = radeon_cs_create_fence;
681 ws->base.fence_wait = radeon_fence_wait;
682 ws->base.fence_reference = radeon_fence_reference;
683 }