winsys/radeon: consolidate hash table lookup
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
79 struct radeon_drm_winsys *ws)
80 {
81 csc->fd = ws->fd;
82 csc->nrelocs = 512;
83 csc->relocs_bo = (struct radeon_bo**)
84 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
85 if (!csc->relocs_bo) {
86 return FALSE;
87 }
88
89 csc->relocs = (struct drm_radeon_cs_reloc*)
90 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
91 if (!csc->relocs) {
92 FREE(csc->relocs_bo);
93 return FALSE;
94 }
95
96 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
97 csc->chunks[0].length_dw = 0;
98 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
99 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
100 csc->chunks[1].length_dw = 0;
101 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
102 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
103 csc->chunks[2].length_dw = 2;
104 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
105
106 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
107 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
108 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
109
110 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
111 return TRUE;
112 }
113
114 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
115 {
116 unsigned i;
117
118 for (i = 0; i < csc->crelocs; i++) {
119 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
120 radeon_bo_reference(&csc->relocs_bo[i], NULL);
121 }
122
123 csc->crelocs = 0;
124 csc->validated_crelocs = 0;
125 csc->chunks[0].length_dw = 0;
126 csc->chunks[1].length_dw = 0;
127 csc->used_gart = 0;
128 csc->used_vram = 0;
129 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
130 }
131
132 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
133 {
134 radeon_cs_context_cleanup(csc);
135 FREE(csc->relocs_bo);
136 FREE(csc->relocs);
137 }
138
139
140 static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
141 enum ring_type ring_type,
142 struct radeon_winsys_cs_handle *trace_buf)
143 {
144 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
145 struct radeon_drm_cs *cs;
146
147 cs = CALLOC_STRUCT(radeon_drm_cs);
148 if (!cs) {
149 return NULL;
150 }
151 pipe_semaphore_init(&cs->flush_completed, 1);
152
153 cs->ws = ws;
154 cs->trace_buf = (struct radeon_bo*)trace_buf;
155
156 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
157 FREE(cs);
158 return NULL;
159 }
160 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
161 radeon_destroy_cs_context(&cs->csc1);
162 FREE(cs);
163 return NULL;
164 }
165
166 /* Set the first command buffer as current. */
167 cs->csc = &cs->csc1;
168 cs->cst = &cs->csc2;
169 cs->base.buf = cs->csc->buf;
170 cs->base.ring_type = ring_type;
171
172 p_atomic_inc(&ws->num_cs);
173 return &cs->base;
174 }
175
176 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
177
178 static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
179 enum radeon_bo_domain rd,
180 enum radeon_bo_domain wd,
181 unsigned priority,
182 enum radeon_bo_domain *added_domains)
183 {
184 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
185
186 reloc->read_domains |= rd;
187 reloc->write_domain |= wd;
188 reloc->flags = MAX2(reloc->flags, priority);
189 }
190
191 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo,
192 struct drm_radeon_cs_reloc **out_reloc)
193 {
194 struct drm_radeon_cs_reloc *reloc = NULL;
195 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
196 int i = -1;
197
198 if (csc->is_handle_added[hash]) {
199 i = csc->reloc_indices_hashlist[hash];
200 reloc = &csc->relocs[i];
201
202 if (reloc->handle != bo->handle) {
203 /* Hash collision, look for the BO in the list of relocs linearly. */
204 for (i = csc->crelocs - 1; i >= 0; i--) {
205 reloc = &csc->relocs[i];
206 if (reloc->handle == bo->handle) {
207 /* Put this reloc in the hash list.
208 * This will prevent additional hash collisions if there are
209 * several consecutive get_reloc calls for the same buffer.
210 *
211 * Example: Assuming buffers A,B,C collide in the hash list,
212 * the following sequence of relocs:
213 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
214 * will collide here: ^ and here: ^,
215 * meaning that we should get very few collisions in the end. */
216 csc->reloc_indices_hashlist[hash] = i;
217 break;
218 }
219 }
220 }
221 }
222 if (out_reloc)
223 *out_reloc = reloc;
224 return i;
225 }
226
227 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
228 struct radeon_bo *bo,
229 enum radeon_bo_usage usage,
230 enum radeon_bo_domain domains,
231 unsigned priority,
232 enum radeon_bo_domain *added_domains)
233 {
234 struct radeon_cs_context *csc = cs->csc;
235 struct drm_radeon_cs_reloc *reloc;
236 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
237 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
238 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
239 int i = -1;
240
241 priority = MIN2(priority, 15);
242 *added_domains = 0;
243
244 i = radeon_get_reloc(csc, bo, &reloc);
245
246 if (i >= 0) {
247 update_reloc(reloc, rd, wd, priority, added_domains);
248
249 /* For async DMA, every add_reloc call must add a buffer to the list
250 * no matter how many duplicates there are. This is due to the fact
251 * the DMA CS checker doesn't use NOP packets for offset patching,
252 * but always uses the i-th buffer from the list to patch the i-th
253 * offset. If there are N offsets in a DMA CS, there must also be N
254 * buffers in the relocation list.
255 *
256 * This doesn't have to be done if virtual memory is enabled,
257 * because there is no offset patching with virtual memory.
258 */
259 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
260 return i;
261 }
262 }
263
264 /* New relocation, check if the backing array is large enough. */
265 if (csc->crelocs >= csc->nrelocs) {
266 uint32_t size;
267 csc->nrelocs += 10;
268
269 size = csc->nrelocs * sizeof(struct radeon_bo*);
270 csc->relocs_bo = realloc(csc->relocs_bo, size);
271
272 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
273 csc->relocs = realloc(csc->relocs, size);
274
275 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
276 }
277
278 /* Initialize the new relocation. */
279 csc->relocs_bo[csc->crelocs] = NULL;
280 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
281 p_atomic_inc(&bo->num_cs_references);
282 reloc = &csc->relocs[csc->crelocs];
283 reloc->handle = bo->handle;
284 reloc->read_domains = rd;
285 reloc->write_domain = wd;
286 reloc->flags = priority;
287
288 csc->is_handle_added[hash] = TRUE;
289 csc->reloc_indices_hashlist[hash] = csc->crelocs;
290
291 csc->chunks[1].length_dw += RELOC_DWORDS;
292
293 *added_domains = rd | wd;
294 return csc->crelocs++;
295 }
296
297 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
298 struct radeon_winsys_cs_handle *buf,
299 enum radeon_bo_usage usage,
300 enum radeon_bo_domain domains,
301 enum radeon_bo_priority priority)
302 {
303 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
304 struct radeon_bo *bo = (struct radeon_bo*)buf;
305 enum radeon_bo_domain added_domains;
306 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
307
308 if (added_domains & RADEON_DOMAIN_GTT)
309 cs->csc->used_gart += bo->base.size;
310 if (added_domains & RADEON_DOMAIN_VRAM)
311 cs->csc->used_vram += bo->base.size;
312
313 return index;
314 }
315
316 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
317 {
318 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
319 boolean status =
320 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
321 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
322
323 if (status) {
324 cs->csc->validated_crelocs = cs->csc->crelocs;
325 } else {
326 /* Remove lately-added relocations. The validation failed with them
327 * and the CS is about to be flushed because of that. Keep only
328 * the already-validated relocations. */
329 unsigned i;
330
331 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
332 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
333 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
334 }
335 cs->csc->crelocs = cs->csc->validated_crelocs;
336
337 /* Flush if there are any relocs. Clean up otherwise. */
338 if (cs->csc->crelocs) {
339 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
340 } else {
341 radeon_cs_context_cleanup(cs->csc);
342
343 assert(cs->base.cdw == 0);
344 if (cs->base.cdw != 0) {
345 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
346 }
347 }
348 }
349 return status;
350 }
351
352 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
353 {
354 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
355 boolean status =
356 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
357 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
358
359 return status;
360 }
361
362 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
363 struct radeon_winsys_cs_handle *buf)
364 {
365 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
366 struct radeon_bo *bo = (struct radeon_bo*)buf;
367 unsigned index = radeon_get_reloc(cs->csc, bo, NULL);
368
369 if (index == -1) {
370 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
371 return;
372 }
373
374 OUT_CS(&cs->base, 0xc0001000);
375 OUT_CS(&cs->base, index * RELOC_DWORDS);
376 }
377
378 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
379 {
380 unsigned i;
381
382 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
383 &csc->cs, sizeof(struct drm_radeon_cs))) {
384 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
385 unsigned i;
386
387 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
388 for (i = 0; i < csc->chunks[0].length_dw; i++) {
389 fprintf(stderr, "0x%08X\n", csc->buf[i]);
390 }
391 } else {
392 fprintf(stderr, "radeon: The kernel rejected CS, "
393 "see dmesg for more information.\n");
394 }
395 }
396
397 if (cs->trace_buf) {
398 radeon_dump_cs_on_lockup(cs, csc);
399 }
400
401 for (i = 0; i < csc->crelocs; i++)
402 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
403
404 radeon_cs_context_cleanup(csc);
405 }
406
407 /*
408 * Make sure previous submission of this cs are completed
409 */
410 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
411 {
412 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
413
414 /* Wait for any pending ioctl to complete. */
415 if (cs->ws->thread) {
416 pipe_semaphore_wait(&cs->flush_completed);
417 pipe_semaphore_signal(&cs->flush_completed);
418 }
419 }
420
421 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
422
423 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
424 {
425 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
426 struct radeon_cs_context *tmp;
427
428 switch (cs->base.ring_type) {
429 case RING_DMA:
430 /* pad DMA ring to 8 DWs */
431 if (cs->ws->info.chip_class <= SI) {
432 while (rcs->cdw & 7)
433 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
434 } else {
435 while (rcs->cdw & 7)
436 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
437 }
438 break;
439 case RING_GFX:
440 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
441 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
442 */
443 if (cs->ws->info.chip_class <= SI) {
444 while (rcs->cdw & 7)
445 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
446 } else {
447 while (rcs->cdw & 7)
448 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
449 }
450 break;
451 case RING_UVD:
452 while (rcs->cdw & 15)
453 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
454 break;
455 default:
456 break;
457 }
458
459 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
460 fprintf(stderr, "radeon: command stream overflowed\n");
461 }
462
463 radeon_drm_cs_sync_flush(rcs);
464
465 /* Flip command streams. */
466 tmp = cs->csc;
467 cs->csc = cs->cst;
468 cs->cst = tmp;
469
470 cs->cst->cs_trace_id = cs_trace_id;
471
472 /* If the CS is not empty or overflowed, emit it in a separate thread. */
473 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
474 unsigned i, crelocs = cs->cst->crelocs;
475
476 cs->cst->chunks[0].length_dw = cs->base.cdw;
477
478 for (i = 0; i < crelocs; i++) {
479 /* Update the number of active asynchronous CS ioctls for the buffer. */
480 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
481 }
482
483 switch (cs->base.ring_type) {
484 case RING_DMA:
485 cs->cst->flags[0] = 0;
486 cs->cst->flags[1] = RADEON_CS_RING_DMA;
487 cs->cst->cs.num_chunks = 3;
488 if (cs->ws->info.r600_virtual_address) {
489 cs->cst->flags[0] |= RADEON_CS_USE_VM;
490 }
491 break;
492
493 case RING_UVD:
494 cs->cst->flags[0] = 0;
495 cs->cst->flags[1] = RADEON_CS_RING_UVD;
496 cs->cst->cs.num_chunks = 3;
497 break;
498
499 case RING_VCE:
500 cs->cst->flags[0] = 0;
501 cs->cst->flags[1] = RADEON_CS_RING_VCE;
502 cs->cst->cs.num_chunks = 3;
503 break;
504
505 default:
506 case RING_GFX:
507 cs->cst->flags[0] = 0;
508 cs->cst->flags[1] = RADEON_CS_RING_GFX;
509 cs->cst->cs.num_chunks = 2;
510 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
511 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
512 cs->cst->cs.num_chunks = 3;
513 }
514 if (cs->ws->info.r600_virtual_address) {
515 cs->cst->flags[0] |= RADEON_CS_USE_VM;
516 cs->cst->cs.num_chunks = 3;
517 }
518 if (flags & RADEON_FLUSH_END_OF_FRAME) {
519 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
520 cs->cst->cs.num_chunks = 3;
521 }
522 if (flags & RADEON_FLUSH_COMPUTE) {
523 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
524 cs->cst->cs.num_chunks = 3;
525 }
526 break;
527 }
528
529 if (cs->ws->thread) {
530 pipe_semaphore_wait(&cs->flush_completed);
531 radeon_drm_ws_queue_cs(cs->ws, cs);
532 if (!(flags & RADEON_FLUSH_ASYNC))
533 radeon_drm_cs_sync_flush(rcs);
534 } else {
535 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
536 }
537 } else {
538 radeon_cs_context_cleanup(cs->cst);
539 }
540
541 /* Prepare a new CS. */
542 cs->base.buf = cs->csc->buf;
543 cs->base.cdw = 0;
544
545 cs->ws->num_cs_flushes++;
546 }
547
548 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
549 {
550 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
551
552 radeon_drm_cs_sync_flush(rcs);
553 pipe_semaphore_destroy(&cs->flush_completed);
554 radeon_cs_context_cleanup(&cs->csc1);
555 radeon_cs_context_cleanup(&cs->csc2);
556 p_atomic_dec(&cs->ws->num_cs);
557 radeon_destroy_cs_context(&cs->csc1);
558 radeon_destroy_cs_context(&cs->csc2);
559 FREE(cs);
560 }
561
562 static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
563 void (*flush)(void *ctx, unsigned flags),
564 void *user)
565 {
566 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
567
568 cs->flush_cs = flush;
569 cs->flush_data = user;
570 }
571
572 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
573 struct radeon_winsys_cs_handle *_buf,
574 enum radeon_bo_usage usage)
575 {
576 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
577 struct radeon_bo *bo = (struct radeon_bo*)_buf;
578 int index;
579
580 if (!bo->num_cs_references)
581 return FALSE;
582
583 index = radeon_get_reloc(cs->csc, bo, NULL);
584 if (index == -1)
585 return FALSE;
586
587 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
588 return TRUE;
589 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
590 return TRUE;
591
592 return FALSE;
593 }
594
595 /* FENCES */
596
597 static struct pipe_fence_handle *
598 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
599 {
600 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
601 struct pb_buffer *fence;
602
603 /* Create a fence, which is a dummy BO. */
604 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
605 RADEON_DOMAIN_GTT);
606 /* Add the fence as a dummy relocation. */
607 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
608 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
609 RADEON_PRIO_MIN);
610 return (struct pipe_fence_handle*)fence;
611 }
612
613 static bool radeon_fence_wait(struct radeon_winsys *ws,
614 struct pipe_fence_handle *fence,
615 uint64_t timeout)
616 {
617 struct pb_buffer *rfence = (struct pb_buffer*)fence;
618
619 if (timeout == 0)
620 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
621
622 if (timeout != PIPE_TIMEOUT_INFINITE) {
623 int64_t start_time = os_time_get();
624
625 /* Convert to microseconds. */
626 timeout /= 1000;
627
628 /* Wait in a loop. */
629 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
630 if (os_time_get() - start_time >= timeout) {
631 return FALSE;
632 }
633 os_time_sleep(10);
634 }
635 return TRUE;
636 }
637
638 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
639 return TRUE;
640 }
641
642 static void radeon_fence_reference(struct pipe_fence_handle **dst,
643 struct pipe_fence_handle *src)
644 {
645 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
646 }
647
648 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
649 {
650 ws->base.cs_create = radeon_drm_cs_create;
651 ws->base.cs_destroy = radeon_drm_cs_destroy;
652 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
653 ws->base.cs_validate = radeon_drm_cs_validate;
654 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
655 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
656 ws->base.cs_flush = radeon_drm_cs_flush;
657 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
658 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
659 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
660 ws->base.cs_create_fence = radeon_cs_create_fence;
661 ws->base.fence_wait = radeon_fence_wait;
662 ws->base.fence_reference = radeon_fence_reference;
663 }