radeon/winsys: add VCE support v4
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75 /*
76 * this are copy from radeon_drm, once an updated libdrm is released
77 * we should bump configure.ac requirement for it and remove the following
78 * field
79 */
80 #ifndef RADEON_CHUNK_ID_FLAGS
81 #define RADEON_CHUNK_ID_FLAGS 0x03
82
83 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
84 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
85 #endif
86
87 #ifndef RADEON_CS_USE_VM
88 #define RADEON_CS_USE_VM 0x02
89 /* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
90 #define RADEON_CS_RING_GFX 0
91 #define RADEON_CS_RING_COMPUTE 1
92 #endif
93
94 #ifndef RADEON_CS_RING_DMA
95 #define RADEON_CS_RING_DMA 2
96 #endif
97
98 #ifndef RADEON_CS_RING_UVD
99 #define RADEON_CS_RING_UVD 3
100 #endif
101
102 #ifndef RADEON_CS_RING_VCE
103 #define RADEON_CS_RING_VCE 4
104 #endif
105
106 #ifndef RADEON_CS_END_OF_FRAME
107 #define RADEON_CS_END_OF_FRAME 0x04
108 #endif
109
110
111 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
112
113 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
114 struct radeon_drm_winsys *ws)
115 {
116 csc->fd = ws->fd;
117 csc->nrelocs = 512;
118 csc->relocs_bo = (struct radeon_bo**)
119 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
120 if (!csc->relocs_bo) {
121 return FALSE;
122 }
123
124 csc->relocs = (struct drm_radeon_cs_reloc*)
125 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
126 if (!csc->relocs) {
127 FREE(csc->relocs_bo);
128 return FALSE;
129 }
130
131 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
132 csc->chunks[0].length_dw = 0;
133 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
134 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
135 csc->chunks[1].length_dw = 0;
136 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
137 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
138 csc->chunks[2].length_dw = 2;
139 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
140
141 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
142 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
143 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
144
145 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
146 return TRUE;
147 }
148
149 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
150 {
151 unsigned i;
152
153 for (i = 0; i < csc->crelocs; i++) {
154 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
155 radeon_bo_reference(&csc->relocs_bo[i], NULL);
156 }
157
158 csc->crelocs = 0;
159 csc->validated_crelocs = 0;
160 csc->chunks[0].length_dw = 0;
161 csc->chunks[1].length_dw = 0;
162 csc->used_gart = 0;
163 csc->used_vram = 0;
164 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
165 }
166
167 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
168 {
169 radeon_cs_context_cleanup(csc);
170 FREE(csc->relocs_bo);
171 FREE(csc->relocs);
172 }
173
174
175 static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
176 enum ring_type ring_type,
177 struct radeon_winsys_cs_handle *trace_buf)
178 {
179 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
180 struct radeon_drm_cs *cs;
181
182 cs = CALLOC_STRUCT(radeon_drm_cs);
183 if (!cs) {
184 return NULL;
185 }
186 pipe_semaphore_init(&cs->flush_completed, 1);
187
188 cs->ws = ws;
189 cs->trace_buf = (struct radeon_bo*)trace_buf;
190
191 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
192 FREE(cs);
193 return NULL;
194 }
195 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
196 radeon_destroy_cs_context(&cs->csc1);
197 FREE(cs);
198 return NULL;
199 }
200
201 /* Set the first command buffer as current. */
202 cs->csc = &cs->csc1;
203 cs->cst = &cs->csc2;
204 cs->base.buf = cs->csc->buf;
205 cs->base.ring_type = ring_type;
206
207 p_atomic_inc(&ws->num_cs);
208 return &cs->base;
209 }
210
211 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
212
213 static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc,
214 enum radeon_bo_domain rd,
215 enum radeon_bo_domain wd,
216 enum radeon_bo_domain *added_domains)
217 {
218 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
219
220 reloc->read_domains |= rd;
221 reloc->write_domain |= wd;
222 }
223
224 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
225 {
226 struct drm_radeon_cs_reloc *reloc;
227 unsigned i;
228 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
229
230 if (csc->is_handle_added[hash]) {
231 i = csc->reloc_indices_hashlist[hash];
232 reloc = &csc->relocs[i];
233 if (reloc->handle == bo->handle) {
234 return i;
235 }
236
237 /* Hash collision, look for the BO in the list of relocs linearly. */
238 for (i = csc->crelocs; i != 0;) {
239 --i;
240 reloc = &csc->relocs[i];
241 if (reloc->handle == bo->handle) {
242 /* Put this reloc in the hash list.
243 * This will prevent additional hash collisions if there are
244 * several consecutive get_reloc calls for the same buffer.
245 *
246 * Example: Assuming buffers A,B,C collide in the hash list,
247 * the following sequence of relocs:
248 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
249 * will collide here: ^ and here: ^,
250 * meaning that we should get very few collisions in the end. */
251 csc->reloc_indices_hashlist[hash] = i;
252 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
253 return i;
254 }
255 }
256 }
257
258 return -1;
259 }
260
261 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
262 struct radeon_bo *bo,
263 enum radeon_bo_usage usage,
264 enum radeon_bo_domain domains,
265 enum radeon_bo_domain *added_domains)
266 {
267 struct radeon_cs_context *csc = cs->csc;
268 struct drm_radeon_cs_reloc *reloc;
269 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
270 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
271 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
272 bool update_hash = TRUE;
273 int i;
274
275 *added_domains = 0;
276 if (csc->is_handle_added[hash]) {
277 i = csc->reloc_indices_hashlist[hash];
278 reloc = &csc->relocs[i];
279 if (reloc->handle != bo->handle) {
280 /* Hash collision, look for the BO in the list of relocs linearly. */
281 for (i = csc->crelocs - 1; i >= 0; i--) {
282 reloc = &csc->relocs[i];
283 if (reloc->handle == bo->handle) {
284 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
285 break;
286 }
287 }
288 }
289
290 if (i >= 0) {
291 /* On DMA ring we need to emit as many relocation as there is use of the bo
292 * thus each time this function is call we should grow add again the bo to
293 * the relocation buffer
294 *
295 * Do not update the hash table if it's dma ring, so that first hash always point
296 * to first bo relocation which will the one used by the kernel. Following relocation
297 * will be ignore by the kernel memory placement (but still use by the kernel to
298 * update the cmd stream with proper buffer offset).
299 */
300 update_hash = FALSE;
301 update_reloc_domains(reloc, rd, wd, added_domains);
302 if (cs->base.ring_type != RING_DMA) {
303 csc->reloc_indices_hashlist[hash] = i;
304 return i;
305 }
306 }
307 }
308
309 /* New relocation, check if the backing array is large enough. */
310 if (csc->crelocs >= csc->nrelocs) {
311 uint32_t size;
312 csc->nrelocs += 10;
313
314 size = csc->nrelocs * sizeof(struct radeon_bo*);
315 csc->relocs_bo = realloc(csc->relocs_bo, size);
316
317 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
318 csc->relocs = realloc(csc->relocs, size);
319
320 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
321 }
322
323 /* Initialize the new relocation. */
324 csc->relocs_bo[csc->crelocs] = NULL;
325 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
326 p_atomic_inc(&bo->num_cs_references);
327 reloc = &csc->relocs[csc->crelocs];
328 reloc->handle = bo->handle;
329 reloc->read_domains = rd;
330 reloc->write_domain = wd;
331 reloc->flags = 0;
332
333 csc->is_handle_added[hash] = TRUE;
334 if (update_hash) {
335 csc->reloc_indices_hashlist[hash] = csc->crelocs;
336 }
337
338 csc->chunks[1].length_dw += RELOC_DWORDS;
339
340 *added_domains = rd | wd;
341 return csc->crelocs++;
342 }
343
344 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
345 struct radeon_winsys_cs_handle *buf,
346 enum radeon_bo_usage usage,
347 enum radeon_bo_domain domains)
348 {
349 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
350 struct radeon_bo *bo = (struct radeon_bo*)buf;
351 enum radeon_bo_domain added_domains;
352 unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains);
353
354 if (added_domains & RADEON_DOMAIN_GTT)
355 cs->csc->used_gart += bo->base.size;
356 if (added_domains & RADEON_DOMAIN_VRAM)
357 cs->csc->used_vram += bo->base.size;
358
359 return index;
360 }
361
362 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
363 {
364 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
365 boolean status =
366 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
367 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
368
369 if (status) {
370 cs->csc->validated_crelocs = cs->csc->crelocs;
371 } else {
372 /* Remove lately-added relocations. The validation failed with them
373 * and the CS is about to be flushed because of that. Keep only
374 * the already-validated relocations. */
375 unsigned i;
376
377 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
378 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
379 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
380 }
381 cs->csc->crelocs = cs->csc->validated_crelocs;
382
383 /* Flush if there are any relocs. Clean up otherwise. */
384 if (cs->csc->crelocs) {
385 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
386 } else {
387 radeon_cs_context_cleanup(cs->csc);
388
389 assert(cs->base.cdw == 0);
390 if (cs->base.cdw != 0) {
391 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
392 }
393 }
394 }
395 return status;
396 }
397
398 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
399 {
400 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
401 boolean status =
402 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
403 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
404
405 return status;
406 }
407
408 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
409 struct radeon_winsys_cs_handle *buf)
410 {
411 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
412 struct radeon_bo *bo = (struct radeon_bo*)buf;
413 unsigned index = radeon_get_reloc(cs->csc, bo);
414
415 if (index == -1) {
416 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
417 return;
418 }
419
420 OUT_CS(&cs->base, 0xc0001000);
421 OUT_CS(&cs->base, index * RELOC_DWORDS);
422 }
423
424 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
425 {
426 unsigned i;
427
428 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
429 &csc->cs, sizeof(struct drm_radeon_cs))) {
430 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
431 unsigned i;
432
433 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
434 for (i = 0; i < csc->chunks[0].length_dw; i++) {
435 fprintf(stderr, "0x%08X\n", csc->buf[i]);
436 }
437 } else {
438 fprintf(stderr, "radeon: The kernel rejected CS, "
439 "see dmesg for more information.\n");
440 }
441 }
442
443 if (cs->trace_buf) {
444 radeon_dump_cs_on_lockup(cs, csc);
445 }
446
447 for (i = 0; i < csc->crelocs; i++)
448 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
449
450 radeon_cs_context_cleanup(csc);
451 }
452
453 /*
454 * Make sure previous submission of this cs are completed
455 */
456 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
457 {
458 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
459
460 /* Wait for any pending ioctl to complete. */
461 if (cs->ws->thread) {
462 pipe_semaphore_wait(&cs->flush_completed);
463 pipe_semaphore_signal(&cs->flush_completed);
464 }
465 }
466
467 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
468
469 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
470 {
471 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
472 struct radeon_cs_context *tmp;
473
474 switch (cs->base.ring_type) {
475 case RING_DMA:
476 /* pad DMA ring to 8 DWs */
477 if (cs->ws->info.chip_class <= SI) {
478 while (rcs->cdw & 7)
479 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
480 } else {
481 while (rcs->cdw & 7)
482 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
483 }
484 break;
485 case RING_GFX:
486 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
487 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
488 */
489 if (cs->ws->info.chip_class <= SI) {
490 while (rcs->cdw & 7)
491 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
492 } else {
493 while (rcs->cdw & 7)
494 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
495 }
496 break;
497 case RING_UVD:
498 while (rcs->cdw & 15)
499 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
500 break;
501 default:
502 break;
503 }
504
505 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
506 fprintf(stderr, "radeon: command stream overflowed\n");
507 }
508
509 radeon_drm_cs_sync_flush(rcs);
510
511 /* Flip command streams. */
512 tmp = cs->csc;
513 cs->csc = cs->cst;
514 cs->cst = tmp;
515
516 cs->cst->cs_trace_id = cs_trace_id;
517
518 /* If the CS is not empty or overflowed, emit it in a separate thread. */
519 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
520 unsigned i, crelocs = cs->cst->crelocs;
521
522 cs->cst->chunks[0].length_dw = cs->base.cdw;
523
524 for (i = 0; i < crelocs; i++) {
525 /* Update the number of active asynchronous CS ioctls for the buffer. */
526 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
527 }
528
529 switch (cs->base.ring_type) {
530 case RING_DMA:
531 cs->cst->flags[0] = 0;
532 cs->cst->flags[1] = RADEON_CS_RING_DMA;
533 cs->cst->cs.num_chunks = 3;
534 if (cs->ws->info.r600_virtual_address) {
535 cs->cst->flags[0] |= RADEON_CS_USE_VM;
536 }
537 break;
538
539 case RING_UVD:
540 cs->cst->flags[0] = 0;
541 cs->cst->flags[1] = RADEON_CS_RING_UVD;
542 cs->cst->cs.num_chunks = 3;
543 break;
544
545 case RING_VCE:
546 cs->cst->flags[0] = 0;
547 cs->cst->flags[1] = RADEON_CS_RING_VCE;
548 cs->cst->cs.num_chunks = 3;
549 break;
550
551 default:
552 case RING_GFX:
553 cs->cst->flags[0] = 0;
554 cs->cst->flags[1] = RADEON_CS_RING_GFX;
555 cs->cst->cs.num_chunks = 2;
556 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
557 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
558 cs->cst->cs.num_chunks = 3;
559 }
560 if (cs->ws->info.r600_virtual_address) {
561 cs->cst->flags[0] |= RADEON_CS_USE_VM;
562 cs->cst->cs.num_chunks = 3;
563 }
564 if (flags & RADEON_FLUSH_END_OF_FRAME) {
565 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
566 cs->cst->cs.num_chunks = 3;
567 }
568 if (flags & RADEON_FLUSH_COMPUTE) {
569 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
570 cs->cst->cs.num_chunks = 3;
571 }
572 break;
573 }
574
575 if (cs->ws->thread) {
576 pipe_semaphore_wait(&cs->flush_completed);
577 radeon_drm_ws_queue_cs(cs->ws, cs);
578 if (!(flags & RADEON_FLUSH_ASYNC))
579 radeon_drm_cs_sync_flush(rcs);
580 } else {
581 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
582 }
583 } else {
584 radeon_cs_context_cleanup(cs->cst);
585 }
586
587 /* Prepare a new CS. */
588 cs->base.buf = cs->csc->buf;
589 cs->base.cdw = 0;
590 }
591
592 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
593 {
594 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
595
596 radeon_drm_cs_sync_flush(rcs);
597 pipe_semaphore_destroy(&cs->flush_completed);
598 radeon_cs_context_cleanup(&cs->csc1);
599 radeon_cs_context_cleanup(&cs->csc2);
600 p_atomic_dec(&cs->ws->num_cs);
601 radeon_destroy_cs_context(&cs->csc1);
602 radeon_destroy_cs_context(&cs->csc2);
603 FREE(cs);
604 }
605
606 static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
607 void (*flush)(void *ctx, unsigned flags),
608 void *user)
609 {
610 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
611
612 cs->flush_cs = flush;
613 cs->flush_data = user;
614 }
615
616 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
617 struct radeon_winsys_cs_handle *_buf,
618 enum radeon_bo_usage usage)
619 {
620 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
621 struct radeon_bo *bo = (struct radeon_bo*)_buf;
622 int index;
623
624 if (!bo->num_cs_references)
625 return FALSE;
626
627 index = radeon_get_reloc(cs->csc, bo);
628 if (index == -1)
629 return FALSE;
630
631 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
632 return TRUE;
633 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
634 return TRUE;
635
636 return FALSE;
637 }
638
639 /* FENCES */
640
641 static struct pipe_fence_handle *
642 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
643 {
644 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
645 struct pb_buffer *fence;
646
647 /* Create a fence, which is a dummy BO. */
648 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
649 RADEON_DOMAIN_GTT);
650 /* Add the fence as a dummy relocation. */
651 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
652 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT);
653 return (struct pipe_fence_handle*)fence;
654 }
655
656 static bool radeon_fence_wait(struct radeon_winsys *ws,
657 struct pipe_fence_handle *fence,
658 uint64_t timeout)
659 {
660 struct pb_buffer *rfence = (struct pb_buffer*)fence;
661
662 if (timeout == 0)
663 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
664
665 if (timeout != PIPE_TIMEOUT_INFINITE) {
666 int64_t start_time = os_time_get();
667
668 /* Convert to microseconds. */
669 timeout /= 1000;
670
671 /* Wait in a loop. */
672 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
673 if (os_time_get() - start_time >= timeout) {
674 return FALSE;
675 }
676 os_time_sleep(10);
677 }
678 return TRUE;
679 }
680
681 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
682 return TRUE;
683 }
684
685 static void radeon_fence_reference(struct pipe_fence_handle **dst,
686 struct pipe_fence_handle *src)
687 {
688 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
689 }
690
691 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
692 {
693 ws->base.cs_create = radeon_drm_cs_create;
694 ws->base.cs_destroy = radeon_drm_cs_destroy;
695 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
696 ws->base.cs_validate = radeon_drm_cs_validate;
697 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
698 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
699 ws->base.cs_flush = radeon_drm_cs_flush;
700 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
701 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
702 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
703 ws->base.cs_create_fence = radeon_cs_create_fence;
704 ws->base.fence_wait = radeon_fence_wait;
705 ws->base.fence_reference = radeon_fence_reference;
706 }