r600g,radeonsi: add a bunch of useful queries for the HUD
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75 /*
76 * this are copy from radeon_drm, once an updated libdrm is released
77 * we should bump configure.ac requirement for it and remove the following
78 * field
79 */
80 #ifndef RADEON_CHUNK_ID_FLAGS
81 #define RADEON_CHUNK_ID_FLAGS 0x03
82
83 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
84 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
85 #endif
86
87 #ifndef RADEON_CS_USE_VM
88 #define RADEON_CS_USE_VM 0x02
89 /* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
90 #define RADEON_CS_RING_GFX 0
91 #define RADEON_CS_RING_COMPUTE 1
92 #endif
93
94 #ifndef RADEON_CS_RING_DMA
95 #define RADEON_CS_RING_DMA 2
96 #endif
97
98 #ifndef RADEON_CS_RING_UVD
99 #define RADEON_CS_RING_UVD 3
100 #endif
101
102 #ifndef RADEON_CS_RING_VCE
103 #define RADEON_CS_RING_VCE 4
104 #endif
105
106 #ifndef RADEON_CS_END_OF_FRAME
107 #define RADEON_CS_END_OF_FRAME 0x04
108 #endif
109
110
111 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
112
113 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
114 struct radeon_drm_winsys *ws)
115 {
116 csc->fd = ws->fd;
117 csc->nrelocs = 512;
118 csc->relocs_bo = (struct radeon_bo**)
119 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
120 if (!csc->relocs_bo) {
121 return FALSE;
122 }
123
124 csc->relocs = (struct drm_radeon_cs_reloc*)
125 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
126 if (!csc->relocs) {
127 FREE(csc->relocs_bo);
128 return FALSE;
129 }
130
131 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
132 csc->chunks[0].length_dw = 0;
133 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
134 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
135 csc->chunks[1].length_dw = 0;
136 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
137 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
138 csc->chunks[2].length_dw = 2;
139 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
140
141 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
142 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
143 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
144
145 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
146 return TRUE;
147 }
148
149 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
150 {
151 unsigned i;
152
153 for (i = 0; i < csc->crelocs; i++) {
154 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references);
155 radeon_bo_reference(&csc->relocs_bo[i], NULL);
156 }
157
158 csc->crelocs = 0;
159 csc->validated_crelocs = 0;
160 csc->chunks[0].length_dw = 0;
161 csc->chunks[1].length_dw = 0;
162 csc->used_gart = 0;
163 csc->used_vram = 0;
164 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added));
165 }
166
167 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
168 {
169 radeon_cs_context_cleanup(csc);
170 FREE(csc->relocs_bo);
171 FREE(csc->relocs);
172 }
173
174
175 static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws,
176 enum ring_type ring_type,
177 struct radeon_winsys_cs_handle *trace_buf)
178 {
179 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
180 struct radeon_drm_cs *cs;
181
182 cs = CALLOC_STRUCT(radeon_drm_cs);
183 if (!cs) {
184 return NULL;
185 }
186 pipe_semaphore_init(&cs->flush_completed, 1);
187
188 cs->ws = ws;
189 cs->trace_buf = (struct radeon_bo*)trace_buf;
190
191 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
192 FREE(cs);
193 return NULL;
194 }
195 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
196 radeon_destroy_cs_context(&cs->csc1);
197 FREE(cs);
198 return NULL;
199 }
200
201 /* Set the first command buffer as current. */
202 cs->csc = &cs->csc1;
203 cs->cst = &cs->csc2;
204 cs->base.buf = cs->csc->buf;
205 cs->base.ring_type = ring_type;
206
207 p_atomic_inc(&ws->num_cs);
208 return &cs->base;
209 }
210
211 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
212
213 static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc,
214 enum radeon_bo_domain rd,
215 enum radeon_bo_domain wd,
216 unsigned priority,
217 enum radeon_bo_domain *added_domains)
218 {
219 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
220
221 reloc->read_domains |= rd;
222 reloc->write_domain |= wd;
223 reloc->flags = MAX2(reloc->flags, priority);
224 }
225
226 int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo)
227 {
228 struct drm_radeon_cs_reloc *reloc;
229 unsigned i;
230 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
231
232 if (csc->is_handle_added[hash]) {
233 i = csc->reloc_indices_hashlist[hash];
234 reloc = &csc->relocs[i];
235 if (reloc->handle == bo->handle) {
236 return i;
237 }
238
239 /* Hash collision, look for the BO in the list of relocs linearly. */
240 for (i = csc->crelocs; i != 0;) {
241 --i;
242 reloc = &csc->relocs[i];
243 if (reloc->handle == bo->handle) {
244 /* Put this reloc in the hash list.
245 * This will prevent additional hash collisions if there are
246 * several consecutive get_reloc calls for the same buffer.
247 *
248 * Example: Assuming buffers A,B,C collide in the hash list,
249 * the following sequence of relocs:
250 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
251 * will collide here: ^ and here: ^,
252 * meaning that we should get very few collisions in the end. */
253 csc->reloc_indices_hashlist[hash] = i;
254 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
255 return i;
256 }
257 }
258 }
259
260 return -1;
261 }
262
263 static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
264 struct radeon_bo *bo,
265 enum radeon_bo_usage usage,
266 enum radeon_bo_domain domains,
267 unsigned priority,
268 enum radeon_bo_domain *added_domains)
269 {
270 struct radeon_cs_context *csc = cs->csc;
271 struct drm_radeon_cs_reloc *reloc;
272 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1);
273 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
274 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
275 bool update_hash = TRUE;
276 int i;
277
278 priority = MIN2(priority, 15);
279 *added_domains = 0;
280
281 if (csc->is_handle_added[hash]) {
282 i = csc->reloc_indices_hashlist[hash];
283 reloc = &csc->relocs[i];
284
285 if (reloc->handle != bo->handle) {
286 /* Hash collision, look for the BO in the list of relocs linearly. */
287 for (i = csc->crelocs - 1; i >= 0; i--) {
288 reloc = &csc->relocs[i];
289 if (reloc->handle == bo->handle) {
290 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
291 break;
292 }
293 }
294 }
295
296 if (i >= 0) {
297 update_reloc(reloc, rd, wd, priority, added_domains);
298
299 /* For async DMA, every add_reloc call must add a buffer to the list
300 * no matter how many duplicates there are. This is due to the fact
301 * the DMA CS checker doesn't use NOP packets for offset patching,
302 * but always uses the i-th buffer from the list to patch the i-th
303 * offset. If there are N offsets in a DMA CS, there must also be N
304 * buffers in the relocation list.
305 *
306 * This doesn't have to be done if virtual memory is enabled,
307 * because there is no offset patching with virtual memory.
308 */
309 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
310 csc->reloc_indices_hashlist[hash] = i;
311 return i;
312 }
313 update_hash = FALSE;
314 }
315 }
316
317 /* New relocation, check if the backing array is large enough. */
318 if (csc->crelocs >= csc->nrelocs) {
319 uint32_t size;
320 csc->nrelocs += 10;
321
322 size = csc->nrelocs * sizeof(struct radeon_bo*);
323 csc->relocs_bo = realloc(csc->relocs_bo, size);
324
325 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
326 csc->relocs = realloc(csc->relocs, size);
327
328 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
329 }
330
331 /* Initialize the new relocation. */
332 csc->relocs_bo[csc->crelocs] = NULL;
333 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo);
334 p_atomic_inc(&bo->num_cs_references);
335 reloc = &csc->relocs[csc->crelocs];
336 reloc->handle = bo->handle;
337 reloc->read_domains = rd;
338 reloc->write_domain = wd;
339 reloc->flags = priority;
340
341 csc->is_handle_added[hash] = TRUE;
342 if (update_hash) {
343 csc->reloc_indices_hashlist[hash] = csc->crelocs;
344 }
345
346 csc->chunks[1].length_dw += RELOC_DWORDS;
347
348 *added_domains = rd | wd;
349 return csc->crelocs++;
350 }
351
352 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
353 struct radeon_winsys_cs_handle *buf,
354 enum radeon_bo_usage usage,
355 enum radeon_bo_domain domains,
356 enum radeon_bo_priority priority)
357 {
358 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
359 struct radeon_bo *bo = (struct radeon_bo*)buf;
360 enum radeon_bo_domain added_domains;
361 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains);
362
363 if (added_domains & RADEON_DOMAIN_GTT)
364 cs->csc->used_gart += bo->base.size;
365 if (added_domains & RADEON_DOMAIN_VRAM)
366 cs->csc->used_vram += bo->base.size;
367
368 return index;
369 }
370
371 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
372 {
373 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
374 boolean status =
375 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
376 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
377
378 if (status) {
379 cs->csc->validated_crelocs = cs->csc->crelocs;
380 } else {
381 /* Remove lately-added relocations. The validation failed with them
382 * and the CS is about to be flushed because of that. Keep only
383 * the already-validated relocations. */
384 unsigned i;
385
386 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
387 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
388 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
389 }
390 cs->csc->crelocs = cs->csc->validated_crelocs;
391
392 /* Flush if there are any relocs. Clean up otherwise. */
393 if (cs->csc->crelocs) {
394 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
395 } else {
396 radeon_cs_context_cleanup(cs->csc);
397
398 assert(cs->base.cdw == 0);
399 if (cs->base.cdw != 0) {
400 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
401 }
402 }
403 }
404 return status;
405 }
406
407 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
408 {
409 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
410 boolean status =
411 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 &&
412 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7;
413
414 return status;
415 }
416
417 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
418 struct radeon_winsys_cs_handle *buf)
419 {
420 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
421 struct radeon_bo *bo = (struct radeon_bo*)buf;
422 unsigned index = radeon_get_reloc(cs->csc, bo);
423
424 if (index == -1) {
425 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__);
426 return;
427 }
428
429 OUT_CS(&cs->base, 0xc0001000);
430 OUT_CS(&cs->base, index * RELOC_DWORDS);
431 }
432
433 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
434 {
435 unsigned i;
436
437 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
438 &csc->cs, sizeof(struct drm_radeon_cs))) {
439 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
440 unsigned i;
441
442 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
443 for (i = 0; i < csc->chunks[0].length_dw; i++) {
444 fprintf(stderr, "0x%08X\n", csc->buf[i]);
445 }
446 } else {
447 fprintf(stderr, "radeon: The kernel rejected CS, "
448 "see dmesg for more information.\n");
449 }
450 }
451
452 if (cs->trace_buf) {
453 radeon_dump_cs_on_lockup(cs, csc);
454 }
455
456 for (i = 0; i < csc->crelocs; i++)
457 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls);
458
459 radeon_cs_context_cleanup(csc);
460 }
461
462 /*
463 * Make sure previous submission of this cs are completed
464 */
465 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
466 {
467 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
468
469 /* Wait for any pending ioctl to complete. */
470 if (cs->ws->thread) {
471 pipe_semaphore_wait(&cs->flush_completed);
472 pipe_semaphore_signal(&cs->flush_completed);
473 }
474 }
475
476 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
477
478 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id)
479 {
480 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
481 struct radeon_cs_context *tmp;
482
483 switch (cs->base.ring_type) {
484 case RING_DMA:
485 /* pad DMA ring to 8 DWs */
486 if (cs->ws->info.chip_class <= SI) {
487 while (rcs->cdw & 7)
488 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
489 } else {
490 while (rcs->cdw & 7)
491 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
492 }
493 break;
494 case RING_GFX:
495 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
496 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
497 */
498 if (cs->ws->info.chip_class <= SI) {
499 while (rcs->cdw & 7)
500 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
501 } else {
502 while (rcs->cdw & 7)
503 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
504 }
505 break;
506 case RING_UVD:
507 while (rcs->cdw & 15)
508 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
509 break;
510 default:
511 break;
512 }
513
514 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) {
515 fprintf(stderr, "radeon: command stream overflowed\n");
516 }
517
518 radeon_drm_cs_sync_flush(rcs);
519
520 /* Flip command streams. */
521 tmp = cs->csc;
522 cs->csc = cs->cst;
523 cs->cst = tmp;
524
525 cs->cst->cs_trace_id = cs_trace_id;
526
527 /* If the CS is not empty or overflowed, emit it in a separate thread. */
528 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) {
529 unsigned i, crelocs = cs->cst->crelocs;
530
531 cs->cst->chunks[0].length_dw = cs->base.cdw;
532
533 for (i = 0; i < crelocs; i++) {
534 /* Update the number of active asynchronous CS ioctls for the buffer. */
535 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
536 }
537
538 switch (cs->base.ring_type) {
539 case RING_DMA:
540 cs->cst->flags[0] = 0;
541 cs->cst->flags[1] = RADEON_CS_RING_DMA;
542 cs->cst->cs.num_chunks = 3;
543 if (cs->ws->info.r600_virtual_address) {
544 cs->cst->flags[0] |= RADEON_CS_USE_VM;
545 }
546 break;
547
548 case RING_UVD:
549 cs->cst->flags[0] = 0;
550 cs->cst->flags[1] = RADEON_CS_RING_UVD;
551 cs->cst->cs.num_chunks = 3;
552 break;
553
554 case RING_VCE:
555 cs->cst->flags[0] = 0;
556 cs->cst->flags[1] = RADEON_CS_RING_VCE;
557 cs->cst->cs.num_chunks = 3;
558 break;
559
560 default:
561 case RING_GFX:
562 cs->cst->flags[0] = 0;
563 cs->cst->flags[1] = RADEON_CS_RING_GFX;
564 cs->cst->cs.num_chunks = 2;
565 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
566 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
567 cs->cst->cs.num_chunks = 3;
568 }
569 if (cs->ws->info.r600_virtual_address) {
570 cs->cst->flags[0] |= RADEON_CS_USE_VM;
571 cs->cst->cs.num_chunks = 3;
572 }
573 if (flags & RADEON_FLUSH_END_OF_FRAME) {
574 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
575 cs->cst->cs.num_chunks = 3;
576 }
577 if (flags & RADEON_FLUSH_COMPUTE) {
578 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
579 cs->cst->cs.num_chunks = 3;
580 }
581 break;
582 }
583
584 if (cs->ws->thread) {
585 pipe_semaphore_wait(&cs->flush_completed);
586 radeon_drm_ws_queue_cs(cs->ws, cs);
587 if (!(flags & RADEON_FLUSH_ASYNC))
588 radeon_drm_cs_sync_flush(rcs);
589 } else {
590 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
591 }
592 } else {
593 radeon_cs_context_cleanup(cs->cst);
594 }
595
596 /* Prepare a new CS. */
597 cs->base.buf = cs->csc->buf;
598 cs->base.cdw = 0;
599
600 cs->ws->num_cs_flushes++;
601 }
602
603 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
604 {
605 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
606
607 radeon_drm_cs_sync_flush(rcs);
608 pipe_semaphore_destroy(&cs->flush_completed);
609 radeon_cs_context_cleanup(&cs->csc1);
610 radeon_cs_context_cleanup(&cs->csc2);
611 p_atomic_dec(&cs->ws->num_cs);
612 radeon_destroy_cs_context(&cs->csc1);
613 radeon_destroy_cs_context(&cs->csc2);
614 FREE(cs);
615 }
616
617 static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs,
618 void (*flush)(void *ctx, unsigned flags),
619 void *user)
620 {
621 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
622
623 cs->flush_cs = flush;
624 cs->flush_data = user;
625 }
626
627 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
628 struct radeon_winsys_cs_handle *_buf,
629 enum radeon_bo_usage usage)
630 {
631 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
632 struct radeon_bo *bo = (struct radeon_bo*)_buf;
633 int index;
634
635 if (!bo->num_cs_references)
636 return FALSE;
637
638 index = radeon_get_reloc(cs->csc, bo);
639 if (index == -1)
640 return FALSE;
641
642 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
643 return TRUE;
644 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
645 return TRUE;
646
647 return FALSE;
648 }
649
650 /* FENCES */
651
652 static struct pipe_fence_handle *
653 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
654 {
655 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
656 struct pb_buffer *fence;
657
658 /* Create a fence, which is a dummy BO. */
659 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
660 RADEON_DOMAIN_GTT);
661 /* Add the fence as a dummy relocation. */
662 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence),
663 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
664 RADEON_PRIO_MIN);
665 return (struct pipe_fence_handle*)fence;
666 }
667
668 static bool radeon_fence_wait(struct radeon_winsys *ws,
669 struct pipe_fence_handle *fence,
670 uint64_t timeout)
671 {
672 struct pb_buffer *rfence = (struct pb_buffer*)fence;
673
674 if (timeout == 0)
675 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE);
676
677 if (timeout != PIPE_TIMEOUT_INFINITE) {
678 int64_t start_time = os_time_get();
679
680 /* Convert to microseconds. */
681 timeout /= 1000;
682
683 /* Wait in a loop. */
684 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) {
685 if (os_time_get() - start_time >= timeout) {
686 return FALSE;
687 }
688 os_time_sleep(10);
689 }
690 return TRUE;
691 }
692
693 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE);
694 return TRUE;
695 }
696
697 static void radeon_fence_reference(struct pipe_fence_handle **dst,
698 struct pipe_fence_handle *src)
699 {
700 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
701 }
702
703 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
704 {
705 ws->base.cs_create = radeon_drm_cs_create;
706 ws->base.cs_destroy = radeon_drm_cs_destroy;
707 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc;
708 ws->base.cs_validate = radeon_drm_cs_validate;
709 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
710 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc;
711 ws->base.cs_flush = radeon_drm_cs_flush;
712 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush;
713 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
714 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
715 ws->base.cs_create_fence = radeon_cs_create_fence;
716 ws->base.fence_wait = radeon_fence_wait;
717 ws->base.fence_reference = radeon_fence_reference;
718 }