Merge ../mesa into vulkan
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Adding buffers and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_buffer. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_buffer.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84 {
85 /* No context support here. Just return the winsys pointer
86 * as the "context". */
87 return (struct radeon_winsys_ctx*)ws;
88 }
89
90 static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91 {
92 /* No context support here. */
93 }
94
95 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
96 struct radeon_drm_winsys *ws)
97 {
98 int i;
99
100 csc->fd = ws->fd;
101 csc->nrelocs = 512;
102 csc->relocs_bo = (struct radeon_bo_item*)
103 CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
104 if (!csc->relocs_bo) {
105 return FALSE;
106 }
107
108 csc->relocs = (struct drm_radeon_cs_reloc*)
109 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
110 if (!csc->relocs) {
111 FREE(csc->relocs_bo);
112 return FALSE;
113 }
114
115 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
116 csc->chunks[0].length_dw = 0;
117 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
118 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
119 csc->chunks[1].length_dw = 0;
120 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
121 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
122 csc->chunks[2].length_dw = 2;
123 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
124
125 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
126 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
127 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
128
129 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
130
131 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
132 csc->reloc_indices_hashlist[i] = -1;
133 }
134 return TRUE;
135 }
136
137 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
138 {
139 unsigned i;
140
141 for (i = 0; i < csc->crelocs; i++) {
142 p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
143 radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
144 }
145
146 csc->crelocs = 0;
147 csc->validated_crelocs = 0;
148 csc->chunks[0].length_dw = 0;
149 csc->chunks[1].length_dw = 0;
150 csc->used_gart = 0;
151 csc->used_vram = 0;
152
153 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) {
154 csc->reloc_indices_hashlist[i] = -1;
155 }
156 }
157
158 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
159 {
160 radeon_cs_context_cleanup(csc);
161 FREE(csc->relocs_bo);
162 FREE(csc->relocs);
163 }
164
165
166 static struct radeon_winsys_cs *
167 radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
168 enum ring_type ring_type,
169 void (*flush)(void *ctx, unsigned flags,
170 struct pipe_fence_handle **fence),
171 void *flush_ctx,
172 struct pb_buffer *trace_buf)
173 {
174 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
175 struct radeon_drm_cs *cs;
176
177 cs = CALLOC_STRUCT(radeon_drm_cs);
178 if (!cs) {
179 return NULL;
180 }
181 pipe_semaphore_init(&cs->flush_completed, 1);
182
183 cs->ws = ws;
184 cs->flush_cs = flush;
185 cs->flush_data = flush_ctx;
186 cs->trace_buf = (struct radeon_bo*)trace_buf;
187
188 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
189 FREE(cs);
190 return NULL;
191 }
192 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
193 radeon_destroy_cs_context(&cs->csc1);
194 FREE(cs);
195 return NULL;
196 }
197
198 /* Set the first command buffer as current. */
199 cs->csc = &cs->csc1;
200 cs->cst = &cs->csc2;
201 cs->base.buf = cs->csc->buf;
202 cs->base.ring_type = ring_type;
203 cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
204
205 p_atomic_inc(&ws->num_cs);
206 return &cs->base;
207 }
208
209 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
210
211 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
212 enum radeon_bo_domain rd,
213 enum radeon_bo_domain wd,
214 unsigned priority,
215 enum radeon_bo_domain *added_domains)
216 {
217 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
218
219 reloc->read_domains |= rd;
220 reloc->write_domain |= wd;
221 reloc->flags = MAX2(reloc->flags, priority);
222 }
223
224 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
225 {
226 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
227 int i = csc->reloc_indices_hashlist[hash];
228
229 /* not found or found */
230 if (i == -1 || csc->relocs_bo[i].bo == bo)
231 return i;
232
233 /* Hash collision, look for the BO in the list of relocs linearly. */
234 for (i = csc->crelocs - 1; i >= 0; i--) {
235 if (csc->relocs_bo[i].bo == bo) {
236 /* Put this reloc in the hash list.
237 * This will prevent additional hash collisions if there are
238 * several consecutive lookup_buffer calls for the same buffer.
239 *
240 * Example: Assuming buffers A,B,C collide in the hash list,
241 * the following sequence of relocs:
242 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
243 * will collide here: ^ and here: ^,
244 * meaning that we should get very few collisions in the end. */
245 csc->reloc_indices_hashlist[hash] = i;
246 return i;
247 }
248 }
249 return -1;
250 }
251
252 static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
253 struct radeon_bo *bo,
254 enum radeon_bo_usage usage,
255 enum radeon_bo_domain domains,
256 unsigned priority,
257 enum radeon_bo_domain *added_domains)
258 {
259 struct radeon_cs_context *csc = cs->csc;
260 struct drm_radeon_cs_reloc *reloc;
261 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1);
262 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
263 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
264 int i = -1;
265
266 assert(priority < 64);
267 *added_domains = 0;
268
269 i = radeon_lookup_buffer(csc, bo);
270
271 if (i >= 0) {
272 reloc = &csc->relocs[i];
273 update_reloc(reloc, rd, wd, priority / 4, added_domains);
274 csc->relocs_bo[i].priority_usage |= 1llu << priority;
275
276 /* For async DMA, every add_buffer call must add a buffer to the list
277 * no matter how many duplicates there are. This is due to the fact
278 * the DMA CS checker doesn't use NOP packets for offset patching,
279 * but always uses the i-th buffer from the list to patch the i-th
280 * offset. If there are N offsets in a DMA CS, there must also be N
281 * buffers in the relocation list.
282 *
283 * This doesn't have to be done if virtual memory is enabled,
284 * because there is no offset patching with virtual memory.
285 */
286 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
287 return i;
288 }
289 }
290
291 /* New relocation, check if the backing array is large enough. */
292 if (csc->crelocs >= csc->nrelocs) {
293 uint32_t size;
294 csc->nrelocs += 10;
295
296 size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
297 csc->relocs_bo = realloc(csc->relocs_bo, size);
298
299 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
300 csc->relocs = realloc(csc->relocs, size);
301
302 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
303 }
304
305 /* Initialize the new relocation. */
306 csc->relocs_bo[csc->crelocs].bo = NULL;
307 csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
308 radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
309 p_atomic_inc(&bo->num_cs_references);
310 reloc = &csc->relocs[csc->crelocs];
311 reloc->handle = bo->handle;
312 reloc->read_domains = rd;
313 reloc->write_domain = wd;
314 reloc->flags = priority / 4;
315
316 csc->reloc_indices_hashlist[hash] = csc->crelocs;
317
318 csc->chunks[1].length_dw += RELOC_DWORDS;
319
320 *added_domains = rd | wd;
321 return csc->crelocs++;
322 }
323
324 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
325 struct pb_buffer *buf,
326 enum radeon_bo_usage usage,
327 enum radeon_bo_domain domains,
328 enum radeon_bo_priority priority)
329 {
330 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
331 struct radeon_bo *bo = (struct radeon_bo*)buf;
332 enum radeon_bo_domain added_domains;
333 unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
334 &added_domains);
335
336 if (added_domains & RADEON_DOMAIN_GTT)
337 cs->csc->used_gart += bo->base.size;
338 if (added_domains & RADEON_DOMAIN_VRAM)
339 cs->csc->used_vram += bo->base.size;
340
341 return index;
342 }
343
344 static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
345 struct pb_buffer *buf)
346 {
347 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
348
349 return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
350 }
351
352 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
353 {
354 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
355 boolean status =
356 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
357 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
358
359 if (status) {
360 cs->csc->validated_crelocs = cs->csc->crelocs;
361 } else {
362 /* Remove lately-added buffers. The validation failed with them
363 * and the CS is about to be flushed because of that. Keep only
364 * the already-validated buffers. */
365 unsigned i;
366
367 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
368 p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
369 radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
370 }
371 cs->csc->crelocs = cs->csc->validated_crelocs;
372
373 /* Flush if there are any relocs. Clean up otherwise. */
374 if (cs->csc->crelocs) {
375 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
376 } else {
377 radeon_cs_context_cleanup(cs->csc);
378
379 assert(cs->base.cdw == 0);
380 if (cs->base.cdw != 0) {
381 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
382 }
383 }
384 }
385 return status;
386 }
387
388 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
389 {
390 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
391
392 vram += cs->csc->used_vram;
393 gtt += cs->csc->used_gart;
394
395 /* Anything that goes above the VRAM size should go to GTT. */
396 if (vram > cs->ws->info.vram_size)
397 gtt += vram - cs->ws->info.vram_size;
398
399 /* Now we just need to check if we have enough GTT. */
400 return gtt < cs->ws->info.gart_size * 0.7;
401 }
402
403 static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
404 struct radeon_bo_list_item *list)
405 {
406 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
407 int i;
408
409 if (list) {
410 for (i = 0; i < cs->csc->crelocs; i++) {
411 pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base);
412 list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
413 list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
414 }
415 }
416 return cs->csc->crelocs;
417 }
418
419 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
420 {
421 unsigned i;
422 int r;
423
424 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
425 &csc->cs, sizeof(struct drm_radeon_cs));
426 if (r) {
427 if (r == -ENOMEM)
428 fprintf(stderr, "radeon: Not enough memory for command submission.\n");
429 else if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
430 unsigned i;
431
432 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
433 for (i = 0; i < csc->chunks[0].length_dw; i++) {
434 fprintf(stderr, "0x%08X\n", csc->buf[i]);
435 }
436 } else {
437 fprintf(stderr, "radeon: The kernel rejected CS, "
438 "see dmesg for more information.\n");
439 }
440 }
441
442 if (cs->trace_buf) {
443 radeon_dump_cs_on_lockup(cs, csc);
444 }
445
446 for (i = 0; i < csc->crelocs; i++)
447 p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
448
449 radeon_cs_context_cleanup(csc);
450 }
451
452 /*
453 * Make sure previous submission of this cs are completed
454 */
455 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
456 {
457 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
458
459 /* Wait for any pending ioctl to complete. */
460 if (cs->ws->thread) {
461 pipe_semaphore_wait(&cs->flush_completed);
462 pipe_semaphore_signal(&cs->flush_completed);
463 }
464 }
465
466 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
467
468 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
469 unsigned flags,
470 struct pipe_fence_handle **fence,
471 uint32_t cs_trace_id)
472 {
473 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
474 struct radeon_cs_context *tmp;
475
476 switch (cs->base.ring_type) {
477 case RING_DMA:
478 /* pad DMA ring to 8 DWs */
479 if (cs->ws->info.chip_class <= SI) {
480 while (rcs->cdw & 7)
481 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
482 } else {
483 while (rcs->cdw & 7)
484 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
485 }
486 break;
487 case RING_GFX:
488 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
489 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
490 */
491 if (cs->ws->info.gfx_ib_pad_with_type2) {
492 while (rcs->cdw & 7)
493 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
494 } else {
495 while (rcs->cdw & 7)
496 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
497 }
498 break;
499 case RING_UVD:
500 while (rcs->cdw & 15)
501 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
502 break;
503 default:
504 break;
505 }
506
507 if (rcs->cdw > rcs->max_dw) {
508 fprintf(stderr, "radeon: command stream overflowed\n");
509 }
510
511 if (fence) {
512 radeon_fence_reference(fence, NULL);
513 *fence = radeon_cs_create_fence(rcs);
514 }
515
516 radeon_drm_cs_sync_flush(rcs);
517
518 /* Swap command streams. */
519 tmp = cs->csc;
520 cs->csc = cs->cst;
521 cs->cst = tmp;
522
523 cs->cst->cs_trace_id = cs_trace_id;
524
525 /* If the CS is not empty or overflowed, emit it in a separate thread. */
526 if (cs->base.cdw && cs->base.cdw <= cs->base.max_dw && !debug_get_option_noop()) {
527 unsigned i, crelocs;
528
529 crelocs = cs->cst->crelocs;
530
531 cs->cst->chunks[0].length_dw = cs->base.cdw;
532
533 for (i = 0; i < crelocs; i++) {
534 /* Update the number of active asynchronous CS ioctls for the buffer. */
535 p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
536 }
537
538 switch (cs->base.ring_type) {
539 case RING_DMA:
540 cs->cst->flags[0] = 0;
541 cs->cst->flags[1] = RADEON_CS_RING_DMA;
542 cs->cst->cs.num_chunks = 3;
543 if (cs->ws->info.r600_virtual_address) {
544 cs->cst->flags[0] |= RADEON_CS_USE_VM;
545 }
546 break;
547
548 case RING_UVD:
549 cs->cst->flags[0] = 0;
550 cs->cst->flags[1] = RADEON_CS_RING_UVD;
551 cs->cst->cs.num_chunks = 3;
552 break;
553
554 case RING_VCE:
555 cs->cst->flags[0] = 0;
556 cs->cst->flags[1] = RADEON_CS_RING_VCE;
557 cs->cst->cs.num_chunks = 3;
558 break;
559
560 default:
561 case RING_GFX:
562 case RING_COMPUTE:
563 cs->cst->flags[0] = 0;
564 cs->cst->flags[1] = RADEON_CS_RING_GFX;
565 cs->cst->cs.num_chunks = 2;
566 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
567 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
568 cs->cst->cs.num_chunks = 3;
569 }
570 if (cs->ws->info.r600_virtual_address) {
571 cs->cst->flags[0] |= RADEON_CS_USE_VM;
572 cs->cst->cs.num_chunks = 3;
573 }
574 if (flags & RADEON_FLUSH_END_OF_FRAME) {
575 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
576 cs->cst->cs.num_chunks = 3;
577 }
578 if (cs->base.ring_type == RING_COMPUTE) {
579 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
580 cs->cst->cs.num_chunks = 3;
581 }
582 break;
583 }
584
585 if (cs->ws->thread) {
586 pipe_semaphore_wait(&cs->flush_completed);
587 radeon_drm_ws_queue_cs(cs->ws, cs);
588 if (!(flags & RADEON_FLUSH_ASYNC))
589 radeon_drm_cs_sync_flush(rcs);
590 } else {
591 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
592 }
593 } else {
594 radeon_cs_context_cleanup(cs->cst);
595 }
596
597 /* Prepare a new CS. */
598 cs->base.buf = cs->csc->buf;
599 cs->base.cdw = 0;
600
601 cs->ws->num_cs_flushes++;
602 }
603
604 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
605 {
606 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
607
608 radeon_drm_cs_sync_flush(rcs);
609 pipe_semaphore_destroy(&cs->flush_completed);
610 radeon_cs_context_cleanup(&cs->csc1);
611 radeon_cs_context_cleanup(&cs->csc2);
612 p_atomic_dec(&cs->ws->num_cs);
613 radeon_destroy_cs_context(&cs->csc1);
614 radeon_destroy_cs_context(&cs->csc2);
615 FREE(cs);
616 }
617
618 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
619 struct pb_buffer *_buf,
620 enum radeon_bo_usage usage)
621 {
622 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
623 struct radeon_bo *bo = (struct radeon_bo*)_buf;
624 int index;
625
626 if (!bo->num_cs_references)
627 return FALSE;
628
629 index = radeon_lookup_buffer(cs->csc, bo);
630 if (index == -1)
631 return FALSE;
632
633 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
634 return TRUE;
635 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
636 return TRUE;
637
638 return FALSE;
639 }
640
641 /* FENCES */
642
643 static struct pipe_fence_handle *
644 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
645 {
646 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
647 struct pb_buffer *fence;
648
649 /* Create a fence, which is a dummy BO. */
650 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE,
651 RADEON_DOMAIN_GTT, 0);
652 /* Add the fence as a dummy relocation. */
653 cs->ws->base.cs_add_buffer(rcs, fence,
654 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
655 RADEON_PRIO_FENCE);
656 return (struct pipe_fence_handle*)fence;
657 }
658
659 static bool radeon_fence_wait(struct radeon_winsys *ws,
660 struct pipe_fence_handle *fence,
661 uint64_t timeout)
662 {
663 return ws->buffer_wait((struct pb_buffer*)fence, timeout,
664 RADEON_USAGE_READWRITE);
665 }
666
667 static void radeon_fence_reference(struct pipe_fence_handle **dst,
668 struct pipe_fence_handle *src)
669 {
670 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
671 }
672
673 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
674 {
675 ws->base.ctx_create = radeon_drm_ctx_create;
676 ws->base.ctx_destroy = radeon_drm_ctx_destroy;
677 ws->base.cs_create = radeon_drm_cs_create;
678 ws->base.cs_destroy = radeon_drm_cs_destroy;
679 ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
680 ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
681 ws->base.cs_validate = radeon_drm_cs_validate;
682 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
683 ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
684 ws->base.cs_flush = radeon_drm_cs_flush;
685 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
686 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
687 ws->base.fence_wait = radeon_fence_wait;
688 ws->base.fence_reference = radeon_fence_reference;
689 }