radeon/winsys: introduce radeon_winsys_cs_chunk
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Adding buffers and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_buffer. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_buffer.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84 {
85 /* No context support here. Just return the winsys pointer
86 * as the "context". */
87 return (struct radeon_winsys_ctx*)ws;
88 }
89
90 static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91 {
92 /* No context support here. */
93 }
94
95 static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
96 struct radeon_drm_winsys *ws)
97 {
98 int i;
99
100 csc->fd = ws->fd;
101 csc->nrelocs = 512;
102 csc->relocs_bo = (struct radeon_bo_item*)
103 CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
104 if (!csc->relocs_bo) {
105 return FALSE;
106 }
107
108 csc->relocs = (struct drm_radeon_cs_reloc*)
109 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
110 if (!csc->relocs) {
111 FREE(csc->relocs_bo);
112 return FALSE;
113 }
114
115 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
116 csc->chunks[0].length_dw = 0;
117 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
118 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
119 csc->chunks[1].length_dw = 0;
120 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
121 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
122 csc->chunks[2].length_dw = 2;
123 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
124
125 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
126 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
127 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
128
129 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
130
131 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
132 csc->reloc_indices_hashlist[i] = -1;
133 }
134 return TRUE;
135 }
136
137 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
138 {
139 unsigned i;
140
141 for (i = 0; i < csc->crelocs; i++) {
142 p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
143 radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
144 }
145
146 csc->crelocs = 0;
147 csc->validated_crelocs = 0;
148 csc->chunks[0].length_dw = 0;
149 csc->chunks[1].length_dw = 0;
150 csc->used_gart = 0;
151 csc->used_vram = 0;
152
153 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
154 csc->reloc_indices_hashlist[i] = -1;
155 }
156 }
157
158 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
159 {
160 radeon_cs_context_cleanup(csc);
161 FREE(csc->relocs_bo);
162 FREE(csc->relocs);
163 }
164
165
166 static struct radeon_winsys_cs *
167 radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
168 enum ring_type ring_type,
169 void (*flush)(void *ctx, unsigned flags,
170 struct pipe_fence_handle **fence),
171 void *flush_ctx)
172 {
173 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
174 struct radeon_drm_cs *cs;
175
176 cs = CALLOC_STRUCT(radeon_drm_cs);
177 if (!cs) {
178 return NULL;
179 }
180 pipe_semaphore_init(&cs->flush_completed, 1);
181
182 cs->ws = ws;
183 cs->flush_cs = flush;
184 cs->flush_data = flush_ctx;
185
186 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
187 FREE(cs);
188 return NULL;
189 }
190 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
191 radeon_destroy_cs_context(&cs->csc1);
192 FREE(cs);
193 return NULL;
194 }
195
196 /* Set the first command buffer as current. */
197 cs->csc = &cs->csc1;
198 cs->cst = &cs->csc2;
199 cs->base.current.buf = cs->csc->buf;
200 cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
201 cs->ring_type = ring_type;
202
203 p_atomic_inc(&ws->num_cs);
204 return &cs->base;
205 }
206
207 #define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value)
208
209 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
210 enum radeon_bo_domain rd,
211 enum radeon_bo_domain wd,
212 unsigned priority,
213 enum radeon_bo_domain *added_domains)
214 {
215 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
216
217 reloc->read_domains |= rd;
218 reloc->write_domain |= wd;
219 reloc->flags = MAX2(reloc->flags, priority);
220 }
221
222 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
223 {
224 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
225 int i = csc->reloc_indices_hashlist[hash];
226
227 /* not found or found */
228 if (i == -1 || csc->relocs_bo[i].bo == bo)
229 return i;
230
231 /* Hash collision, look for the BO in the list of relocs linearly. */
232 for (i = csc->crelocs - 1; i >= 0; i--) {
233 if (csc->relocs_bo[i].bo == bo) {
234 /* Put this reloc in the hash list.
235 * This will prevent additional hash collisions if there are
236 * several consecutive lookup_buffer calls for the same buffer.
237 *
238 * Example: Assuming buffers A,B,C collide in the hash list,
239 * the following sequence of relocs:
240 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
241 * will collide here: ^ and here: ^,
242 * meaning that we should get very few collisions in the end. */
243 csc->reloc_indices_hashlist[hash] = i;
244 return i;
245 }
246 }
247 return -1;
248 }
249
250 static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
251 struct radeon_bo *bo,
252 enum radeon_bo_usage usage,
253 enum radeon_bo_domain domains,
254 unsigned priority,
255 enum radeon_bo_domain *added_domains)
256 {
257 struct radeon_cs_context *csc = cs->csc;
258 struct drm_radeon_cs_reloc *reloc;
259 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
260 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
261 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
262 int i = -1;
263
264 assert(priority < 64);
265 *added_domains = 0;
266
267 i = radeon_lookup_buffer(csc, bo);
268
269 if (i >= 0) {
270 reloc = &csc->relocs[i];
271 update_reloc(reloc, rd, wd, priority / 4, added_domains);
272 csc->relocs_bo[i].priority_usage |= 1llu << priority;
273
274 /* For async DMA, every add_buffer call must add a buffer to the list
275 * no matter how many duplicates there are. This is due to the fact
276 * the DMA CS checker doesn't use NOP packets for offset patching,
277 * but always uses the i-th buffer from the list to patch the i-th
278 * offset. If there are N offsets in a DMA CS, there must also be N
279 * buffers in the relocation list.
280 *
281 * This doesn't have to be done if virtual memory is enabled,
282 * because there is no offset patching with virtual memory.
283 */
284 if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
285 return i;
286 }
287 }
288
289 /* New relocation, check if the backing array is large enough. */
290 if (csc->crelocs >= csc->nrelocs) {
291 uint32_t size;
292 csc->nrelocs += 10;
293
294 size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
295 csc->relocs_bo = realloc(csc->relocs_bo, size);
296
297 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
298 csc->relocs = realloc(csc->relocs, size);
299
300 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
301 }
302
303 /* Initialize the new relocation. */
304 csc->relocs_bo[csc->crelocs].bo = NULL;
305 csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
306 radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
307 p_atomic_inc(&bo->num_cs_references);
308 reloc = &csc->relocs[csc->crelocs];
309 reloc->handle = bo->handle;
310 reloc->read_domains = rd;
311 reloc->write_domain = wd;
312 reloc->flags = priority / 4;
313
314 csc->reloc_indices_hashlist[hash] = csc->crelocs;
315
316 csc->chunks[1].length_dw += RELOC_DWORDS;
317
318 *added_domains = rd | wd;
319 return csc->crelocs++;
320 }
321
322 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
323 struct pb_buffer *buf,
324 enum radeon_bo_usage usage,
325 enum radeon_bo_domain domains,
326 enum radeon_bo_priority priority)
327 {
328 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
329 struct radeon_bo *bo = (struct radeon_bo*)buf;
330 enum radeon_bo_domain added_domains;
331 unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
332 &added_domains);
333
334 if (added_domains & RADEON_DOMAIN_VRAM)
335 cs->csc->used_vram += bo->base.size;
336 else if (added_domains & RADEON_DOMAIN_GTT)
337 cs->csc->used_gart += bo->base.size;
338
339 return index;
340 }
341
342 static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
343 struct pb_buffer *buf)
344 {
345 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
346
347 return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
348 }
349
350 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
351 {
352 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
353 boolean status =
354 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 &&
355 cs->csc->used_vram < cs->ws->info.vram_size * 0.8;
356
357 if (status) {
358 cs->csc->validated_crelocs = cs->csc->crelocs;
359 } else {
360 /* Remove lately-added buffers. The validation failed with them
361 * and the CS is about to be flushed because of that. Keep only
362 * the already-validated buffers. */
363 unsigned i;
364
365 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
366 p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
367 radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
368 }
369 cs->csc->crelocs = cs->csc->validated_crelocs;
370
371 /* Flush if there are any relocs. Clean up otherwise. */
372 if (cs->csc->crelocs) {
373 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
374 } else {
375 radeon_cs_context_cleanup(cs->csc);
376
377 assert(cs->base.current.cdw == 0);
378 if (cs->base.current.cdw != 0) {
379 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
380 }
381 }
382 }
383 return status;
384 }
385
386 static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
387 {
388 assert(rcs->current.cdw <= rcs->current.max_dw);
389 return rcs->current.max_dw - rcs->current.cdw >= dw;
390 }
391
392 static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
393 {
394 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
395
396 vram += cs->csc->used_vram;
397 gtt += cs->csc->used_gart;
398
399 /* Anything that goes above the VRAM size should go to GTT. */
400 if (vram > cs->ws->info.vram_size)
401 gtt += vram - cs->ws->info.vram_size;
402
403 /* Now we just need to check if we have enough GTT. */
404 return gtt < cs->ws->info.gart_size * 0.7;
405 }
406
407 static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
408 {
409 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
410
411 return cs->csc->used_vram + cs->csc->used_gart;
412 }
413
414 static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
415 struct radeon_bo_list_item *list)
416 {
417 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
418 int i;
419
420 if (list) {
421 for (i = 0; i < cs->csc->crelocs; i++) {
422 pb_reference(&list[i].buf, &cs->csc->relocs_bo[i].bo->base);
423 list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
424 list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
425 }
426 }
427 return cs->csc->crelocs;
428 }
429
430 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc)
431 {
432 unsigned i;
433 int r;
434
435 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
436 &csc->cs, sizeof(struct drm_radeon_cs));
437 if (r) {
438 if (r == -ENOMEM)
439 fprintf(stderr, "radeon: Not enough memory for command submission.\n");
440 else if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) {
441 unsigned i;
442
443 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
444 for (i = 0; i < csc->chunks[0].length_dw; i++) {
445 fprintf(stderr, "0x%08X\n", csc->buf[i]);
446 }
447 } else {
448 fprintf(stderr, "radeon: The kernel rejected CS, "
449 "see dmesg for more information.\n");
450 }
451 }
452
453 for (i = 0; i < csc->crelocs; i++)
454 p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
455
456 radeon_cs_context_cleanup(csc);
457 }
458
459 /*
460 * Make sure previous submission of this cs are completed
461 */
462 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
463 {
464 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
465
466 /* Wait for any pending ioctl to complete. */
467 if (cs->ws->thread) {
468 pipe_semaphore_wait(&cs->flush_completed);
469 pipe_semaphore_signal(&cs->flush_completed);
470 }
471 }
472
473 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE)
474
475 static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
476 unsigned flags,
477 struct pipe_fence_handle **fence)
478 {
479 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
480 struct radeon_cs_context *tmp;
481
482 switch (cs->ring_type) {
483 case RING_DMA:
484 /* pad DMA ring to 8 DWs */
485 if (cs->ws->info.chip_class <= SI) {
486 while (rcs->current.cdw & 7)
487 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
488 } else {
489 while (rcs->current.cdw & 7)
490 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
491 }
492 break;
493 case RING_GFX:
494 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
495 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
496 */
497 if (cs->ws->info.gfx_ib_pad_with_type2) {
498 while (rcs->current.cdw & 7)
499 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
500 } else {
501 while (rcs->current.cdw & 7)
502 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
503 }
504 break;
505 case RING_UVD:
506 while (rcs->current.cdw & 15)
507 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
508 break;
509 default:
510 break;
511 }
512
513 if (rcs->current.cdw > rcs->current.max_dw) {
514 fprintf(stderr, "radeon: command stream overflowed\n");
515 }
516
517 if (fence) {
518 radeon_fence_reference(fence, NULL);
519 *fence = radeon_cs_create_fence(rcs);
520 }
521
522 radeon_drm_cs_sync_flush(rcs);
523
524 /* Swap command streams. */
525 tmp = cs->csc;
526 cs->csc = cs->cst;
527 cs->cst = tmp;
528
529 /* If the CS is not empty or overflowed, emit it in a separate thread. */
530 if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
531 unsigned i, crelocs;
532
533 crelocs = cs->cst->crelocs;
534
535 cs->cst->chunks[0].length_dw = cs->base.current.cdw;
536
537 for (i = 0; i < crelocs; i++) {
538 /* Update the number of active asynchronous CS ioctls for the buffer. */
539 p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
540 }
541
542 switch (cs->ring_type) {
543 case RING_DMA:
544 cs->cst->flags[0] = 0;
545 cs->cst->flags[1] = RADEON_CS_RING_DMA;
546 cs->cst->cs.num_chunks = 3;
547 if (cs->ws->info.has_virtual_memory) {
548 cs->cst->flags[0] |= RADEON_CS_USE_VM;
549 }
550 break;
551
552 case RING_UVD:
553 cs->cst->flags[0] = 0;
554 cs->cst->flags[1] = RADEON_CS_RING_UVD;
555 cs->cst->cs.num_chunks = 3;
556 break;
557
558 case RING_VCE:
559 cs->cst->flags[0] = 0;
560 cs->cst->flags[1] = RADEON_CS_RING_VCE;
561 cs->cst->cs.num_chunks = 3;
562 break;
563
564 default:
565 case RING_GFX:
566 case RING_COMPUTE:
567 cs->cst->flags[0] = 0;
568 cs->cst->flags[1] = RADEON_CS_RING_GFX;
569 cs->cst->cs.num_chunks = 2;
570 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
571 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
572 cs->cst->cs.num_chunks = 3;
573 }
574 if (cs->ws->info.has_virtual_memory) {
575 cs->cst->flags[0] |= RADEON_CS_USE_VM;
576 cs->cst->cs.num_chunks = 3;
577 }
578 if (flags & RADEON_FLUSH_END_OF_FRAME) {
579 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
580 cs->cst->cs.num_chunks = 3;
581 }
582 if (cs->ring_type == RING_COMPUTE) {
583 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
584 cs->cst->cs.num_chunks = 3;
585 }
586 break;
587 }
588
589 if (cs->ws->thread) {
590 pipe_semaphore_wait(&cs->flush_completed);
591 radeon_drm_ws_queue_cs(cs->ws, cs);
592 if (!(flags & RADEON_FLUSH_ASYNC))
593 radeon_drm_cs_sync_flush(rcs);
594 } else {
595 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst);
596 }
597 } else {
598 radeon_cs_context_cleanup(cs->cst);
599 }
600
601 /* Prepare a new CS. */
602 cs->base.current.buf = cs->csc->buf;
603 cs->base.current.cdw = 0;
604
605 cs->ws->num_cs_flushes++;
606 }
607
608 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
609 {
610 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
611
612 radeon_drm_cs_sync_flush(rcs);
613 pipe_semaphore_destroy(&cs->flush_completed);
614 radeon_cs_context_cleanup(&cs->csc1);
615 radeon_cs_context_cleanup(&cs->csc2);
616 p_atomic_dec(&cs->ws->num_cs);
617 radeon_destroy_cs_context(&cs->csc1);
618 radeon_destroy_cs_context(&cs->csc2);
619 FREE(cs);
620 }
621
622 static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
623 struct pb_buffer *_buf,
624 enum radeon_bo_usage usage)
625 {
626 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
627 struct radeon_bo *bo = (struct radeon_bo*)_buf;
628 int index;
629
630 if (!bo->num_cs_references)
631 return FALSE;
632
633 index = radeon_lookup_buffer(cs->csc, bo);
634 if (index == -1)
635 return FALSE;
636
637 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
638 return TRUE;
639 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
640 return TRUE;
641
642 return FALSE;
643 }
644
645 /* FENCES */
646
647 static struct pipe_fence_handle *
648 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
649 {
650 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
651 struct pb_buffer *fence;
652
653 /* Create a fence, which is a dummy BO. */
654 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
655 RADEON_DOMAIN_GTT, 0);
656 /* Add the fence as a dummy relocation. */
657 cs->ws->base.cs_add_buffer(rcs, fence,
658 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
659 RADEON_PRIO_FENCE);
660 return (struct pipe_fence_handle*)fence;
661 }
662
663 static bool radeon_fence_wait(struct radeon_winsys *ws,
664 struct pipe_fence_handle *fence,
665 uint64_t timeout)
666 {
667 return ws->buffer_wait((struct pb_buffer*)fence, timeout,
668 RADEON_USAGE_READWRITE);
669 }
670
671 static void radeon_fence_reference(struct pipe_fence_handle **dst,
672 struct pipe_fence_handle *src)
673 {
674 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
675 }
676
677 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
678 {
679 ws->base.ctx_create = radeon_drm_ctx_create;
680 ws->base.ctx_destroy = radeon_drm_ctx_destroy;
681 ws->base.cs_create = radeon_drm_cs_create;
682 ws->base.cs_destroy = radeon_drm_cs_destroy;
683 ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
684 ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
685 ws->base.cs_validate = radeon_drm_cs_validate;
686 ws->base.cs_check_space = radeon_drm_cs_check_space;
687 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
688 ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage;
689 ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
690 ws->base.cs_flush = radeon_drm_cs_flush;
691 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
692 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
693 ws->base.fence_wait = radeon_fence_wait;
694 ws->base.fence_reference = radeon_fence_reference;
695 }