26d5a229633dca99da309c1a01a70dcc44917b2e
[mesa.git] / src / gallium / winsys / radeon / drm / radeon_drm_cs.c
1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27 /*
28 * Authors:
29 * Marek Olšák <maraeo@gmail.com>
30 *
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
35 */
36
37 /*
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Adding buffers and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
43
44 It works like this:
45
46 cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
51
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
59
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_buffer. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_buffer.
63 */
64
65 #include "radeon_drm_cs.h"
66
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
69
70 #include <stdio.h>
71 #include <stdlib.h>
72 #include <stdint.h>
73 #include <xf86drm.h>
74
75
76 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
77
78 static struct pipe_fence_handle *
79 radeon_cs_create_fence(struct radeon_winsys_cs *rcs);
80 static void radeon_fence_reference(struct pipe_fence_handle **dst,
81 struct pipe_fence_handle *src);
82
83 static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws)
84 {
85 /* No context support here. Just return the winsys pointer
86 * as the "context". */
87 return (struct radeon_winsys_ctx*)ws;
88 }
89
90 static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx)
91 {
92 /* No context support here. */
93 }
94
95 static bool radeon_init_cs_context(struct radeon_cs_context *csc,
96 struct radeon_drm_winsys *ws)
97 {
98 int i;
99
100 csc->fd = ws->fd;
101 csc->nrelocs = 512;
102 csc->relocs_bo = (struct radeon_bo_item*)
103 CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0]));
104 if (!csc->relocs_bo) {
105 return false;
106 }
107
108 csc->relocs = (struct drm_radeon_cs_reloc*)
109 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
110 if (!csc->relocs) {
111 FREE(csc->relocs_bo);
112 return false;
113 }
114
115 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
116 csc->chunks[0].length_dw = 0;
117 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf;
118 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
119 csc->chunks[1].length_dw = 0;
120 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
121 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
122 csc->chunks[2].length_dw = 2;
123 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags;
124
125 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0];
126 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1];
127 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2];
128
129 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array;
130
131 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
132 csc->reloc_indices_hashlist[i] = -1;
133 }
134 return true;
135 }
136
137 static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
138 {
139 unsigned i;
140
141 for (i = 0; i < csc->crelocs; i++) {
142 p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references);
143 radeon_bo_reference(&csc->relocs_bo[i].bo, NULL);
144 }
145
146 csc->crelocs = 0;
147 csc->validated_crelocs = 0;
148 csc->chunks[0].length_dw = 0;
149 csc->chunks[1].length_dw = 0;
150
151 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) {
152 csc->reloc_indices_hashlist[i] = -1;
153 }
154 }
155
156 static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
157 {
158 radeon_cs_context_cleanup(csc);
159 FREE(csc->relocs_bo);
160 FREE(csc->relocs);
161 }
162
163
164 static struct radeon_winsys_cs *
165 radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
166 enum ring_type ring_type,
167 void (*flush)(void *ctx, unsigned flags,
168 struct pipe_fence_handle **fence),
169 void *flush_ctx)
170 {
171 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx;
172 struct radeon_drm_cs *cs;
173
174 cs = CALLOC_STRUCT(radeon_drm_cs);
175 if (!cs) {
176 return NULL;
177 }
178 util_queue_fence_init(&cs->flush_completed);
179
180 cs->ws = ws;
181 cs->flush_cs = flush;
182 cs->flush_data = flush_ctx;
183
184 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
185 FREE(cs);
186 return NULL;
187 }
188 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
189 radeon_destroy_cs_context(&cs->csc1);
190 FREE(cs);
191 return NULL;
192 }
193
194 /* Set the first command buffer as current. */
195 cs->csc = &cs->csc1;
196 cs->cst = &cs->csc2;
197 cs->base.current.buf = cs->csc->buf;
198 cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf);
199 cs->ring_type = ring_type;
200
201 p_atomic_inc(&ws->num_cs);
202 return &cs->base;
203 }
204
205 #define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value)
206
207 static inline void update_reloc(struct drm_radeon_cs_reloc *reloc,
208 enum radeon_bo_domain rd,
209 enum radeon_bo_domain wd,
210 unsigned priority,
211 enum radeon_bo_domain *added_domains)
212 {
213 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
214
215 reloc->read_domains |= rd;
216 reloc->write_domain |= wd;
217 reloc->flags = MAX2(reloc->flags, priority);
218 }
219
220 int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
221 {
222 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
223 int i = csc->reloc_indices_hashlist[hash];
224
225 /* not found or found */
226 if (i == -1 || csc->relocs_bo[i].bo == bo)
227 return i;
228
229 /* Hash collision, look for the BO in the list of relocs linearly. */
230 for (i = csc->crelocs - 1; i >= 0; i--) {
231 if (csc->relocs_bo[i].bo == bo) {
232 /* Put this reloc in the hash list.
233 * This will prevent additional hash collisions if there are
234 * several consecutive lookup_buffer calls for the same buffer.
235 *
236 * Example: Assuming buffers A,B,C collide in the hash list,
237 * the following sequence of relocs:
238 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
239 * will collide here: ^ and here: ^,
240 * meaning that we should get very few collisions in the end. */
241 csc->reloc_indices_hashlist[hash] = i;
242 return i;
243 }
244 }
245 return -1;
246 }
247
248 static unsigned radeon_add_buffer(struct radeon_drm_cs *cs,
249 struct radeon_bo *bo,
250 enum radeon_bo_usage usage,
251 enum radeon_bo_domain domains,
252 unsigned priority,
253 enum radeon_bo_domain *added_domains)
254 {
255 struct radeon_cs_context *csc = cs->csc;
256 struct drm_radeon_cs_reloc *reloc;
257 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1);
258 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0;
259 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0;
260 int i = -1;
261
262 assert(priority < 64);
263 *added_domains = 0;
264
265 i = radeon_lookup_buffer(csc, bo);
266
267 if (i >= 0) {
268 reloc = &csc->relocs[i];
269 update_reloc(reloc, rd, wd, priority / 4, added_domains);
270 csc->relocs_bo[i].priority_usage |= 1llu << priority;
271
272 /* For async DMA, every add_buffer call must add a buffer to the list
273 * no matter how many duplicates there are. This is due to the fact
274 * the DMA CS checker doesn't use NOP packets for offset patching,
275 * but always uses the i-th buffer from the list to patch the i-th
276 * offset. If there are N offsets in a DMA CS, there must also be N
277 * buffers in the relocation list.
278 *
279 * This doesn't have to be done if virtual memory is enabled,
280 * because there is no offset patching with virtual memory.
281 */
282 if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
283 return i;
284 }
285 }
286
287 /* New relocation, check if the backing array is large enough. */
288 if (csc->crelocs >= csc->nrelocs) {
289 uint32_t size;
290 csc->nrelocs += 10;
291
292 size = csc->nrelocs * sizeof(csc->relocs_bo[0]);
293 csc->relocs_bo = realloc(csc->relocs_bo, size);
294
295 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc);
296 csc->relocs = realloc(csc->relocs, size);
297
298 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs;
299 }
300
301 /* Initialize the new relocation. */
302 csc->relocs_bo[csc->crelocs].bo = NULL;
303 csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority;
304 radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo);
305 p_atomic_inc(&bo->num_cs_references);
306 reloc = &csc->relocs[csc->crelocs];
307 reloc->handle = bo->handle;
308 reloc->read_domains = rd;
309 reloc->write_domain = wd;
310 reloc->flags = priority / 4;
311
312 csc->reloc_indices_hashlist[hash] = csc->crelocs;
313
314 csc->chunks[1].length_dw += RELOC_DWORDS;
315
316 *added_domains = rd | wd;
317 return csc->crelocs++;
318 }
319
320 static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs,
321 struct pb_buffer *buf,
322 enum radeon_bo_usage usage,
323 enum radeon_bo_domain domains,
324 enum radeon_bo_priority priority)
325 {
326 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
327 struct radeon_bo *bo = (struct radeon_bo*)buf;
328 enum radeon_bo_domain added_domains;
329 unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority,
330 &added_domains);
331
332 if (added_domains & RADEON_DOMAIN_VRAM)
333 cs->base.used_vram += bo->base.size;
334 else if (added_domains & RADEON_DOMAIN_GTT)
335 cs->base.used_gart += bo->base.size;
336
337 return index;
338 }
339
340 static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs,
341 struct pb_buffer *buf)
342 {
343 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
344
345 return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf);
346 }
347
348 static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
349 {
350 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
351 bool status =
352 cs->base.used_gart < cs->ws->info.gart_size * 0.8 &&
353 cs->base.used_vram < cs->ws->info.vram_size * 0.8;
354
355 if (status) {
356 cs->csc->validated_crelocs = cs->csc->crelocs;
357 } else {
358 /* Remove lately-added buffers. The validation failed with them
359 * and the CS is about to be flushed because of that. Keep only
360 * the already-validated buffers. */
361 unsigned i;
362
363 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
364 p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references);
365 radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL);
366 }
367 cs->csc->crelocs = cs->csc->validated_crelocs;
368
369 /* Flush if there are any relocs. Clean up otherwise. */
370 if (cs->csc->crelocs) {
371 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL);
372 } else {
373 radeon_cs_context_cleanup(cs->csc);
374 cs->base.used_vram = 0;
375 cs->base.used_gart = 0;
376
377 assert(cs->base.current.cdw == 0);
378 if (cs->base.current.cdw != 0) {
379 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
380 }
381 }
382 }
383 return status;
384 }
385
386 static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw)
387 {
388 assert(rcs->current.cdw <= rcs->current.max_dw);
389 return rcs->current.max_dw - rcs->current.cdw >= dw;
390 }
391
392 static bool radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt)
393 {
394 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
395
396 vram += cs->base.used_vram;
397 gtt += cs->base.used_gart;
398
399 /* Anything that goes above the VRAM size should go to GTT. */
400 if (vram > cs->ws->info.vram_size)
401 gtt += vram - cs->ws->info.vram_size;
402
403 /* Now we just need to check if we have enough GTT. */
404 return gtt < cs->ws->info.gart_size * 0.7;
405 }
406
407 static uint64_t radeon_drm_cs_query_memory_usage(struct radeon_winsys_cs *rcs)
408 {
409 return rcs->used_vram + rcs->used_gart;
410 }
411
412 static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs,
413 struct radeon_bo_list_item *list)
414 {
415 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
416 int i;
417
418 if (list) {
419 for (i = 0; i < cs->csc->crelocs; i++) {
420 list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size;
421 list[i].vm_address = cs->csc->relocs_bo[i].bo->va;
422 list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage;
423 }
424 }
425 return cs->csc->crelocs;
426 }
427
428 void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
429 {
430 struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
431 unsigned i;
432 int r;
433
434 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS,
435 &csc->cs, sizeof(struct drm_radeon_cs));
436 if (r) {
437 if (r == -ENOMEM)
438 fprintf(stderr, "radeon: Not enough memory for command submission.\n");
439 else if (debug_get_bool_option("RADEON_DUMP_CS", false)) {
440 unsigned i;
441
442 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n");
443 for (i = 0; i < csc->chunks[0].length_dw; i++) {
444 fprintf(stderr, "0x%08X\n", csc->buf[i]);
445 }
446 } else {
447 fprintf(stderr, "radeon: The kernel rejected CS, "
448 "see dmesg for more information (%i).\n", r);
449 }
450 }
451
452 for (i = 0; i < csc->crelocs; i++)
453 p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls);
454
455 radeon_cs_context_cleanup(csc);
456 }
457
458 /*
459 * Make sure previous submission of this cs are completed
460 */
461 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs)
462 {
463 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
464
465 /* Wait for any pending ioctl of this CS to complete. */
466 if (util_queue_is_initialized(&cs->ws->cs_queue))
467 util_queue_job_wait(&cs->flush_completed);
468 }
469
470 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
471
472 static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs,
473 unsigned flags,
474 struct pipe_fence_handle **fence)
475 {
476 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
477 struct radeon_cs_context *tmp;
478
479 switch (cs->ring_type) {
480 case RING_DMA:
481 /* pad DMA ring to 8 DWs */
482 if (cs->ws->info.chip_class <= SI) {
483 while (rcs->current.cdw & 7)
484 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */
485 } else {
486 while (rcs->current.cdw & 7)
487 OUT_CS(&cs->base, 0x00000000); /* NOP packet */
488 }
489 break;
490 case RING_GFX:
491 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements
492 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
493 */
494 if (cs->ws->info.gfx_ib_pad_with_type2) {
495 while (rcs->current.cdw & 7)
496 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
497 } else {
498 while (rcs->current.cdw & 7)
499 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */
500 }
501 break;
502 case RING_UVD:
503 while (rcs->current.cdw & 15)
504 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */
505 break;
506 default:
507 break;
508 }
509
510 if (rcs->current.cdw > rcs->current.max_dw) {
511 fprintf(stderr, "radeon: command stream overflowed\n");
512 }
513
514 if (fence) {
515 radeon_fence_reference(fence, NULL);
516 *fence = radeon_cs_create_fence(rcs);
517 }
518
519 radeon_drm_cs_sync_flush(rcs);
520
521 /* Swap command streams. */
522 tmp = cs->csc;
523 cs->csc = cs->cst;
524 cs->cst = tmp;
525
526 /* If the CS is not empty or overflowed, emit it in a separate thread. */
527 if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) {
528 unsigned i, crelocs;
529
530 crelocs = cs->cst->crelocs;
531
532 cs->cst->chunks[0].length_dw = cs->base.current.cdw;
533
534 for (i = 0; i < crelocs; i++) {
535 /* Update the number of active asynchronous CS ioctls for the buffer. */
536 p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls);
537 }
538
539 switch (cs->ring_type) {
540 case RING_DMA:
541 cs->cst->flags[0] = 0;
542 cs->cst->flags[1] = RADEON_CS_RING_DMA;
543 cs->cst->cs.num_chunks = 3;
544 if (cs->ws->info.has_virtual_memory) {
545 cs->cst->flags[0] |= RADEON_CS_USE_VM;
546 }
547 break;
548
549 case RING_UVD:
550 cs->cst->flags[0] = 0;
551 cs->cst->flags[1] = RADEON_CS_RING_UVD;
552 cs->cst->cs.num_chunks = 3;
553 break;
554
555 case RING_VCE:
556 cs->cst->flags[0] = 0;
557 cs->cst->flags[1] = RADEON_CS_RING_VCE;
558 cs->cst->cs.num_chunks = 3;
559 break;
560
561 default:
562 case RING_GFX:
563 case RING_COMPUTE:
564 cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS;
565 cs->cst->flags[1] = RADEON_CS_RING_GFX;
566 cs->cst->cs.num_chunks = 3;
567
568 if (cs->ws->info.has_virtual_memory) {
569 cs->cst->flags[0] |= RADEON_CS_USE_VM;
570 cs->cst->cs.num_chunks = 3;
571 }
572 if (flags & RADEON_FLUSH_END_OF_FRAME) {
573 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME;
574 cs->cst->cs.num_chunks = 3;
575 }
576 if (cs->ring_type == RING_COMPUTE) {
577 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE;
578 cs->cst->cs.num_chunks = 3;
579 }
580 break;
581 }
582
583 if (util_queue_is_initialized(&cs->ws->cs_queue)) {
584 util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed,
585 radeon_drm_cs_emit_ioctl_oneshot, NULL);
586 if (!(flags & RADEON_FLUSH_ASYNC))
587 radeon_drm_cs_sync_flush(rcs);
588 } else {
589 radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
590 }
591 } else {
592 radeon_cs_context_cleanup(cs->cst);
593 }
594
595 /* Prepare a new CS. */
596 cs->base.current.buf = cs->csc->buf;
597 cs->base.current.cdw = 0;
598 cs->base.used_vram = 0;
599 cs->base.used_gart = 0;
600
601 cs->ws->num_cs_flushes++;
602 return 0;
603 }
604
605 static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs)
606 {
607 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
608
609 radeon_drm_cs_sync_flush(rcs);
610 util_queue_fence_destroy(&cs->flush_completed);
611 radeon_cs_context_cleanup(&cs->csc1);
612 radeon_cs_context_cleanup(&cs->csc2);
613 p_atomic_dec(&cs->ws->num_cs);
614 radeon_destroy_cs_context(&cs->csc1);
615 radeon_destroy_cs_context(&cs->csc2);
616 FREE(cs);
617 }
618
619 static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs,
620 struct pb_buffer *_buf,
621 enum radeon_bo_usage usage)
622 {
623 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
624 struct radeon_bo *bo = (struct radeon_bo*)_buf;
625 int index;
626
627 if (!bo->num_cs_references)
628 return false;
629
630 index = radeon_lookup_buffer(cs->csc, bo);
631 if (index == -1)
632 return false;
633
634 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain)
635 return true;
636 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains)
637 return true;
638
639 return false;
640 }
641
642 /* FENCES */
643
644 static struct pipe_fence_handle *
645 radeon_cs_create_fence(struct radeon_winsys_cs *rcs)
646 {
647 struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
648 struct pb_buffer *fence;
649
650 /* Create a fence, which is a dummy BO. */
651 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1,
652 RADEON_DOMAIN_GTT, 0);
653 /* Add the fence as a dummy relocation. */
654 cs->ws->base.cs_add_buffer(rcs, fence,
655 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT,
656 RADEON_PRIO_FENCE);
657 return (struct pipe_fence_handle*)fence;
658 }
659
660 static bool radeon_fence_wait(struct radeon_winsys *ws,
661 struct pipe_fence_handle *fence,
662 uint64_t timeout)
663 {
664 return ws->buffer_wait((struct pb_buffer*)fence, timeout,
665 RADEON_USAGE_READWRITE);
666 }
667
668 static void radeon_fence_reference(struct pipe_fence_handle **dst,
669 struct pipe_fence_handle *src)
670 {
671 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src);
672 }
673
674 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
675 {
676 ws->base.ctx_create = radeon_drm_ctx_create;
677 ws->base.ctx_destroy = radeon_drm_ctx_destroy;
678 ws->base.cs_create = radeon_drm_cs_create;
679 ws->base.cs_destroy = radeon_drm_cs_destroy;
680 ws->base.cs_add_buffer = radeon_drm_cs_add_buffer;
681 ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer;
682 ws->base.cs_validate = radeon_drm_cs_validate;
683 ws->base.cs_check_space = radeon_drm_cs_check_space;
684 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit;
685 ws->base.cs_query_memory_usage = radeon_drm_cs_query_memory_usage;
686 ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list;
687 ws->base.cs_flush = radeon_drm_cs_flush;
688 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
689 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
690 ws->base.fence_wait = radeon_fence_wait;
691 ws->base.fence_reference = radeon_fence_reference;
692 }