2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
29 * Marek Olšák <maraeo@gmail.com>
31 * Based on work from libdrm_radeon by:
32 * Aapo Tahkola <aet@rasterburn.org>
33 * Nicolai Haehnle <prefect_@gmx.net>
34 * Jérôme Glisse <glisse@freedesktop.org>
38 This file replaces libdrm's radeon_cs_gem with our own implemention.
39 It's optimized specifically for Radeon DRM.
40 Reloc writes and space checking are faster and simpler than their
41 counterparts in libdrm (the time complexity of all the functions
42 is O(1) in nearly all scenarios, thanks to hashing).
46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and
47 also adds the size of 'buf' to the used_gart and used_vram winsys variables
48 based on the domains, which are simply or'd for the accounting purposes.
49 The adding is skipped if the reloc is already present in the list, but it
50 accounts any newly-referenced domains.
52 cs_validate is then called, which just checks:
53 used_vram/gart < vram/gart_size * 0.8
54 The 0.8 number allows for some memory fragmentation. If the validation
55 fails, the pipe driver flushes CS and tries do the validation again,
56 i.e. it validates only that one operation. If it fails again, it drops
57 the operation on the floor and prints some nasty message to stderr.
58 (done in the pipe driver)
60 cs_write_reloc(cs, buf) just writes a reloc that has been added using
61 cs_add_reloc. The read_domain and write_domain parameters have been removed,
62 because we already specify them in cs_add_reloc.
65 #include "radeon_drm_cs.h"
67 #include "util/u_memory.h"
68 #include "os/os_time.h"
76 * this are copy from radeon_drm, once an updated libdrm is released
77 * we should bump configure.ac requirement for it and remove the following
80 #ifndef RADEON_CHUNK_ID_FLAGS
81 #define RADEON_CHUNK_ID_FLAGS 0x03
83 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
84 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
87 #ifndef RADEON_CS_USE_VM
88 #define RADEON_CS_USE_VM 0x02
89 /* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
90 #define RADEON_CS_RING_GFX 0
91 #define RADEON_CS_RING_COMPUTE 1
94 #ifndef RADEON_CS_RING_DMA
95 #define RADEON_CS_RING_DMA 2
98 #ifndef RADEON_CS_RING_UVD
99 #define RADEON_CS_RING_UVD 3
102 #ifndef RADEON_CS_RING_VCE
103 #define RADEON_CS_RING_VCE 4
106 #ifndef RADEON_CS_END_OF_FRAME
107 #define RADEON_CS_END_OF_FRAME 0x04
111 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
113 static boolean
radeon_init_cs_context(struct radeon_cs_context
*csc
,
114 struct radeon_drm_winsys
*ws
)
118 csc
->relocs_bo
= (struct radeon_bo
**)
119 CALLOC(1, csc
->nrelocs
* sizeof(struct radeon_bo
*));
120 if (!csc
->relocs_bo
) {
124 csc
->relocs
= (struct drm_radeon_cs_reloc
*)
125 CALLOC(1, csc
->nrelocs
* sizeof(struct drm_radeon_cs_reloc
));
127 FREE(csc
->relocs_bo
);
131 csc
->chunks
[0].chunk_id
= RADEON_CHUNK_ID_IB
;
132 csc
->chunks
[0].length_dw
= 0;
133 csc
->chunks
[0].chunk_data
= (uint64_t)(uintptr_t)csc
->buf
;
134 csc
->chunks
[1].chunk_id
= RADEON_CHUNK_ID_RELOCS
;
135 csc
->chunks
[1].length_dw
= 0;
136 csc
->chunks
[1].chunk_data
= (uint64_t)(uintptr_t)csc
->relocs
;
137 csc
->chunks
[2].chunk_id
= RADEON_CHUNK_ID_FLAGS
;
138 csc
->chunks
[2].length_dw
= 2;
139 csc
->chunks
[2].chunk_data
= (uint64_t)(uintptr_t)&csc
->flags
;
141 csc
->chunk_array
[0] = (uint64_t)(uintptr_t)&csc
->chunks
[0];
142 csc
->chunk_array
[1] = (uint64_t)(uintptr_t)&csc
->chunks
[1];
143 csc
->chunk_array
[2] = (uint64_t)(uintptr_t)&csc
->chunks
[2];
145 csc
->cs
.chunks
= (uint64_t)(uintptr_t)csc
->chunk_array
;
149 static void radeon_cs_context_cleanup(struct radeon_cs_context
*csc
)
153 for (i
= 0; i
< csc
->crelocs
; i
++) {
154 p_atomic_dec(&csc
->relocs_bo
[i
]->num_cs_references
);
155 radeon_bo_reference(&csc
->relocs_bo
[i
], NULL
);
159 csc
->validated_crelocs
= 0;
160 csc
->chunks
[0].length_dw
= 0;
161 csc
->chunks
[1].length_dw
= 0;
164 memset(csc
->is_handle_added
, 0, sizeof(csc
->is_handle_added
));
167 static void radeon_destroy_cs_context(struct radeon_cs_context
*csc
)
169 radeon_cs_context_cleanup(csc
);
170 FREE(csc
->relocs_bo
);
175 static struct radeon_winsys_cs
*radeon_drm_cs_create(struct radeon_winsys
*rws
,
176 enum ring_type ring_type
,
177 struct radeon_winsys_cs_handle
*trace_buf
)
179 struct radeon_drm_winsys
*ws
= radeon_drm_winsys(rws
);
180 struct radeon_drm_cs
*cs
;
182 cs
= CALLOC_STRUCT(radeon_drm_cs
);
186 pipe_semaphore_init(&cs
->flush_completed
, 1);
189 cs
->trace_buf
= (struct radeon_bo
*)trace_buf
;
191 if (!radeon_init_cs_context(&cs
->csc1
, cs
->ws
)) {
195 if (!radeon_init_cs_context(&cs
->csc2
, cs
->ws
)) {
196 radeon_destroy_cs_context(&cs
->csc1
);
201 /* Set the first command buffer as current. */
204 cs
->base
.buf
= cs
->csc
->buf
;
205 cs
->base
.ring_type
= ring_type
;
207 p_atomic_inc(&ws
->num_cs
);
211 #define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value)
213 static INLINE
void update_reloc(struct drm_radeon_cs_reloc
*reloc
,
214 enum radeon_bo_domain rd
,
215 enum radeon_bo_domain wd
,
217 enum radeon_bo_domain
*added_domains
)
219 *added_domains
= (rd
| wd
) & ~(reloc
->read_domains
| reloc
->write_domain
);
221 reloc
->read_domains
|= rd
;
222 reloc
->write_domain
|= wd
;
223 reloc
->flags
= MAX2(reloc
->flags
, priority
);
226 int radeon_get_reloc(struct radeon_cs_context
*csc
, struct radeon_bo
*bo
)
228 struct drm_radeon_cs_reloc
*reloc
;
230 unsigned hash
= bo
->handle
& (sizeof(csc
->is_handle_added
)-1);
232 if (csc
->is_handle_added
[hash
]) {
233 i
= csc
->reloc_indices_hashlist
[hash
];
234 reloc
= &csc
->relocs
[i
];
235 if (reloc
->handle
== bo
->handle
) {
239 /* Hash collision, look for the BO in the list of relocs linearly. */
240 for (i
= csc
->crelocs
; i
!= 0;) {
242 reloc
= &csc
->relocs
[i
];
243 if (reloc
->handle
== bo
->handle
) {
244 /* Put this reloc in the hash list.
245 * This will prevent additional hash collisions if there are
246 * several consecutive get_reloc calls for the same buffer.
248 * Example: Assuming buffers A,B,C collide in the hash list,
249 * the following sequence of relocs:
250 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC
251 * will collide here: ^ and here: ^,
252 * meaning that we should get very few collisions in the end. */
253 csc
->reloc_indices_hashlist
[hash
] = i
;
254 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
263 static unsigned radeon_add_reloc(struct radeon_drm_cs
*cs
,
264 struct radeon_bo
*bo
,
265 enum radeon_bo_usage usage
,
266 enum radeon_bo_domain domains
,
268 enum radeon_bo_domain
*added_domains
)
270 struct radeon_cs_context
*csc
= cs
->csc
;
271 struct drm_radeon_cs_reloc
*reloc
;
272 unsigned hash
= bo
->handle
& (sizeof(csc
->is_handle_added
)-1);
273 enum radeon_bo_domain rd
= usage
& RADEON_USAGE_READ
? domains
: 0;
274 enum radeon_bo_domain wd
= usage
& RADEON_USAGE_WRITE
? domains
: 0;
275 bool update_hash
= TRUE
;
278 priority
= MIN2(priority
, 15);
281 if (csc
->is_handle_added
[hash
]) {
282 i
= csc
->reloc_indices_hashlist
[hash
];
283 reloc
= &csc
->relocs
[i
];
285 if (reloc
->handle
!= bo
->handle
) {
286 /* Hash collision, look for the BO in the list of relocs linearly. */
287 for (i
= csc
->crelocs
- 1; i
>= 0; i
--) {
288 reloc
= &csc
->relocs
[i
];
289 if (reloc
->handle
== bo
->handle
) {
290 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/
297 update_reloc(reloc
, rd
, wd
, priority
, added_domains
);
299 /* For async DMA, every add_reloc call must add a buffer to the list
300 * no matter how many duplicates there are. This is due to the fact
301 * the DMA CS checker doesn't use NOP packets for offset patching,
302 * but always uses the i-th buffer from the list to patch the i-th
303 * offset. If there are N offsets in a DMA CS, there must also be N
304 * buffers in the relocation list.
306 * This doesn't have to be done if virtual memory is enabled,
307 * because there is no offset patching with virtual memory.
309 if (cs
->base
.ring_type
!= RING_DMA
|| cs
->ws
->info
.r600_virtual_address
) {
310 csc
->reloc_indices_hashlist
[hash
] = i
;
317 /* New relocation, check if the backing array is large enough. */
318 if (csc
->crelocs
>= csc
->nrelocs
) {
322 size
= csc
->nrelocs
* sizeof(struct radeon_bo
*);
323 csc
->relocs_bo
= realloc(csc
->relocs_bo
, size
);
325 size
= csc
->nrelocs
* sizeof(struct drm_radeon_cs_reloc
);
326 csc
->relocs
= realloc(csc
->relocs
, size
);
328 csc
->chunks
[1].chunk_data
= (uint64_t)(uintptr_t)csc
->relocs
;
331 /* Initialize the new relocation. */
332 csc
->relocs_bo
[csc
->crelocs
] = NULL
;
333 radeon_bo_reference(&csc
->relocs_bo
[csc
->crelocs
], bo
);
334 p_atomic_inc(&bo
->num_cs_references
);
335 reloc
= &csc
->relocs
[csc
->crelocs
];
336 reloc
->handle
= bo
->handle
;
337 reloc
->read_domains
= rd
;
338 reloc
->write_domain
= wd
;
339 reloc
->flags
= priority
;
341 csc
->is_handle_added
[hash
] = TRUE
;
343 csc
->reloc_indices_hashlist
[hash
] = csc
->crelocs
;
346 csc
->chunks
[1].length_dw
+= RELOC_DWORDS
;
348 *added_domains
= rd
| wd
;
349 return csc
->crelocs
++;
352 static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs
*rcs
,
353 struct radeon_winsys_cs_handle
*buf
,
354 enum radeon_bo_usage usage
,
355 enum radeon_bo_domain domains
,
356 enum radeon_bo_priority priority
)
358 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
359 struct radeon_bo
*bo
= (struct radeon_bo
*)buf
;
360 enum radeon_bo_domain added_domains
;
361 unsigned index
= radeon_add_reloc(cs
, bo
, usage
, domains
, priority
, &added_domains
);
363 if (added_domains
& RADEON_DOMAIN_GTT
)
364 cs
->csc
->used_gart
+= bo
->base
.size
;
365 if (added_domains
& RADEON_DOMAIN_VRAM
)
366 cs
->csc
->used_vram
+= bo
->base
.size
;
371 static boolean
radeon_drm_cs_validate(struct radeon_winsys_cs
*rcs
)
373 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
375 cs
->csc
->used_gart
< cs
->ws
->info
.gart_size
* 0.8 &&
376 cs
->csc
->used_vram
< cs
->ws
->info
.vram_size
* 0.8;
379 cs
->csc
->validated_crelocs
= cs
->csc
->crelocs
;
381 /* Remove lately-added relocations. The validation failed with them
382 * and the CS is about to be flushed because of that. Keep only
383 * the already-validated relocations. */
386 for (i
= cs
->csc
->validated_crelocs
; i
< cs
->csc
->crelocs
; i
++) {
387 p_atomic_dec(&cs
->csc
->relocs_bo
[i
]->num_cs_references
);
388 radeon_bo_reference(&cs
->csc
->relocs_bo
[i
], NULL
);
390 cs
->csc
->crelocs
= cs
->csc
->validated_crelocs
;
392 /* Flush if there are any relocs. Clean up otherwise. */
393 if (cs
->csc
->crelocs
) {
394 cs
->flush_cs(cs
->flush_data
, RADEON_FLUSH_ASYNC
);
396 radeon_cs_context_cleanup(cs
->csc
);
398 assert(cs
->base
.cdw
== 0);
399 if (cs
->base
.cdw
!= 0) {
400 fprintf(stderr
, "radeon: Unexpected error in %s.\n", __func__
);
407 static boolean
radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs
*rcs
, uint64_t vram
, uint64_t gtt
)
409 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
411 (cs
->csc
->used_gart
+ gtt
) < cs
->ws
->info
.gart_size
* 0.7 &&
412 (cs
->csc
->used_vram
+ vram
) < cs
->ws
->info
.vram_size
* 0.7;
417 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs
*rcs
,
418 struct radeon_winsys_cs_handle
*buf
)
420 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
421 struct radeon_bo
*bo
= (struct radeon_bo
*)buf
;
422 unsigned index
= radeon_get_reloc(cs
->csc
, bo
);
425 fprintf(stderr
, "radeon: Cannot get a relocation in %s.\n", __func__
);
429 OUT_CS(&cs
->base
, 0xc0001000);
430 OUT_CS(&cs
->base
, index
* RELOC_DWORDS
);
433 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs
*cs
, struct radeon_cs_context
*csc
)
437 if (drmCommandWriteRead(csc
->fd
, DRM_RADEON_CS
,
438 &csc
->cs
, sizeof(struct drm_radeon_cs
))) {
439 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE
)) {
442 fprintf(stderr
, "radeon: The kernel rejected CS, dumping...\n");
443 for (i
= 0; i
< csc
->chunks
[0].length_dw
; i
++) {
444 fprintf(stderr
, "0x%08X\n", csc
->buf
[i
]);
447 fprintf(stderr
, "radeon: The kernel rejected CS, "
448 "see dmesg for more information.\n");
453 radeon_dump_cs_on_lockup(cs
, csc
);
456 for (i
= 0; i
< csc
->crelocs
; i
++)
457 p_atomic_dec(&csc
->relocs_bo
[i
]->num_active_ioctls
);
459 radeon_cs_context_cleanup(csc
);
463 * Make sure previous submission of this cs are completed
465 void radeon_drm_cs_sync_flush(struct radeon_winsys_cs
*rcs
)
467 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
469 /* Wait for any pending ioctl to complete. */
470 if (cs
->ws
->thread
) {
471 pipe_semaphore_wait(&cs
->flush_completed
);
472 pipe_semaphore_signal(&cs
->flush_completed
);
476 DEBUG_GET_ONCE_BOOL_OPTION(noop
, "RADEON_NOOP", FALSE
)
478 static void radeon_drm_cs_flush(struct radeon_winsys_cs
*rcs
, unsigned flags
, uint32_t cs_trace_id
)
480 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
481 struct radeon_cs_context
*tmp
;
483 switch (cs
->base
.ring_type
) {
485 /* pad DMA ring to 8 DWs */
486 if (cs
->ws
->info
.chip_class
<= SI
) {
488 OUT_CS(&cs
->base
, 0xf0000000); /* NOP packet */
491 OUT_CS(&cs
->base
, 0x00000000); /* NOP packet */
495 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
496 * r6xx, requires at least 4 dw alignment to avoid a hw bug.
498 if (cs
->ws
->info
.chip_class
<= SI
) {
500 OUT_CS(&cs
->base
, 0x80000000); /* type2 nop packet */
503 OUT_CS(&cs
->base
, 0xffff1000); /* type3 nop packet */
507 while (rcs
->cdw
& 15)
508 OUT_CS(&cs
->base
, 0x80000000); /* type2 nop packet */
514 if (rcs
->cdw
> RADEON_MAX_CMDBUF_DWORDS
) {
515 fprintf(stderr
, "radeon: command stream overflowed\n");
518 radeon_drm_cs_sync_flush(rcs
);
520 /* Flip command streams. */
525 cs
->cst
->cs_trace_id
= cs_trace_id
;
527 /* If the CS is not empty or overflowed, emit it in a separate thread. */
528 if (cs
->base
.cdw
&& cs
->base
.cdw
<= RADEON_MAX_CMDBUF_DWORDS
&& !debug_get_option_noop()) {
529 unsigned i
, crelocs
= cs
->cst
->crelocs
;
531 cs
->cst
->chunks
[0].length_dw
= cs
->base
.cdw
;
533 for (i
= 0; i
< crelocs
; i
++) {
534 /* Update the number of active asynchronous CS ioctls for the buffer. */
535 p_atomic_inc(&cs
->cst
->relocs_bo
[i
]->num_active_ioctls
);
538 switch (cs
->base
.ring_type
) {
540 cs
->cst
->flags
[0] = 0;
541 cs
->cst
->flags
[1] = RADEON_CS_RING_DMA
;
542 cs
->cst
->cs
.num_chunks
= 3;
543 if (cs
->ws
->info
.r600_virtual_address
) {
544 cs
->cst
->flags
[0] |= RADEON_CS_USE_VM
;
549 cs
->cst
->flags
[0] = 0;
550 cs
->cst
->flags
[1] = RADEON_CS_RING_UVD
;
551 cs
->cst
->cs
.num_chunks
= 3;
555 cs
->cst
->flags
[0] = 0;
556 cs
->cst
->flags
[1] = RADEON_CS_RING_VCE
;
557 cs
->cst
->cs
.num_chunks
= 3;
562 cs
->cst
->flags
[0] = 0;
563 cs
->cst
->flags
[1] = RADEON_CS_RING_GFX
;
564 cs
->cst
->cs
.num_chunks
= 2;
565 if (flags
& RADEON_FLUSH_KEEP_TILING_FLAGS
) {
566 cs
->cst
->flags
[0] |= RADEON_CS_KEEP_TILING_FLAGS
;
567 cs
->cst
->cs
.num_chunks
= 3;
569 if (cs
->ws
->info
.r600_virtual_address
) {
570 cs
->cst
->flags
[0] |= RADEON_CS_USE_VM
;
571 cs
->cst
->cs
.num_chunks
= 3;
573 if (flags
& RADEON_FLUSH_END_OF_FRAME
) {
574 cs
->cst
->flags
[0] |= RADEON_CS_END_OF_FRAME
;
575 cs
->cst
->cs
.num_chunks
= 3;
577 if (flags
& RADEON_FLUSH_COMPUTE
) {
578 cs
->cst
->flags
[1] = RADEON_CS_RING_COMPUTE
;
579 cs
->cst
->cs
.num_chunks
= 3;
584 if (cs
->ws
->thread
) {
585 pipe_semaphore_wait(&cs
->flush_completed
);
586 radeon_drm_ws_queue_cs(cs
->ws
, cs
);
587 if (!(flags
& RADEON_FLUSH_ASYNC
))
588 radeon_drm_cs_sync_flush(rcs
);
590 radeon_drm_cs_emit_ioctl_oneshot(cs
, cs
->cst
);
593 radeon_cs_context_cleanup(cs
->cst
);
596 /* Prepare a new CS. */
597 cs
->base
.buf
= cs
->csc
->buf
;
600 cs
->ws
->num_cs_flushes
++;
603 static void radeon_drm_cs_destroy(struct radeon_winsys_cs
*rcs
)
605 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
607 radeon_drm_cs_sync_flush(rcs
);
608 pipe_semaphore_destroy(&cs
->flush_completed
);
609 radeon_cs_context_cleanup(&cs
->csc1
);
610 radeon_cs_context_cleanup(&cs
->csc2
);
611 p_atomic_dec(&cs
->ws
->num_cs
);
612 radeon_destroy_cs_context(&cs
->csc1
);
613 radeon_destroy_cs_context(&cs
->csc2
);
617 static void radeon_drm_cs_set_flush(struct radeon_winsys_cs
*rcs
,
618 void (*flush
)(void *ctx
, unsigned flags
),
621 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
623 cs
->flush_cs
= flush
;
624 cs
->flush_data
= user
;
627 static boolean
radeon_bo_is_referenced(struct radeon_winsys_cs
*rcs
,
628 struct radeon_winsys_cs_handle
*_buf
,
629 enum radeon_bo_usage usage
)
631 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
632 struct radeon_bo
*bo
= (struct radeon_bo
*)_buf
;
635 if (!bo
->num_cs_references
)
638 index
= radeon_get_reloc(cs
->csc
, bo
);
642 if ((usage
& RADEON_USAGE_WRITE
) && cs
->csc
->relocs
[index
].write_domain
)
644 if ((usage
& RADEON_USAGE_READ
) && cs
->csc
->relocs
[index
].read_domains
)
652 static struct pipe_fence_handle
*
653 radeon_cs_create_fence(struct radeon_winsys_cs
*rcs
)
655 struct radeon_drm_cs
*cs
= radeon_drm_cs(rcs
);
656 struct pb_buffer
*fence
;
658 /* Create a fence, which is a dummy BO. */
659 fence
= cs
->ws
->base
.buffer_create(&cs
->ws
->base
, 1, 1, TRUE
,
661 /* Add the fence as a dummy relocation. */
662 cs
->ws
->base
.cs_add_reloc(rcs
, cs
->ws
->base
.buffer_get_cs_handle(fence
),
663 RADEON_USAGE_READWRITE
, RADEON_DOMAIN_GTT
,
665 return (struct pipe_fence_handle
*)fence
;
668 static bool radeon_fence_wait(struct radeon_winsys
*ws
,
669 struct pipe_fence_handle
*fence
,
672 struct pb_buffer
*rfence
= (struct pb_buffer
*)fence
;
675 return !ws
->buffer_is_busy(rfence
, RADEON_USAGE_READWRITE
);
677 if (timeout
!= PIPE_TIMEOUT_INFINITE
) {
678 int64_t start_time
= os_time_get();
680 /* Convert to microseconds. */
683 /* Wait in a loop. */
684 while (ws
->buffer_is_busy(rfence
, RADEON_USAGE_READWRITE
)) {
685 if (os_time_get() - start_time
>= timeout
) {
693 ws
->buffer_wait(rfence
, RADEON_USAGE_READWRITE
);
697 static void radeon_fence_reference(struct pipe_fence_handle
**dst
,
698 struct pipe_fence_handle
*src
)
700 pb_reference((struct pb_buffer
**)dst
, (struct pb_buffer
*)src
);
703 void radeon_drm_cs_init_functions(struct radeon_drm_winsys
*ws
)
705 ws
->base
.cs_create
= radeon_drm_cs_create
;
706 ws
->base
.cs_destroy
= radeon_drm_cs_destroy
;
707 ws
->base
.cs_add_reloc
= radeon_drm_cs_add_reloc
;
708 ws
->base
.cs_validate
= radeon_drm_cs_validate
;
709 ws
->base
.cs_memory_below_limit
= radeon_drm_cs_memory_below_limit
;
710 ws
->base
.cs_write_reloc
= radeon_drm_cs_write_reloc
;
711 ws
->base
.cs_flush
= radeon_drm_cs_flush
;
712 ws
->base
.cs_set_flush_callback
= radeon_drm_cs_set_flush
;
713 ws
->base
.cs_is_buffer_referenced
= radeon_bo_is_referenced
;
714 ws
->base
.cs_sync_flush
= radeon_drm_cs_sync_flush
;
715 ws
->base
.cs_create_fence
= radeon_cs_create_fence
;
716 ws
->base
.fence_wait
= radeon_fence_wait
;
717 ws
->base
.fence_reference
= radeon_fence_reference
;