1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 **************************************************************************/
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
48 #define COPY_DWORDS( dst, src, nr ) \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
57 void radeonEmitVec4(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
61 if (RADEON_DEBUG
& RADEON_VERTS
)
62 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
66 COPY_DWORDS(out
, data
, count
);
68 for (i
= 0; i
< count
; i
++) {
69 out
[0] = *(int *)data
;
75 void radeonEmitVec8(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
79 if (RADEON_DEBUG
& RADEON_VERTS
)
80 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
84 COPY_DWORDS(out
, data
, count
* 2);
86 for (i
= 0; i
< count
; i
++) {
87 out
[0] = *(int *)data
;
88 out
[1] = *(int *)(data
+ 4);
94 void radeonEmitVec12(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
98 if (RADEON_DEBUG
& RADEON_VERTS
)
99 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
103 COPY_DWORDS(out
, data
, count
* 3);
106 for (i
= 0; i
< count
; i
++) {
107 out
[0] = *(int *)data
;
108 out
[1] = *(int *)(data
+ 4);
109 out
[2] = *(int *)(data
+ 8);
115 void radeonEmitVec16(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
119 if (RADEON_DEBUG
& RADEON_VERTS
)
120 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
124 COPY_DWORDS(out
, data
, count
* 4);
126 for (i
= 0; i
< count
; i
++) {
127 out
[0] = *(int *)data
;
128 out
[1] = *(int *)(data
+ 4);
129 out
[2] = *(int *)(data
+ 8);
130 out
[3] = *(int *)(data
+ 12);
136 void rcommon_emit_vector(GLcontext
* ctx
, struct radeon_aos
*aos
,
137 const GLvoid
* data
, int size
, int stride
, int count
)
139 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
143 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* 4, 32);
147 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* count
* 4, 32);
151 aos
->components
= size
;
154 out
= (uint32_t*)((char*)aos
->bo
->ptr
+ aos
->offset
);
156 case 1: radeonEmitVec4(out
, data
, stride
, count
); break;
157 case 2: radeonEmitVec8(out
, data
, stride
, count
); break;
158 case 3: radeonEmitVec12(out
, data
, stride
, count
); break;
159 case 4: radeonEmitVec16(out
, data
, stride
, count
); break;
166 void radeon_init_dma(radeonContextPtr rmesa
)
168 make_empty_list(&rmesa
->dma
.free
);
169 make_empty_list(&rmesa
->dma
.wait
);
170 make_empty_list(&rmesa
->dma
.reserved
);
171 rmesa
->dma
.minimum_size
= MAX_DMA_BUF_SZ
;
174 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa
, int size
)
176 struct radeon_dma_bo
*dma_bo
= NULL
;
177 /* we set minimum sizes to at least requested size
178 aligned to next 16 bytes. */
179 if (size
> rmesa
->dma
.minimum_size
)
180 rmesa
->dma
.minimum_size
= (size
+ 15) & (~15);
182 radeon_print(RADEON_DMA
, RADEON_NORMAL
, "%s size %d minimum_size %d\n",
183 __FUNCTION__
, size
, rmesa
->dma
.minimum_size
);
186 /* unmap old reserved bo */
187 if (!is_empty_list(&rmesa
->dma
.reserved
))
188 radeon_bo_unmap(first_elem(&rmesa
->dma
.reserved
)->bo
);
190 if (is_empty_list(&rmesa
->dma
.free
)
191 || last_elem(&rmesa
->dma
.free
)->bo
->size
< size
) {
192 dma_bo
= CALLOC_STRUCT(radeon_dma_bo
);
196 dma_bo
->bo
= radeon_bo_open(rmesa
->radeonScreen
->bom
,
197 0, rmesa
->dma
.minimum_size
, 4,
198 RADEON_GEM_DOMAIN_GTT
, 0);
201 rcommonFlushCmdBuf(rmesa
, __FUNCTION__
);
204 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
206 /* We push and pop buffers from end of list so we can keep
207 counter on unused buffers for later freeing them from
209 dma_bo
= last_elem(&rmesa
->dma
.free
);
210 remove_from_list(dma_bo
);
211 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
214 rmesa
->dma
.current_used
= 0;
215 rmesa
->dma
.current_vertexptr
= 0;
217 if (radeon_cs_space_check_with_bo(rmesa
->cmdbuf
.cs
,
218 first_elem(&rmesa
->dma
.reserved
)->bo
,
219 RADEON_GEM_DOMAIN_GTT
, 0))
220 fprintf(stderr
,"failure to revalidate BOs - badness\n");
222 if (is_empty_list(&rmesa
->dma
.reserved
)) {
223 /* Cmd buff have been flushed in radeon_revalidate_bos */
227 radeon_bo_map(first_elem(&rmesa
->dma
.reserved
)->bo
, 1);
230 /* Allocates a region from rmesa->dma.current. If there isn't enough
231 * space in current, grab a new buffer (and discard what was left of current)
233 void radeonAllocDmaRegion(radeonContextPtr rmesa
,
234 struct radeon_bo
**pbo
, int *poffset
,
235 int bytes
, int alignment
)
237 if (RADEON_DEBUG
& RADEON_IOCTL
)
238 fprintf(stderr
, "%s %d\n", __FUNCTION__
, bytes
);
240 if (rmesa
->dma
.flush
)
241 rmesa
->dma
.flush(rmesa
->glCtx
);
243 assert(rmesa
->dma
.current_used
== rmesa
->dma
.current_vertexptr
);
246 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ alignment
) & ~alignment
;
248 if (is_empty_list(&rmesa
->dma
.reserved
)
249 || rmesa
->dma
.current_used
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
)
250 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
252 *poffset
= rmesa
->dma
.current_used
;
253 *pbo
= first_elem(&rmesa
->dma
.reserved
)->bo
;
256 /* Always align to at least 16 bytes */
257 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ bytes
+ 15) & ~15;
258 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
260 assert(rmesa
->dma
.current_used
<= first_elem(&rmesa
->dma
.reserved
)->bo
->size
);
263 void radeonFreeDmaRegions(radeonContextPtr rmesa
)
265 struct radeon_dma_bo
*dma_bo
;
266 struct radeon_dma_bo
*temp
;
267 if (RADEON_DEBUG
& RADEON_DMA
)
268 fprintf(stderr
, "%s\n", __FUNCTION__
);
270 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
271 remove_from_list(dma_bo
);
272 radeon_bo_unref(dma_bo
->bo
);
276 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
277 remove_from_list(dma_bo
);
278 radeon_bo_unref(dma_bo
->bo
);
282 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
283 remove_from_list(dma_bo
);
284 radeon_bo_unmap(dma_bo
->bo
);
285 radeon_bo_unref(dma_bo
->bo
);
290 void radeonReturnDmaRegion(radeonContextPtr rmesa
, int return_bytes
)
292 if (is_empty_list(&rmesa
->dma
.reserved
))
295 if (RADEON_DEBUG
& RADEON_IOCTL
)
296 fprintf(stderr
, "%s %d\n", __FUNCTION__
, return_bytes
);
297 rmesa
->dma
.current_used
-= return_bytes
;
298 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
301 static int radeon_bo_is_idle(struct radeon_bo
* bo
)
304 int ret
= radeon_bo_is_busy(bo
, &domain
);
305 if (ret
== -EINVAL
) {
306 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
307 "This may cause small performance drop for you.\n");
309 return ret
!= -EBUSY
;
312 void radeonReleaseDmaRegions(radeonContextPtr rmesa
)
314 struct radeon_dma_bo
*dma_bo
;
315 struct radeon_dma_bo
*temp
;
316 const int expire_at
= ++rmesa
->dma
.free
.expire_counter
+ DMA_BO_FREE_TIME
;
317 const int time
= rmesa
->dma
.free
.expire_counter
;
319 if (RADEON_DEBUG
& RADEON_DMA
) {
323 foreach(dma_bo
, &rmesa
->dma
.free
)
326 foreach(dma_bo
, &rmesa
->dma
.wait
)
329 foreach(dma_bo
, &rmesa
->dma
.reserved
)
332 fprintf(stderr
, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
333 __FUNCTION__
, free
, wait
, reserved
, rmesa
->dma
.minimum_size
);
336 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
337 /* request updated cs processing information from kernel */
338 legacy_track_pending(rmesa
->radeonScreen
->bom
, 0);
340 /* move waiting bos to free list.
341 wait list provides gpu time to handle data before reuse */
342 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
343 if (dma_bo
->expire_counter
== time
) {
344 WARN_ONCE("Leaking dma buffer object!\n");
345 radeon_bo_unref(dma_bo
->bo
);
346 remove_from_list(dma_bo
);
350 /* free objects that are too small to be used because of large request */
351 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
352 radeon_bo_unref(dma_bo
->bo
);
353 remove_from_list(dma_bo
);
357 if (!radeon_bo_is_idle(dma_bo
->bo
))
359 remove_from_list(dma_bo
);
360 dma_bo
->expire_counter
= expire_at
;
361 insert_at_tail(&rmesa
->dma
.free
, dma_bo
);
364 /* unmap the last dma region */
365 if (!is_empty_list(&rmesa
->dma
.reserved
))
366 radeon_bo_unmap(first_elem(&rmesa
->dma
.reserved
)->bo
);
367 /* move reserved to wait list */
368 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
369 /* free objects that are too small to be used because of large request */
370 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
371 radeon_bo_unref(dma_bo
->bo
);
372 remove_from_list(dma_bo
);
376 remove_from_list(dma_bo
);
377 dma_bo
->expire_counter
= expire_at
;
378 insert_at_tail(&rmesa
->dma
.wait
, dma_bo
);
381 /* free bos that have been unused for some time */
382 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
383 if (dma_bo
->expire_counter
!= time
)
385 remove_from_list(dma_bo
);
386 radeon_bo_unref(dma_bo
->bo
);
393 /* Flush vertices in the current dma region.
395 void rcommon_flush_last_swtcl_prim( GLcontext
*ctx
)
397 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
398 struct radeon_dma
*dma
= &rmesa
->dma
;
401 if (RADEON_DEBUG
& RADEON_IOCTL
)
402 fprintf(stderr
, "%s\n", __FUNCTION__
);
405 if (!is_empty_list(&dma
->reserved
)) {
406 GLuint current_offset
= dma
->current_used
;
408 assert (dma
->current_used
+
409 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
410 dma
->current_vertexptr
);
412 if (dma
->current_used
!= dma
->current_vertexptr
) {
413 dma
->current_used
= dma
->current_vertexptr
;
415 rmesa
->vtbl
.swtcl_flush(ctx
, current_offset
);
417 rmesa
->swtcl
.numverts
= 0;
420 /* Alloc space in the current dma region.
423 rcommonAllocDmaLowVerts( radeonContextPtr rmesa
, int nverts
, int vsize
)
425 GLuint bytes
= vsize
* nverts
;
427 if (RADEON_DEBUG
& RADEON_IOCTL
)
428 fprintf(stderr
, "%s\n", __FUNCTION__
);
429 if(is_empty_list(&rmesa
->dma
.reserved
)
430 ||rmesa
->dma
.current_vertexptr
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
) {
431 if (rmesa
->dma
.flush
) {
432 rmesa
->dma
.flush(rmesa
->glCtx
);
435 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
440 if (!rmesa
->dma
.flush
) {
441 /* if cmdbuf flushed DMA restart */
442 rmesa
->glCtx
->Driver
.NeedFlush
|= FLUSH_STORED_VERTICES
;
443 rmesa
->dma
.flush
= rcommon_flush_last_swtcl_prim
;
446 ASSERT( vsize
== rmesa
->swtcl
.vertex_size
* 4 );
447 ASSERT( rmesa
->dma
.flush
== rcommon_flush_last_swtcl_prim
);
448 ASSERT( rmesa
->dma
.current_used
+
449 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
450 rmesa
->dma
.current_vertexptr
);
452 head
= (first_elem(&rmesa
->dma
.reserved
)->bo
->ptr
+ rmesa
->dma
.current_vertexptr
);
453 rmesa
->dma
.current_vertexptr
+= bytes
;
454 rmesa
->swtcl
.numverts
+= nverts
;
458 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
460 radeonContextPtr radeon
= RADEON_CONTEXT( ctx
);
462 if (RADEON_DEBUG
& RADEON_IOCTL
)
463 fprintf(stderr
, "%s\n", __FUNCTION__
);
465 if (radeon
->dma
.flush
) {
466 radeon
->dma
.flush(radeon
->glCtx
);
468 for (i
= 0; i
< radeon
->tcl
.aos_count
; i
++) {
469 if (radeon
->tcl
.aos
[i
].bo
) {
470 radeon_bo_unref(radeon
->tcl
.aos
[i
].bo
);
471 radeon
->tcl
.aos
[i
].bo
= NULL
;