1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 **************************************************************************/
34 #include "radeon_common.h"
35 #include "main/simple_list.h"
37 #if defined(USE_X86_ASM)
38 #define COPY_DWORDS( dst, src, nr ) \
41 __asm__ __volatile__( "rep ; movsl" \
42 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
48 #define COPY_DWORDS( dst, src, nr ) \
51 for ( j = 0 ; j < nr ; j++ ) \
52 dst[j] = ((int *)src)[j]; \
57 void radeonEmitVec4(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
61 if (RADEON_DEBUG
& RADEON_VERTS
)
62 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
63 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
66 COPY_DWORDS(out
, data
, count
);
68 for (i
= 0; i
< count
; i
++) {
69 out
[0] = *(int *)data
;
75 void radeonEmitVec8(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
79 if (RADEON_DEBUG
& RADEON_VERTS
)
80 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
81 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
84 COPY_DWORDS(out
, data
, count
* 2);
86 for (i
= 0; i
< count
; i
++) {
87 out
[0] = *(int *)data
;
88 out
[1] = *(int *)(data
+ 4);
94 void radeonEmitVec12(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
98 if (RADEON_DEBUG
& RADEON_VERTS
)
99 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
100 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
103 COPY_DWORDS(out
, data
, count
* 3);
106 for (i
= 0; i
< count
; i
++) {
107 out
[0] = *(int *)data
;
108 out
[1] = *(int *)(data
+ 4);
109 out
[2] = *(int *)(data
+ 8);
115 void radeonEmitVec16(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
119 if (RADEON_DEBUG
& RADEON_VERTS
)
120 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
121 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
124 COPY_DWORDS(out
, data
, count
* 4);
126 for (i
= 0; i
< count
; i
++) {
127 out
[0] = *(int *)data
;
128 out
[1] = *(int *)(data
+ 4);
129 out
[2] = *(int *)(data
+ 8);
130 out
[3] = *(int *)(data
+ 12);
136 void rcommon_emit_vector(GLcontext
* ctx
, struct radeon_aos
*aos
,
137 const GLvoid
* data
, int size
, int stride
, int count
)
139 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
143 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* 4, 32);
147 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* count
* 4, 32);
151 aos
->components
= size
;
154 radeon_bo_map(aos
->bo
, 1);
155 out
= (uint32_t*)((char*)aos
->bo
->ptr
+ aos
->offset
);
157 case 1: radeonEmitVec4(out
, data
, stride
, count
); break;
158 case 2: radeonEmitVec8(out
, data
, stride
, count
); break;
159 case 3: radeonEmitVec12(out
, data
, stride
, count
); break;
160 case 4: radeonEmitVec16(out
, data
, stride
, count
); break;
165 radeon_bo_unmap(aos
->bo
);
168 void radeon_init_dma(radeonContextPtr rmesa
)
170 make_empty_list(&rmesa
->dma
.free
);
171 make_empty_list(&rmesa
->dma
.wait
);
172 make_empty_list(&rmesa
->dma
.reserved
);
173 rmesa
->dma
.minimum_size
= MAX_DMA_BUF_SZ
;
176 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa
, int size
)
178 struct radeon_dma_bo
*dma_bo
= NULL
;
179 /* we set minimum sizes to at least requested size
180 aligned to next 16 bytes. */
181 if (size
> rmesa
->dma
.minimum_size
)
182 rmesa
->dma
.minimum_size
= (size
+ 15) & (~15);
184 radeon_print(RADEON_DMA
, RADEON_NORMAL
, "%s size %d minimum_size %d\n",
185 __FUNCTION__
, size
, rmesa
->dma
.minimum_size
);
188 if (is_empty_list(&rmesa
->dma
.free
)
189 || last_elem(&rmesa
->dma
.free
)->bo
->size
< size
) {
190 dma_bo
= CALLOC_STRUCT(radeon_dma_bo
);
194 dma_bo
->bo
= radeon_bo_open(rmesa
->radeonScreen
->bom
,
195 0, rmesa
->dma
.minimum_size
, 4,
196 RADEON_GEM_DOMAIN_GTT
, 0);
199 rcommonFlushCmdBuf(rmesa
, __FUNCTION__
);
202 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
204 /* We push and pop buffers from end of list so we can keep
205 counter on unused buffers for later freeing them from
207 dma_bo
= last_elem(&rmesa
->dma
.free
);
208 remove_from_list(dma_bo
);
209 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
212 rmesa
->dma
.current_used
= 0;
213 rmesa
->dma
.current_vertexptr
= 0;
215 if (radeon_cs_space_check_with_bo(rmesa
->cmdbuf
.cs
,
216 first_elem(&rmesa
->dma
.reserved
)->bo
,
217 RADEON_GEM_DOMAIN_GTT
, 0))
218 fprintf(stderr
,"failure to revalidate BOs - badness\n");
220 if (is_empty_list(&rmesa
->dma
.reserved
)) {
221 /* Cmd buff have been flushed in radeon_revalidate_bos */
226 /* Allocates a region from rmesa->dma.current. If there isn't enough
227 * space in current, grab a new buffer (and discard what was left of current)
229 void radeonAllocDmaRegion(radeonContextPtr rmesa
,
230 struct radeon_bo
**pbo
, int *poffset
,
231 int bytes
, int alignment
)
233 if (RADEON_DEBUG
& RADEON_IOCTL
)
234 fprintf(stderr
, "%s %d\n", __FUNCTION__
, bytes
);
236 if (rmesa
->dma
.flush
)
237 rmesa
->dma
.flush(rmesa
->glCtx
);
239 assert(rmesa
->dma
.current_used
== rmesa
->dma
.current_vertexptr
);
242 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ alignment
) & ~alignment
;
244 if (is_empty_list(&rmesa
->dma
.reserved
)
245 || rmesa
->dma
.current_used
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
)
246 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
248 *poffset
= rmesa
->dma
.current_used
;
249 *pbo
= first_elem(&rmesa
->dma
.reserved
)->bo
;
252 /* Always align to at least 16 bytes */
253 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ bytes
+ 15) & ~15;
254 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
256 assert(rmesa
->dma
.current_used
<= first_elem(&rmesa
->dma
.reserved
)->bo
->size
);
259 void radeonFreeDmaRegions(radeonContextPtr rmesa
)
261 struct radeon_dma_bo
*dma_bo
;
262 struct radeon_dma_bo
*temp
;
263 if (RADEON_DEBUG
& RADEON_DMA
)
264 fprintf(stderr
, "%s\n", __FUNCTION__
);
266 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
267 remove_from_list(dma_bo
);
268 radeon_bo_unref(dma_bo
->bo
);
272 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
273 remove_from_list(dma_bo
);
274 radeon_bo_unref(dma_bo
->bo
);
278 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
279 remove_from_list(dma_bo
);
280 radeon_bo_unref(dma_bo
->bo
);
285 void radeonReturnDmaRegion(radeonContextPtr rmesa
, int return_bytes
)
287 if (is_empty_list(&rmesa
->dma
.reserved
))
290 if (RADEON_DEBUG
& RADEON_IOCTL
)
291 fprintf(stderr
, "%s %d\n", __FUNCTION__
, return_bytes
);
292 rmesa
->dma
.current_used
-= return_bytes
;
293 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
296 static int radeon_bo_is_idle(struct radeon_bo
* bo
)
299 int ret
= radeon_bo_is_busy(bo
, &domain
);
300 if (ret
== -EINVAL
) {
301 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
302 "This may cause small performance drop for you.\n");
304 return ret
!= -EBUSY
;
307 void radeonReleaseDmaRegions(radeonContextPtr rmesa
)
309 struct radeon_dma_bo
*dma_bo
;
310 struct radeon_dma_bo
*temp
;
311 const int expire_at
= ++rmesa
->dma
.free
.expire_counter
+ DMA_BO_FREE_TIME
;
312 const int time
= rmesa
->dma
.free
.expire_counter
;
314 if (RADEON_DEBUG
& RADEON_DMA
) {
318 foreach(dma_bo
, &rmesa
->dma
.free
)
321 foreach(dma_bo
, &rmesa
->dma
.wait
)
324 foreach(dma_bo
, &rmesa
->dma
.reserved
)
327 fprintf(stderr
, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
328 __FUNCTION__
, free
, wait
, reserved
, rmesa
->dma
.minimum_size
);
331 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
332 /* request updated cs processing information from kernel */
333 legacy_track_pending(rmesa
->radeonScreen
->bom
, 0);
335 /* move waiting bos to free list.
336 wait list provides gpu time to handle data before reuse */
337 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
338 if (dma_bo
->expire_counter
== time
) {
339 WARN_ONCE("Leaking dma buffer object!\n");
340 radeon_bo_unref(dma_bo
->bo
);
341 remove_from_list(dma_bo
);
345 /* free objects that are too small to be used because of large request */
346 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
347 radeon_bo_unref(dma_bo
->bo
);
348 remove_from_list(dma_bo
);
352 if (!radeon_bo_is_idle(dma_bo
->bo
))
354 remove_from_list(dma_bo
);
355 dma_bo
->expire_counter
= expire_at
;
356 insert_at_tail(&rmesa
->dma
.free
, dma_bo
);
359 /* move reserved to wait list */
360 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
361 /* free objects that are too small to be used because of large request */
362 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
363 radeon_bo_unref(dma_bo
->bo
);
364 remove_from_list(dma_bo
);
368 remove_from_list(dma_bo
);
369 dma_bo
->expire_counter
= expire_at
;
370 insert_at_tail(&rmesa
->dma
.wait
, dma_bo
);
373 /* free bos that have been unused for some time */
374 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
375 if (dma_bo
->expire_counter
!= time
)
377 remove_from_list(dma_bo
);
378 radeon_bo_unref(dma_bo
->bo
);
385 /* Flush vertices in the current dma region.
387 void rcommon_flush_last_swtcl_prim( GLcontext
*ctx
)
389 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
390 struct radeon_dma
*dma
= &rmesa
->dma
;
392 if (RADEON_DEBUG
& RADEON_IOCTL
)
393 fprintf(stderr
, "%s\n", __FUNCTION__
);
396 radeon_bo_unmap(rmesa
->swtcl
.bo
);
398 if (!is_empty_list(&dma
->reserved
)) {
399 GLuint current_offset
= dma
->current_used
;
401 assert (dma
->current_used
+
402 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
403 dma
->current_vertexptr
);
405 if (dma
->current_used
!= dma
->current_vertexptr
) {
406 dma
->current_used
= dma
->current_vertexptr
;
408 rmesa
->vtbl
.swtcl_flush(ctx
, current_offset
);
410 rmesa
->swtcl
.numverts
= 0;
412 radeon_bo_unref(rmesa
->swtcl
.bo
);
413 rmesa
->swtcl
.bo
= NULL
;
415 /* Alloc space in the current dma region.
418 rcommonAllocDmaLowVerts( radeonContextPtr rmesa
, int nverts
, int vsize
)
420 GLuint bytes
= vsize
* nverts
;
422 if (RADEON_DEBUG
& RADEON_IOCTL
)
423 fprintf(stderr
, "%s\n", __FUNCTION__
);
425 if(is_empty_list(&rmesa
->dma
.reserved
)
426 ||rmesa
->dma
.current_vertexptr
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
) {
427 if (rmesa
->dma
.flush
) {
428 rmesa
->dma
.flush(rmesa
->glCtx
);
431 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
436 if (!rmesa
->dma
.flush
) {
437 /* if cmdbuf flushed DMA restart */
438 rmesa
->glCtx
->Driver
.NeedFlush
|= FLUSH_STORED_VERTICES
;
439 rmesa
->dma
.flush
= rcommon_flush_last_swtcl_prim
;
442 ASSERT( vsize
== rmesa
->swtcl
.vertex_size
* 4 );
443 ASSERT( rmesa
->dma
.flush
== rcommon_flush_last_swtcl_prim
);
444 ASSERT( rmesa
->dma
.current_used
+
445 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
446 rmesa
->dma
.current_vertexptr
);
448 if (!rmesa
->swtcl
.bo
) {
449 rmesa
->swtcl
.bo
= first_elem(&rmesa
->dma
.reserved
)->bo
;
450 radeon_bo_ref(rmesa
->swtcl
.bo
);
451 radeon_bo_map(rmesa
->swtcl
.bo
, 1);
454 head
= (rmesa
->swtcl
.bo
->ptr
+ rmesa
->dma
.current_vertexptr
);
455 rmesa
->dma
.current_vertexptr
+= bytes
;
456 rmesa
->swtcl
.numverts
+= nverts
;
460 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
462 radeonContextPtr radeon
= RADEON_CONTEXT( ctx
);
464 if (RADEON_DEBUG
& RADEON_IOCTL
)
465 fprintf(stderr
, "%s\n", __FUNCTION__
);
467 if (radeon
->dma
.flush
) {
468 radeon
->dma
.flush(radeon
->glCtx
);
470 for (i
= 0; i
< radeon
->tcl
.aos_count
; i
++) {
471 if (radeon
->tcl
.aos
[i
].bo
) {
472 radeon_bo_unref(radeon
->tcl
.aos
[i
].bo
);
473 radeon
->tcl
.aos
[i
].bo
= NULL
;