1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 **************************************************************************/
34 #include "radeon_common.h"
35 #include "radeon_fog.h"
36 #include "util/simple_list.h"
38 #if defined(USE_X86_ASM)
39 #define COPY_DWORDS( dst, src, nr ) \
42 __asm__ __volatile__( "rep ; movsl" \
43 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
49 #define COPY_DWORDS( dst, src, nr ) \
52 for ( j = 0 ; j < nr ; j++ ) \
53 dst[j] = ((int *)src)[j]; \
58 void radeonEmitVec4(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
62 if (RADEON_DEBUG
& RADEON_VERTS
)
63 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
64 __func__
, count
, stride
, (void *)out
, (void *)data
);
67 COPY_DWORDS(out
, data
, count
);
69 for (i
= 0; i
< count
; i
++) {
70 out
[0] = *(int *)data
;
76 void radeonEmitVec8(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
80 if (RADEON_DEBUG
& RADEON_VERTS
)
81 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
82 __func__
, count
, stride
, (void *)out
, (void *)data
);
85 COPY_DWORDS(out
, data
, count
* 2);
87 for (i
= 0; i
< count
; i
++) {
88 out
[0] = *(int *)data
;
89 out
[1] = *(int *)(data
+ 4);
95 void radeonEmitVec12(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
99 if (RADEON_DEBUG
& RADEON_VERTS
)
100 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
101 __func__
, count
, stride
, (void *)out
, (void *)data
);
104 COPY_DWORDS(out
, data
, count
* 3);
107 for (i
= 0; i
< count
; i
++) {
108 out
[0] = *(int *)data
;
109 out
[1] = *(int *)(data
+ 4);
110 out
[2] = *(int *)(data
+ 8);
116 void radeonEmitVec16(uint32_t *out
, const GLvoid
* data
, int stride
, int count
)
120 if (RADEON_DEBUG
& RADEON_VERTS
)
121 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
122 __func__
, count
, stride
, (void *)out
, (void *)data
);
125 COPY_DWORDS(out
, data
, count
* 4);
127 for (i
= 0; i
< count
; i
++) {
128 out
[0] = *(int *)data
;
129 out
[1] = *(int *)(data
+ 4);
130 out
[2] = *(int *)(data
+ 8);
131 out
[3] = *(int *)(data
+ 12);
137 void rcommon_emit_vector(struct gl_context
* ctx
, struct radeon_aos
*aos
,
138 const GLvoid
* data
, int size
, int stride
, int count
)
140 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
144 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* 4, 32);
148 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* count
* 4, 32);
152 aos
->components
= size
;
155 radeon_bo_map(aos
->bo
, 1);
156 out
= (uint32_t*)((char*)aos
->bo
->ptr
+ aos
->offset
);
158 case 1: radeonEmitVec4(out
, data
, stride
, count
); break;
159 case 2: radeonEmitVec8(out
, data
, stride
, count
); break;
160 case 3: radeonEmitVec12(out
, data
, stride
, count
); break;
161 case 4: radeonEmitVec16(out
, data
, stride
, count
); break;
166 radeon_bo_unmap(aos
->bo
);
169 void rcommon_emit_vecfog(struct gl_context
*ctx
, struct radeon_aos
*aos
,
170 GLvoid
*data
, int stride
, int count
)
175 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
177 if (RADEON_DEBUG
& RADEON_VERTS
)
178 fprintf(stderr
, "%s count %d stride %d\n",
179 __func__
, count
, stride
);
182 radeonAllocDmaRegion( rmesa
, &aos
->bo
, &aos
->offset
, size
* 4, 32 );
186 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* count
* 4, 32);
190 aos
->components
= size
;
194 radeon_bo_map(aos
->bo
, 1);
195 out
= (float*)((char*)aos
->bo
->ptr
+ aos
->offset
);
196 for (i
= 0; i
< count
; i
++) {
197 out
[0] = radeonComputeFogBlendFactor( ctx
, *(GLfloat
*)data
);
201 radeon_bo_unmap(aos
->bo
);
204 void radeon_init_dma(radeonContextPtr rmesa
)
206 make_empty_list(&rmesa
->dma
.free
);
207 make_empty_list(&rmesa
->dma
.wait
);
208 make_empty_list(&rmesa
->dma
.reserved
);
209 rmesa
->dma
.minimum_size
= MAX_DMA_BUF_SZ
;
212 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa
, int size
)
214 struct radeon_dma_bo
*dma_bo
= NULL
;
215 /* we set minimum sizes to at least requested size
216 aligned to next 16 bytes. */
217 if (size
> rmesa
->dma
.minimum_size
)
218 rmesa
->dma
.minimum_size
= (size
+ 15) & (~15);
220 radeon_print(RADEON_DMA
, RADEON_NORMAL
, "%s size %d minimum_size %Zi\n",
221 __func__
, size
, rmesa
->dma
.minimum_size
);
223 if (is_empty_list(&rmesa
->dma
.free
)
224 || last_elem(&rmesa
->dma
.free
)->bo
->size
< size
) {
225 dma_bo
= CALLOC_STRUCT(radeon_dma_bo
);
229 dma_bo
->bo
= radeon_bo_open(rmesa
->radeonScreen
->bom
,
230 0, rmesa
->dma
.minimum_size
, 4,
231 RADEON_GEM_DOMAIN_GTT
, 0);
234 rcommonFlushCmdBuf(rmesa
, __func__
);
237 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
239 /* We push and pop buffers from end of list so we can keep
240 counter on unused buffers for later freeing them from
242 dma_bo
= last_elem(&rmesa
->dma
.free
);
243 remove_from_list(dma_bo
);
244 insert_at_head(&rmesa
->dma
.reserved
, dma_bo
);
247 rmesa
->dma
.current_used
= 0;
248 rmesa
->dma
.current_vertexptr
= 0;
250 if (radeon_cs_space_check_with_bo(rmesa
->cmdbuf
.cs
,
251 first_elem(&rmesa
->dma
.reserved
)->bo
,
252 RADEON_GEM_DOMAIN_GTT
, 0))
253 fprintf(stderr
,"failure to revalidate BOs - badness\n");
255 if (is_empty_list(&rmesa
->dma
.reserved
)) {
256 /* Cmd buff have been flushed in radeon_revalidate_bos */
259 radeon_bo_map(first_elem(&rmesa
->dma
.reserved
)->bo
, 1);
262 /* Allocates a region from rmesa->dma.current. If there isn't enough
263 * space in current, grab a new buffer (and discard what was left of current)
265 void radeonAllocDmaRegion(radeonContextPtr rmesa
,
266 struct radeon_bo
**pbo
, int *poffset
,
267 int bytes
, int alignment
)
269 if (RADEON_DEBUG
& RADEON_IOCTL
)
270 fprintf(stderr
, "%s %d\n", __func__
, bytes
);
272 if (rmesa
->dma
.flush
)
273 rmesa
->dma
.flush(&rmesa
->glCtx
);
275 assert(rmesa
->dma
.current_used
== rmesa
->dma
.current_vertexptr
);
278 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ alignment
) & ~alignment
;
280 if (is_empty_list(&rmesa
->dma
.reserved
)
281 || rmesa
->dma
.current_used
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
)
282 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
284 *poffset
= rmesa
->dma
.current_used
;
285 *pbo
= first_elem(&rmesa
->dma
.reserved
)->bo
;
288 /* Always align to at least 16 bytes */
289 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ bytes
+ 15) & ~15;
290 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
292 assert(rmesa
->dma
.current_used
<= first_elem(&rmesa
->dma
.reserved
)->bo
->size
);
295 void radeonFreeDmaRegions(radeonContextPtr rmesa
)
297 struct radeon_dma_bo
*dma_bo
;
298 struct radeon_dma_bo
*temp
;
299 if (RADEON_DEBUG
& RADEON_DMA
)
300 fprintf(stderr
, "%s\n", __func__
);
302 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
303 remove_from_list(dma_bo
);
304 radeon_bo_unref(dma_bo
->bo
);
308 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
309 remove_from_list(dma_bo
);
310 radeon_bo_unref(dma_bo
->bo
);
314 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
315 remove_from_list(dma_bo
);
316 radeon_bo_unref(dma_bo
->bo
);
321 void radeonReturnDmaRegion(radeonContextPtr rmesa
, int return_bytes
)
323 if (is_empty_list(&rmesa
->dma
.reserved
))
326 if (RADEON_DEBUG
& RADEON_IOCTL
)
327 fprintf(stderr
, "%s %d\n", __func__
, return_bytes
);
328 rmesa
->dma
.current_used
-= return_bytes
;
329 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
332 static int radeon_bo_is_idle(struct radeon_bo
* bo
)
335 int ret
= radeon_bo_is_busy(bo
, &domain
);
336 if (ret
== -EINVAL
) {
337 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
338 "This may cause small performance drop for you.\n");
340 return ret
!= -EBUSY
;
343 void radeonReleaseDmaRegions(radeonContextPtr rmesa
)
345 struct radeon_dma_bo
*dma_bo
;
346 struct radeon_dma_bo
*temp
;
347 const int expire_at
= ++rmesa
->dma
.free
.expire_counter
+ DMA_BO_FREE_TIME
;
348 const int time
= rmesa
->dma
.free
.expire_counter
;
350 if (RADEON_DEBUG
& RADEON_DMA
) {
354 foreach(dma_bo
, &rmesa
->dma
.free
)
357 foreach(dma_bo
, &rmesa
->dma
.wait
)
360 foreach(dma_bo
, &rmesa
->dma
.reserved
)
363 fprintf(stderr
, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
364 __func__
, free
, wait
, reserved
, rmesa
->dma
.minimum_size
);
367 /* move waiting bos to free list.
368 wait list provides gpu time to handle data before reuse */
369 foreach_s(dma_bo
, temp
, &rmesa
->dma
.wait
) {
370 if (dma_bo
->expire_counter
== time
) {
371 WARN_ONCE("Leaking dma buffer object!\n");
372 radeon_bo_unref(dma_bo
->bo
);
373 remove_from_list(dma_bo
);
377 /* free objects that are too small to be used because of large request */
378 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
379 radeon_bo_unref(dma_bo
->bo
);
380 remove_from_list(dma_bo
);
384 if (!radeon_bo_is_idle(dma_bo
->bo
)) {
387 remove_from_list(dma_bo
);
388 dma_bo
->expire_counter
= expire_at
;
389 insert_at_tail(&rmesa
->dma
.free
, dma_bo
);
392 /* move reserved to wait list */
393 foreach_s(dma_bo
, temp
, &rmesa
->dma
.reserved
) {
394 radeon_bo_unmap(dma_bo
->bo
);
395 /* free objects that are too small to be used because of large request */
396 if (dma_bo
->bo
->size
< rmesa
->dma
.minimum_size
) {
397 radeon_bo_unref(dma_bo
->bo
);
398 remove_from_list(dma_bo
);
402 remove_from_list(dma_bo
);
403 dma_bo
->expire_counter
= expire_at
;
404 insert_at_tail(&rmesa
->dma
.wait
, dma_bo
);
407 /* free bos that have been unused for some time */
408 foreach_s(dma_bo
, temp
, &rmesa
->dma
.free
) {
409 if (dma_bo
->expire_counter
!= time
)
411 remove_from_list(dma_bo
);
412 radeon_bo_unref(dma_bo
->bo
);
419 /* Flush vertices in the current dma region.
421 void rcommon_flush_last_swtcl_prim( struct gl_context
*ctx
)
423 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
424 struct radeon_dma
*dma
= &rmesa
->dma
;
426 if (RADEON_DEBUG
& RADEON_IOCTL
)
427 fprintf(stderr
, "%s\n", __func__
);
430 radeon_bo_unmap(rmesa
->swtcl
.bo
);
432 if (!is_empty_list(&dma
->reserved
)) {
433 GLuint current_offset
= dma
->current_used
;
435 assert (dma
->current_used
+
436 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
437 dma
->current_vertexptr
);
439 if (dma
->current_used
!= dma
->current_vertexptr
) {
440 dma
->current_used
= dma
->current_vertexptr
;
442 rmesa
->vtbl
.swtcl_flush(ctx
, current_offset
);
444 rmesa
->swtcl
.numverts
= 0;
446 radeon_bo_unref(rmesa
->swtcl
.bo
);
447 rmesa
->swtcl
.bo
= NULL
;
449 /* Alloc space in the current dma region.
452 rcommonAllocDmaLowVerts( radeonContextPtr rmesa
, int nverts
, int vsize
)
454 GLuint bytes
= vsize
* nverts
;
456 if (RADEON_DEBUG
& RADEON_IOCTL
)
457 fprintf(stderr
, "%s\n", __func__
);
459 if(is_empty_list(&rmesa
->dma
.reserved
)
460 ||rmesa
->dma
.current_vertexptr
+ bytes
> first_elem(&rmesa
->dma
.reserved
)->bo
->size
) {
461 if (rmesa
->dma
.flush
) {
462 rmesa
->dma
.flush(&rmesa
->glCtx
);
465 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
470 if (!rmesa
->dma
.flush
) {
471 /* if cmdbuf flushed DMA restart */
472 rmesa
->glCtx
.Driver
.NeedFlush
|= FLUSH_STORED_VERTICES
;
473 rmesa
->dma
.flush
= rcommon_flush_last_swtcl_prim
;
476 assert( vsize
== rmesa
->swtcl
.vertex_size
* 4 );
477 assert( rmesa
->dma
.flush
== rcommon_flush_last_swtcl_prim
);
478 assert( rmesa
->dma
.current_used
+
479 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
480 rmesa
->dma
.current_vertexptr
);
482 if (!rmesa
->swtcl
.bo
) {
483 rmesa
->swtcl
.bo
= first_elem(&rmesa
->dma
.reserved
)->bo
;
484 radeon_bo_ref(rmesa
->swtcl
.bo
);
485 radeon_bo_map(rmesa
->swtcl
.bo
, 1);
488 head
= (rmesa
->swtcl
.bo
->ptr
+ rmesa
->dma
.current_vertexptr
);
489 rmesa
->dma
.current_vertexptr
+= bytes
;
490 rmesa
->swtcl
.numverts
+= nverts
;
494 void radeonReleaseArrays( struct gl_context
*ctx
, GLuint newinputs
)
496 radeonContextPtr radeon
= RADEON_CONTEXT( ctx
);
498 if (RADEON_DEBUG
& RADEON_IOCTL
)
499 fprintf(stderr
, "%s\n", __func__
);
501 if (radeon
->dma
.flush
) {
502 radeon
->dma
.flush(&radeon
->glCtx
);
504 for (i
= 0; i
< radeon
->tcl
.aos_count
; i
++) {
505 if (radeon
->tcl
.aos
[i
].bo
) {
506 radeon_bo_unref(radeon
->tcl
.aos
[i
].bo
);
507 radeon
->tcl
.aos
[i
].bo
= NULL
;