1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
31 **************************************************************************/
33 #include "radeon_common.h"
35 #if defined(USE_X86_ASM)
36 #define COPY_DWORDS( dst, src, nr ) \
39 __asm__ __volatile__( "rep ; movsl" \
40 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
46 #define COPY_DWORDS( dst, src, nr ) \
49 for ( j = 0 ; j < nr ; j++ ) \
50 dst[j] = ((int *)src)[j]; \
55 static void radeonEmitVec4(uint32_t *out
, GLvoid
* data
, int stride
, int count
)
59 if (RADEON_DEBUG
& DEBUG_VERTS
)
60 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
61 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
64 COPY_DWORDS(out
, data
, count
);
66 for (i
= 0; i
< count
; i
++) {
67 out
[0] = *(int *)data
;
73 void radeonEmitVec8(uint32_t *out
, GLvoid
* data
, int stride
, int count
)
77 if (RADEON_DEBUG
& DEBUG_VERTS
)
78 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
79 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
82 COPY_DWORDS(out
, data
, count
* 2);
84 for (i
= 0; i
< count
; i
++) {
85 out
[0] = *(int *)data
;
86 out
[1] = *(int *)(data
+ 4);
92 void radeonEmitVec12(uint32_t *out
, GLvoid
* data
, int stride
, int count
)
96 if (RADEON_DEBUG
& DEBUG_VERTS
)
97 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
98 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
101 COPY_DWORDS(out
, data
, count
* 3);
104 for (i
= 0; i
< count
; i
++) {
105 out
[0] = *(int *)data
;
106 out
[1] = *(int *)(data
+ 4);
107 out
[2] = *(int *)(data
+ 8);
113 static void radeonEmitVec16(uint32_t *out
, GLvoid
* data
, int stride
, int count
)
117 if (RADEON_DEBUG
& DEBUG_VERTS
)
118 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
119 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
122 COPY_DWORDS(out
, data
, count
* 4);
124 for (i
= 0; i
< count
; i
++) {
125 out
[0] = *(int *)data
;
126 out
[1] = *(int *)(data
+ 4);
127 out
[2] = *(int *)(data
+ 8);
128 out
[3] = *(int *)(data
+ 12);
134 void rcommon_emit_vector(GLcontext
* ctx
, struct radeon_aos
*aos
,
135 GLvoid
* data
, int size
, int stride
, int count
)
137 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
141 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* 4, 32);
145 radeonAllocDmaRegion(rmesa
, &aos
->bo
, &aos
->offset
, size
* count
* 4, 32);
149 aos
->components
= size
;
152 out
= (uint32_t*)((char*)aos
->bo
->ptr
+ aos
->offset
);
154 case 1: radeonEmitVec4(out
, data
, stride
, count
); break;
155 case 2: radeonEmitVec8(out
, data
, stride
, count
); break;
156 case 3: radeonEmitVec12(out
, data
, stride
, count
); break;
157 case 4: radeonEmitVec16(out
, data
, stride
, count
); break;
164 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa
, int size
)
166 struct radeon_cs_space_check bos
[1];
167 int flushed
= 0, ret
;
169 size
= MAX2(size
, MAX_DMA_BUF_SZ
* 16);
171 if (RADEON_DEBUG
& (DEBUG_IOCTL
| DEBUG_DMA
))
172 fprintf(stderr
, "%s\n", __FUNCTION__
);
174 if (rmesa
->dma
.flush
) {
175 rmesa
->dma
.flush(rmesa
->glCtx
);
178 if (rmesa
->dma
.nr_released_bufs
> 4) {
179 rcommonFlushCmdBuf(rmesa
, __FUNCTION__
);
180 rmesa
->dma
.nr_released_bufs
= 0;
183 if (rmesa
->dma
.current
) {
184 radeon_bo_unmap(rmesa
->dma
.current
);
185 radeon_bo_unref(rmesa
->dma
.current
);
186 rmesa
->dma
.current
= 0;
190 rmesa
->dma
.current
= radeon_bo_open(rmesa
->radeonScreen
->bom
,
191 0, size
, 4, RADEON_GEM_DOMAIN_GTT
,
194 if (!rmesa
->dma
.current
) {
195 rcommonFlushCmdBuf(rmesa
, __FUNCTION__
);
196 rmesa
->dma
.nr_released_bufs
= 0;
200 rmesa
->dma
.current_used
= 0;
201 rmesa
->dma
.current_vertexptr
= 0;
203 bos
[0].bo
= rmesa
->dma
.current
;
204 bos
[0].read_domains
= RADEON_GEM_DOMAIN_GTT
;
205 bos
[0].write_domain
=0 ;
206 bos
[0].new_accounted
= 0;
208 ret
= radeon_cs_space_check(rmesa
->cmdbuf
.cs
, bos
, 1);
209 if (ret
== RADEON_CS_SPACE_OP_TO_BIG
) {
210 fprintf(stderr
,"Got OPEARTION TO BIG ILLEGAL - this cannot happen");
212 } else if (ret
== RADEON_CS_SPACE_FLUSH
) {
213 rcommonFlushCmdBuf(rmesa
, __FUNCTION__
);
215 fprintf(stderr
,"flushed but still no space\n");
221 radeon_bo_map(rmesa
->dma
.current
, 1);
224 /* Allocates a region from rmesa->dma.current. If there isn't enough
225 * space in current, grab a new buffer (and discard what was left of current)
227 void radeonAllocDmaRegion(radeonContextPtr rmesa
,
228 struct radeon_bo
**pbo
, int *poffset
,
229 int bytes
, int alignment
)
231 if (RADEON_DEBUG
& DEBUG_IOCTL
)
232 fprintf(stderr
, "%s %d\n", __FUNCTION__
, bytes
);
234 if (rmesa
->dma
.flush
)
235 rmesa
->dma
.flush(rmesa
->glCtx
);
237 assert(rmesa
->dma
.current_used
== rmesa
->dma
.current_vertexptr
);
240 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ alignment
) & ~alignment
;
242 if (!rmesa
->dma
.current
|| rmesa
->dma
.current_used
+ bytes
> rmesa
->dma
.current
->size
)
243 radeonRefillCurrentDmaRegion(rmesa
, (bytes
+ 15) & ~15);
245 *poffset
= rmesa
->dma
.current_used
;
246 *pbo
= rmesa
->dma
.current
;
249 /* Always align to at least 16 bytes */
250 rmesa
->dma
.current_used
= (rmesa
->dma
.current_used
+ bytes
+ 15) & ~15;
251 rmesa
->dma
.current_vertexptr
= rmesa
->dma
.current_used
;
253 assert(rmesa
->dma
.current_used
<= rmesa
->dma
.current
->size
);
256 void radeonReleaseDmaRegion(radeonContextPtr rmesa
)
258 if (RADEON_DEBUG
& DEBUG_IOCTL
)
259 fprintf(stderr
, "%s %p\n", __FUNCTION__
, rmesa
->dma
.current
);
260 if (rmesa
->dma
.current
) {
261 rmesa
->dma
.nr_released_bufs
++;
262 radeon_bo_unmap(rmesa
->dma
.current
);
263 radeon_bo_unref(rmesa
->dma
.current
);
265 rmesa
->dma
.current
= NULL
;
269 /* Flush vertices in the current dma region.
271 void rcommon_flush_last_swtcl_prim( GLcontext
*ctx
)
273 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
274 struct radeon_dma
*dma
= &rmesa
->dma
;
277 if (RADEON_DEBUG
& DEBUG_IOCTL
)
278 fprintf(stderr
, "%s\n", __FUNCTION__
);
282 GLuint current_offset
= dma
->current_used
;
284 assert (dma
->current_used
+
285 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
286 dma
->current_vertexptr
);
288 if (dma
->current_used
!= dma
->current_vertexptr
) {
289 dma
->current_used
= dma
->current_vertexptr
;
291 rmesa
->vtbl
.swtcl_flush(ctx
, current_offset
);
293 rmesa
->swtcl
.numverts
= 0;
296 /* Alloc space in the current dma region.
299 rcommonAllocDmaLowVerts( radeonContextPtr rmesa
, int nverts
, int vsize
)
301 GLuint bytes
= vsize
* nverts
;
304 if (!rmesa
->dma
.current
|| rmesa
->dma
.current_vertexptr
+ bytes
> rmesa
->dma
.current
->size
) {
305 radeonRefillCurrentDmaRegion(rmesa
, bytes
);
308 if (!rmesa
->dma
.flush
) {
309 rmesa
->glCtx
->Driver
.NeedFlush
|= FLUSH_STORED_VERTICES
;
310 rmesa
->dma
.flush
= rcommon_flush_last_swtcl_prim
;
313 ASSERT( vsize
== rmesa
->swtcl
.vertex_size
* 4 );
314 ASSERT( rmesa
->dma
.flush
== rcommon_flush_last_swtcl_prim
);
315 ASSERT( rmesa
->dma
.current_used
+
316 rmesa
->swtcl
.numverts
* rmesa
->swtcl
.vertex_size
* 4 ==
317 rmesa
->dma
.current_vertexptr
);
319 head
= (rmesa
->dma
.current
->ptr
+ rmesa
->dma
.current_vertexptr
);
320 rmesa
->dma
.current_vertexptr
+= bytes
;
321 rmesa
->swtcl
.numverts
+= nverts
;