When clearing the stencil buffer, don't use a two-sided stencil
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_dma.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include "radeon_common.h"
34
35 #if defined(USE_X86_ASM)
36 #define COPY_DWORDS( dst, src, nr ) \
37 do { \
38 int __tmp; \
39 __asm__ __volatile__( "rep ; movsl" \
40 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
41 : "0" (nr), \
42 "D" ((long)dst), \
43 "S" ((long)src) ); \
44 } while (0)
45 #else
46 #define COPY_DWORDS( dst, src, nr ) \
47 do { \
48 int j; \
49 for ( j = 0 ; j < nr ; j++ ) \
50 dst[j] = ((int *)src)[j]; \
51 dst += nr; \
52 } while (0)
53 #endif
54
55 static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
56 {
57 int i;
58
59 if (RADEON_DEBUG & DEBUG_VERTS)
60 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
61 __FUNCTION__, count, stride, (void *)out, (void *)data);
62
63 if (stride == 4)
64 COPY_DWORDS(out, data, count);
65 else
66 for (i = 0; i < count; i++) {
67 out[0] = *(int *)data;
68 out++;
69 data += stride;
70 }
71 }
72
73 void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
74 {
75 int i;
76
77 if (RADEON_DEBUG & DEBUG_VERTS)
78 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
79 __FUNCTION__, count, stride, (void *)out, (void *)data);
80
81 if (stride == 8)
82 COPY_DWORDS(out, data, count * 2);
83 else
84 for (i = 0; i < count; i++) {
85 out[0] = *(int *)data;
86 out[1] = *(int *)(data + 4);
87 out += 2;
88 data += stride;
89 }
90 }
91
92 void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
93 {
94 int i;
95
96 if (RADEON_DEBUG & DEBUG_VERTS)
97 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98 __FUNCTION__, count, stride, (void *)out, (void *)data);
99
100 if (stride == 12) {
101 COPY_DWORDS(out, data, count * 3);
102 }
103 else
104 for (i = 0; i < count; i++) {
105 out[0] = *(int *)data;
106 out[1] = *(int *)(data + 4);
107 out[2] = *(int *)(data + 8);
108 out += 3;
109 data += stride;
110 }
111 }
112
113 static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
114 {
115 int i;
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119 __FUNCTION__, count, stride, (void *)out, (void *)data);
120
121 if (stride == 16)
122 COPY_DWORDS(out, data, count * 4);
123 else
124 for (i = 0; i < count; i++) {
125 out[0] = *(int *)data;
126 out[1] = *(int *)(data + 4);
127 out[2] = *(int *)(data + 8);
128 out[3] = *(int *)(data + 12);
129 out += 4;
130 data += stride;
131 }
132 }
133
134 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
135 GLvoid * data, int size, int stride, int count)
136 {
137 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
138 uint32_t *out;
139
140 if (stride == 0) {
141 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
142 count = 1;
143 aos->stride = 0;
144 } else {
145 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
146 aos->stride = size;
147 }
148
149 aos->components = size;
150 aos->count = count;
151
152 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
153 switch (size) {
154 case 1: radeonEmitVec4(out, data, stride, count); break;
155 case 2: radeonEmitVec8(out, data, stride, count); break;
156 case 3: radeonEmitVec12(out, data, stride, count); break;
157 case 4: radeonEmitVec16(out, data, stride, count); break;
158 default:
159 assert(0);
160 break;
161 }
162 }
163
164 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
165 {
166
167 size = MAX2(size, MAX_DMA_BUF_SZ * 16);
168
169 if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
170 fprintf(stderr, "%s\n", __FUNCTION__);
171
172 if (rmesa->dma.flush) {
173 rmesa->dma.flush(rmesa->glCtx);
174 }
175
176 if (rmesa->dma.nr_released_bufs > 4) {
177 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
178 rmesa->dma.nr_released_bufs = 0;
179 }
180
181 if (rmesa->dma.current) {
182 radeon_bo_unmap(rmesa->dma.current);
183 radeon_bo_unref(rmesa->dma.current);
184 rmesa->dma.current = 0;
185 }
186
187 again_alloc:
188 rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
189 0, size, 4, RADEON_GEM_DOMAIN_GTT,
190 0);
191
192 if (!rmesa->dma.current) {
193 rcommonFlushCmdBuf(rmesa, __FUNCTION__);
194 rmesa->dma.nr_released_bufs = 0;
195 goto again_alloc;
196 }
197
198 rmesa->dma.current_used = 0;
199 rmesa->dma.current_vertexptr = 0;
200
201 radeon_validate_bo(rmesa, rmesa->dma.current, RADEON_GEM_DOMAIN_GTT, 0);
202
203 if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE)
204 fprintf(stderr,"failure to revalidate BOs - badness\n");
205
206 radeon_bo_map(rmesa->dma.current, 1);
207 }
208
209 /* Allocates a region from rmesa->dma.current. If there isn't enough
210 * space in current, grab a new buffer (and discard what was left of current)
211 */
212 void radeonAllocDmaRegion(radeonContextPtr rmesa,
213 struct radeon_bo **pbo, int *poffset,
214 int bytes, int alignment)
215 {
216 if (RADEON_DEBUG & DEBUG_IOCTL)
217 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
218
219 if (rmesa->dma.flush)
220 rmesa->dma.flush(rmesa->glCtx);
221
222 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
223
224 alignment--;
225 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
226
227 if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
228 radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
229
230 *poffset = rmesa->dma.current_used;
231 *pbo = rmesa->dma.current;
232 radeon_bo_ref(*pbo);
233
234 /* Always align to at least 16 bytes */
235 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
236 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
237
238 assert(rmesa->dma.current_used <= rmesa->dma.current->size);
239 }
240
241 void radeonReleaseDmaRegion(radeonContextPtr rmesa)
242 {
243 if (RADEON_DEBUG & DEBUG_IOCTL)
244 fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
245 if (rmesa->dma.current) {
246 rmesa->dma.nr_released_bufs++;
247 radeon_bo_unmap(rmesa->dma.current);
248 radeon_bo_unref(rmesa->dma.current);
249 }
250 rmesa->dma.current = NULL;
251 }
252
253
254 /* Flush vertices in the current dma region.
255 */
256 void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
257 {
258 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
259 struct radeon_dma *dma = &rmesa->dma;
260
261
262 if (RADEON_DEBUG & DEBUG_IOCTL)
263 fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
264 dma->flush = NULL;
265
266 if (dma->current) {
267 GLuint current_offset = dma->current_used;
268
269 assert (dma->current_used +
270 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
271 dma->current_vertexptr);
272
273 if (dma->current_used != dma->current_vertexptr) {
274 dma->current_used = dma->current_vertexptr;
275
276 rmesa->vtbl.swtcl_flush(ctx, current_offset);
277 }
278 rmesa->swtcl.numverts = 0;
279 }
280 }
281 /* Alloc space in the current dma region.
282 */
283 void *
284 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
285 {
286 GLuint bytes = vsize * nverts;
287 void *head;
288 restart:
289 if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
290 radeonRefillCurrentDmaRegion(rmesa, bytes);
291 }
292
293 if (!rmesa->dma.flush) {
294 /* make sure we have enough space to use this in cmdbuf */
295 rcommonEnsureCmdBufSpace(rmesa,
296 rmesa->hw.max_state_size + (12*sizeof(int)),
297 __FUNCTION__);
298 /* if cmdbuf flushed DMA restart */
299 if (!rmesa->dma.current)
300 goto restart;
301 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
302 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
303 }
304
305 ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
306 ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
307 ASSERT( rmesa->dma.current_used +
308 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
309 rmesa->dma.current_vertexptr );
310
311 head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
312 rmesa->dma.current_vertexptr += bytes;
313 rmesa->swtcl.numverts += nverts;
314 return head;
315 }
316
317 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
318 {
319 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
320 int i;
321
322 if (radeon->tcl.elt_dma_bo) {
323 radeon_bo_unref(radeon->tcl.elt_dma_bo);
324 radeon->tcl.elt_dma_bo = NULL;
325 }
326 for (i = 0; i < radeon->tcl.aos_count; i++) {
327 if (radeon->tcl.aos[i].bo) {
328 radeon_bo_unref(radeon->tcl.aos[i].bo);
329 radeon->tcl.aos[i].bo = NULL;
330 }
331 }
332 }