radeon: add support for stencil buffers + misc debug changes
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_maos_arrays.c
1 /**************************************************************************
2
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4 Tungsten Graphics Inc., Cedar Park, Texas.
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /*
31 * Authors:
32 * Keith Whitwell <keith@tungstengraphics.com>
33 */
34
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/macros.h"
39
40 #include "swrast_setup/swrast_setup.h"
41 #include "math/m_translate.h"
42 #include "tnl/tnl.h"
43 #include "tnl/tcontext.h"
44
45 #include "radeon_context.h"
46 #include "radeon_ioctl.h"
47 #include "radeon_state.h"
48 #include "radeon_swtcl.h"
49 #include "radeon_maos.h"
50 #include "radeon_tcl.h"
51
52 #if defined(USE_X86_ASM)
53 #define COPY_DWORDS( dst, src, nr ) \
54 do { \
55 int __tmp; \
56 __asm__ __volatile__( "rep ; movsl" \
57 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
58 : "0" (nr), \
59 "D" ((long)dst), \
60 "S" ((long)src) ); \
61 } while (0)
62 #else
63 #define COPY_DWORDS( dst, src, nr ) \
64 do { \
65 int j; \
66 for ( j = 0 ; j < nr ; j++ ) \
67 dst[j] = ((int *)src)[j]; \
68 dst += nr; \
69 } while (0)
70 #endif
71
72 static void emit_vecfog( GLcontext *ctx,
73 struct radeon_dma_region *rvb,
74 char *data,
75 int stride,
76 int count )
77 {
78 int i;
79 GLfloat *out;
80
81 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
82
83 if (RADEON_DEBUG & DEBUG_VERTS)
84 fprintf(stderr, "%s count %d stride %d\n",
85 __FUNCTION__, count, stride);
86
87 assert (!rvb->buf);
88
89 if (stride == 0) {
90 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
91 count = 1;
92 rvb->aos_start = GET_START(rvb);
93 rvb->aos_stride = 0;
94 rvb->aos_size = 1;
95 }
96 else {
97 radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
98 rvb->aos_start = GET_START(rvb);
99 rvb->aos_stride = 1;
100 rvb->aos_size = 1;
101 }
102
103 /* Emit the data
104 */
105 out = (GLfloat *)(rvb->address + rvb->start);
106 for (i = 0; i < count; i++) {
107 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
108 out++;
109 data += stride;
110 }
111 }
112
113 static void emit_vec4( GLcontext *ctx,
114 struct radeon_dma_region *rvb,
115 char *data,
116 int stride,
117 int count )
118 {
119 int i;
120 int *out = (int *)(rvb->address + rvb->start);
121
122 if (RADEON_DEBUG & DEBUG_VERTS)
123 fprintf(stderr, "%s count %d stride %d\n",
124 __FUNCTION__, count, stride);
125
126 if (stride == 4)
127 COPY_DWORDS( out, data, count );
128 else
129 for (i = 0; i < count; i++) {
130 out[0] = *(int *)data;
131 out++;
132 data += stride;
133 }
134 }
135
136
137 static void emit_vec8( GLcontext *ctx,
138 struct radeon_dma_region *rvb,
139 char *data,
140 int stride,
141 int count )
142 {
143 int i;
144 int *out = (int *)(rvb->address + rvb->start);
145
146 if (RADEON_DEBUG & DEBUG_VERTS)
147 fprintf(stderr, "%s count %d stride %d\n",
148 __FUNCTION__, count, stride);
149
150 if (stride == 8)
151 COPY_DWORDS( out, data, count*2 );
152 else
153 for (i = 0; i < count; i++) {
154 out[0] = *(int *)data;
155 out[1] = *(int *)(data+4);
156 out += 2;
157 data += stride;
158 }
159 }
160
161 static void emit_vec12( GLcontext *ctx,
162 struct radeon_dma_region *rvb,
163 char *data,
164 int stride,
165 int count )
166 {
167 int i;
168 int *out = (int *)(rvb->address + rvb->start);
169
170 if (RADEON_DEBUG & DEBUG_VERTS)
171 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
172 __FUNCTION__, count, stride, (void *)out, (void *)data);
173
174 if (stride == 12)
175 COPY_DWORDS( out, data, count*3 );
176 else
177 for (i = 0; i < count; i++) {
178 out[0] = *(int *)data;
179 out[1] = *(int *)(data+4);
180 out[2] = *(int *)(data+8);
181 out += 3;
182 data += stride;
183 }
184 }
185
186 static void emit_vec16( GLcontext *ctx,
187 struct radeon_dma_region *rvb,
188 char *data,
189 int stride,
190 int count )
191 {
192 int i;
193 int *out = (int *)(rvb->address + rvb->start);
194
195 if (RADEON_DEBUG & DEBUG_VERTS)
196 fprintf(stderr, "%s count %d stride %d\n",
197 __FUNCTION__, count, stride);
198
199 if (stride == 16)
200 COPY_DWORDS( out, data, count*4 );
201 else
202 for (i = 0; i < count; i++) {
203 out[0] = *(int *)data;
204 out[1] = *(int *)(data+4);
205 out[2] = *(int *)(data+8);
206 out[3] = *(int *)(data+12);
207 out += 4;
208 data += stride;
209 }
210 }
211
212
213 static void emit_vector( GLcontext *ctx,
214 struct radeon_dma_region *rvb,
215 char *data,
216 int size,
217 int stride,
218 int count )
219 {
220 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
221
222 if (RADEON_DEBUG & DEBUG_VERTS)
223 fprintf(stderr, "%s count %d size %d stride %d\n",
224 __FUNCTION__, count, size, stride);
225
226 assert (!rvb->buf);
227
228 if (stride == 0) {
229 radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
230 count = 1;
231 rvb->aos_start = GET_START(rvb);
232 rvb->aos_stride = 0;
233 rvb->aos_size = size;
234 }
235 else {
236 radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
237 rvb->aos_start = GET_START(rvb);
238 rvb->aos_stride = size;
239 rvb->aos_size = size;
240 }
241
242 /* Emit the data
243 */
244 switch (size) {
245 case 1:
246 emit_vec4( ctx, rvb, data, stride, count );
247 break;
248 case 2:
249 emit_vec8( ctx, rvb, data, stride, count );
250 break;
251 case 3:
252 emit_vec12( ctx, rvb, data, stride, count );
253 break;
254 case 4:
255 emit_vec16( ctx, rvb, data, stride, count );
256 break;
257 default:
258 assert(0);
259 exit(1);
260 break;
261 }
262
263 }
264
265
266
267 static void emit_s0_vec( GLcontext *ctx,
268 struct radeon_dma_region *rvb,
269 char *data,
270 int stride,
271 int count )
272 {
273 int i;
274 int *out = (int *)(rvb->address + rvb->start);
275
276 if (RADEON_DEBUG & DEBUG_VERTS)
277 fprintf(stderr, "%s count %d stride %d\n",
278 __FUNCTION__, count, stride);
279
280 for (i = 0; i < count; i++) {
281 out[0] = *(int *)data;
282 out[1] = 0;
283 out += 2;
284 data += stride;
285 }
286 }
287
288 static void emit_stq_vec( GLcontext *ctx,
289 struct radeon_dma_region *rvb,
290 char *data,
291 int stride,
292 int count )
293 {
294 int i;
295 int *out = (int *)(rvb->address + rvb->start);
296
297 if (RADEON_DEBUG & DEBUG_VERTS)
298 fprintf(stderr, "%s count %d stride %d\n",
299 __FUNCTION__, count, stride);
300
301 for (i = 0; i < count; i++) {
302 out[0] = *(int *)data;
303 out[1] = *(int *)(data+4);
304 out[2] = *(int *)(data+12);
305 out += 3;
306 data += stride;
307 }
308 }
309
310
311
312
313 static void emit_tex_vector( GLcontext *ctx,
314 struct radeon_dma_region *rvb,
315 char *data,
316 int size,
317 int stride,
318 int count )
319 {
320 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
321 int emitsize;
322
323 if (RADEON_DEBUG & DEBUG_VERTS)
324 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
325
326 assert (!rvb->buf);
327
328 switch (size) {
329 case 4: emitsize = 3; break;
330 case 3: emitsize = 3; break;
331 default: emitsize = 2; break;
332 }
333
334
335 if (stride == 0) {
336 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
337 count = 1;
338 rvb->aos_start = GET_START(rvb);
339 rvb->aos_stride = 0;
340 rvb->aos_size = emitsize;
341 }
342 else {
343 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
344 rvb->aos_start = GET_START(rvb);
345 rvb->aos_stride = emitsize;
346 rvb->aos_size = emitsize;
347 }
348
349
350 /* Emit the data
351 */
352 switch (size) {
353 case 1:
354 emit_s0_vec( ctx, rvb, data, stride, count );
355 break;
356 case 2:
357 emit_vec8( ctx, rvb, data, stride, count );
358 break;
359 case 3:
360 emit_vec12( ctx, rvb, data, stride, count );
361 break;
362 case 4:
363 emit_stq_vec( ctx, rvb, data, stride, count );
364 break;
365 default:
366 assert(0);
367 exit(1);
368 break;
369 }
370 }
371
372
373
374
375 /* Emit any changed arrays to new GART memory, re-emit a packet to
376 * update the arrays.
377 */
378 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
379 {
380 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
381 struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
382 struct radeon_dma_region **component = rmesa->tcl.aos_components;
383 GLuint nr = 0;
384 GLuint vfmt = 0;
385 GLuint count = VB->Count;
386 GLuint vtx, unit;
387
388 #if 0
389 if (RADEON_DEBUG & DEBUG_VERTS)
390 _tnl_print_vert_flags( __FUNCTION__, inputs );
391 #endif
392
393 if (1) {
394 if (!rmesa->tcl.obj.buf)
395 emit_vector( ctx,
396 &rmesa->tcl.obj,
397 (char *)VB->ObjPtr->data,
398 VB->ObjPtr->size,
399 VB->ObjPtr->stride,
400 count);
401
402 switch( VB->ObjPtr->size ) {
403 case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
404 case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
405 case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
406 default:
407 break;
408 }
409 component[nr++] = &rmesa->tcl.obj;
410 }
411
412
413 if (inputs & VERT_BIT_NORMAL) {
414 if (!rmesa->tcl.norm.buf)
415 emit_vector( ctx,
416 &(rmesa->tcl.norm),
417 (char *)VB->NormalPtr->data,
418 3,
419 VB->NormalPtr->stride,
420 count);
421
422 vfmt |= RADEON_CP_VC_FRMT_N0;
423 component[nr++] = &rmesa->tcl.norm;
424 }
425
426 if (inputs & VERT_BIT_COLOR0) {
427 int emitsize;
428 if (VB->ColorPtr[0]->size == 4 &&
429 (VB->ColorPtr[0]->stride != 0 ||
430 VB->ColorPtr[0]->data[0][3] != 1.0)) {
431 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
432 emitsize = 4;
433 }
434
435 else {
436 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
437 emitsize = 3;
438 }
439
440 if (!rmesa->tcl.rgba.buf)
441 emit_vector( ctx,
442 &(rmesa->tcl.rgba),
443 (char *)VB->ColorPtr[0]->data,
444 emitsize,
445 VB->ColorPtr[0]->stride,
446 count);
447
448
449 component[nr++] = &rmesa->tcl.rgba;
450 }
451
452
453 if (inputs & VERT_BIT_COLOR1) {
454 if (!rmesa->tcl.spec.buf) {
455
456 emit_vector( ctx,
457 &rmesa->tcl.spec,
458 (char *)VB->SecondaryColorPtr[0]->data,
459 3,
460 VB->SecondaryColorPtr[0]->stride,
461 count);
462 }
463
464 vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
465 component[nr++] = &rmesa->tcl.spec;
466 }
467
468 /* FIXME: not sure if this is correct. May need to stitch this together with
469 secondary color. It seems odd that for primary color color and alpha values
470 are emitted together but for secondary color not. */
471 if (inputs & VERT_BIT_FOG) {
472 if (!rmesa->tcl.fog.buf)
473 emit_vecfog( ctx,
474 &(rmesa->tcl.fog),
475 (char *)VB->FogCoordPtr->data,
476 VB->FogCoordPtr->stride,
477 count);
478
479 vfmt |= RADEON_CP_VC_FRMT_FPFOG;
480 component[nr++] = &rmesa->tcl.fog;
481 }
482
483
484 vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
485 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
486
487 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
488 if (inputs & VERT_BIT_TEX(unit)) {
489 if (!rmesa->tcl.tex[unit].buf)
490 emit_tex_vector( ctx,
491 &(rmesa->tcl.tex[unit]),
492 (char *)VB->TexCoordPtr[unit]->data,
493 VB->TexCoordPtr[unit]->size,
494 VB->TexCoordPtr[unit]->stride,
495 count );
496
497 vfmt |= RADEON_ST_BIT(unit);
498 /* assume we need the 3rd coord if texgen is active for r/q OR at least
499 3 coords are submitted. This may not be 100% correct */
500 if (VB->TexCoordPtr[unit]->size >= 3) {
501 vtx |= RADEON_Q_BIT(unit);
502 vfmt |= RADEON_Q_BIT(unit);
503 }
504 if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
505 vtx |= RADEON_Q_BIT(unit);
506 else if ((VB->TexCoordPtr[unit]->size >= 3) &&
507 ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
508 GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3);
509 if (((rmesa->NeedTexMatrix >> unit) & 1) &&
510 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
511 radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
512 }
513 component[nr++] = &rmesa->tcl.tex[unit];
514 }
515 }
516
517 if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
518 RADEON_STATECHANGE( rmesa, tcl );
519 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
520 }
521
522 rmesa->tcl.nr_aos_components = nr;
523 rmesa->tcl.vertex_format = vfmt;
524 }
525
526
527 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
528 {
529 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
530 GLuint unit;
531
532 #if 0
533 if (RADEON_DEBUG & DEBUG_VERTS)
534 _tnl_print_vert_flags( __FUNCTION__, newinputs );
535 #endif
536
537 if (newinputs & VERT_BIT_POS)
538 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
539
540 if (newinputs & VERT_BIT_NORMAL)
541 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
542
543 if (newinputs & VERT_BIT_COLOR0)
544 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
545
546 if (newinputs & VERT_BIT_COLOR1)
547 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
548
549 if (newinputs & VERT_BIT_FOG)
550 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
551
552 for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
553 if (newinputs & VERT_BIT_TEX(unit))
554 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
555 }
556 }