Merge commit 'origin/gallium-0.1'
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_maos_arrays.c
1 /**************************************************************************
2
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4 Tungsten Graphics Inc., Cedar Park, Texas.
5
6 All Rights Reserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /*
31 * Authors:
32 * Keith Whitwell <keith@tungstengraphics.com>
33 */
34
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/macros.h"
39
40 #include "swrast_setup/swrast_setup.h"
41 #include "math/m_translate.h"
42 #include "tnl/tnl.h"
43
44 #include "radeon_context.h"
45 #include "radeon_ioctl.h"
46 #include "radeon_state.h"
47 #include "radeon_swtcl.h"
48 #include "radeon_maos.h"
49 #include "radeon_tcl.h"
50
51 #if 0
52 /* Usage:
53 * - from radeon_tcl_render
54 * - call radeonEmitArrays to ensure uptodate arrays in dma
55 * - emit primitives (new type?) which reference the data
56 * -- need to use elts for lineloop, quads, quadstrip/flat
57 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
58 *
59 */
60 static void emit_ubyte_rgba3( GLcontext *ctx,
61 struct radeon_dma_region *rvb,
62 char *data,
63 int stride,
64 int count )
65 {
66 int i;
67 radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
68
69 if (RADEON_DEBUG & DEBUG_VERTS)
70 fprintf(stderr, "%s count %d stride %d out %p\n",
71 __FUNCTION__, count, stride, (void *)out);
72
73 for (i = 0; i < count; i++) {
74 out->red = *data;
75 out->green = *(data+1);
76 out->blue = *(data+2);
77 out->alpha = 0xFF;
78 out++;
79 data += stride;
80 }
81 }
82
83 static void emit_ubyte_rgba4( GLcontext *ctx,
84 struct radeon_dma_region *rvb,
85 char *data,
86 int stride,
87 int count )
88 {
89 int i;
90 int *out = (int *)(rvb->address + rvb->start);
91
92 if (RADEON_DEBUG & DEBUG_VERTS)
93 fprintf(stderr, "%s count %d stride %d\n",
94 __FUNCTION__, count, stride);
95
96 if (stride == 4)
97 COPY_DWORDS( out, data, count );
98 else
99 for (i = 0; i < count; i++) {
100 *out++ = LE32_TO_CPU(*(int *)data);
101 data += stride;
102 }
103 }
104
105
106 static void emit_ubyte_rgba( GLcontext *ctx,
107 struct radeon_dma_region *rvb,
108 char *data,
109 int size,
110 int stride,
111 int count )
112 {
113 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
114
115 if (RADEON_DEBUG & DEBUG_VERTS)
116 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
117
118 assert (!rvb->buf);
119
120 if (stride == 0) {
121 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
122 count = 1;
123 rvb->aos_start = GET_START(rvb);
124 rvb->aos_stride = 0;
125 rvb->aos_size = 1;
126 }
127 else {
128 radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
129 rvb->aos_start = GET_START(rvb);
130 rvb->aos_stride = 1;
131 rvb->aos_size = 1;
132 }
133
134 /* Emit the data
135 */
136 switch (size) {
137 case 3:
138 emit_ubyte_rgba3( ctx, rvb, data, stride, count );
139 break;
140 case 4:
141 emit_ubyte_rgba4( ctx, rvb, data, stride, count );
142 break;
143 default:
144 assert(0);
145 exit(1);
146 break;
147 }
148 }
149 #endif
150
151 #if defined(USE_X86_ASM)
152 #define COPY_DWORDS( dst, src, nr ) \
153 do { \
154 int __tmp; \
155 __asm__ __volatile__( "rep ; movsl" \
156 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
157 : "0" (nr), \
158 "D" ((long)dst), \
159 "S" ((long)src) ); \
160 } while (0)
161 #else
162 #define COPY_DWORDS( dst, src, nr ) \
163 do { \
164 int j; \
165 for ( j = 0 ; j < nr ; j++ ) \
166 dst[j] = ((int *)src)[j]; \
167 dst += nr; \
168 } while (0)
169 #endif
170
171 static void emit_vecfog( GLcontext *ctx,
172 struct radeon_dma_region *rvb,
173 char *data,
174 int stride,
175 int count )
176 {
177 int i;
178 GLfloat *out;
179
180 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
181
182 if (RADEON_DEBUG & DEBUG_VERTS)
183 fprintf(stderr, "%s count %d stride %d\n",
184 __FUNCTION__, count, stride);
185
186 assert (!rvb->buf);
187
188 if (stride == 0) {
189 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
190 count = 1;
191 rvb->aos_start = GET_START(rvb);
192 rvb->aos_stride = 0;
193 rvb->aos_size = 1;
194 }
195 else {
196 radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
197 rvb->aos_start = GET_START(rvb);
198 rvb->aos_stride = 1;
199 rvb->aos_size = 1;
200 }
201
202 /* Emit the data
203 */
204 out = (GLfloat *)(rvb->address + rvb->start);
205 for (i = 0; i < count; i++) {
206 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
207 out++;
208 data += stride;
209 }
210 }
211
212 static void emit_vec4( GLcontext *ctx,
213 struct radeon_dma_region *rvb,
214 char *data,
215 int stride,
216 int count )
217 {
218 int i;
219 int *out = (int *)(rvb->address + rvb->start);
220
221 if (RADEON_DEBUG & DEBUG_VERTS)
222 fprintf(stderr, "%s count %d stride %d\n",
223 __FUNCTION__, count, stride);
224
225 if (stride == 4)
226 COPY_DWORDS( out, data, count );
227 else
228 for (i = 0; i < count; i++) {
229 out[0] = *(int *)data;
230 out++;
231 data += stride;
232 }
233 }
234
235
236 static void emit_vec8( GLcontext *ctx,
237 struct radeon_dma_region *rvb,
238 char *data,
239 int stride,
240 int count )
241 {
242 int i;
243 int *out = (int *)(rvb->address + rvb->start);
244
245 if (RADEON_DEBUG & DEBUG_VERTS)
246 fprintf(stderr, "%s count %d stride %d\n",
247 __FUNCTION__, count, stride);
248
249 if (stride == 8)
250 COPY_DWORDS( out, data, count*2 );
251 else
252 for (i = 0; i < count; i++) {
253 out[0] = *(int *)data;
254 out[1] = *(int *)(data+4);
255 out += 2;
256 data += stride;
257 }
258 }
259
260 static void emit_vec12( GLcontext *ctx,
261 struct radeon_dma_region *rvb,
262 char *data,
263 int stride,
264 int count )
265 {
266 int i;
267 int *out = (int *)(rvb->address + rvb->start);
268
269 if (RADEON_DEBUG & DEBUG_VERTS)
270 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
271 __FUNCTION__, count, stride, (void *)out, (void *)data);
272
273 if (stride == 12)
274 COPY_DWORDS( out, data, count*3 );
275 else
276 for (i = 0; i < count; i++) {
277 out[0] = *(int *)data;
278 out[1] = *(int *)(data+4);
279 out[2] = *(int *)(data+8);
280 out += 3;
281 data += stride;
282 }
283 }
284
285 static void emit_vec16( GLcontext *ctx,
286 struct radeon_dma_region *rvb,
287 char *data,
288 int stride,
289 int count )
290 {
291 int i;
292 int *out = (int *)(rvb->address + rvb->start);
293
294 if (RADEON_DEBUG & DEBUG_VERTS)
295 fprintf(stderr, "%s count %d stride %d\n",
296 __FUNCTION__, count, stride);
297
298 if (stride == 16)
299 COPY_DWORDS( out, data, count*4 );
300 else
301 for (i = 0; i < count; i++) {
302 out[0] = *(int *)data;
303 out[1] = *(int *)(data+4);
304 out[2] = *(int *)(data+8);
305 out[3] = *(int *)(data+12);
306 out += 4;
307 data += stride;
308 }
309 }
310
311
312 static void emit_vector( GLcontext *ctx,
313 struct radeon_dma_region *rvb,
314 char *data,
315 int size,
316 int stride,
317 int count )
318 {
319 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
320
321 if (RADEON_DEBUG & DEBUG_VERTS)
322 fprintf(stderr, "%s count %d size %d stride %d\n",
323 __FUNCTION__, count, size, stride);
324
325 assert (!rvb->buf);
326
327 if (stride == 0) {
328 radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
329 count = 1;
330 rvb->aos_start = GET_START(rvb);
331 rvb->aos_stride = 0;
332 rvb->aos_size = size;
333 }
334 else {
335 radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
336 rvb->aos_start = GET_START(rvb);
337 rvb->aos_stride = size;
338 rvb->aos_size = size;
339 }
340
341 /* Emit the data
342 */
343 switch (size) {
344 case 1:
345 emit_vec4( ctx, rvb, data, stride, count );
346 break;
347 case 2:
348 emit_vec8( ctx, rvb, data, stride, count );
349 break;
350 case 3:
351 emit_vec12( ctx, rvb, data, stride, count );
352 break;
353 case 4:
354 emit_vec16( ctx, rvb, data, stride, count );
355 break;
356 default:
357 assert(0);
358 exit(1);
359 break;
360 }
361
362 }
363
364
365
366 static void emit_s0_vec( GLcontext *ctx,
367 struct radeon_dma_region *rvb,
368 char *data,
369 int stride,
370 int count )
371 {
372 int i;
373 int *out = (int *)(rvb->address + rvb->start);
374
375 if (RADEON_DEBUG & DEBUG_VERTS)
376 fprintf(stderr, "%s count %d stride %d\n",
377 __FUNCTION__, count, stride);
378
379 for (i = 0; i < count; i++) {
380 out[0] = *(int *)data;
381 out[1] = 0;
382 out += 2;
383 data += stride;
384 }
385 }
386
387 static void emit_stq_vec( GLcontext *ctx,
388 struct radeon_dma_region *rvb,
389 char *data,
390 int stride,
391 int count )
392 {
393 int i;
394 int *out = (int *)(rvb->address + rvb->start);
395
396 if (RADEON_DEBUG & DEBUG_VERTS)
397 fprintf(stderr, "%s count %d stride %d\n",
398 __FUNCTION__, count, stride);
399
400 for (i = 0; i < count; i++) {
401 out[0] = *(int *)data;
402 out[1] = *(int *)(data+4);
403 out[2] = *(int *)(data+12);
404 out += 3;
405 data += stride;
406 }
407 }
408
409
410
411
412 static void emit_tex_vector( GLcontext *ctx,
413 struct radeon_dma_region *rvb,
414 char *data,
415 int size,
416 int stride,
417 int count )
418 {
419 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
420 int emitsize;
421
422 if (RADEON_DEBUG & DEBUG_VERTS)
423 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
424
425 assert (!rvb->buf);
426
427 switch (size) {
428 case 4: emitsize = 3; break;
429 case 3: emitsize = 3; break;
430 default: emitsize = 2; break;
431 }
432
433
434 if (stride == 0) {
435 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
436 count = 1;
437 rvb->aos_start = GET_START(rvb);
438 rvb->aos_stride = 0;
439 rvb->aos_size = emitsize;
440 }
441 else {
442 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
443 rvb->aos_start = GET_START(rvb);
444 rvb->aos_stride = emitsize;
445 rvb->aos_size = emitsize;
446 }
447
448
449 /* Emit the data
450 */
451 switch (size) {
452 case 1:
453 emit_s0_vec( ctx, rvb, data, stride, count );
454 break;
455 case 2:
456 emit_vec8( ctx, rvb, data, stride, count );
457 break;
458 case 3:
459 emit_vec12( ctx, rvb, data, stride, count );
460 break;
461 case 4:
462 emit_stq_vec( ctx, rvb, data, stride, count );
463 break;
464 default:
465 assert(0);
466 exit(1);
467 break;
468 }
469 }
470
471
472
473
474 /* Emit any changed arrays to new GART memory, re-emit a packet to
475 * update the arrays.
476 */
477 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
478 {
479 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
480 struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
481 struct radeon_dma_region **component = rmesa->tcl.aos_components;
482 GLuint nr = 0;
483 GLuint vfmt = 0;
484 GLuint count = VB->Count;
485 GLuint vtx, unit;
486
487 #if 0
488 if (RADEON_DEBUG & DEBUG_VERTS)
489 _tnl_print_vert_flags( __FUNCTION__, inputs );
490 #endif
491
492 if (1) {
493 if (!rmesa->tcl.obj.buf)
494 emit_vector( ctx,
495 &rmesa->tcl.obj,
496 (char *)VB->ObjPtr->data,
497 VB->ObjPtr->size,
498 VB->ObjPtr->stride,
499 count);
500
501 switch( VB->ObjPtr->size ) {
502 case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
503 case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
504 case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
505 default:
506 break;
507 }
508 component[nr++] = &rmesa->tcl.obj;
509 }
510
511
512 if (inputs & VERT_BIT_NORMAL) {
513 if (!rmesa->tcl.norm.buf)
514 emit_vector( ctx,
515 &(rmesa->tcl.norm),
516 (char *)VB->NormalPtr->data,
517 3,
518 VB->NormalPtr->stride,
519 count);
520
521 vfmt |= RADEON_CP_VC_FRMT_N0;
522 component[nr++] = &rmesa->tcl.norm;
523 }
524
525 if (inputs & VERT_BIT_COLOR0) {
526 int emitsize;
527 if (VB->ColorPtr[0]->size == 4 &&
528 (VB->ColorPtr[0]->stride != 0 ||
529 VB->ColorPtr[0]->data[0][3] != 1.0)) {
530 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
531 emitsize = 4;
532 }
533
534 else {
535 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
536 emitsize = 3;
537 }
538
539 if (!rmesa->tcl.rgba.buf)
540 emit_vector( ctx,
541 &(rmesa->tcl.rgba),
542 (char *)VB->ColorPtr[0]->data,
543 emitsize,
544 VB->ColorPtr[0]->stride,
545 count);
546
547
548 component[nr++] = &rmesa->tcl.rgba;
549 }
550
551
552 if (inputs & VERT_BIT_COLOR1) {
553 if (!rmesa->tcl.spec.buf) {
554
555 emit_vector( ctx,
556 &rmesa->tcl.spec,
557 (char *)VB->SecondaryColorPtr[0]->data,
558 3,
559 VB->SecondaryColorPtr[0]->stride,
560 count);
561 }
562
563 vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
564 component[nr++] = &rmesa->tcl.spec;
565 }
566
567 /* FIXME: not sure if this is correct. May need to stitch this together with
568 secondary color. It seems odd that for primary color color and alpha values
569 are emitted together but for secondary color not. */
570 if (inputs & VERT_BIT_FOG) {
571 if (!rmesa->tcl.fog.buf)
572 emit_vecfog( ctx,
573 &(rmesa->tcl.fog),
574 (char *)VB->FogCoordPtr->data,
575 VB->FogCoordPtr->stride,
576 count);
577
578 vfmt |= RADEON_CP_VC_FRMT_FPFOG;
579 component[nr++] = &rmesa->tcl.fog;
580 }
581
582
583 vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
584 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
585
586 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
587 if (inputs & VERT_BIT_TEX(unit)) {
588 if (!rmesa->tcl.tex[unit].buf)
589 emit_tex_vector( ctx,
590 &(rmesa->tcl.tex[unit]),
591 (char *)VB->TexCoordPtr[unit]->data,
592 VB->TexCoordPtr[unit]->size,
593 VB->TexCoordPtr[unit]->stride,
594 count );
595
596 vfmt |= RADEON_ST_BIT(unit);
597 /* assume we need the 3rd coord if texgen is active for r/q OR at least
598 3 coords are submitted. This may not be 100% correct */
599 if (VB->TexCoordPtr[unit]->size >= 3) {
600 vtx |= RADEON_Q_BIT(unit);
601 vfmt |= RADEON_Q_BIT(unit);
602 }
603 if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
604 vtx |= RADEON_Q_BIT(unit);
605 else if ((VB->TexCoordPtr[unit]->size >= 3) &&
606 ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
607 GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3);
608 if (((rmesa->NeedTexMatrix >> unit) & 1) &&
609 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
610 radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
611 }
612 component[nr++] = &rmesa->tcl.tex[unit];
613 }
614 }
615
616 if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
617 RADEON_STATECHANGE( rmesa, tcl );
618 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
619 }
620
621 rmesa->tcl.nr_aos_components = nr;
622 rmesa->tcl.vertex_format = vfmt;
623 }
624
625
626 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
627 {
628 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
629 GLuint unit;
630
631 #if 0
632 if (RADEON_DEBUG & DEBUG_VERTS)
633 _tnl_print_vert_flags( __FUNCTION__, newinputs );
634 #endif
635
636 if (newinputs & VERT_BIT_POS)
637 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
638
639 if (newinputs & VERT_BIT_NORMAL)
640 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
641
642 if (newinputs & VERT_BIT_COLOR0)
643 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
644
645 if (newinputs & VERT_BIT_COLOR1)
646 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
647
648 if (newinputs & VERT_BIT_FOG)
649 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
650
651 for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
652 if (newinputs & VERT_BIT_TEX(unit))
653 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
654 }
655 }