49118b5e37bd1b29f2965fdca8be123ad06b62f2
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_maos_arrays.c
1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
2 /**************************************************************************
3
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc., Cedar Park, Texas.
6
7 All Rights Reserved.
8
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
16
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
20
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29 **************************************************************************/
30
31 /*
32 * Authors:
33 * Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36 #include "glheader.h"
37 #include "imports.h"
38 #include "mtypes.h"
39 #include "macros.h"
40
41 #include "swrast_setup/swrast_setup.h"
42 #include "math/m_translate.h"
43 #include "tnl/tnl.h"
44 #include "tnl/t_context.h"
45
46 #include "radeon_context.h"
47 #include "radeon_ioctl.h"
48 #include "radeon_state.h"
49 #include "radeon_swtcl.h"
50 #include "radeon_maos.h"
51 #include "radeon_tcl.h"
52
53 #if 0
54 /* Usage:
55 * - from radeon_tcl_render
56 * - call radeonEmitArrays to ensure uptodate arrays in dma
57 * - emit primitives (new type?) which reference the data
58 * -- need to use elts for lineloop, quads, quadstrip/flat
59 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
60 *
61 */
62 static void emit_ubyte_rgba3( GLcontext *ctx,
63 struct radeon_dma_region *rvb,
64 char *data,
65 int stride,
66 int count )
67 {
68 int i;
69 radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address);
70
71 if (RADEON_DEBUG & DEBUG_VERTS)
72 fprintf(stderr, "%s count %d stride %d out %p\n",
73 __FUNCTION__, count, stride, (void *)out);
74
75 for (i = 0; i < count; i++) {
76 out->red = *data;
77 out->green = *(data+1);
78 out->blue = *(data+2);
79 out->alpha = 0xFF;
80 out++;
81 data += stride;
82 }
83 }
84
85 static void emit_ubyte_rgba4( GLcontext *ctx,
86 struct radeon_dma_region *rvb,
87 char *data,
88 int stride,
89 int count )
90 {
91 int i;
92 int *out = (int *)(rvb->address + rvb->start);
93
94 if (RADEON_DEBUG & DEBUG_VERTS)
95 fprintf(stderr, "%s count %d stride %d\n",
96 __FUNCTION__, count, stride);
97
98 if (stride == 4)
99 COPY_DWORDS( out, data, count );
100 else
101 for (i = 0; i < count; i++) {
102 *out++ = LE32_TO_CPU(*(int *)data);
103 data += stride;
104 }
105 }
106
107
108 static void emit_ubyte_rgba( GLcontext *ctx,
109 struct radeon_dma_region *rvb,
110 char *data,
111 int size,
112 int stride,
113 int count )
114 {
115 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
116
117 if (RADEON_DEBUG & DEBUG_VERTS)
118 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
119
120 assert (!rvb->buf);
121
122 if (stride == 0) {
123 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
124 count = 1;
125 rvb->aos_start = GET_START(rvb);
126 rvb->aos_stride = 0;
127 rvb->aos_size = 1;
128 }
129 else {
130 radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */
131 rvb->aos_start = GET_START(rvb);
132 rvb->aos_stride = 1;
133 rvb->aos_size = 1;
134 }
135
136 /* Emit the data
137 */
138 switch (size) {
139 case 3:
140 emit_ubyte_rgba3( ctx, rvb, data, stride, count );
141 break;
142 case 4:
143 emit_ubyte_rgba4( ctx, rvb, data, stride, count );
144 break;
145 default:
146 assert(0);
147 exit(1);
148 break;
149 }
150 }
151 #endif
152
153 #if defined(USE_X86_ASM)
154 #define COPY_DWORDS( dst, src, nr ) \
155 do { \
156 int __tmp; \
157 __asm__ __volatile__( "rep ; movsl" \
158 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
159 : "0" (nr), \
160 "D" ((long)dst), \
161 "S" ((long)src) ); \
162 } while (0)
163 #else
164 #define COPY_DWORDS( dst, src, nr ) \
165 do { \
166 int j; \
167 for ( j = 0 ; j < nr ; j++ ) \
168 dst[j] = ((int *)src)[j]; \
169 dst += nr; \
170 } while (0)
171 #endif
172
173 static void emit_vecfog( GLcontext *ctx,
174 struct radeon_dma_region *rvb,
175 char *data,
176 int stride,
177 int count )
178 {
179 int i;
180 GLfloat *out;
181
182 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
183
184 if (RADEON_DEBUG & DEBUG_VERTS)
185 fprintf(stderr, "%s count %d stride %d\n",
186 __FUNCTION__, count, stride);
187
188 assert (!rvb->buf);
189
190 if (stride == 0) {
191 radeonAllocDmaRegion( rmesa, rvb, 4, 4 );
192 count = 1;
193 rvb->aos_start = GET_START(rvb);
194 rvb->aos_stride = 0;
195 rvb->aos_size = 1;
196 }
197 else {
198 radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */
199 rvb->aos_start = GET_START(rvb);
200 rvb->aos_stride = 1;
201 rvb->aos_size = 1;
202 }
203
204 /* Emit the data
205 */
206 out = (GLfloat *)(rvb->address + rvb->start);
207 for (i = 0; i < count; i++) {
208 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
209 out++;
210 data += stride;
211 }
212 }
213
214 static void emit_vec4( GLcontext *ctx,
215 struct radeon_dma_region *rvb,
216 char *data,
217 int stride,
218 int count )
219 {
220 int i;
221 int *out = (int *)(rvb->address + rvb->start);
222
223 if (RADEON_DEBUG & DEBUG_VERTS)
224 fprintf(stderr, "%s count %d stride %d\n",
225 __FUNCTION__, count, stride);
226
227 if (stride == 4)
228 COPY_DWORDS( out, data, count );
229 else
230 for (i = 0; i < count; i++) {
231 out[0] = *(int *)data;
232 out++;
233 data += stride;
234 }
235 }
236
237
238 static void emit_vec8( GLcontext *ctx,
239 struct radeon_dma_region *rvb,
240 char *data,
241 int stride,
242 int count )
243 {
244 int i;
245 int *out = (int *)(rvb->address + rvb->start);
246
247 if (RADEON_DEBUG & DEBUG_VERTS)
248 fprintf(stderr, "%s count %d stride %d\n",
249 __FUNCTION__, count, stride);
250
251 if (stride == 8)
252 COPY_DWORDS( out, data, count*2 );
253 else
254 for (i = 0; i < count; i++) {
255 out[0] = *(int *)data;
256 out[1] = *(int *)(data+4);
257 out += 2;
258 data += stride;
259 }
260 }
261
262 static void emit_vec12( GLcontext *ctx,
263 struct radeon_dma_region *rvb,
264 char *data,
265 int stride,
266 int count )
267 {
268 int i;
269 int *out = (int *)(rvb->address + rvb->start);
270
271 if (RADEON_DEBUG & DEBUG_VERTS)
272 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
273 __FUNCTION__, count, stride, (void *)out, (void *)data);
274
275 if (stride == 12)
276 COPY_DWORDS( out, data, count*3 );
277 else
278 for (i = 0; i < count; i++) {
279 out[0] = *(int *)data;
280 out[1] = *(int *)(data+4);
281 out[2] = *(int *)(data+8);
282 out += 3;
283 data += stride;
284 }
285 }
286
287 static void emit_vec16( GLcontext *ctx,
288 struct radeon_dma_region *rvb,
289 char *data,
290 int stride,
291 int count )
292 {
293 int i;
294 int *out = (int *)(rvb->address + rvb->start);
295
296 if (RADEON_DEBUG & DEBUG_VERTS)
297 fprintf(stderr, "%s count %d stride %d\n",
298 __FUNCTION__, count, stride);
299
300 if (stride == 16)
301 COPY_DWORDS( out, data, count*4 );
302 else
303 for (i = 0; i < count; i++) {
304 out[0] = *(int *)data;
305 out[1] = *(int *)(data+4);
306 out[2] = *(int *)(data+8);
307 out[3] = *(int *)(data+12);
308 out += 4;
309 data += stride;
310 }
311 }
312
313
314 static void emit_vector( GLcontext *ctx,
315 struct radeon_dma_region *rvb,
316 char *data,
317 int size,
318 int stride,
319 int count )
320 {
321 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
322
323 if (RADEON_DEBUG & DEBUG_VERTS)
324 fprintf(stderr, "%s count %d size %d stride %d\n",
325 __FUNCTION__, count, size, stride);
326
327 assert (!rvb->buf);
328
329 if (stride == 0) {
330 radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 );
331 count = 1;
332 rvb->aos_start = GET_START(rvb);
333 rvb->aos_stride = 0;
334 rvb->aos_size = size;
335 }
336 else {
337 radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */
338 rvb->aos_start = GET_START(rvb);
339 rvb->aos_stride = size;
340 rvb->aos_size = size;
341 }
342
343 /* Emit the data
344 */
345 switch (size) {
346 case 1:
347 emit_vec4( ctx, rvb, data, stride, count );
348 break;
349 case 2:
350 emit_vec8( ctx, rvb, data, stride, count );
351 break;
352 case 3:
353 emit_vec12( ctx, rvb, data, stride, count );
354 break;
355 case 4:
356 emit_vec16( ctx, rvb, data, stride, count );
357 break;
358 default:
359 assert(0);
360 exit(1);
361 break;
362 }
363
364 }
365
366
367
368 static void emit_s0_vec( GLcontext *ctx,
369 struct radeon_dma_region *rvb,
370 char *data,
371 int stride,
372 int count )
373 {
374 int i;
375 int *out = (int *)(rvb->address + rvb->start);
376
377 if (RADEON_DEBUG & DEBUG_VERTS)
378 fprintf(stderr, "%s count %d stride %d\n",
379 __FUNCTION__, count, stride);
380
381 for (i = 0; i < count; i++) {
382 out[0] = *(int *)data;
383 out[1] = 0;
384 out += 2;
385 data += stride;
386 }
387 }
388
389 static void emit_stq_vec( GLcontext *ctx,
390 struct radeon_dma_region *rvb,
391 char *data,
392 int stride,
393 int count )
394 {
395 int i;
396 int *out = (int *)(rvb->address + rvb->start);
397
398 if (RADEON_DEBUG & DEBUG_VERTS)
399 fprintf(stderr, "%s count %d stride %d\n",
400 __FUNCTION__, count, stride);
401
402 for (i = 0; i < count; i++) {
403 out[0] = *(int *)data;
404 out[1] = *(int *)(data+4);
405 out[2] = *(int *)(data+12);
406 out += 3;
407 data += stride;
408 }
409 }
410
411
412
413
414 static void emit_tex_vector( GLcontext *ctx,
415 struct radeon_dma_region *rvb,
416 char *data,
417 int size,
418 int stride,
419 int count )
420 {
421 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
422 int emitsize;
423
424 if (RADEON_DEBUG & DEBUG_VERTS)
425 fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
426
427 assert (!rvb->buf);
428
429 switch (size) {
430 case 4: emitsize = 3; break;
431 case 3: emitsize = 3; break;
432 default: emitsize = 2; break;
433 }
434
435
436 if (stride == 0) {
437 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 );
438 count = 1;
439 rvb->aos_start = GET_START(rvb);
440 rvb->aos_stride = 0;
441 rvb->aos_size = emitsize;
442 }
443 else {
444 radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 );
445 rvb->aos_start = GET_START(rvb);
446 rvb->aos_stride = emitsize;
447 rvb->aos_size = emitsize;
448 }
449
450
451 /* Emit the data
452 */
453 switch (size) {
454 case 1:
455 emit_s0_vec( ctx, rvb, data, stride, count );
456 break;
457 case 2:
458 emit_vec8( ctx, rvb, data, stride, count );
459 break;
460 case 3:
461 emit_vec12( ctx, rvb, data, stride, count );
462 break;
463 case 4:
464 emit_stq_vec( ctx, rvb, data, stride, count );
465 break;
466 default:
467 assert(0);
468 exit(1);
469 break;
470 }
471 }
472
473
474
475
476 /* Emit any changed arrays to new GART memory, re-emit a packet to
477 * update the arrays.
478 */
479 void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
480 {
481 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
482 struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
483 struct radeon_dma_region **component = rmesa->tcl.aos_components;
484 GLuint nr = 0;
485 GLuint vfmt = 0;
486 GLuint count = VB->Count;
487 GLuint vtx, unit;
488
489 #if 0
490 if (RADEON_DEBUG & DEBUG_VERTS)
491 _tnl_print_vert_flags( __FUNCTION__, inputs );
492 #endif
493
494 if (1) {
495 if (!rmesa->tcl.obj.buf)
496 emit_vector( ctx,
497 &rmesa->tcl.obj,
498 (char *)VB->ObjPtr->data,
499 VB->ObjPtr->size,
500 VB->ObjPtr->stride,
501 count);
502
503 switch( VB->ObjPtr->size ) {
504 case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
505 case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
506 case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
507 default:
508 break;
509 }
510 component[nr++] = &rmesa->tcl.obj;
511 }
512
513
514 if (inputs & VERT_BIT_NORMAL) {
515 if (!rmesa->tcl.norm.buf)
516 emit_vector( ctx,
517 &(rmesa->tcl.norm),
518 (char *)VB->NormalPtr->data,
519 3,
520 VB->NormalPtr->stride,
521 count);
522
523 vfmt |= RADEON_CP_VC_FRMT_N0;
524 component[nr++] = &rmesa->tcl.norm;
525 }
526
527 if (inputs & VERT_BIT_COLOR0) {
528 int emitsize;
529 if (VB->ColorPtr[0]->size == 4 &&
530 (VB->ColorPtr[0]->stride != 0 ||
531 VB->ColorPtr[0]->data[0][3] != 1.0)) {
532 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
533 emitsize = 4;
534 }
535
536 else {
537 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
538 emitsize = 3;
539 }
540
541 if (!rmesa->tcl.rgba.buf)
542 emit_vector( ctx,
543 &(rmesa->tcl.rgba),
544 (char *)VB->ColorPtr[0]->data,
545 emitsize,
546 VB->ColorPtr[0]->stride,
547 count);
548
549
550 component[nr++] = &rmesa->tcl.rgba;
551 }
552
553
554 if (inputs & VERT_BIT_COLOR1) {
555 if (!rmesa->tcl.spec.buf) {
556
557 emit_vector( ctx,
558 &rmesa->tcl.spec,
559 (char *)VB->SecondaryColorPtr[0]->data,
560 3,
561 VB->SecondaryColorPtr[0]->stride,
562 count);
563 }
564
565 vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
566 component[nr++] = &rmesa->tcl.spec;
567 }
568
569 /* FIXME: not sure if this is correct. May need to stitch this together with
570 secondary color. It seems odd that for primary color color and alpha values
571 are emitted together but for secondary color not. */
572 if (inputs & VERT_BIT_FOG) {
573 if (!rmesa->tcl.fog.buf)
574 emit_vecfog( ctx,
575 &(rmesa->tcl.fog),
576 (char *)VB->FogCoordPtr->data,
577 VB->FogCoordPtr->stride,
578 count);
579
580 vfmt |= RADEON_CP_VC_FRMT_FPFOG;
581 component[nr++] = &rmesa->tcl.fog;
582 }
583
584
585 vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
586 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
587
588 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
589 if (inputs & VERT_BIT_TEX(unit)) {
590 if (!rmesa->tcl.tex[unit].buf)
591 emit_tex_vector( ctx,
592 &(rmesa->tcl.tex[unit]),
593 (char *)VB->TexCoordPtr[unit]->data,
594 VB->TexCoordPtr[unit]->size,
595 VB->TexCoordPtr[unit]->stride,
596 count );
597
598 vfmt |= RADEON_ST_BIT(unit);
599 /* assume we need the 3rd coord if texgen is active for r/q OR at least
600 3 coords are submitted. This may not be 100% correct */
601 if (VB->TexCoordPtr[unit]->size >= 3) {
602 vtx |= RADEON_Q_BIT(unit);
603 vfmt |= RADEON_Q_BIT(unit);
604 }
605 if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
606 vtx |= RADEON_Q_BIT(unit);
607 else if ((VB->TexCoordPtr[unit]->size >= 3) &&
608 ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
609 GLuint swaptexmatcol = (VB->TexCoordPtr[unit]->size - 3);
610 if (((rmesa->NeedTexMatrix >> unit) & 1) &&
611 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
612 radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
613 }
614 component[nr++] = &rmesa->tcl.tex[unit];
615 }
616 }
617
618 if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
619 RADEON_STATECHANGE( rmesa, tcl );
620 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
621 }
622
623 rmesa->tcl.nr_aos_components = nr;
624 rmesa->tcl.vertex_format = vfmt;
625 }
626
627
628 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
629 {
630 radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
631 GLuint unit;
632
633 #if 0
634 if (RADEON_DEBUG & DEBUG_VERTS)
635 _tnl_print_vert_flags( __FUNCTION__, newinputs );
636 #endif
637
638 if (newinputs & VERT_BIT_POS)
639 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
640
641 if (newinputs & VERT_BIT_NORMAL)
642 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
643
644 if (newinputs & VERT_BIT_COLOR0)
645 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
646
647 if (newinputs & VERT_BIT_COLOR1)
648 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
649
650 if (newinputs & VERT_BIT_FOG)
651 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
652
653 for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
654 if (newinputs & VERT_BIT_TEX(unit))
655 radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
656 }
657 }