1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
2 /**************************************************************************
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc., Cedar Park, Texas.
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 **************************************************************************/
33 * Keith Whitwell <keith@tungstengraphics.com>
41 #include "swrast_setup/swrast_setup.h"
42 #include "math/m_translate.h"
44 #include "tnl/t_context.h"
46 #include "radeon_context.h"
47 #include "radeon_ioctl.h"
48 #include "radeon_state.h"
49 #include "radeon_swtcl.h"
50 #include "radeon_maos.h"
51 #include "radeon_tcl.h"
55 * - from radeon_tcl_render
56 * - call radeonEmitArrays to ensure uptodate arrays in dma
57 * - emit primitives (new type?) which reference the data
58 * -- need to use elts for lineloop, quads, quadstrip/flat
59 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
62 static void emit_ubyte_rgba3( GLcontext
*ctx
,
63 struct radeon_dma_region
*rvb
,
69 radeon_color_t
*out
= (radeon_color_t
*)(rvb
->start
+ rvb
->address
);
71 if (RADEON_DEBUG
& DEBUG_VERTS
)
72 fprintf(stderr
, "%s count %d stride %d out %p\n",
73 __FUNCTION__
, count
, stride
, (void *)out
);
75 for (i
= 0; i
< count
; i
++) {
77 out
->green
= *(data
+1);
78 out
->blue
= *(data
+2);
85 static void emit_ubyte_rgba4( GLcontext
*ctx
,
86 struct radeon_dma_region
*rvb
,
92 int *out
= (int *)(rvb
->address
+ rvb
->start
);
94 if (RADEON_DEBUG
& DEBUG_VERTS
)
95 fprintf(stderr
, "%s count %d stride %d\n",
96 __FUNCTION__
, count
, stride
);
99 COPY_DWORDS( out
, data
, count
);
101 for (i
= 0; i
< count
; i
++) {
102 *out
++ = LE32_TO_CPU(*(int *)data
);
108 static void emit_ubyte_rgba( GLcontext
*ctx
,
109 struct radeon_dma_region
*rvb
,
115 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
117 if (RADEON_DEBUG
& DEBUG_VERTS
)
118 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
123 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
125 rvb
->aos_start
= GET_START(rvb
);
130 radeonAllocDmaRegion( rmesa
, rvb
, 4 * count
, 4 ); /* alignment? */
131 rvb
->aos_start
= GET_START(rvb
);
140 emit_ubyte_rgba3( ctx
, rvb
, data
, stride
, count
);
143 emit_ubyte_rgba4( ctx
, rvb
, data
, stride
, count
);
153 #if defined(USE_X86_ASM)
154 #define COPY_DWORDS( dst, src, nr ) \
157 __asm__ __volatile__( "rep ; movsl" \
158 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
164 #define COPY_DWORDS( dst, src, nr ) \
167 for ( j = 0 ; j < nr ; j++ ) \
168 dst[j] = ((int *)src)[j]; \
173 static void emit_vecfog( GLcontext
*ctx
,
174 struct radeon_dma_region
*rvb
,
182 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
184 if (RADEON_DEBUG
& DEBUG_VERTS
)
185 fprintf(stderr
, "%s count %d stride %d\n",
186 __FUNCTION__
, count
, stride
);
191 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
193 rvb
->aos_start
= GET_START(rvb
);
198 radeonAllocDmaRegion( rmesa
, rvb
, count
* 4, 4 ); /* alignment? */
199 rvb
->aos_start
= GET_START(rvb
);
206 out
= (GLfloat
*)(rvb
->address
+ rvb
->start
);
207 for (i
= 0; i
< count
; i
++) {
208 out
[0] = radeonComputeFogBlendFactor( ctx
, *(GLfloat
*)data
);
214 static void emit_vec4( GLcontext
*ctx
,
215 struct radeon_dma_region
*rvb
,
221 int *out
= (int *)(rvb
->address
+ rvb
->start
);
223 if (RADEON_DEBUG
& DEBUG_VERTS
)
224 fprintf(stderr
, "%s count %d stride %d\n",
225 __FUNCTION__
, count
, stride
);
228 COPY_DWORDS( out
, data
, count
);
230 for (i
= 0; i
< count
; i
++) {
231 out
[0] = *(int *)data
;
238 static void emit_vec8( GLcontext
*ctx
,
239 struct radeon_dma_region
*rvb
,
245 int *out
= (int *)(rvb
->address
+ rvb
->start
);
247 if (RADEON_DEBUG
& DEBUG_VERTS
)
248 fprintf(stderr
, "%s count %d stride %d\n",
249 __FUNCTION__
, count
, stride
);
252 COPY_DWORDS( out
, data
, count
*2 );
254 for (i
= 0; i
< count
; i
++) {
255 out
[0] = *(int *)data
;
256 out
[1] = *(int *)(data
+4);
262 static void emit_vec12( GLcontext
*ctx
,
263 struct radeon_dma_region
*rvb
,
269 int *out
= (int *)(rvb
->address
+ rvb
->start
);
271 if (RADEON_DEBUG
& DEBUG_VERTS
)
272 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
273 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
276 COPY_DWORDS( out
, data
, count
*3 );
278 for (i
= 0; i
< count
; i
++) {
279 out
[0] = *(int *)data
;
280 out
[1] = *(int *)(data
+4);
281 out
[2] = *(int *)(data
+8);
287 static void emit_vec16( GLcontext
*ctx
,
288 struct radeon_dma_region
*rvb
,
294 int *out
= (int *)(rvb
->address
+ rvb
->start
);
296 if (RADEON_DEBUG
& DEBUG_VERTS
)
297 fprintf(stderr
, "%s count %d stride %d\n",
298 __FUNCTION__
, count
, stride
);
301 COPY_DWORDS( out
, data
, count
*4 );
303 for (i
= 0; i
< count
; i
++) {
304 out
[0] = *(int *)data
;
305 out
[1] = *(int *)(data
+4);
306 out
[2] = *(int *)(data
+8);
307 out
[3] = *(int *)(data
+12);
314 static void emit_vector( GLcontext
*ctx
,
315 struct radeon_dma_region
*rvb
,
321 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
323 if (RADEON_DEBUG
& DEBUG_VERTS
)
324 fprintf(stderr
, "%s count %d size %d stride %d\n",
325 __FUNCTION__
, count
, size
, stride
);
330 radeonAllocDmaRegion( rmesa
, rvb
, size
* 4, 4 );
332 rvb
->aos_start
= GET_START(rvb
);
334 rvb
->aos_size
= size
;
337 radeonAllocDmaRegion( rmesa
, rvb
, size
* count
* 4, 4 ); /* alignment? */
338 rvb
->aos_start
= GET_START(rvb
);
339 rvb
->aos_stride
= size
;
340 rvb
->aos_size
= size
;
347 emit_vec4( ctx
, rvb
, data
, stride
, count
);
350 emit_vec8( ctx
, rvb
, data
, stride
, count
);
353 emit_vec12( ctx
, rvb
, data
, stride
, count
);
356 emit_vec16( ctx
, rvb
, data
, stride
, count
);
368 static void emit_s0_vec( GLcontext
*ctx
,
369 struct radeon_dma_region
*rvb
,
375 int *out
= (int *)(rvb
->address
+ rvb
->start
);
377 if (RADEON_DEBUG
& DEBUG_VERTS
)
378 fprintf(stderr
, "%s count %d stride %d\n",
379 __FUNCTION__
, count
, stride
);
381 for (i
= 0; i
< count
; i
++) {
382 out
[0] = *(int *)data
;
389 static void emit_stq_vec( GLcontext
*ctx
,
390 struct radeon_dma_region
*rvb
,
396 int *out
= (int *)(rvb
->address
+ rvb
->start
);
398 if (RADEON_DEBUG
& DEBUG_VERTS
)
399 fprintf(stderr
, "%s count %d stride %d\n",
400 __FUNCTION__
, count
, stride
);
402 for (i
= 0; i
< count
; i
++) {
403 out
[0] = *(int *)data
;
404 out
[1] = *(int *)(data
+4);
405 out
[2] = *(int *)(data
+12);
414 static void emit_tex_vector( GLcontext
*ctx
,
415 struct radeon_dma_region
*rvb
,
421 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
424 if (RADEON_DEBUG
& DEBUG_VERTS
)
425 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
430 case 4: emitsize
= 3; break;
431 case 3: emitsize
= 3; break;
432 default: emitsize
= 2; break;
437 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
, 4 );
439 rvb
->aos_start
= GET_START(rvb
);
441 rvb
->aos_size
= emitsize
;
444 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
* count
, 4 );
445 rvb
->aos_start
= GET_START(rvb
);
446 rvb
->aos_stride
= emitsize
;
447 rvb
->aos_size
= emitsize
;
455 emit_s0_vec( ctx
, rvb
, data
, stride
, count
);
458 emit_vec8( ctx
, rvb
, data
, stride
, count
);
461 emit_vec12( ctx
, rvb
, data
, stride
, count
);
464 emit_stq_vec( ctx
, rvb
, data
, stride
, count
);
476 /* Emit any changed arrays to new GART memory, re-emit a packet to
479 void radeonEmitArrays( GLcontext
*ctx
, GLuint inputs
)
481 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
482 struct vertex_buffer
*VB
= &TNL_CONTEXT( ctx
)->vb
;
483 struct radeon_dma_region
**component
= rmesa
->tcl
.aos_components
;
486 GLuint count
= VB
->Count
;
490 if (RADEON_DEBUG
& DEBUG_VERTS
)
491 _tnl_print_vert_flags( __FUNCTION__
, inputs
);
495 if (!rmesa
->tcl
.obj
.buf
)
498 (char *)VB
->ObjPtr
->data
,
503 switch( VB
->ObjPtr
->size
) {
504 case 4: vfmt
|= RADEON_CP_VC_FRMT_W0
;
505 case 3: vfmt
|= RADEON_CP_VC_FRMT_Z
;
506 case 2: vfmt
|= RADEON_CP_VC_FRMT_XY
;
510 component
[nr
++] = &rmesa
->tcl
.obj
;
514 if (inputs
& VERT_BIT_NORMAL
) {
515 if (!rmesa
->tcl
.norm
.buf
)
518 (char *)VB
->NormalPtr
->data
,
520 VB
->NormalPtr
->stride
,
523 vfmt
|= RADEON_CP_VC_FRMT_N0
;
524 component
[nr
++] = &rmesa
->tcl
.norm
;
527 if (inputs
& VERT_BIT_COLOR0
) {
529 if (VB
->ColorPtr
[0]->size
== 4 &&
530 (VB
->ColorPtr
[0]->stride
!= 0 ||
531 VB
->ColorPtr
[0]->data
[0][3] != 1.0)) {
532 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
| RADEON_CP_VC_FRMT_FPALPHA
;
537 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
;
541 if (!rmesa
->tcl
.rgba
.buf
)
544 (char *)VB
->ColorPtr
[0]->data
,
546 VB
->ColorPtr
[0]->stride
,
550 component
[nr
++] = &rmesa
->tcl
.rgba
;
554 if (inputs
& VERT_BIT_COLOR1
) {
555 if (!rmesa
->tcl
.spec
.buf
) {
559 (char *)VB
->SecondaryColorPtr
[0]->data
,
561 VB
->SecondaryColorPtr
[0]->stride
,
565 vfmt
|= RADEON_CP_VC_FRMT_FPSPEC
;
566 component
[nr
++] = &rmesa
->tcl
.spec
;
569 /* FIXME: not sure if this is correct. May need to stitch this together with
570 secondary color. It seems odd that for primary color color and alpha values
571 are emitted together but for secondary color not. */
572 if (inputs
& VERT_BIT_FOG
) {
573 if (!rmesa
->tcl
.fog
.buf
)
576 (char *)VB
->FogCoordPtr
->data
,
577 VB
->FogCoordPtr
->stride
,
580 vfmt
|= RADEON_CP_VC_FRMT_FPFOG
;
581 component
[nr
++] = &rmesa
->tcl
.fog
;
585 vtx
= (rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] &
586 ~(RADEON_TCL_VTX_Q0
|RADEON_TCL_VTX_Q1
|RADEON_TCL_VTX_Q2
));
588 for (unit
= 0; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
589 if (inputs
& VERT_BIT_TEX(unit
)) {
590 if (!rmesa
->tcl
.tex
[unit
].buf
)
591 emit_tex_vector( ctx
,
592 &(rmesa
->tcl
.tex
[unit
]),
593 (char *)VB
->TexCoordPtr
[unit
]->data
,
594 VB
->TexCoordPtr
[unit
]->size
,
595 VB
->TexCoordPtr
[unit
]->stride
,
598 vfmt
|= RADEON_ST_BIT(unit
);
599 /* assume we need the 3rd coord if texgen is active for r/q OR at least
600 3 coords are submitted. This may not be 100% correct */
601 if (VB
->TexCoordPtr
[unit
]->size
>= 3) {
602 vtx
|= RADEON_Q_BIT(unit
);
603 vfmt
|= RADEON_Q_BIT(unit
);
605 if ( (ctx
->Texture
.Unit
[unit
].TexGenEnabled
& (R_BIT
| Q_BIT
)) )
606 vtx
|= RADEON_Q_BIT(unit
);
607 else if ((VB
->TexCoordPtr
[unit
]->size
>= 3) &&
608 ((ctx
->Texture
.Unit
[unit
]._ReallyEnabled
& (TEXTURE_CUBE_BIT
)) == 0)) {
609 GLuint swaptexmatcol
= (VB
->TexCoordPtr
[unit
]->size
- 3);
610 if (((rmesa
->NeedTexMatrix
>> unit
) & 1) &&
611 (swaptexmatcol
!= ((rmesa
->TexMatColSwap
>> unit
) & 1)))
612 radeonUploadTexMatrix( rmesa
, unit
, swaptexmatcol
) ;
614 component
[nr
++] = &rmesa
->tcl
.tex
[unit
];
618 if (vtx
!= rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
]) {
619 RADEON_STATECHANGE( rmesa
, tcl
);
620 rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] = vtx
;
623 rmesa
->tcl
.nr_aos_components
= nr
;
624 rmesa
->tcl
.vertex_format
= vfmt
;
628 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
630 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
634 if (RADEON_DEBUG
& DEBUG_VERTS
)
635 _tnl_print_vert_flags( __FUNCTION__
, newinputs
);
638 if (newinputs
& VERT_BIT_POS
)
639 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.obj
, __FUNCTION__
);
641 if (newinputs
& VERT_BIT_NORMAL
)
642 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.norm
, __FUNCTION__
);
644 if (newinputs
& VERT_BIT_COLOR0
)
645 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.rgba
, __FUNCTION__
);
647 if (newinputs
& VERT_BIT_COLOR1
)
648 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.spec
, __FUNCTION__
);
650 if (newinputs
& VERT_BIT_FOG
)
651 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.fog
, __FUNCTION__
);
653 for (unit
= 0 ; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
654 if (newinputs
& VERT_BIT_TEX(unit
))
655 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.tex
[unit
], __FUNCTION__
);