1 /* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c,v 1.1 2002/10/30 12:51:55 alanh Exp $ */
2 /**************************************************************************
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 Tungsten Graphics Inc., Cedar Park, Texas.
9 Permission is hereby granted, free of charge, to any person obtaining
10 a copy of this software and associated documentation files (the
11 "Software"), to deal in the Software without restriction, including
12 without limitation the rights to use, copy, modify, merge, publish,
13 distribute, sublicense, and/or sell copies of the Software, and to
14 permit persons to whom the Software is furnished to do so, subject to
15 the following conditions:
17 The above copyright notice and this permission notice (including the
18 next paragraph) shall be included in all copies or substantial
19 portions of the Software.
21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 **************************************************************************/
33 * Keith Whitwell <keith@tungstengraphics.com>
41 #include "swrast_setup/swrast_setup.h"
42 #include "math/m_translate.h"
44 #include "tnl/t_context.h"
46 #include "radeon_context.h"
47 #include "radeon_ioctl.h"
48 #include "radeon_state.h"
49 #include "radeon_swtcl.h"
50 #include "radeon_maos.h"
54 * - from radeon_tcl_render
55 * - call radeonEmitArrays to ensure uptodate arrays in dma
56 * - emit primitives (new type?) which reference the data
57 * -- need to use elts for lineloop, quads, quadstrip/flat
58 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
61 static void emit_ubyte_rgba3( GLcontext
*ctx
,
62 struct radeon_dma_region
*rvb
,
68 radeon_color_t
*out
= (radeon_color_t
*)(rvb
->start
+ rvb
->address
);
70 if (RADEON_DEBUG
& DEBUG_VERTS
)
71 fprintf(stderr
, "%s count %d stride %d out %p\n",
72 __FUNCTION__
, count
, stride
, (void *)out
);
74 for (i
= 0; i
< count
; i
++) {
76 out
->green
= *(data
+1);
77 out
->blue
= *(data
+2);
84 static void emit_ubyte_rgba4( GLcontext
*ctx
,
85 struct radeon_dma_region
*rvb
,
91 int *out
= (int *)(rvb
->address
+ rvb
->start
);
93 if (RADEON_DEBUG
& DEBUG_VERTS
)
94 fprintf(stderr
, "%s count %d stride %d\n",
95 __FUNCTION__
, count
, stride
);
98 COPY_DWORDS( out
, data
, count
);
100 for (i
= 0; i
< count
; i
++) {
101 *out
++ = LE32_TO_CPU(*(int *)data
);
107 static void emit_ubyte_rgba( GLcontext
*ctx
,
108 struct radeon_dma_region
*rvb
,
114 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
116 if (RADEON_DEBUG
& DEBUG_VERTS
)
117 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
122 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
124 rvb
->aos_start
= GET_START(rvb
);
129 radeonAllocDmaRegion( rmesa
, rvb
, 4 * count
, 4 ); /* alignment? */
130 rvb
->aos_start
= GET_START(rvb
);
139 emit_ubyte_rgba3( ctx
, rvb
, data
, stride
, count
);
142 emit_ubyte_rgba4( ctx
, rvb
, data
, stride
, count
);
152 #if defined(USE_X86_ASM)
153 #define COPY_DWORDS( dst, src, nr ) \
156 __asm__ __volatile__( "rep ; movsl" \
157 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
163 #define COPY_DWORDS( dst, src, nr ) \
166 for ( j = 0 ; j < nr ; j++ ) \
167 dst[j] = ((int *)src)[j]; \
173 static void emit_vec4( GLcontext
*ctx
,
174 struct radeon_dma_region
*rvb
,
180 int *out
= (int *)(rvb
->address
+ rvb
->start
);
182 if (RADEON_DEBUG
& DEBUG_VERTS
)
183 fprintf(stderr
, "%s count %d stride %d\n",
184 __FUNCTION__
, count
, stride
);
187 COPY_DWORDS( out
, data
, count
);
189 for (i
= 0; i
< count
; i
++) {
190 out
[0] = *(int *)data
;
197 static void emit_vec8( GLcontext
*ctx
,
198 struct radeon_dma_region
*rvb
,
204 int *out
= (int *)(rvb
->address
+ rvb
->start
);
206 if (RADEON_DEBUG
& DEBUG_VERTS
)
207 fprintf(stderr
, "%s count %d stride %d\n",
208 __FUNCTION__
, count
, stride
);
211 COPY_DWORDS( out
, data
, count
*2 );
213 for (i
= 0; i
< count
; i
++) {
214 out
[0] = *(int *)data
;
215 out
[1] = *(int *)(data
+4);
221 static void emit_vec12( GLcontext
*ctx
,
222 struct radeon_dma_region
*rvb
,
228 int *out
= (int *)(rvb
->address
+ rvb
->start
);
230 if (RADEON_DEBUG
& DEBUG_VERTS
)
231 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
232 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
235 COPY_DWORDS( out
, data
, count
*3 );
237 for (i
= 0; i
< count
; i
++) {
238 out
[0] = *(int *)data
;
239 out
[1] = *(int *)(data
+4);
240 out
[2] = *(int *)(data
+8);
246 static void emit_vec16( GLcontext
*ctx
,
247 struct radeon_dma_region
*rvb
,
253 int *out
= (int *)(rvb
->address
+ rvb
->start
);
255 if (RADEON_DEBUG
& DEBUG_VERTS
)
256 fprintf(stderr
, "%s count %d stride %d\n",
257 __FUNCTION__
, count
, stride
);
260 COPY_DWORDS( out
, data
, count
*4 );
262 for (i
= 0; i
< count
; i
++) {
263 out
[0] = *(int *)data
;
264 out
[1] = *(int *)(data
+4);
265 out
[2] = *(int *)(data
+8);
266 out
[3] = *(int *)(data
+12);
273 static void emit_vector( GLcontext
*ctx
,
274 struct radeon_dma_region
*rvb
,
280 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
282 if (RADEON_DEBUG
& DEBUG_VERTS
)
283 fprintf(stderr
, "%s count %d size %d stride %d\n",
284 __FUNCTION__
, count
, size
, stride
);
289 radeonAllocDmaRegion( rmesa
, rvb
, size
* 4, 4 );
291 rvb
->aos_start
= GET_START(rvb
);
293 rvb
->aos_size
= size
;
296 radeonAllocDmaRegion( rmesa
, rvb
, size
* count
* 4, 4 ); /* alignment? */
297 rvb
->aos_start
= GET_START(rvb
);
298 rvb
->aos_stride
= size
;
299 rvb
->aos_size
= size
;
306 emit_vec4( ctx
, rvb
, data
, stride
, count
);
309 emit_vec8( ctx
, rvb
, data
, stride
, count
);
312 emit_vec12( ctx
, rvb
, data
, stride
, count
);
315 emit_vec16( ctx
, rvb
, data
, stride
, count
);
327 static void emit_s0_vec( GLcontext
*ctx
,
328 struct radeon_dma_region
*rvb
,
334 int *out
= (int *)(rvb
->address
+ rvb
->start
);
336 if (RADEON_DEBUG
& DEBUG_VERTS
)
337 fprintf(stderr
, "%s count %d stride %d\n",
338 __FUNCTION__
, count
, stride
);
340 for (i
= 0; i
< count
; i
++) {
341 out
[0] = *(int *)data
;
348 static void emit_stq_vec( GLcontext
*ctx
,
349 struct radeon_dma_region
*rvb
,
355 int *out
= (int *)(rvb
->address
+ rvb
->start
);
357 if (RADEON_DEBUG
& DEBUG_VERTS
)
358 fprintf(stderr
, "%s count %d stride %d\n",
359 __FUNCTION__
, count
, stride
);
361 for (i
= 0; i
< count
; i
++) {
362 out
[0] = *(int *)data
;
363 out
[1] = *(int *)(data
+4);
364 out
[2] = *(int *)(data
+12);
373 static void emit_tex_vector( GLcontext
*ctx
,
374 struct radeon_dma_region
*rvb
,
380 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
383 if (RADEON_DEBUG
& DEBUG_VERTS
)
384 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
389 case 4: emitsize
= 3; break;
390 case 3: emitsize
= 3; break;
391 default: emitsize
= 2; break;
396 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
, 4 );
398 rvb
->aos_start
= GET_START(rvb
);
400 rvb
->aos_size
= emitsize
;
403 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
* count
, 4 );
404 rvb
->aos_start
= GET_START(rvb
);
405 rvb
->aos_stride
= emitsize
;
406 rvb
->aos_size
= emitsize
;
414 emit_s0_vec( ctx
, rvb
, data
, stride
, count
);
417 emit_vec8( ctx
, rvb
, data
, stride
, count
);
420 emit_vec12( ctx
, rvb
, data
, stride
, count
);
423 emit_stq_vec( ctx
, rvb
, data
, stride
, count
);
435 /* Emit any changed arrays to new GART memory, re-emit a packet to
438 void radeonEmitArrays( GLcontext
*ctx
, GLuint inputs
)
440 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
441 struct vertex_buffer
*VB
= &TNL_CONTEXT( ctx
)->vb
;
442 struct radeon_dma_region
**component
= rmesa
->tcl
.aos_components
;
445 GLuint count
= VB
->Count
;
449 if (RADEON_DEBUG
& DEBUG_VERTS
)
450 _tnl_print_vert_flags( __FUNCTION__
, inputs
);
454 if (!rmesa
->tcl
.obj
.buf
)
457 (char *)VB
->ObjPtr
->data
,
462 switch( VB
->ObjPtr
->size
) {
463 case 4: vfmt
|= RADEON_CP_VC_FRMT_W0
;
464 case 3: vfmt
|= RADEON_CP_VC_FRMT_Z
;
465 case 2: vfmt
|= RADEON_CP_VC_FRMT_XY
;
469 component
[nr
++] = &rmesa
->tcl
.obj
;
473 if (inputs
& VERT_BIT_NORMAL
) {
474 if (!rmesa
->tcl
.norm
.buf
)
477 (char *)VB
->NormalPtr
->data
,
479 VB
->NormalPtr
->stride
,
482 vfmt
|= RADEON_CP_VC_FRMT_N0
;
483 component
[nr
++] = &rmesa
->tcl
.norm
;
486 if (inputs
& VERT_BIT_COLOR0
) {
488 if (VB
->ColorPtr
[0]->size
== 4 &&
489 (VB
->ColorPtr
[0]->stride
!= 0 ||
490 VB
->ColorPtr
[0]->data
[0][3] != 1.0)) {
491 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
| RADEON_CP_VC_FRMT_FPALPHA
;
496 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
;
500 if (!rmesa
->tcl
.rgba
.buf
)
503 (char *)VB
->ColorPtr
[0]->data
,
505 VB
->ColorPtr
[0]->stride
,
509 component
[nr
++] = &rmesa
->tcl
.rgba
;
513 if (inputs
& VERT_BIT_COLOR1
) {
514 if (!rmesa
->tcl
.spec
.buf
) {
518 (char *)VB
->SecondaryColorPtr
[0]->data
,
520 VB
->SecondaryColorPtr
[0]->stride
,
524 vfmt
|= RADEON_CP_VC_FRMT_FPSPEC
;
525 component
[nr
++] = &rmesa
->tcl
.spec
;
528 vtx
= (rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] &
529 ~(RADEON_TCL_VTX_Q0
|RADEON_TCL_VTX_Q1
|RADEON_TCL_VTX_Q2
));
531 for (unit
= 0; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
532 if (inputs
& VERT_BIT_TEX(unit
)) {
533 if (!rmesa
->tcl
.tex
[unit
].buf
)
534 emit_tex_vector( ctx
,
535 &(rmesa
->tcl
.tex
[unit
]),
536 (char *)VB
->TexCoordPtr
[unit
]->data
,
537 VB
->TexCoordPtr
[unit
]->size
,
538 VB
->TexCoordPtr
[unit
]->stride
,
541 vfmt
|= RADEON_ST_BIT(unit
);
542 /* assume we need the 3rd coord if texgen is active for r/q OR at least
543 3 coords are submitted. This may not be 100% correct */
544 if (VB
->TexCoordPtr
[unit
]->size
>= 3) {
545 vtx
|= RADEON_Q_BIT(unit
);
546 vfmt
|= RADEON_Q_BIT(unit
);
548 if ( (ctx
->Texture
.Unit
[unit
].TexGenEnabled
& (R_BIT
| Q_BIT
)) )
549 vtx
|= RADEON_Q_BIT(unit
);
550 else if ((VB
->TexCoordPtr
[unit
]->size
>= 3) &&
551 ((ctx
->Texture
.Unit
[unit
]._ReallyEnabled
& (TEXTURE_CUBE_BIT
)) == 0)) {
552 GLuint swaptexmatcol
= (VB
->TexCoordPtr
[unit
]->size
- 3);
553 if (((rmesa
->NeedTexMatrix
>> unit
) & 1) &&
554 (swaptexmatcol
!= ((rmesa
->TexMatColSwap
>> unit
) & 1)))
555 radeonUploadTexMatrix( rmesa
, unit
, swaptexmatcol
) ;
557 component
[nr
++] = &rmesa
->tcl
.tex
[unit
];
561 if (vtx
!= rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
]) {
562 RADEON_STATECHANGE( rmesa
, tcl
);
563 rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] = vtx
;
566 rmesa
->tcl
.nr_aos_components
= nr
;
567 rmesa
->tcl
.vertex_format
= vfmt
;
571 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
573 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
577 if (RADEON_DEBUG
& DEBUG_VERTS
)
578 _tnl_print_vert_flags( __FUNCTION__
, newinputs
);
581 if (newinputs
& VERT_BIT_POS
)
582 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.obj
, __FUNCTION__
);
584 if (newinputs
& VERT_BIT_NORMAL
)
585 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.norm
, __FUNCTION__
);
587 if (newinputs
& VERT_BIT_COLOR0
)
588 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.rgba
, __FUNCTION__
);
590 if (newinputs
& VERT_BIT_COLOR1
)
591 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.spec
, __FUNCTION__
);
593 for (unit
= 0 ; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
594 if (newinputs
& VERT_BIT_TEX(unit
))
595 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.tex
[unit
], __FUNCTION__
);