1 /**************************************************************************
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4 Tungsten Graphics Inc., Cedar Park, Texas.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/macros.h"
40 #include "swrast_setup/swrast_setup.h"
41 #include "math/m_translate.h"
44 #include "radeon_context.h"
45 #include "radeon_ioctl.h"
46 #include "radeon_state.h"
47 #include "radeon_swtcl.h"
48 #include "radeon_maos.h"
49 #include "radeon_tcl.h"
53 * - from radeon_tcl_render
54 * - call radeonEmitArrays to ensure uptodate arrays in dma
55 * - emit primitives (new type?) which reference the data
56 * -- need to use elts for lineloop, quads, quadstrip/flat
57 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
60 static void emit_ubyte_rgba3( GLcontext
*ctx
,
61 struct radeon_dma_region
*rvb
,
67 radeon_color_t
*out
= (radeon_color_t
*)(rvb
->start
+ rvb
->address
);
69 if (RADEON_DEBUG
& DEBUG_VERTS
)
70 fprintf(stderr
, "%s count %d stride %d out %p\n",
71 __FUNCTION__
, count
, stride
, (void *)out
);
73 for (i
= 0; i
< count
; i
++) {
75 out
->green
= *(data
+1);
76 out
->blue
= *(data
+2);
83 static void emit_ubyte_rgba4( GLcontext
*ctx
,
84 struct radeon_dma_region
*rvb
,
90 int *out
= (int *)(rvb
->address
+ rvb
->start
);
92 if (RADEON_DEBUG
& DEBUG_VERTS
)
93 fprintf(stderr
, "%s count %d stride %d\n",
94 __FUNCTION__
, count
, stride
);
97 COPY_DWORDS( out
, data
, count
);
99 for (i
= 0; i
< count
; i
++) {
100 *out
++ = LE32_TO_CPU(*(int *)data
);
106 static void emit_ubyte_rgba( GLcontext
*ctx
,
107 struct radeon_dma_region
*rvb
,
113 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
115 if (RADEON_DEBUG
& DEBUG_VERTS
)
116 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
121 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
123 rvb
->aos_start
= GET_START(rvb
);
128 radeonAllocDmaRegion( rmesa
, rvb
, 4 * count
, 4 ); /* alignment? */
129 rvb
->aos_start
= GET_START(rvb
);
138 emit_ubyte_rgba3( ctx
, rvb
, data
, stride
, count
);
141 emit_ubyte_rgba4( ctx
, rvb
, data
, stride
, count
);
151 #if defined(USE_X86_ASM)
152 #define COPY_DWORDS( dst, src, nr ) \
155 __asm__ __volatile__( "rep ; movsl" \
156 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
162 #define COPY_DWORDS( dst, src, nr ) \
165 for ( j = 0 ; j < nr ; j++ ) \
166 dst[j] = ((int *)src)[j]; \
171 static void emit_vecfog( GLcontext
*ctx
,
172 struct radeon_dma_region
*rvb
,
180 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
182 if (RADEON_DEBUG
& DEBUG_VERTS
)
183 fprintf(stderr
, "%s count %d stride %d\n",
184 __FUNCTION__
, count
, stride
);
189 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
191 rvb
->aos_start
= GET_START(rvb
);
196 radeonAllocDmaRegion( rmesa
, rvb
, count
* 4, 4 ); /* alignment? */
197 rvb
->aos_start
= GET_START(rvb
);
204 out
= (GLfloat
*)(rvb
->address
+ rvb
->start
);
205 for (i
= 0; i
< count
; i
++) {
206 out
[0] = radeonComputeFogBlendFactor( ctx
, *(GLfloat
*)data
);
212 static void emit_vec4( GLcontext
*ctx
,
213 struct radeon_dma_region
*rvb
,
219 int *out
= (int *)(rvb
->address
+ rvb
->start
);
221 if (RADEON_DEBUG
& DEBUG_VERTS
)
222 fprintf(stderr
, "%s count %d stride %d\n",
223 __FUNCTION__
, count
, stride
);
226 COPY_DWORDS( out
, data
, count
);
228 for (i
= 0; i
< count
; i
++) {
229 out
[0] = *(int *)data
;
236 static void emit_vec8( GLcontext
*ctx
,
237 struct radeon_dma_region
*rvb
,
243 int *out
= (int *)(rvb
->address
+ rvb
->start
);
245 if (RADEON_DEBUG
& DEBUG_VERTS
)
246 fprintf(stderr
, "%s count %d stride %d\n",
247 __FUNCTION__
, count
, stride
);
250 COPY_DWORDS( out
, data
, count
*2 );
252 for (i
= 0; i
< count
; i
++) {
253 out
[0] = *(int *)data
;
254 out
[1] = *(int *)(data
+4);
260 static void emit_vec12( GLcontext
*ctx
,
261 struct radeon_dma_region
*rvb
,
267 int *out
= (int *)(rvb
->address
+ rvb
->start
);
269 if (RADEON_DEBUG
& DEBUG_VERTS
)
270 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
271 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
274 COPY_DWORDS( out
, data
, count
*3 );
276 for (i
= 0; i
< count
; i
++) {
277 out
[0] = *(int *)data
;
278 out
[1] = *(int *)(data
+4);
279 out
[2] = *(int *)(data
+8);
285 static void emit_vec16( GLcontext
*ctx
,
286 struct radeon_dma_region
*rvb
,
292 int *out
= (int *)(rvb
->address
+ rvb
->start
);
294 if (RADEON_DEBUG
& DEBUG_VERTS
)
295 fprintf(stderr
, "%s count %d stride %d\n",
296 __FUNCTION__
, count
, stride
);
299 COPY_DWORDS( out
, data
, count
*4 );
301 for (i
= 0; i
< count
; i
++) {
302 out
[0] = *(int *)data
;
303 out
[1] = *(int *)(data
+4);
304 out
[2] = *(int *)(data
+8);
305 out
[3] = *(int *)(data
+12);
312 static void emit_vector( GLcontext
*ctx
,
313 struct radeon_dma_region
*rvb
,
319 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
321 if (RADEON_DEBUG
& DEBUG_VERTS
)
322 fprintf(stderr
, "%s count %d size %d stride %d\n",
323 __FUNCTION__
, count
, size
, stride
);
328 radeonAllocDmaRegion( rmesa
, rvb
, size
* 4, 4 );
330 rvb
->aos_start
= GET_START(rvb
);
332 rvb
->aos_size
= size
;
335 radeonAllocDmaRegion( rmesa
, rvb
, size
* count
* 4, 4 ); /* alignment? */
336 rvb
->aos_start
= GET_START(rvb
);
337 rvb
->aos_stride
= size
;
338 rvb
->aos_size
= size
;
345 emit_vec4( ctx
, rvb
, data
, stride
, count
);
348 emit_vec8( ctx
, rvb
, data
, stride
, count
);
351 emit_vec12( ctx
, rvb
, data
, stride
, count
);
354 emit_vec16( ctx
, rvb
, data
, stride
, count
);
366 static void emit_s0_vec( GLcontext
*ctx
,
367 struct radeon_dma_region
*rvb
,
373 int *out
= (int *)(rvb
->address
+ rvb
->start
);
375 if (RADEON_DEBUG
& DEBUG_VERTS
)
376 fprintf(stderr
, "%s count %d stride %d\n",
377 __FUNCTION__
, count
, stride
);
379 for (i
= 0; i
< count
; i
++) {
380 out
[0] = *(int *)data
;
387 static void emit_stq_vec( GLcontext
*ctx
,
388 struct radeon_dma_region
*rvb
,
394 int *out
= (int *)(rvb
->address
+ rvb
->start
);
396 if (RADEON_DEBUG
& DEBUG_VERTS
)
397 fprintf(stderr
, "%s count %d stride %d\n",
398 __FUNCTION__
, count
, stride
);
400 for (i
= 0; i
< count
; i
++) {
401 out
[0] = *(int *)data
;
402 out
[1] = *(int *)(data
+4);
403 out
[2] = *(int *)(data
+12);
412 static void emit_tex_vector( GLcontext
*ctx
,
413 struct radeon_dma_region
*rvb
,
419 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
422 if (RADEON_DEBUG
& DEBUG_VERTS
)
423 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
428 case 4: emitsize
= 3; break;
429 case 3: emitsize
= 3; break;
430 default: emitsize
= 2; break;
435 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
, 4 );
437 rvb
->aos_start
= GET_START(rvb
);
439 rvb
->aos_size
= emitsize
;
442 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
* count
, 4 );
443 rvb
->aos_start
= GET_START(rvb
);
444 rvb
->aos_stride
= emitsize
;
445 rvb
->aos_size
= emitsize
;
453 emit_s0_vec( ctx
, rvb
, data
, stride
, count
);
456 emit_vec8( ctx
, rvb
, data
, stride
, count
);
459 emit_vec12( ctx
, rvb
, data
, stride
, count
);
462 emit_stq_vec( ctx
, rvb
, data
, stride
, count
);
474 /* Emit any changed arrays to new GART memory, re-emit a packet to
477 void radeonEmitArrays( GLcontext
*ctx
, GLuint inputs
)
479 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
480 struct vertex_buffer
*VB
= &TNL_CONTEXT( ctx
)->vb
;
481 struct radeon_dma_region
**component
= rmesa
->tcl
.aos_components
;
484 GLuint count
= VB
->Count
;
488 if (RADEON_DEBUG
& DEBUG_VERTS
)
489 _tnl_print_vert_flags( __FUNCTION__
, inputs
);
493 if (!rmesa
->tcl
.obj
.buf
)
496 (char *)VB
->ObjPtr
->data
,
501 switch( VB
->ObjPtr
->size
) {
502 case 4: vfmt
|= RADEON_CP_VC_FRMT_W0
;
503 case 3: vfmt
|= RADEON_CP_VC_FRMT_Z
;
504 case 2: vfmt
|= RADEON_CP_VC_FRMT_XY
;
508 component
[nr
++] = &rmesa
->tcl
.obj
;
512 if (inputs
& VERT_BIT_NORMAL
) {
513 if (!rmesa
->tcl
.norm
.buf
)
516 (char *)VB
->NormalPtr
->data
,
518 VB
->NormalPtr
->stride
,
521 vfmt
|= RADEON_CP_VC_FRMT_N0
;
522 component
[nr
++] = &rmesa
->tcl
.norm
;
525 if (inputs
& VERT_BIT_COLOR0
) {
527 if (VB
->ColorPtr
[0]->size
== 4 &&
528 (VB
->ColorPtr
[0]->stride
!= 0 ||
529 VB
->ColorPtr
[0]->data
[0][3] != 1.0)) {
530 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
| RADEON_CP_VC_FRMT_FPALPHA
;
535 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
;
539 if (!rmesa
->tcl
.rgba
.buf
)
542 (char *)VB
->ColorPtr
[0]->data
,
544 VB
->ColorPtr
[0]->stride
,
548 component
[nr
++] = &rmesa
->tcl
.rgba
;
552 if (inputs
& VERT_BIT_COLOR1
) {
553 if (!rmesa
->tcl
.spec
.buf
) {
557 (char *)VB
->SecondaryColorPtr
[0]->data
,
559 VB
->SecondaryColorPtr
[0]->stride
,
563 vfmt
|= RADEON_CP_VC_FRMT_FPSPEC
;
564 component
[nr
++] = &rmesa
->tcl
.spec
;
567 /* FIXME: not sure if this is correct. May need to stitch this together with
568 secondary color. It seems odd that for primary color color and alpha values
569 are emitted together but for secondary color not. */
570 if (inputs
& VERT_BIT_FOG
) {
571 if (!rmesa
->tcl
.fog
.buf
)
574 (char *)VB
->FogCoordPtr
->data
,
575 VB
->FogCoordPtr
->stride
,
578 vfmt
|= RADEON_CP_VC_FRMT_FPFOG
;
579 component
[nr
++] = &rmesa
->tcl
.fog
;
583 vtx
= (rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] &
584 ~(RADEON_TCL_VTX_Q0
|RADEON_TCL_VTX_Q1
|RADEON_TCL_VTX_Q2
));
586 for (unit
= 0; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
587 if (inputs
& VERT_BIT_TEX(unit
)) {
588 if (!rmesa
->tcl
.tex
[unit
].buf
)
589 emit_tex_vector( ctx
,
590 &(rmesa
->tcl
.tex
[unit
]),
591 (char *)VB
->TexCoordPtr
[unit
]->data
,
592 VB
->TexCoordPtr
[unit
]->size
,
593 VB
->TexCoordPtr
[unit
]->stride
,
596 vfmt
|= RADEON_ST_BIT(unit
);
597 /* assume we need the 3rd coord if texgen is active for r/q OR at least
598 3 coords are submitted. This may not be 100% correct */
599 if (VB
->TexCoordPtr
[unit
]->size
>= 3) {
600 vtx
|= RADEON_Q_BIT(unit
);
601 vfmt
|= RADEON_Q_BIT(unit
);
603 if ( (ctx
->Texture
.Unit
[unit
].TexGenEnabled
& (R_BIT
| Q_BIT
)) )
604 vtx
|= RADEON_Q_BIT(unit
);
605 else if ((VB
->TexCoordPtr
[unit
]->size
>= 3) &&
606 ((ctx
->Texture
.Unit
[unit
]._ReallyEnabled
& (TEXTURE_CUBE_BIT
)) == 0)) {
607 GLuint swaptexmatcol
= (VB
->TexCoordPtr
[unit
]->size
- 3);
608 if (((rmesa
->NeedTexMatrix
>> unit
) & 1) &&
609 (swaptexmatcol
!= ((rmesa
->TexMatColSwap
>> unit
) & 1)))
610 radeonUploadTexMatrix( rmesa
, unit
, swaptexmatcol
) ;
612 component
[nr
++] = &rmesa
->tcl
.tex
[unit
];
616 if (vtx
!= rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
]) {
617 RADEON_STATECHANGE( rmesa
, tcl
);
618 rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] = vtx
;
621 rmesa
->tcl
.nr_aos_components
= nr
;
622 rmesa
->tcl
.vertex_format
= vfmt
;
626 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
628 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
632 if (RADEON_DEBUG
& DEBUG_VERTS
)
633 _tnl_print_vert_flags( __FUNCTION__
, newinputs
);
636 if (newinputs
& VERT_BIT_POS
)
637 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.obj
, __FUNCTION__
);
639 if (newinputs
& VERT_BIT_NORMAL
)
640 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.norm
, __FUNCTION__
);
642 if (newinputs
& VERT_BIT_COLOR0
)
643 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.rgba
, __FUNCTION__
);
645 if (newinputs
& VERT_BIT_COLOR1
)
646 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.spec
, __FUNCTION__
);
648 if (newinputs
& VERT_BIT_FOG
)
649 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.fog
, __FUNCTION__
);
651 for (unit
= 0 ; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
652 if (newinputs
& VERT_BIT_TEX(unit
))
653 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.tex
[unit
], __FUNCTION__
);