1 /**************************************************************************
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4 Tungsten Graphics Inc., Cedar Park, Texas.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/macros.h"
40 #include "swrast_setup/swrast_setup.h"
41 #include "math/m_translate.h"
43 #include "tnl/tcontext.h"
45 #include "radeon_context.h"
46 #include "radeon_ioctl.h"
47 #include "radeon_state.h"
48 #include "radeon_swtcl.h"
49 #include "radeon_maos.h"
50 #include "radeon_tcl.h"
54 * - from radeon_tcl_render
55 * - call radeonEmitArrays to ensure uptodate arrays in dma
56 * - emit primitives (new type?) which reference the data
57 * -- need to use elts for lineloop, quads, quadstrip/flat
58 * -- other primitives are all well-formed (need tristrip-1,fake-poly)
61 static void emit_ubyte_rgba3( GLcontext
*ctx
,
62 struct radeon_dma_region
*rvb
,
68 radeon_color_t
*out
= (radeon_color_t
*)(rvb
->start
+ rvb
->address
);
70 if (RADEON_DEBUG
& DEBUG_VERTS
)
71 fprintf(stderr
, "%s count %d stride %d out %p\n",
72 __FUNCTION__
, count
, stride
, (void *)out
);
74 for (i
= 0; i
< count
; i
++) {
76 out
->green
= *(data
+1);
77 out
->blue
= *(data
+2);
84 static void emit_ubyte_rgba4( GLcontext
*ctx
,
85 struct radeon_dma_region
*rvb
,
91 int *out
= (int *)(rvb
->address
+ rvb
->start
);
93 if (RADEON_DEBUG
& DEBUG_VERTS
)
94 fprintf(stderr
, "%s count %d stride %d\n",
95 __FUNCTION__
, count
, stride
);
98 COPY_DWORDS( out
, data
, count
);
100 for (i
= 0; i
< count
; i
++) {
101 *out
++ = LE32_TO_CPU(*(int *)data
);
107 static void emit_ubyte_rgba( GLcontext
*ctx
,
108 struct radeon_dma_region
*rvb
,
114 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
116 if (RADEON_DEBUG
& DEBUG_VERTS
)
117 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
122 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
124 rvb
->aos_start
= GET_START(rvb
);
129 radeonAllocDmaRegion( rmesa
, rvb
, 4 * count
, 4 ); /* alignment? */
130 rvb
->aos_start
= GET_START(rvb
);
139 emit_ubyte_rgba3( ctx
, rvb
, data
, stride
, count
);
142 emit_ubyte_rgba4( ctx
, rvb
, data
, stride
, count
);
152 #if defined(USE_X86_ASM)
153 #define COPY_DWORDS( dst, src, nr ) \
156 __asm__ __volatile__( "rep ; movsl" \
157 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
163 #define COPY_DWORDS( dst, src, nr ) \
166 for ( j = 0 ; j < nr ; j++ ) \
167 dst[j] = ((int *)src)[j]; \
172 static void emit_vecfog( GLcontext
*ctx
,
173 struct radeon_dma_region
*rvb
,
181 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
183 if (RADEON_DEBUG
& DEBUG_VERTS
)
184 fprintf(stderr
, "%s count %d stride %d\n",
185 __FUNCTION__
, count
, stride
);
190 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
192 rvb
->aos_start
= GET_START(rvb
);
197 radeonAllocDmaRegion( rmesa
, rvb
, count
* 4, 4 ); /* alignment? */
198 rvb
->aos_start
= GET_START(rvb
);
205 out
= (GLfloat
*)(rvb
->address
+ rvb
->start
);
206 for (i
= 0; i
< count
; i
++) {
207 out
[0] = radeonComputeFogBlendFactor( ctx
, *(GLfloat
*)data
);
213 static void emit_vec4( GLcontext
*ctx
,
214 struct radeon_dma_region
*rvb
,
220 int *out
= (int *)(rvb
->address
+ rvb
->start
);
222 if (RADEON_DEBUG
& DEBUG_VERTS
)
223 fprintf(stderr
, "%s count %d stride %d\n",
224 __FUNCTION__
, count
, stride
);
227 COPY_DWORDS( out
, data
, count
);
229 for (i
= 0; i
< count
; i
++) {
230 out
[0] = *(int *)data
;
237 static void emit_vec8( GLcontext
*ctx
,
238 struct radeon_dma_region
*rvb
,
244 int *out
= (int *)(rvb
->address
+ rvb
->start
);
246 if (RADEON_DEBUG
& DEBUG_VERTS
)
247 fprintf(stderr
, "%s count %d stride %d\n",
248 __FUNCTION__
, count
, stride
);
251 COPY_DWORDS( out
, data
, count
*2 );
253 for (i
= 0; i
< count
; i
++) {
254 out
[0] = *(int *)data
;
255 out
[1] = *(int *)(data
+4);
261 static void emit_vec12( GLcontext
*ctx
,
262 struct radeon_dma_region
*rvb
,
268 int *out
= (int *)(rvb
->address
+ rvb
->start
);
270 if (RADEON_DEBUG
& DEBUG_VERTS
)
271 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
272 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
275 COPY_DWORDS( out
, data
, count
*3 );
277 for (i
= 0; i
< count
; i
++) {
278 out
[0] = *(int *)data
;
279 out
[1] = *(int *)(data
+4);
280 out
[2] = *(int *)(data
+8);
286 static void emit_vec16( GLcontext
*ctx
,
287 struct radeon_dma_region
*rvb
,
293 int *out
= (int *)(rvb
->address
+ rvb
->start
);
295 if (RADEON_DEBUG
& DEBUG_VERTS
)
296 fprintf(stderr
, "%s count %d stride %d\n",
297 __FUNCTION__
, count
, stride
);
300 COPY_DWORDS( out
, data
, count
*4 );
302 for (i
= 0; i
< count
; i
++) {
303 out
[0] = *(int *)data
;
304 out
[1] = *(int *)(data
+4);
305 out
[2] = *(int *)(data
+8);
306 out
[3] = *(int *)(data
+12);
313 static void emit_vector( GLcontext
*ctx
,
314 struct radeon_dma_region
*rvb
,
320 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
322 if (RADEON_DEBUG
& DEBUG_VERTS
)
323 fprintf(stderr
, "%s count %d size %d stride %d\n",
324 __FUNCTION__
, count
, size
, stride
);
329 radeonAllocDmaRegion( rmesa
, rvb
, size
* 4, 4 );
331 rvb
->aos_start
= GET_START(rvb
);
333 rvb
->aos_size
= size
;
336 radeonAllocDmaRegion( rmesa
, rvb
, size
* count
* 4, 4 ); /* alignment? */
337 rvb
->aos_start
= GET_START(rvb
);
338 rvb
->aos_stride
= size
;
339 rvb
->aos_size
= size
;
346 emit_vec4( ctx
, rvb
, data
, stride
, count
);
349 emit_vec8( ctx
, rvb
, data
, stride
, count
);
352 emit_vec12( ctx
, rvb
, data
, stride
, count
);
355 emit_vec16( ctx
, rvb
, data
, stride
, count
);
367 static void emit_s0_vec( GLcontext
*ctx
,
368 struct radeon_dma_region
*rvb
,
374 int *out
= (int *)(rvb
->address
+ rvb
->start
);
376 if (RADEON_DEBUG
& DEBUG_VERTS
)
377 fprintf(stderr
, "%s count %d stride %d\n",
378 __FUNCTION__
, count
, stride
);
380 for (i
= 0; i
< count
; i
++) {
381 out
[0] = *(int *)data
;
388 static void emit_stq_vec( GLcontext
*ctx
,
389 struct radeon_dma_region
*rvb
,
395 int *out
= (int *)(rvb
->address
+ rvb
->start
);
397 if (RADEON_DEBUG
& DEBUG_VERTS
)
398 fprintf(stderr
, "%s count %d stride %d\n",
399 __FUNCTION__
, count
, stride
);
401 for (i
= 0; i
< count
; i
++) {
402 out
[0] = *(int *)data
;
403 out
[1] = *(int *)(data
+4);
404 out
[2] = *(int *)(data
+12);
413 static void emit_tex_vector( GLcontext
*ctx
,
414 struct radeon_dma_region
*rvb
,
420 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
423 if (RADEON_DEBUG
& DEBUG_VERTS
)
424 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
429 case 4: emitsize
= 3; break;
430 case 3: emitsize
= 3; break;
431 default: emitsize
= 2; break;
436 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
, 4 );
438 rvb
->aos_start
= GET_START(rvb
);
440 rvb
->aos_size
= emitsize
;
443 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
* count
, 4 );
444 rvb
->aos_start
= GET_START(rvb
);
445 rvb
->aos_stride
= emitsize
;
446 rvb
->aos_size
= emitsize
;
454 emit_s0_vec( ctx
, rvb
, data
, stride
, count
);
457 emit_vec8( ctx
, rvb
, data
, stride
, count
);
460 emit_vec12( ctx
, rvb
, data
, stride
, count
);
463 emit_stq_vec( ctx
, rvb
, data
, stride
, count
);
475 /* Emit any changed arrays to new GART memory, re-emit a packet to
478 void radeonEmitArrays( GLcontext
*ctx
, GLuint inputs
)
480 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
481 struct vertex_buffer
*VB
= &TNL_CONTEXT( ctx
)->vb
;
482 struct radeon_dma_region
**component
= rmesa
->tcl
.aos_components
;
485 GLuint count
= VB
->Count
;
489 if (RADEON_DEBUG
& DEBUG_VERTS
)
490 _tnl_print_vert_flags( __FUNCTION__
, inputs
);
494 if (!rmesa
->tcl
.obj
.buf
)
497 (char *)VB
->ObjPtr
->data
,
502 switch( VB
->ObjPtr
->size
) {
503 case 4: vfmt
|= RADEON_CP_VC_FRMT_W0
;
504 case 3: vfmt
|= RADEON_CP_VC_FRMT_Z
;
505 case 2: vfmt
|= RADEON_CP_VC_FRMT_XY
;
509 component
[nr
++] = &rmesa
->tcl
.obj
;
513 if (inputs
& VERT_BIT_NORMAL
) {
514 if (!rmesa
->tcl
.norm
.buf
)
517 (char *)VB
->NormalPtr
->data
,
519 VB
->NormalPtr
->stride
,
522 vfmt
|= RADEON_CP_VC_FRMT_N0
;
523 component
[nr
++] = &rmesa
->tcl
.norm
;
526 if (inputs
& VERT_BIT_COLOR0
) {
528 if (VB
->ColorPtr
[0]->size
== 4 &&
529 (VB
->ColorPtr
[0]->stride
!= 0 ||
530 VB
->ColorPtr
[0]->data
[0][3] != 1.0)) {
531 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
| RADEON_CP_VC_FRMT_FPALPHA
;
536 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
;
540 if (!rmesa
->tcl
.rgba
.buf
)
543 (char *)VB
->ColorPtr
[0]->data
,
545 VB
->ColorPtr
[0]->stride
,
549 component
[nr
++] = &rmesa
->tcl
.rgba
;
553 if (inputs
& VERT_BIT_COLOR1
) {
554 if (!rmesa
->tcl
.spec
.buf
) {
558 (char *)VB
->SecondaryColorPtr
[0]->data
,
560 VB
->SecondaryColorPtr
[0]->stride
,
564 vfmt
|= RADEON_CP_VC_FRMT_FPSPEC
;
565 component
[nr
++] = &rmesa
->tcl
.spec
;
568 /* FIXME: not sure if this is correct. May need to stitch this together with
569 secondary color. It seems odd that for primary color color and alpha values
570 are emitted together but for secondary color not. */
571 if (inputs
& VERT_BIT_FOG
) {
572 if (!rmesa
->tcl
.fog
.buf
)
575 (char *)VB
->FogCoordPtr
->data
,
576 VB
->FogCoordPtr
->stride
,
579 vfmt
|= RADEON_CP_VC_FRMT_FPFOG
;
580 component
[nr
++] = &rmesa
->tcl
.fog
;
584 vtx
= (rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] &
585 ~(RADEON_TCL_VTX_Q0
|RADEON_TCL_VTX_Q1
|RADEON_TCL_VTX_Q2
));
587 for (unit
= 0; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
588 if (inputs
& VERT_BIT_TEX(unit
)) {
589 if (!rmesa
->tcl
.tex
[unit
].buf
)
590 emit_tex_vector( ctx
,
591 &(rmesa
->tcl
.tex
[unit
]),
592 (char *)VB
->TexCoordPtr
[unit
]->data
,
593 VB
->TexCoordPtr
[unit
]->size
,
594 VB
->TexCoordPtr
[unit
]->stride
,
597 vfmt
|= RADEON_ST_BIT(unit
);
598 /* assume we need the 3rd coord if texgen is active for r/q OR at least
599 3 coords are submitted. This may not be 100% correct */
600 if (VB
->TexCoordPtr
[unit
]->size
>= 3) {
601 vtx
|= RADEON_Q_BIT(unit
);
602 vfmt
|= RADEON_Q_BIT(unit
);
604 if ( (ctx
->Texture
.Unit
[unit
].TexGenEnabled
& (R_BIT
| Q_BIT
)) )
605 vtx
|= RADEON_Q_BIT(unit
);
606 else if ((VB
->TexCoordPtr
[unit
]->size
>= 3) &&
607 ((ctx
->Texture
.Unit
[unit
]._ReallyEnabled
& (TEXTURE_CUBE_BIT
)) == 0)) {
608 GLuint swaptexmatcol
= (VB
->TexCoordPtr
[unit
]->size
- 3);
609 if (((rmesa
->NeedTexMatrix
>> unit
) & 1) &&
610 (swaptexmatcol
!= ((rmesa
->TexMatColSwap
>> unit
) & 1)))
611 radeonUploadTexMatrix( rmesa
, unit
, swaptexmatcol
) ;
613 component
[nr
++] = &rmesa
->tcl
.tex
[unit
];
617 if (vtx
!= rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
]) {
618 RADEON_STATECHANGE( rmesa
, tcl
);
619 rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] = vtx
;
622 rmesa
->tcl
.nr_aos_components
= nr
;
623 rmesa
->tcl
.vertex_format
= vfmt
;
627 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
629 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
633 if (RADEON_DEBUG
& DEBUG_VERTS
)
634 _tnl_print_vert_flags( __FUNCTION__
, newinputs
);
637 if (newinputs
& VERT_BIT_POS
)
638 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.obj
, __FUNCTION__
);
640 if (newinputs
& VERT_BIT_NORMAL
)
641 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.norm
, __FUNCTION__
);
643 if (newinputs
& VERT_BIT_COLOR0
)
644 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.rgba
, __FUNCTION__
);
646 if (newinputs
& VERT_BIT_COLOR1
)
647 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.spec
, __FUNCTION__
);
649 if (newinputs
& VERT_BIT_FOG
)
650 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.fog
, __FUNCTION__
);
652 for (unit
= 0 ; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
653 if (newinputs
& VERT_BIT_TEX(unit
))
654 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.tex
[unit
], __FUNCTION__
);