1 /**************************************************************************
3 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
4 Tungsten Graphics Inc., Cedar Park, Texas.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/macros.h"
40 #include "swrast_setup/swrast_setup.h"
41 #include "math/m_translate.h"
43 #include "tnl/tcontext.h"
45 #include "radeon_context.h"
46 #include "radeon_ioctl.h"
47 #include "radeon_state.h"
48 #include "radeon_swtcl.h"
49 #include "radeon_maos.h"
50 #include "radeon_tcl.h"
52 #if defined(USE_X86_ASM)
53 #define COPY_DWORDS( dst, src, nr ) \
56 __asm__ __volatile__( "rep ; movsl" \
57 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
63 #define COPY_DWORDS( dst, src, nr ) \
66 for ( j = 0 ; j < nr ; j++ ) \
67 dst[j] = ((int *)src)[j]; \
72 static void emit_vecfog( GLcontext
*ctx
,
73 struct radeon_dma_region
*rvb
,
81 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
83 if (RADEON_DEBUG
& DEBUG_VERTS
)
84 fprintf(stderr
, "%s count %d stride %d\n",
85 __FUNCTION__
, count
, stride
);
90 radeonAllocDmaRegion( rmesa
, rvb
, 4, 4 );
92 rvb
->aos_start
= GET_START(rvb
);
97 radeonAllocDmaRegion( rmesa
, rvb
, count
* 4, 4 ); /* alignment? */
98 rvb
->aos_start
= GET_START(rvb
);
105 out
= (GLfloat
*)(rvb
->address
+ rvb
->start
);
106 for (i
= 0; i
< count
; i
++) {
107 out
[0] = radeonComputeFogBlendFactor( ctx
, *(GLfloat
*)data
);
113 static void emit_vec4( GLcontext
*ctx
,
114 struct radeon_dma_region
*rvb
,
120 int *out
= (int *)(rvb
->address
+ rvb
->start
);
122 if (RADEON_DEBUG
& DEBUG_VERTS
)
123 fprintf(stderr
, "%s count %d stride %d\n",
124 __FUNCTION__
, count
, stride
);
127 COPY_DWORDS( out
, data
, count
);
129 for (i
= 0; i
< count
; i
++) {
130 out
[0] = *(int *)data
;
137 static void emit_vec8( GLcontext
*ctx
,
138 struct radeon_dma_region
*rvb
,
144 int *out
= (int *)(rvb
->address
+ rvb
->start
);
146 if (RADEON_DEBUG
& DEBUG_VERTS
)
147 fprintf(stderr
, "%s count %d stride %d\n",
148 __FUNCTION__
, count
, stride
);
151 COPY_DWORDS( out
, data
, count
*2 );
153 for (i
= 0; i
< count
; i
++) {
154 out
[0] = *(int *)data
;
155 out
[1] = *(int *)(data
+4);
161 static void emit_vec12( GLcontext
*ctx
,
162 struct radeon_dma_region
*rvb
,
168 int *out
= (int *)(rvb
->address
+ rvb
->start
);
170 if (RADEON_DEBUG
& DEBUG_VERTS
)
171 fprintf(stderr
, "%s count %d stride %d out %p data %p\n",
172 __FUNCTION__
, count
, stride
, (void *)out
, (void *)data
);
175 COPY_DWORDS( out
, data
, count
*3 );
177 for (i
= 0; i
< count
; i
++) {
178 out
[0] = *(int *)data
;
179 out
[1] = *(int *)(data
+4);
180 out
[2] = *(int *)(data
+8);
186 static void emit_vec16( GLcontext
*ctx
,
187 struct radeon_dma_region
*rvb
,
193 int *out
= (int *)(rvb
->address
+ rvb
->start
);
195 if (RADEON_DEBUG
& DEBUG_VERTS
)
196 fprintf(stderr
, "%s count %d stride %d\n",
197 __FUNCTION__
, count
, stride
);
200 COPY_DWORDS( out
, data
, count
*4 );
202 for (i
= 0; i
< count
; i
++) {
203 out
[0] = *(int *)data
;
204 out
[1] = *(int *)(data
+4);
205 out
[2] = *(int *)(data
+8);
206 out
[3] = *(int *)(data
+12);
213 static void emit_vector( GLcontext
*ctx
,
214 struct radeon_dma_region
*rvb
,
220 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
222 if (RADEON_DEBUG
& DEBUG_VERTS
)
223 fprintf(stderr
, "%s count %d size %d stride %d\n",
224 __FUNCTION__
, count
, size
, stride
);
229 radeonAllocDmaRegion( rmesa
, rvb
, size
* 4, 4 );
231 rvb
->aos_start
= GET_START(rvb
);
233 rvb
->aos_size
= size
;
236 radeonAllocDmaRegion( rmesa
, rvb
, size
* count
* 4, 4 ); /* alignment? */
237 rvb
->aos_start
= GET_START(rvb
);
238 rvb
->aos_stride
= size
;
239 rvb
->aos_size
= size
;
246 emit_vec4( ctx
, rvb
, data
, stride
, count
);
249 emit_vec8( ctx
, rvb
, data
, stride
, count
);
252 emit_vec12( ctx
, rvb
, data
, stride
, count
);
255 emit_vec16( ctx
, rvb
, data
, stride
, count
);
267 static void emit_s0_vec( GLcontext
*ctx
,
268 struct radeon_dma_region
*rvb
,
274 int *out
= (int *)(rvb
->address
+ rvb
->start
);
276 if (RADEON_DEBUG
& DEBUG_VERTS
)
277 fprintf(stderr
, "%s count %d stride %d\n",
278 __FUNCTION__
, count
, stride
);
280 for (i
= 0; i
< count
; i
++) {
281 out
[0] = *(int *)data
;
288 static void emit_stq_vec( GLcontext
*ctx
,
289 struct radeon_dma_region
*rvb
,
295 int *out
= (int *)(rvb
->address
+ rvb
->start
);
297 if (RADEON_DEBUG
& DEBUG_VERTS
)
298 fprintf(stderr
, "%s count %d stride %d\n",
299 __FUNCTION__
, count
, stride
);
301 for (i
= 0; i
< count
; i
++) {
302 out
[0] = *(int *)data
;
303 out
[1] = *(int *)(data
+4);
304 out
[2] = *(int *)(data
+12);
313 static void emit_tex_vector( GLcontext
*ctx
,
314 struct radeon_dma_region
*rvb
,
320 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
323 if (RADEON_DEBUG
& DEBUG_VERTS
)
324 fprintf(stderr
, "%s %d/%d\n", __FUNCTION__
, count
, size
);
329 case 4: emitsize
= 3; break;
330 case 3: emitsize
= 3; break;
331 default: emitsize
= 2; break;
336 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
, 4 );
338 rvb
->aos_start
= GET_START(rvb
);
340 rvb
->aos_size
= emitsize
;
343 radeonAllocDmaRegion( rmesa
, rvb
, 4 * emitsize
* count
, 4 );
344 rvb
->aos_start
= GET_START(rvb
);
345 rvb
->aos_stride
= emitsize
;
346 rvb
->aos_size
= emitsize
;
354 emit_s0_vec( ctx
, rvb
, data
, stride
, count
);
357 emit_vec8( ctx
, rvb
, data
, stride
, count
);
360 emit_vec12( ctx
, rvb
, data
, stride
, count
);
363 emit_stq_vec( ctx
, rvb
, data
, stride
, count
);
375 /* Emit any changed arrays to new GART memory, re-emit a packet to
378 void radeonEmitArrays( GLcontext
*ctx
, GLuint inputs
)
380 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
381 struct vertex_buffer
*VB
= &TNL_CONTEXT( ctx
)->vb
;
382 struct radeon_dma_region
**component
= rmesa
->tcl
.aos_components
;
385 GLuint count
= VB
->Count
;
389 if (RADEON_DEBUG
& DEBUG_VERTS
)
390 _tnl_print_vert_flags( __FUNCTION__
, inputs
);
394 if (!rmesa
->tcl
.obj
.buf
)
397 (char *)VB
->ObjPtr
->data
,
402 switch( VB
->ObjPtr
->size
) {
403 case 4: vfmt
|= RADEON_CP_VC_FRMT_W0
;
404 case 3: vfmt
|= RADEON_CP_VC_FRMT_Z
;
405 case 2: vfmt
|= RADEON_CP_VC_FRMT_XY
;
409 component
[nr
++] = &rmesa
->tcl
.obj
;
413 if (inputs
& VERT_BIT_NORMAL
) {
414 if (!rmesa
->tcl
.norm
.buf
)
417 (char *)VB
->NormalPtr
->data
,
419 VB
->NormalPtr
->stride
,
422 vfmt
|= RADEON_CP_VC_FRMT_N0
;
423 component
[nr
++] = &rmesa
->tcl
.norm
;
426 if (inputs
& VERT_BIT_COLOR0
) {
428 if (VB
->ColorPtr
[0]->size
== 4 &&
429 (VB
->ColorPtr
[0]->stride
!= 0 ||
430 VB
->ColorPtr
[0]->data
[0][3] != 1.0)) {
431 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
| RADEON_CP_VC_FRMT_FPALPHA
;
436 vfmt
|= RADEON_CP_VC_FRMT_FPCOLOR
;
440 if (!rmesa
->tcl
.rgba
.buf
)
443 (char *)VB
->ColorPtr
[0]->data
,
445 VB
->ColorPtr
[0]->stride
,
449 component
[nr
++] = &rmesa
->tcl
.rgba
;
453 if (inputs
& VERT_BIT_COLOR1
) {
454 if (!rmesa
->tcl
.spec
.buf
) {
458 (char *)VB
->SecondaryColorPtr
[0]->data
,
460 VB
->SecondaryColorPtr
[0]->stride
,
464 vfmt
|= RADEON_CP_VC_FRMT_FPSPEC
;
465 component
[nr
++] = &rmesa
->tcl
.spec
;
468 /* FIXME: not sure if this is correct. May need to stitch this together with
469 secondary color. It seems odd that for primary color color and alpha values
470 are emitted together but for secondary color not. */
471 if (inputs
& VERT_BIT_FOG
) {
472 if (!rmesa
->tcl
.fog
.buf
)
475 (char *)VB
->FogCoordPtr
->data
,
476 VB
->FogCoordPtr
->stride
,
479 vfmt
|= RADEON_CP_VC_FRMT_FPFOG
;
480 component
[nr
++] = &rmesa
->tcl
.fog
;
484 vtx
= (rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] &
485 ~(RADEON_TCL_VTX_Q0
|RADEON_TCL_VTX_Q1
|RADEON_TCL_VTX_Q2
));
487 for (unit
= 0; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
488 if (inputs
& VERT_BIT_TEX(unit
)) {
489 if (!rmesa
->tcl
.tex
[unit
].buf
)
490 emit_tex_vector( ctx
,
491 &(rmesa
->tcl
.tex
[unit
]),
492 (char *)VB
->TexCoordPtr
[unit
]->data
,
493 VB
->TexCoordPtr
[unit
]->size
,
494 VB
->TexCoordPtr
[unit
]->stride
,
497 vfmt
|= RADEON_ST_BIT(unit
);
498 /* assume we need the 3rd coord if texgen is active for r/q OR at least
499 3 coords are submitted. This may not be 100% correct */
500 if (VB
->TexCoordPtr
[unit
]->size
>= 3) {
501 vtx
|= RADEON_Q_BIT(unit
);
502 vfmt
|= RADEON_Q_BIT(unit
);
504 if ( (ctx
->Texture
.Unit
[unit
].TexGenEnabled
& (R_BIT
| Q_BIT
)) )
505 vtx
|= RADEON_Q_BIT(unit
);
506 else if ((VB
->TexCoordPtr
[unit
]->size
>= 3) &&
507 ((ctx
->Texture
.Unit
[unit
]._ReallyEnabled
& (TEXTURE_CUBE_BIT
)) == 0)) {
508 GLuint swaptexmatcol
= (VB
->TexCoordPtr
[unit
]->size
- 3);
509 if (((rmesa
->NeedTexMatrix
>> unit
) & 1) &&
510 (swaptexmatcol
!= ((rmesa
->TexMatColSwap
>> unit
) & 1)))
511 radeonUploadTexMatrix( rmesa
, unit
, swaptexmatcol
) ;
513 component
[nr
++] = &rmesa
->tcl
.tex
[unit
];
517 if (vtx
!= rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
]) {
518 RADEON_STATECHANGE( rmesa
, tcl
);
519 rmesa
->hw
.tcl
.cmd
[TCL_OUTPUT_VTXFMT
] = vtx
;
522 rmesa
->tcl
.nr_aos_components
= nr
;
523 rmesa
->tcl
.vertex_format
= vfmt
;
527 void radeonReleaseArrays( GLcontext
*ctx
, GLuint newinputs
)
529 radeonContextPtr rmesa
= RADEON_CONTEXT( ctx
);
533 if (RADEON_DEBUG
& DEBUG_VERTS
)
534 _tnl_print_vert_flags( __FUNCTION__
, newinputs
);
537 if (newinputs
& VERT_BIT_POS
)
538 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.obj
, __FUNCTION__
);
540 if (newinputs
& VERT_BIT_NORMAL
)
541 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.norm
, __FUNCTION__
);
543 if (newinputs
& VERT_BIT_COLOR0
)
544 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.rgba
, __FUNCTION__
);
546 if (newinputs
& VERT_BIT_COLOR1
)
547 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.spec
, __FUNCTION__
);
549 if (newinputs
& VERT_BIT_FOG
)
550 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.fog
, __FUNCTION__
);
552 for (unit
= 0 ; unit
< ctx
->Const
.MaxTextureUnits
; unit
++) {
553 if (newinputs
& VERT_BIT_TEX(unit
))
554 radeonReleaseDmaRegion( rmesa
, &rmesa
->tcl
.tex
[unit
], __FUNCTION__
);