2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keith@tungstengraphics.com>
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/enums.h"
39 #include "main/colormac.h"
40 #include "main/light.h"
44 #include "tnl/t_pipeline.h"
46 #include "r200_context.h"
47 #include "r200_state.h"
48 #include "r200_ioctl.h"
50 #include "r200_swtcl.h"
51 #include "r200_maos.h"
53 #include "radeon_common_context.h"
59 #define HAVE_LINE_LOOP 0
60 #define HAVE_LINE_STRIPS 1
61 #define HAVE_TRIANGLES 1
62 #define HAVE_TRI_STRIPS 1
63 #define HAVE_TRI_STRIP_1 0
64 #define HAVE_TRI_FANS 1
66 #define HAVE_QUAD_STRIPS 1
67 #define HAVE_POLYGONS 1
71 #define HW_POINTS ((!(ctx->_TriangleCaps & DD_POINT_SMOOTH)) ? \
72 R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS)
73 #define HW_LINES R200_VF_PRIM_LINES
74 #define HW_LINE_LOOP 0
75 #define HW_LINE_STRIP R200_VF_PRIM_LINE_STRIP
76 #define HW_TRIANGLES R200_VF_PRIM_TRIANGLES
77 #define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP
78 #define HW_TRIANGLE_STRIP_1 0
79 #define HW_TRIANGLE_FAN R200_VF_PRIM_TRIANGLE_FAN
80 #define HW_QUADS R200_VF_PRIM_QUADS
81 #define HW_QUAD_STRIP R200_VF_PRIM_QUAD_STRIP
82 #define HW_POLYGON R200_VF_PRIM_POLYGON
85 static GLboolean discrete_prim
[0x10] = {
93 0, /* 7 tri_w_flags */
94 1, /* 8 rect list (unused) */
95 1, /* 9 3vert point */
97 0, /* b point sprite */
100 0, /* e quad strip */
105 #define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx)
106 #define ELT_TYPE GLushort
108 #define ELT_INIT(prim, hw_prim) \
109 r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
111 #define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
114 /* Don't really know how many elts will fit in what's left of cmdbuf,
115 * as there is state to emit, etc:
118 /* Testing on isosurf shows a maximum around here. Don't know if it's
119 * the card or driver or kernel module that is causing the behaviour.
121 #define GET_MAX_HW_ELTS() 300
123 #define RESET_STIPPLE() do { \
124 R200_STATECHANGE( rmesa, lin ); \
125 radeonEmitState(&rmesa->radeon); \
128 #define AUTO_STIPPLE( mode ) do { \
129 R200_STATECHANGE( rmesa, lin ); \
131 rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
132 R200_LINE_PATTERN_AUTO_RESET; \
134 rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
135 ~R200_LINE_PATTERN_AUTO_RESET; \
136 radeonEmitState(&rmesa->radeon); \
140 #define ALLOC_ELTS(nr) r200AllocElts( rmesa, nr )
142 static GLushort
*r200AllocElts( r200ContextPtr rmesa
, GLuint nr
)
144 if (rmesa
->radeon
.dma
.flush
== r200FlushElts
&&
145 rmesa
->tcl
.elt_used
+ nr
*2 < R200_ELT_BUF_SZ
) {
147 GLushort
*dest
= (GLushort
*)(rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+
148 rmesa
->radeon
.tcl
.elt_dma_offset
+ rmesa
->tcl
.elt_used
);
150 rmesa
->tcl
.elt_used
+= nr
*2;
155 if (rmesa
->radeon
.dma
.flush
)
156 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
159 rmesa
->radeon
.tcl
.aos_count
, 0 );
161 r200EmitMaxVtxIndex(rmesa
, rmesa
->radeon
.tcl
.aos
[0].count
);
162 return r200AllocEltsOpenEnded( rmesa
, rmesa
->tcl
.hw_primitive
, nr
);
167 #define CLOSE_ELTS() \
169 if (0) R200_NEWPRIM( rmesa ); \
174 /* TODO: Try to extend existing primitive if both are identical,
175 * discrete and there are no intervening state changes. (Somewhat
176 * duplicates changes to DrawArrays code)
178 static void r200EmitPrim( struct gl_context
*ctx
,
184 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
185 r200TclPrimitive( ctx
, prim
, hwprim
);
187 // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
190 rmesa
->radeon
.tcl
.aos_count
,
193 /* Why couldn't this packet have taken an offset param?
195 r200EmitVbufPrim( rmesa
,
196 rmesa
->tcl
.hw_primitive
,
200 #define EMIT_PRIM(ctx, prim, hwprim, start, count) do { \
201 r200EmitPrim( ctx, prim, hwprim, start, count ); \
202 (void) rmesa; } while (0)
204 #define MAX_CONVERSION_SIZE 40
205 /* Try & join small primitives
208 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
210 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) \
213 rmesa->tcl.hw_primitive == (PRIM| \
214 R200_VF_TCL_OUTPUT_VTX_ENABLE| \
215 R200_VF_PRIM_WALK_IND)))
218 #ifdef MESA_BIG_ENDIAN
219 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
220 #define EMIT_ELT(dest, offset, x) do { \
221 int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 ); \
222 GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
223 (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \
224 (void)rmesa; } while (0)
226 #define EMIT_ELT(dest, offset, x) do { \
227 (dest)[offset] = (GLushort) (x); \
228 (void)rmesa; } while (0)
231 #define EMIT_TWO_ELTS(dest, offset, x, y) *(GLuint *)((dest)+offset) = ((y)<<16)|(x);
235 #define TAG(x) tcl_##x
236 #include "tnl_dd/t_dd_dmatmp2.h"
238 /**********************************************************************/
239 /* External entrypoints */
240 /**********************************************************************/
242 void r200EmitPrimitive( struct gl_context
*ctx
,
247 tcl_render_tab_verts
[flags
&PRIM_MODE_MASK
]( ctx
, first
, last
, flags
);
250 void r200EmitEltPrimitive( struct gl_context
*ctx
,
255 tcl_render_tab_elts
[flags
&PRIM_MODE_MASK
]( ctx
, first
, last
, flags
);
258 void r200TclPrimitive( struct gl_context
*ctx
,
262 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
263 GLuint newprim
= hw_prim
| R200_VF_TCL_OUTPUT_VTX_ENABLE
;
265 radeon_prepare_render(&rmesa
->radeon
);
266 if (rmesa
->radeon
.NewGLState
)
267 r200ValidateState( ctx
);
269 if (newprim
!= rmesa
->tcl
.hw_primitive
||
270 !discrete_prim
[hw_prim
&0xf]) {
271 /* need to disable perspective-correct texturing for point sprites */
272 if ((prim
& PRIM_MODE_MASK
) == GL_POINTS
&& ctx
->Point
.PointSprite
) {
273 if (rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] & R200_PERSPECTIVE_ENABLE
) {
274 R200_STATECHANGE( rmesa
, set
);
275 rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] &= ~R200_PERSPECTIVE_ENABLE
;
278 else if (!(rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] & R200_PERSPECTIVE_ENABLE
)) {
279 R200_STATECHANGE( rmesa
, set
);
280 rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] |= R200_PERSPECTIVE_ENABLE
;
282 R200_NEWPRIM( rmesa
);
283 rmesa
->tcl
.hw_primitive
= newprim
;
288 * Predict total emit size for next rendering operation so there is no flush in middle of rendering
289 * Prediction has to aim towards the best possible value that is worse than worst case scenario
291 static GLuint
r200EnsureEmitSize( struct gl_context
* ctx
, GLubyte
* vimap_rev
)
293 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
294 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
295 struct vertex_buffer
*VB
= &tnl
->vb
;
296 GLuint space_required
;
300 /* predict number of aos to emit */
301 for (i
= 0; i
< 15; ++i
)
303 if (vimap_rev
[i
] != 255)
310 /* count the prediction for state size */
312 state_size
= radeonCountStateEmitSize( &rmesa
->radeon
);
313 /* vtx may be changed in r200EmitArrays so account for it if not dirty */
314 if (!rmesa
->hw
.vtx
.dirty
)
315 state_size
+= rmesa
->hw
.vtx
.check(&rmesa
->radeon
.glCtx
, &rmesa
->hw
.vtx
);
316 /* predict size for elements */
317 for (i
= 0; i
< VB
->PrimitiveCount
; ++i
)
319 if (!VB
->Primitive
[i
].count
)
321 /* If primitive.count is less than MAX_CONVERSION_SIZE
322 rendering code may decide convert to elts.
323 In that case we have to make pessimistic prediction.
324 and use larger of 2 paths. */
325 const GLuint elt_count
=(VB
->Primitive
[i
].count
/GET_MAX_HW_ELTS() + 1);
326 const GLuint elts
= ELTS_BUFSZ(nr_aos
) * elt_count
;
327 const GLuint index
= INDEX_BUFSZ
* elt_count
;
328 const GLuint vbuf
= VBUF_BUFSZ
;
329 if ( (!VB
->Elts
&& VB
->Primitive
[i
].count
>= MAX_CONVERSION_SIZE
)
330 || vbuf
> index
+ elts
)
331 space_required
+= vbuf
;
333 space_required
+= index
+ elts
;
334 space_required
+= AOS_BUFSZ(nr_aos
);
338 radeon_print(RADEON_RENDER
,RADEON_VERBOSE
,
339 "%s space %u, aos %d\n",
340 __func__
, space_required
, AOS_BUFSZ(nr_aos
) );
341 /* flush the buffer in case we need more than is left. */
342 if (rcommonEnsureCmdBufSpace(&rmesa
->radeon
, space_required
+ state_size
, __FUNCTION__
))
343 return space_required
+ radeonCountStateEmitSize( &rmesa
->radeon
);
345 return space_required
+ state_size
;
349 /**********************************************************************/
350 /* Render pipeline stage */
351 /**********************************************************************/
356 static GLboolean
r200_run_tcl_render( struct gl_context
*ctx
,
357 struct tnl_pipeline_stage
*stage
)
359 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
360 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
361 struct vertex_buffer
*VB
= &tnl
->vb
;
364 /* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3,
365 color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use
366 more than 12 of those at the same time. */
367 GLubyte map_rev_fixed
[15] = {255, 255, 255, 255, 255, 255, 255, 255,
368 255, 255, 255, 255, 255, 255, 255};
371 /* TODO: separate this from the swtnl pipeline
373 if (rmesa
->radeon
.TclFallback
)
374 return GL_TRUE
; /* fallback to software t&l */
376 radeon_print(RADEON_RENDER
, RADEON_NORMAL
, "%s\n", __FUNCTION__
);
383 if (rmesa
->radeon
.NewGLState
)
384 if (!r200ValidateState( ctx
))
385 return GL_TRUE
; /* fallback to sw t&l */
387 if (!ctx
->VertexProgram
._Enabled
) {
388 /* NOTE: inputs != tnl->render_inputs - these are the untransformed
391 map_rev_fixed
[0] = VERT_ATTRIB_POS
;
392 /* technically there is no reason we always need VA_COLOR0. In theory
393 could disable it depending on lighting, color materials, texturing... */
394 map_rev_fixed
[4] = VERT_ATTRIB_COLOR0
;
396 if (ctx
->Light
.Enabled
) {
397 map_rev_fixed
[2] = VERT_ATTRIB_NORMAL
;
400 /* this also enables VA_COLOR1 when using separate specular
401 lighting model, which is unnecessary.
402 FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses
403 the secondary color (if lighting is disabled). The chip seems
404 misconfigured for that though elsewhere (tcl output, might lock up) */
405 if (ctx
->_TriangleCaps
& DD_SEPARATE_SPECULAR
) {
406 map_rev_fixed
[5] = VERT_ATTRIB_COLOR1
;
409 if ( (ctx
->Fog
.FogCoordinateSource
== GL_FOG_COORD
) && ctx
->Fog
.Enabled
) {
410 map_rev_fixed
[3] = VERT_ATTRIB_FOG
;
413 for (i
= 0 ; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
414 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
) {
415 if (rmesa
->TexGenNeedNormals
[i
]) {
416 map_rev_fixed
[2] = VERT_ATTRIB_NORMAL
;
418 map_rev_fixed
[8 + i
] = VERT_ATTRIB_TEX0
+ i
;
421 vimap_rev
= &map_rev_fixed
[0];
424 /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment
425 part", since using some vertex interpolator later which is not in
426 out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex
427 prog to a not enabled output however, so just don't mess with it.
428 We only need to change compsel. */
429 GLuint out_compsel
= 0;
430 const GLbitfield64 vp_out
=
431 rmesa
->curr_vp_hw
->mesa_program
.Base
.OutputsWritten
;
433 vimap_rev
= &rmesa
->curr_vp_hw
->inputmap_rev
[0];
434 assert(vp_out
& BITFIELD64_BIT(VARYING_SLOT_POS
));
435 out_compsel
= R200_OUTPUT_XYZW
;
436 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_COL0
)) {
437 out_compsel
|= R200_OUTPUT_COLOR_0
;
439 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_COL1
)) {
440 out_compsel
|= R200_OUTPUT_COLOR_1
;
442 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_FOGC
)) {
443 out_compsel
|= R200_OUTPUT_DISCRETE_FOG
;
445 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_PSIZ
)) {
446 out_compsel
|= R200_OUTPUT_PT_SIZE
;
448 for (i
= VARYING_SLOT_TEX0
; i
< VARYING_SLOT_TEX6
; i
++) {
449 if (vp_out
& BITFIELD64_BIT(i
)) {
450 out_compsel
|= R200_OUTPUT_TEX_0
<< (i
- VARYING_SLOT_TEX0
);
453 if (rmesa
->hw
.vtx
.cmd
[VTX_TCL_OUTPUT_COMPSEL
] != out_compsel
) {
454 R200_STATECHANGE( rmesa
, vtx
);
455 rmesa
->hw
.vtx
.cmd
[VTX_TCL_OUTPUT_COMPSEL
] = out_compsel
;
459 /* Do the actual work:
461 radeonReleaseArrays( ctx
, ~0 /* stage->changed_inputs */ );
462 GLuint emit_end
= r200EnsureEmitSize( ctx
, vimap_rev
)
463 + rmesa
->radeon
.cmdbuf
.cs
->cdw
;
464 r200EmitArrays( ctx
, vimap_rev
);
466 for (i
= 0 ; i
< VB
->PrimitiveCount
; i
++)
468 GLuint prim
= _tnl_translate_prim(&VB
->Primitive
[i
]);
469 GLuint start
= VB
->Primitive
[i
].start
;
470 GLuint length
= VB
->Primitive
[i
].count
;
476 r200EmitEltPrimitive( ctx
, start
, start
+length
, prim
);
478 r200EmitPrimitive( ctx
, start
, start
+length
, prim
);
480 if ( emit_end
< rmesa
->radeon
.cmdbuf
.cs
->cdw
)
481 WARN_ONCE("Rendering was %d commands larger than predicted size."
482 " We might overflow command buffer.\n", rmesa
->radeon
.cmdbuf
.cs
->cdw
- emit_end
);
484 return GL_FALSE
; /* finished the pipe */
489 /* Initial state for tcl stage.
491 const struct tnl_pipeline_stage _r200_tcl_stage
=
498 r200_run_tcl_render
/* run */
503 /**********************************************************************/
504 /* Validate state at pipeline start */
505 /**********************************************************************/
508 /*-----------------------------------------------------------------------
509 * Manage TCL fallbacks
513 static void transition_to_swtnl( struct gl_context
*ctx
)
515 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
516 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
518 R200_NEWPRIM( rmesa
);
520 r200ChooseVertexState( ctx
);
521 r200ChooseRenderState( ctx
);
523 _tnl_validate_shine_tables( ctx
);
525 tnl
->Driver
.NotifyMaterialChange
=
526 _tnl_validate_shine_tables
;
528 radeonReleaseArrays( ctx
, ~0 );
530 /* Still using the D3D based hardware-rasterizer from the radeon;
531 * need to put the card into D3D mode to make it work:
533 R200_STATECHANGE( rmesa
, vap
);
534 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] &= ~(R200_VAP_TCL_ENABLE
|R200_VAP_PROG_VTX_SHADER_ENABLE
);
537 static void transition_to_hwtnl( struct gl_context
*ctx
)
539 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
540 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
542 _tnl_need_projected_coords( ctx
, GL_FALSE
);
544 r200UpdateMaterial( ctx
);
546 tnl
->Driver
.NotifyMaterialChange
= r200UpdateMaterial
;
548 if ( rmesa
->radeon
.dma
.flush
)
549 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
551 rmesa
->radeon
.dma
.flush
= NULL
;
553 R200_STATECHANGE( rmesa
, vap
);
554 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_TCL_ENABLE
;
555 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] &= ~R200_VAP_FORCE_W_TO_ONE
;
557 if (ctx
->VertexProgram
._Enabled
) {
558 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
;
561 if ( ((rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] & R200_FOG_USE_MASK
)
562 == R200_FOG_USE_SPEC_ALPHA
) &&
563 (ctx
->Fog
.FogCoordinateSource
== GL_FOG_COORD
)) {
564 R200_STATECHANGE( rmesa
, ctx
);
565 rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] &= ~R200_FOG_USE_MASK
;
566 rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] |= R200_FOG_USE_VTX_FOG
;
569 R200_STATECHANGE( rmesa
, vte
);
570 rmesa
->hw
.vte
.cmd
[VTE_SE_VTE_CNTL
] &= ~(R200_VTX_XY_FMT
|R200_VTX_Z_FMT
);
571 rmesa
->hw
.vte
.cmd
[VTE_SE_VTE_CNTL
] |= R200_VTX_W0_FMT
;
573 if (R200_DEBUG
& RADEON_FALLBACKS
)
574 fprintf(stderr
, "R200 end tcl fallback\n");
578 static char *fallbackStrings
[] = {
579 "Rasterization fallback",
580 "Unfilled triangles",
581 "Twosided lighting, differing materials",
582 "Materials in VB (maybe between begin/end)",
595 static char *getFallbackString(GLuint bit
)
602 return fallbackStrings
[i
];
607 void r200TclFallback( struct gl_context
*ctx
, GLuint bit
, GLboolean mode
)
609 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
610 GLuint oldfallback
= rmesa
->radeon
.TclFallback
;
613 if (oldfallback
== 0) {
614 /* We have to flush before transition */
615 if ( rmesa
->radeon
.dma
.flush
)
616 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
618 if (R200_DEBUG
& RADEON_FALLBACKS
)
619 fprintf(stderr
, "R200 begin tcl fallback %s\n",
620 getFallbackString( bit
));
621 rmesa
->radeon
.TclFallback
|= bit
;
622 transition_to_swtnl( ctx
);
624 rmesa
->radeon
.TclFallback
|= bit
;
626 if (oldfallback
== bit
) {
627 /* We have to flush before transition */
628 if ( rmesa
->radeon
.dma
.flush
)
629 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
631 if (R200_DEBUG
& RADEON_FALLBACKS
)
632 fprintf(stderr
, "R200 end tcl fallback %s\n",
633 getFallbackString( bit
));
634 rmesa
->radeon
.TclFallback
&= ~bit
;
635 transition_to_hwtnl( ctx
);
637 rmesa
->radeon
.TclFallback
&= ~bit
;