2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 **************************************************************************/
32 * Keith Whitwell <keithw@vmware.com>
35 #include "main/glheader.h"
36 #include "main/imports.h"
37 #include "main/mtypes.h"
38 #include "main/enums.h"
39 #include "main/light.h"
40 #include "main/state.h"
44 #include "tnl/t_pipeline.h"
46 #include "r200_context.h"
47 #include "r200_state.h"
48 #include "r200_ioctl.h"
50 #include "r200_swtcl.h"
51 #include "r200_maos.h"
53 #include "radeon_common_context.h"
59 #define HAVE_LINE_LOOP 0
60 #define HAVE_LINE_STRIPS 1
61 #define HAVE_TRIANGLES 1
62 #define HAVE_TRI_STRIPS 1
63 #define HAVE_TRI_FANS 1
65 #define HAVE_QUAD_STRIPS 1
66 #define HAVE_POLYGONS 1
70 #define HW_POINTS ((!ctx->Point.SmoothFlag) ? \
71 R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS)
72 #define HW_LINES R200_VF_PRIM_LINES
73 #define HW_LINE_LOOP 0
74 #define HW_LINE_STRIP R200_VF_PRIM_LINE_STRIP
75 #define HW_TRIANGLES R200_VF_PRIM_TRIANGLES
76 #define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP
77 #define HW_TRIANGLE_STRIP_1 0
78 #define HW_TRIANGLE_FAN R200_VF_PRIM_TRIANGLE_FAN
79 #define HW_QUADS R200_VF_PRIM_QUADS
80 #define HW_QUAD_STRIP R200_VF_PRIM_QUAD_STRIP
81 #define HW_POLYGON R200_VF_PRIM_POLYGON
84 static GLboolean discrete_prim
[0x10] = {
92 0, /* 7 tri_w_flags */
93 1, /* 8 rect list (unused) */
94 1, /* 9 3vert point */
96 0, /* b point sprite */
104 #define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx)
105 #define ELT_TYPE GLushort
107 #define ELT_INIT(prim, hw_prim) \
108 r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
110 #define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
113 /* Don't really know how many elts will fit in what's left of cmdbuf,
114 * as there is state to emit, etc:
117 /* Testing on isosurf shows a maximum around here. Don't know if it's
118 * the card or driver or kernel module that is causing the behaviour.
120 #define GET_MAX_HW_ELTS() 300
122 #define RESET_STIPPLE() do { \
123 R200_STATECHANGE( rmesa, lin ); \
124 radeonEmitState(&rmesa->radeon); \
127 #define AUTO_STIPPLE( mode ) do { \
128 R200_STATECHANGE( rmesa, lin ); \
130 rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |= \
131 R200_LINE_PATTERN_AUTO_RESET; \
133 rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \
134 ~R200_LINE_PATTERN_AUTO_RESET; \
135 radeonEmitState(&rmesa->radeon); \
139 #define ALLOC_ELTS(nr) r200AllocElts( rmesa, nr )
141 static GLushort
*r200AllocElts( r200ContextPtr rmesa
, GLuint nr
)
143 if (rmesa
->radeon
.dma
.flush
== r200FlushElts
&&
144 rmesa
->tcl
.elt_used
+ nr
*2 < R200_ELT_BUF_SZ
) {
146 GLushort
*dest
= (GLushort
*)(rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+
147 rmesa
->radeon
.tcl
.elt_dma_offset
+ rmesa
->tcl
.elt_used
);
149 rmesa
->tcl
.elt_used
+= nr
*2;
154 if (rmesa
->radeon
.dma
.flush
)
155 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
158 rmesa
->radeon
.tcl
.aos_count
, 0 );
160 r200EmitMaxVtxIndex(rmesa
, rmesa
->radeon
.tcl
.aos
[0].count
);
161 return r200AllocEltsOpenEnded( rmesa
, rmesa
->tcl
.hw_primitive
, nr
);
166 #define CLOSE_ELTS() \
168 if (0) R200_NEWPRIM( rmesa ); \
173 /* TODO: Try to extend existing primitive if both are identical,
174 * discrete and there are no intervening state changes. (Somewhat
175 * duplicates changes to DrawArrays code)
177 static void r200EmitPrim( struct gl_context
*ctx
,
183 r200ContextPtr rmesa
= R200_CONTEXT( ctx
);
184 r200TclPrimitive( ctx
, prim
, hwprim
);
186 // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
189 rmesa
->radeon
.tcl
.aos_count
,
192 /* Why couldn't this packet have taken an offset param?
194 r200EmitVbufPrim( rmesa
,
195 rmesa
->tcl
.hw_primitive
,
199 #define EMIT_PRIM(ctx, prim, hwprim, start, count) do { \
200 r200EmitPrim( ctx, prim, hwprim, start, count ); \
201 (void) rmesa; } while (0)
203 #define MAX_CONVERSION_SIZE 40
204 /* Try & join small primitives
207 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
209 #define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) \
212 rmesa->tcl.hw_primitive == (PRIM| \
213 R200_VF_TCL_OUTPUT_VTX_ENABLE| \
214 R200_VF_PRIM_WALK_IND)))
217 #ifdef MESA_BIG_ENDIAN
218 /* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
219 #define EMIT_ELT(dest, offset, x) do { \
220 int off = offset + ( ( (uintptr_t)dest & 0x2 ) >> 1 ); \
221 GLushort *des = (GLushort *)( (uintptr_t)dest & ~0x2 ); \
222 (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); \
223 (void)rmesa; } while (0)
225 #define EMIT_ELT(dest, offset, x) do { \
226 (dest)[offset] = (GLushort) (x); \
227 (void)rmesa; } while (0)
230 #define EMIT_TWO_ELTS(dest, offset, x, y) *(GLuint *)((dest)+offset) = ((y)<<16)|(x);
234 #define TAG(x) tcl_##x
235 #include "tnl_dd/t_dd_dmatmp2.h"
237 /**********************************************************************/
238 /* External entrypoints */
239 /**********************************************************************/
241 void r200EmitPrimitive( struct gl_context
*ctx
,
246 tcl_render_tab_verts
[flags
&PRIM_MODE_MASK
]( ctx
, first
, last
, flags
);
249 void r200EmitEltPrimitive( struct gl_context
*ctx
,
254 tcl_render_tab_elts
[flags
&PRIM_MODE_MASK
]( ctx
, first
, last
, flags
);
257 void r200TclPrimitive( struct gl_context
*ctx
,
261 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
262 GLuint newprim
= hw_prim
| R200_VF_TCL_OUTPUT_VTX_ENABLE
;
264 radeon_prepare_render(&rmesa
->radeon
);
265 if (rmesa
->radeon
.NewGLState
)
266 r200ValidateState( ctx
);
268 if (newprim
!= rmesa
->tcl
.hw_primitive
||
269 !discrete_prim
[hw_prim
&0xf]) {
270 /* need to disable perspective-correct texturing for point sprites */
271 if ((prim
& PRIM_MODE_MASK
) == GL_POINTS
&& ctx
->Point
.PointSprite
) {
272 if (rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] & R200_PERSPECTIVE_ENABLE
) {
273 R200_STATECHANGE( rmesa
, set
);
274 rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] &= ~R200_PERSPECTIVE_ENABLE
;
277 else if (!(rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] & R200_PERSPECTIVE_ENABLE
)) {
278 R200_STATECHANGE( rmesa
, set
);
279 rmesa
->hw
.set
.cmd
[SET_RE_CNTL
] |= R200_PERSPECTIVE_ENABLE
;
281 R200_NEWPRIM( rmesa
);
282 rmesa
->tcl
.hw_primitive
= newprim
;
287 * Predict total emit size for next rendering operation so there is no flush in middle of rendering
288 * Prediction has to aim towards the best possible value that is worse than worst case scenario
290 static GLuint
r200EnsureEmitSize( struct gl_context
* ctx
, GLubyte
* vimap_rev
)
292 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
293 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
294 struct vertex_buffer
*VB
= &tnl
->vb
;
295 GLuint space_required
;
299 /* predict number of aos to emit */
300 for (i
= 0; i
< 15; ++i
)
302 if (vimap_rev
[i
] != 255)
309 /* count the prediction for state size */
311 state_size
= radeonCountStateEmitSize( &rmesa
->radeon
);
312 /* vtx may be changed in r200EmitArrays so account for it if not dirty */
313 if (!rmesa
->hw
.vtx
.dirty
)
314 state_size
+= rmesa
->hw
.vtx
.check(&rmesa
->radeon
.glCtx
, &rmesa
->hw
.vtx
);
315 /* predict size for elements */
316 for (i
= 0; i
< VB
->PrimitiveCount
; ++i
)
318 if (!VB
->Primitive
[i
].count
)
320 /* If primitive.count is less than MAX_CONVERSION_SIZE
321 rendering code may decide convert to elts.
322 In that case we have to make pessimistic prediction.
323 and use larger of 2 paths. */
324 const GLuint elt_count
=(VB
->Primitive
[i
].count
/GET_MAX_HW_ELTS() + 1);
325 const GLuint elts
= ELTS_BUFSZ(nr_aos
) * elt_count
;
326 const GLuint index
= INDEX_BUFSZ
* elt_count
;
327 const GLuint vbuf
= VBUF_BUFSZ
;
328 if ( (!VB
->Elts
&& VB
->Primitive
[i
].count
>= MAX_CONVERSION_SIZE
)
329 || vbuf
> index
+ elts
)
330 space_required
+= vbuf
;
332 space_required
+= index
+ elts
;
333 space_required
+= AOS_BUFSZ(nr_aos
);
337 radeon_print(RADEON_RENDER
,RADEON_VERBOSE
,
338 "%s space %u, aos %d\n",
339 __func__
, space_required
, AOS_BUFSZ(nr_aos
) );
340 /* flush the buffer in case we need more than is left. */
341 if (rcommonEnsureCmdBufSpace(&rmesa
->radeon
, space_required
+ state_size
, __func__
))
342 return space_required
+ radeonCountStateEmitSize( &rmesa
->radeon
);
344 return space_required
+ state_size
;
348 /**********************************************************************/
349 /* Render pipeline stage */
350 /**********************************************************************/
355 static GLboolean
r200_run_tcl_render( struct gl_context
*ctx
,
356 struct tnl_pipeline_stage
*stage
)
358 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
359 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
360 struct vertex_buffer
*VB
= &tnl
->vb
;
363 /* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3,
364 color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use
365 more than 12 of those at the same time. */
366 GLubyte map_rev_fixed
[15] = {255, 255, 255, 255, 255, 255, 255, 255,
367 255, 255, 255, 255, 255, 255, 255};
370 /* TODO: separate this from the swtnl pipeline
372 if (rmesa
->radeon
.TclFallback
)
373 return GL_TRUE
; /* fallback to software t&l */
375 radeon_print(RADEON_RENDER
, RADEON_NORMAL
, "%s\n", __func__
);
382 if (rmesa
->radeon
.NewGLState
)
383 if (!r200ValidateState( ctx
))
384 return GL_TRUE
; /* fallback to sw t&l */
386 if (!_mesa_arb_vertex_program_enabled(ctx
)) {
387 /* NOTE: inputs != tnl->render_inputs - these are the untransformed
390 map_rev_fixed
[0] = VERT_ATTRIB_POS
;
391 /* technically there is no reason we always need VA_COLOR0. In theory
392 could disable it depending on lighting, color materials, texturing... */
393 map_rev_fixed
[4] = VERT_ATTRIB_COLOR0
;
395 if (ctx
->Light
.Enabled
) {
396 map_rev_fixed
[2] = VERT_ATTRIB_NORMAL
;
399 /* this also enables VA_COLOR1 when using separate specular
400 lighting model, which is unnecessary.
401 FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses
402 the secondary color (if lighting is disabled). The chip seems
403 misconfigured for that though elsewhere (tcl output, might lock up) */
404 if (_mesa_need_secondary_color(ctx
)) {
405 map_rev_fixed
[5] = VERT_ATTRIB_COLOR1
;
408 if ( (ctx
->Fog
.FogCoordinateSource
== GL_FOG_COORD
) && ctx
->Fog
.Enabled
) {
409 map_rev_fixed
[3] = VERT_ATTRIB_FOG
;
412 for (i
= 0 ; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
413 if (ctx
->Texture
.Unit
[i
]._Current
) {
414 if (rmesa
->TexGenNeedNormals
[i
]) {
415 map_rev_fixed
[2] = VERT_ATTRIB_NORMAL
;
417 map_rev_fixed
[8 + i
] = VERT_ATTRIB_TEX0
+ i
;
420 vimap_rev
= &map_rev_fixed
[0];
423 /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment
424 part", since using some vertex interpolator later which is not in
425 out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex
426 prog to a not enabled output however, so just don't mess with it.
427 We only need to change compsel. */
428 GLuint out_compsel
= 0;
429 const GLbitfield64 vp_out
=
430 rmesa
->curr_vp_hw
->mesa_program
.info
.outputs_written
;
432 vimap_rev
= &rmesa
->curr_vp_hw
->inputmap_rev
[0];
433 assert(vp_out
& BITFIELD64_BIT(VARYING_SLOT_POS
));
434 out_compsel
= R200_OUTPUT_XYZW
;
435 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_COL0
)) {
436 out_compsel
|= R200_OUTPUT_COLOR_0
;
438 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_COL1
)) {
439 out_compsel
|= R200_OUTPUT_COLOR_1
;
441 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_FOGC
)) {
442 out_compsel
|= R200_OUTPUT_DISCRETE_FOG
;
444 if (vp_out
& BITFIELD64_BIT(VARYING_SLOT_PSIZ
)) {
445 out_compsel
|= R200_OUTPUT_PT_SIZE
;
447 for (i
= VARYING_SLOT_TEX0
; i
< VARYING_SLOT_TEX6
; i
++) {
448 if (vp_out
& BITFIELD64_BIT(i
)) {
449 out_compsel
|= R200_OUTPUT_TEX_0
<< (i
- VARYING_SLOT_TEX0
);
452 if (rmesa
->hw
.vtx
.cmd
[VTX_TCL_OUTPUT_COMPSEL
] != out_compsel
) {
453 R200_STATECHANGE( rmesa
, vtx
);
454 rmesa
->hw
.vtx
.cmd
[VTX_TCL_OUTPUT_COMPSEL
] = out_compsel
;
458 /* Do the actual work:
460 radeonReleaseArrays( ctx
, ~0 /* stage->changed_inputs */ );
461 GLuint emit_end
= r200EnsureEmitSize( ctx
, vimap_rev
)
462 + rmesa
->radeon
.cmdbuf
.cs
->cdw
;
463 r200EmitArrays( ctx
, vimap_rev
);
465 for (i
= 0 ; i
< VB
->PrimitiveCount
; i
++)
467 GLuint prim
= _tnl_translate_prim(&VB
->Primitive
[i
]);
468 GLuint start
= VB
->Primitive
[i
].start
;
469 GLuint length
= VB
->Primitive
[i
].count
;
475 r200EmitEltPrimitive( ctx
, start
, start
+length
, prim
);
477 r200EmitPrimitive( ctx
, start
, start
+length
, prim
);
479 if ( emit_end
< rmesa
->radeon
.cmdbuf
.cs
->cdw
)
480 WARN_ONCE("Rendering was %d commands larger than predicted size."
481 " We might overflow command buffer.\n", rmesa
->radeon
.cmdbuf
.cs
->cdw
- emit_end
);
483 return GL_FALSE
; /* finished the pipe */
488 /* Initial state for tcl stage.
490 const struct tnl_pipeline_stage _r200_tcl_stage
=
497 r200_run_tcl_render
/* run */
502 /**********************************************************************/
503 /* Validate state at pipeline start */
504 /**********************************************************************/
507 /*-----------------------------------------------------------------------
508 * Manage TCL fallbacks
512 static void transition_to_swtnl( struct gl_context
*ctx
)
514 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
515 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
517 R200_NEWPRIM( rmesa
);
519 r200ChooseVertexState( ctx
);
520 r200ChooseRenderState( ctx
);
522 _tnl_validate_shine_tables( ctx
);
524 tnl
->Driver
.NotifyMaterialChange
=
525 _tnl_validate_shine_tables
;
527 radeonReleaseArrays( ctx
, ~0 );
529 /* Still using the D3D based hardware-rasterizer from the radeon;
530 * need to put the card into D3D mode to make it work:
532 R200_STATECHANGE( rmesa
, vap
);
533 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] &= ~(R200_VAP_TCL_ENABLE
|R200_VAP_PROG_VTX_SHADER_ENABLE
);
536 static void transition_to_hwtnl( struct gl_context
*ctx
)
538 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
539 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
541 _tnl_need_projected_coords( ctx
, GL_FALSE
);
543 r200UpdateMaterial( ctx
);
545 tnl
->Driver
.NotifyMaterialChange
= r200UpdateMaterial
;
547 if ( rmesa
->radeon
.dma
.flush
)
548 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
550 rmesa
->radeon
.dma
.flush
= NULL
;
552 R200_STATECHANGE( rmesa
, vap
);
553 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_TCL_ENABLE
;
554 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] &= ~R200_VAP_FORCE_W_TO_ONE
;
556 if (_mesa_arb_vertex_program_enabled(ctx
)) {
557 rmesa
->hw
.vap
.cmd
[VAP_SE_VAP_CNTL
] |= R200_VAP_PROG_VTX_SHADER_ENABLE
;
560 if ( ((rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] & R200_FOG_USE_MASK
)
561 == R200_FOG_USE_SPEC_ALPHA
) &&
562 (ctx
->Fog
.FogCoordinateSource
== GL_FOG_COORD
)) {
563 R200_STATECHANGE( rmesa
, ctx
);
564 rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] &= ~R200_FOG_USE_MASK
;
565 rmesa
->hw
.ctx
.cmd
[CTX_PP_FOG_COLOR
] |= R200_FOG_USE_VTX_FOG
;
568 R200_STATECHANGE( rmesa
, vte
);
569 rmesa
->hw
.vte
.cmd
[VTE_SE_VTE_CNTL
] &= ~(R200_VTX_XY_FMT
|R200_VTX_Z_FMT
);
570 rmesa
->hw
.vte
.cmd
[VTE_SE_VTE_CNTL
] |= R200_VTX_W0_FMT
;
572 if (R200_DEBUG
& RADEON_FALLBACKS
)
573 fprintf(stderr
, "R200 end tcl fallback\n");
577 static char *fallbackStrings
[] = {
578 "Rasterization fallback",
579 "Unfilled triangles",
580 "Twosided lighting, differing materials",
581 "Materials in VB (maybe between begin/end)",
594 static char *getFallbackString(GLuint bit
)
601 return fallbackStrings
[i
];
606 void r200TclFallback( struct gl_context
*ctx
, GLuint bit
, GLboolean mode
)
608 r200ContextPtr rmesa
= R200_CONTEXT(ctx
);
609 GLuint oldfallback
= rmesa
->radeon
.TclFallback
;
612 if (oldfallback
== 0) {
613 /* We have to flush before transition */
614 if ( rmesa
->radeon
.dma
.flush
)
615 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
617 if (R200_DEBUG
& RADEON_FALLBACKS
)
618 fprintf(stderr
, "R200 begin tcl fallback %s\n",
619 getFallbackString( bit
));
620 rmesa
->radeon
.TclFallback
|= bit
;
621 transition_to_swtnl( ctx
);
623 rmesa
->radeon
.TclFallback
|= bit
;
625 if (oldfallback
== bit
) {
626 /* We have to flush before transition */
627 if ( rmesa
->radeon
.dma
.flush
)
628 rmesa
->radeon
.dma
.flush( &rmesa
->radeon
.glCtx
);
630 if (R200_DEBUG
& RADEON_FALLBACKS
)
631 fprintf(stderr
, "R200 end tcl fallback %s\n",
632 getFallbackString( bit
));
633 rmesa
->radeon
.TclFallback
&= ~bit
;
634 transition_to_hwtnl( ctx
);
636 rmesa
->radeon
.TclFallback
&= ~bit
;