1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Nicolai Haehnle <prefect_@gmx.net>
40 #include "simple_list.h"
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "array_cache/acache.h"
47 #include "tnl/t_vp_build.h"
49 #include "radeon_reg.h"
50 #include "radeon_macros.h"
51 #include "radeon_ioctl.h"
52 #include "radeon_state.h"
53 #include "r300_context.h"
54 #include "r300_ioctl.h"
55 #include "r300_state.h"
57 #include "r300_program.h"
59 #include "r300_maos.h"
60 #include "r300_emit.h"
62 extern int future_hw_tcl_on
;
64 /**********************************************************************
65 * Hardware rasterization
67 * When we fell back to software TCL, we still try to use the
68 * rasterization hardware for rendering.
69 **********************************************************************/
71 static int r300_get_primitive_type(r300ContextPtr rmesa
, GLcontext
*ctx
, int prim
)
75 switch (prim
& PRIM_MODE_MASK
) {
77 type
=R300_VAP_VF_CNTL__PRIM_POINTS
;
80 type
=R300_VAP_VF_CNTL__PRIM_LINES
;
83 type
=R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
86 type
=R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
89 type
=R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
91 case GL_TRIANGLE_STRIP
:
92 type
=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
95 type
=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
98 type
=R300_VAP_VF_CNTL__PRIM_QUADS
;
101 type
=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
104 type
=R300_VAP_VF_CNTL__PRIM_POLYGON
;
107 fprintf(stderr
, "%s:%s Do not know how to handle primitive %02x - help me !\n",
108 __FILE__
, __FUNCTION__
,
109 prim
& PRIM_MODE_MASK
);
116 static int r300_get_num_verts(r300ContextPtr rmesa
,
122 char *name
="UNKNOWN";
124 switch (prim
& PRIM_MODE_MASK
) {
131 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
141 verts_off
= num_verts
;
145 verts_off
= num_verts
% 3;
147 case GL_TRIANGLE_STRIP
:
150 verts_off
= num_verts
;
152 case GL_TRIANGLE_FAN
:
155 verts_off
= num_verts
;
159 verts_off
= num_verts
% 4;
164 verts_off
= num_verts
;
166 verts_off
= num_verts
% 2;
171 verts_off
= num_verts
;
174 fprintf(stderr
, "%s:%s Do not know how to handle primitive %02x - help me !\n",
175 __FILE__
, __FUNCTION__
,
176 prim
& PRIM_MODE_MASK
);
181 if (RADEON_DEBUG
& DEBUG_VERTS
) {
182 if (num_verts
- verts_off
== 0) {
183 WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts
, name
);
188 WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts
, name
);
192 return num_verts
- verts_off
;
195 /* Immediate implementation has been removed from CVS. */
197 /* vertex buffer implementation */
199 static void inline fire_EB(PREFIX
unsigned long addr
, int vertex_count
, int type
, int elt_size
)
202 unsigned long addr_a
;
203 unsigned long t_addr
;
204 unsigned long magic_1
, magic_2
;
206 ctx
= rmesa
->radeon
.glCtx
;
208 assert(elt_size
== 2 || elt_size
== 4);
210 if(addr
& (elt_size
-1)){
211 WARN_ONCE("Badly aligned buffer\n");
217 magic_1
= (addr
% 32) / 4;
218 t_addr
= addr
& (~0x1d);
219 magic_2
= (vertex_count
+ 1 + (t_addr
& 0x2)) / 2 + magic_1
;
223 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2
, 0);
225 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
| (vertex_count
<<16) | type
| R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
227 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
| (vertex_count
<<16) | type
);
230 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER
, 2);
232 e32(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
233 e32(addr
/*& 0xffffffe3*/);
235 e32(R300_EB_UNK1
| (magic_1
<< 16) | R300_EB_UNK2
);
240 e32(vertex_count
/*+ addr_a/4*/); /* Total number of dwords needed? */
242 e32(magic_2
); /* Total number of dwords needed? */
244 //cp_delay(PASS_PREFIX 1);
246 fprintf(stderr
, "magic_1 %d\n", magic_1
);
247 fprintf(stderr
, "t_addr %x\n", t_addr
);
248 fprintf(stderr
, "magic_2 %d\n", magic_2
);
252 (void)magic_2
, (void)magic_1
, (void)t_addr
;
258 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2
, 0);
260 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
| (vertex_count
<<16) | type
| R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
262 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
| (vertex_count
<<16) | type
);
265 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER
, 2);
266 e32(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
267 e32(addr
/*& 0xffffffe3*/);
270 e32(vertex_count
/*+ addr_a/4*/); /* Total number of dwords needed? */
272 e32((vertex_count
+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
274 //cp_delay(PASS_PREFIX 1);
278 static void r300_render_vb_primitive(r300ContextPtr rmesa
,
286 type
=r300_get_primitive_type(rmesa
, ctx
, prim
);
287 num_verts
=r300_get_num_verts(rmesa
, ctx
, end
-start
, prim
);
289 if(type
<0 || num_verts
<= 0)return;
291 if(rmesa
->state
.VB
.Elts
){
292 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, /*0*/start
);
296 start_index32_packet(num_verts
, type
);
297 for(i
=0; i
< num_verts
; i
++)
298 e32(((unsigned long *)rmesa
->state
.VB
.Elts
)[i
]/*rmesa->state.Elts[start+i]*/); /* start ? */
301 //start_index32_packet(num_verts, type);
302 //e32(rmesa->state.Elts[start]);
306 if(num_verts
> 65535){ /* not implemented yet */
307 WARN_ONCE("Too many elts\n");
311 r300EmitElts(ctx
, rmesa
->state
.VB
.Elts
, num_verts
, rmesa
->state
.VB
.elt_size
);
312 fire_EB(PASS_PREFIX rmesa
->state
.elt_dma
.aos_offset
, num_verts
, type
, rmesa
->state
.VB
.elt_size
);
315 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
316 fire_AOS(PASS_PREFIX num_verts
, type
);
321 void dump_array(struct r300_dma_region
*rvb
, int count
)
323 int *out
= (int *)(rvb
->address
+ rvb
->start
);
326 fprintf(stderr
, "stride %d:", rvb
->aos_stride
);
327 for (i
=0; i
< count
; i
++) {
328 fprintf(stderr
, "{");
329 if (rvb
->aos_format
== AOS_FORMAT_FLOAT
)
330 for (ci
=0; ci
< rvb
->aos_size
; ci
++)
331 fprintf(stderr
, "%f ", ((float *)out
)[ci
]);
333 for (ci
=0; ci
< rvb
->aos_size
; ci
++)
334 fprintf(stderr
, "%d ", ((unsigned char *)out
)[ci
]);
335 fprintf(stderr
, "}");
337 out
+= rvb
->aos_stride
;
340 fprintf(stderr
, "\n");
343 void dump_dt(struct dt
*dt
, int count
)
348 fprintf(stderr
, "stride %d", dt
->stride
);
350 for (i
=0; i
< count
; i
++){
351 fprintf(stderr
, "{");
352 if (dt
->type
== GL_FLOAT
)
353 for (ci
=0; ci
< dt
->size
; ci
++)
354 fprintf(stderr
, "%f ", ((float *)out
)[ci
]);
356 for (ci
=0; ci
< dt
->size
; ci
++)
357 fprintf(stderr
, "%d ", ((unsigned char *)out
)[ci
]);
358 fprintf(stderr
, "}");
360 out
= (int *)((char *)out
+ dt
->stride
);
363 fprintf(stderr
, "\n");
367 GLboolean
r300_run_vb_render(GLcontext
*ctx
,
368 struct tnl_pipeline_stage
*stage
)
370 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
371 struct radeon_vertex_buffer
*VB
= &rmesa
->state
.VB
;
375 if (RADEON_DEBUG
& DEBUG_PRIMS
)
376 fprintf(stderr
, "%s\n", __FUNCTION__
);
379 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
380 radeon_vb_to_rvb(rmesa
, VB
, &tnl
->vb
);
383 r300UpdateShaders(rmesa
);
384 if (rmesa
->state
.VB
.LockCount
== 0 || 1) {
385 r300EmitArrays(ctx
, GL_FALSE
);
387 r300UpdateShaderStates(rmesa
);
389 /* TODO: Figure out why do we need these. */
390 R300_STATECHANGE(rmesa
, vir
[0]);
391 R300_STATECHANGE(rmesa
, vir
[1]);
392 R300_STATECHANGE(rmesa
, vic
);
393 R300_STATECHANGE(rmesa
, vof
);
396 fprintf(stderr
, "dt:\n");
397 for(i
=0; i
< VERT_ATTRIB_MAX
; i
++){
398 fprintf(stderr
, "dt %d:", i
);
399 dump_dt(&rmesa
->state
.VB
.AttribPtr
[i
], VB
->Count
);
402 fprintf(stderr
, "before:\n");
403 for(i
=0; i
< rmesa
->state
.aos_count
; i
++){
404 fprintf(stderr
, "aos %d:", i
);
405 dump_array(&rmesa
->state
.aos
[i
], VB
->Count
);
409 r300ReleaseArrays(ctx
);
410 r300EmitArrays(ctx
, GL_FALSE
);
412 fprintf(stderr
, "after:\n");
413 for(i
=0; i
< rmesa
->state
.aos_count
; i
++){
414 fprintf(stderr
, "aos %d:", i
);
415 dump_array(&rmesa
->state
.aos
[i
], VB
->Count
);
420 reg_start(R300_RB3D_DSTCACHE_CTLSTAT
,0);
426 reg_start(R300_VAP_PVS_WAITIDLE
,0);
429 r300EmitState(rmesa
);
431 for(i
=0; i
< VB
->PrimitiveCount
; i
++){
432 GLuint prim
= VB
->Primitive
[i
].mode
;
433 GLuint start
= VB
->Primitive
[i
].start
;
434 GLuint length
= VB
->Primitive
[i
].count
;
436 r300_render_vb_primitive(rmesa
, ctx
, start
, start
+ length
, prim
);
439 reg_start(R300_RB3D_DSTCACHE_CTLSTAT
,0);
440 e32(0x0000000a/*0x2*/);
443 e32(0x00000003/*0x1*/);
448 r300ReleaseArrays(ctx
);
452 #define FALLBACK_IF(expr) \
455 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
456 WARN_ONCE("Software fallback:%s\n", #expr); \
457 return R300_FALLBACK_RAST; \
461 int r300Fallback(GLcontext
*ctx
)
465 //FALLBACK_IF(ctx->RenderMode != GL_RENDER); // We do not do SELECT or FEEDBACK (yet ?)
467 #if 0 /* These should work now.. */
468 FALLBACK_IF(ctx
->Color
.DitherFlag
);
469 FALLBACK_IF(ctx
->Color
.AlphaEnabled
); // GL_ALPHA_TEST
470 FALLBACK_IF(ctx
->Color
.BlendEnabled
); // GL_BLEND
471 FALLBACK_IF(ctx
->Polygon
.OffsetFill
); // GL_POLYGON_OFFSET_FILL
473 FALLBACK_IF(ctx
->Polygon
.OffsetPoint
); // GL_POLYGON_OFFSET_POINT
474 FALLBACK_IF(ctx
->Polygon
.OffsetLine
); // GL_POLYGON_OFFSET_LINE
475 //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST
477 //FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG disable as swtcl doesnt seem to support this
478 //FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH disabling to get blender going
479 FALLBACK_IF(ctx
->Polygon
.StippleFlag
); // GL_POLYGON_STIPPLE
480 FALLBACK_IF(ctx
->Multisample
.Enabled
); // GL_MULTISAMPLE_ARB
483 FALLBACK_IF(ctx
->Line
.StippleFlag
);
485 /* HW doesnt appear to directly support these */
486 FALLBACK_IF(ctx
->Line
.SmoothFlag
); // GL_LINE_SMOOTH
487 FALLBACK_IF(ctx
->Point
.SmoothFlag
); // GL_POINT_SMOOTH
488 /* Rest could be done with vertex fragments */
489 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
490 FALLBACK_IF(ctx
->Point
.PointSprite
); // GL_POINT_SPRITE_NV
492 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++)
493 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
& TEXTURE_RECT_BIT
)
494 return R300_FALLBACK_TCL
;
496 return R300_FALLBACK_NONE
;
500 * Called by the pipeline manager to render a batch of primitives.
501 * We can return true to pass on to the next stage (i.e. software
502 * rasterization) or false to indicate that the pipeline has finished
503 * after we render something.
505 static GLboolean
r300_run_render(GLcontext
*ctx
,
506 struct tnl_pipeline_stage
*stage
)
509 if (RADEON_DEBUG
& DEBUG_PRIMS
)
510 fprintf(stderr
, "%s\n", __FUNCTION__
);
512 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
515 return r300_run_vb_render(ctx
, stage
);
518 const struct tnl_pipeline_stage _r300_render_stage
= {
524 r300_run_render
/* run */
527 static GLboolean
r300_run_tcl_render(GLcontext
*ctx
,
528 struct tnl_pipeline_stage
*stage
)
530 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
531 struct r300_vertex_program
*vp
;
533 hw_tcl_on
=future_hw_tcl_on
;
535 if (RADEON_DEBUG
& DEBUG_PRIMS
)
536 fprintf(stderr
, "%s\n", __FUNCTION__
);
537 if(hw_tcl_on
== GL_FALSE
)
540 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
541 hw_tcl_on
= GL_FALSE
;
545 r300UpdateShaders(rmesa
);
547 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
548 #if 0 /* Draw every second request with software arb vp */
551 //vp->native = GL_FALSE;
554 #if 0 /* You dont want to know what this does... */
555 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
556 struct tnl_cache
*cache
;
557 struct tnl_cache_item
*c
;
559 cache
= tnl
->vp_cache
;
560 c
= cache
->items
[0xc000cc0e % cache
->size
];
562 if(c
&& c
->data
== vp
)
563 vp
->native
= GL_FALSE
;
567 vp
->native
= GL_FALSE
;
569 if (vp
->native
== GL_FALSE
) {
570 hw_tcl_on
= GL_FALSE
;
573 //r300UpdateShaderStates(rmesa);
575 return r300_run_vb_render(ctx
, stage
);
578 const struct tnl_pipeline_stage _r300_tcl_stage
= {
584 r300_run_tcl_render
/* run */
587 /* R300 texture rectangle expects coords in 0..1 range, not 0..dimension
588 * as in the extension spec. Need to translate here.
590 * Note that swrast expects 0..dimension, so if a fallback is active,
591 * don't do anything. (Maybe need to configure swrast to match hw)
593 struct texrect_stage_data
{
594 GLvector4f texcoord
[MAX_TEXTURE_UNITS
];
597 #define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
600 static GLboolean
run_texrect_stage( GLcontext
*ctx
,
601 struct tnl_pipeline_stage
*stage
)
603 struct texrect_stage_data
*store
= TEXRECT_STAGE_DATA(stage
);
604 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
605 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
606 struct vertex_buffer
*VB
= &tnl
->vb
;
609 if (rmesa
->radeon
.Fallback
)
612 for (i
= 0 ; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
613 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
& TEXTURE_RECT_BIT
) {
614 struct gl_texture_object
*texObj
= ctx
->Texture
.Unit
[i
].CurrentRect
;
615 struct gl_texture_image
*texImage
= texObj
->Image
[0][texObj
->BaseLevel
];
616 const GLfloat iw
= 1.0/texImage
->Width
;
617 const GLfloat ih
= 1.0/texImage
->Height
;
618 GLfloat
*in
= (GLfloat
*)VB
->TexCoordPtr
[i
]->data
;
619 GLint instride
= VB
->TexCoordPtr
[i
]->stride
;
620 GLfloat (*out
)[4] = store
->texcoord
[i
].data
;
623 store
->texcoord
[i
].size
= VB
->TexCoordPtr
[i
]->size
;
624 for (j
= 0 ; j
< VB
->Count
; j
++) {
625 switch (VB
->TexCoordPtr
[i
]->size
) {
633 out
[j
][0] = in
[0] * iw
;
634 out
[j
][1] = in
[1] * ih
;
636 in
= (GLfloat
*)((GLubyte
*)in
+ instride
);
639 VB
->AttribPtr
[VERT_ATTRIB_TEX0
+i
] = VB
->TexCoordPtr
[i
] = &store
->texcoord
[i
];
647 /* Called the first time stage->run() is invoked.
649 static GLboolean
alloc_texrect_data( GLcontext
*ctx
,
650 struct tnl_pipeline_stage
*stage
)
652 struct vertex_buffer
*VB
= &TNL_CONTEXT(ctx
)->vb
;
653 struct texrect_stage_data
*store
;
656 stage
->privatePtr
= CALLOC(sizeof(*store
));
657 store
= TEXRECT_STAGE_DATA(stage
);
661 for (i
= 0 ; i
< ctx
->Const
.MaxTextureUnits
; i
++)
662 _mesa_vector4f_alloc( &store
->texcoord
[i
], 0, VB
->Size
, 32 );
667 static void free_texrect_data( struct tnl_pipeline_stage
*stage
)
669 struct texrect_stage_data
*store
= TEXRECT_STAGE_DATA(stage
);
673 for (i
= 0 ; i
< MAX_TEXTURE_UNITS
; i
++)
674 if (store
->texcoord
[i
].data
)
675 _mesa_vector4f_free( &store
->texcoord
[i
] );
677 stage
->privatePtr
= NULL
;
681 const struct tnl_pipeline_stage _r300_texrect_stage
=
683 "r300 texrect stage", /* name */