1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog.h"
76 extern int future_hw_tcl_on
;
79 * \brief Convert a OpenGL primitive type into a R300 primitive type.
81 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
83 switch (prim
& PRIM_MODE_MASK
) {
85 return R300_VAP_VF_CNTL__PRIM_POINTS
;
88 return R300_VAP_VF_CNTL__PRIM_LINES
;
91 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
94 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
97 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
99 case GL_TRIANGLE_STRIP
:
100 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
102 case GL_TRIANGLE_FAN
:
103 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
106 return R300_VAP_VF_CNTL__PRIM_QUADS
;
109 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
112 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
121 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
125 switch (prim
& PRIM_MODE_MASK
) {
130 verts_off
= num_verts
% 2;
134 verts_off
= num_verts
;
138 verts_off
= num_verts
;
141 verts_off
= num_verts
% 3;
143 case GL_TRIANGLE_STRIP
:
145 verts_off
= num_verts
;
147 case GL_TRIANGLE_FAN
:
149 verts_off
= num_verts
;
152 verts_off
= num_verts
% 4;
156 verts_off
= num_verts
;
158 verts_off
= num_verts
% 2;
162 verts_off
= num_verts
;
170 return num_verts
- verts_off
;
173 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
175 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
178 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->state
.elt_dma_bo
,
179 &rmesa
->state
.elt_dma_offset
, n_elts
* 4, 4);
180 radeon_bo_map(rmesa
->state
.elt_dma_bo
, 1);
181 out
= rmesa
->state
.elt_dma_bo
->ptr
+ rmesa
->state
.elt_dma_offset
;
182 memcpy(out
, elts
, n_elts
* 4);
183 radeon_bo_unmap(rmesa
->state
.elt_dma_bo
);
186 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
188 BATCH_LOCALS(&rmesa
->radeon
);
190 if (vertex_count
> 0) {
192 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
193 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
194 ((vertex_count
+ 0) << 16) |
196 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
198 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
199 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
200 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
201 OUT_BATCH_RELOC(rmesa
->state
.elt_dma_offset
,
202 rmesa
->state
.elt_dma_bo
,
203 rmesa
->state
.elt_dma_offset
,
204 RADEON_GEM_DOMAIN_GTT
, 0, 0);
205 OUT_BATCH(vertex_count
);
207 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
208 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
209 OUT_BATCH(rmesa
->state
.elt_dma_offset
);
210 OUT_BATCH(vertex_count
);
211 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
212 rmesa
->state
.elt_dma_bo
,
213 RADEON_GEM_DOMAIN_GTT
, 0, 0);
219 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
221 BATCH_LOCALS(&rmesa
->radeon
);
223 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
226 if (RADEON_DEBUG
& DEBUG_VERTS
)
227 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
231 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
232 BEGIN_BATCH(sz
+2+(nr
* 2));
233 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
236 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
237 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
238 (rmesa
->state
.aos
[i
].stride
<< 8) |
239 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
240 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
242 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
243 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
244 OUT_BATCH_RELOC(voffset
,
245 rmesa
->state
.aos
[i
].bo
,
247 RADEON_GEM_DOMAIN_GTT
,
249 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
250 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
251 OUT_BATCH_RELOC(voffset
,
252 rmesa
->state
.aos
[i
+1].bo
,
254 RADEON_GEM_DOMAIN_GTT
,
259 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
260 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
261 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
262 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
263 OUT_BATCH_RELOC(voffset
,
264 rmesa
->state
.aos
[nr
- 1].bo
,
266 RADEON_GEM_DOMAIN_GTT
,
272 BEGIN_BATCH(sz
+2+(nr
* 2));
273 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
276 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
277 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
278 (rmesa
->state
.aos
[i
].stride
<< 8) |
279 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
280 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
282 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
283 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
285 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
286 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
291 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
292 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
293 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
294 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
297 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
298 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
299 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
300 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
301 rmesa
->state
.aos
[i
+0].bo
,
302 RADEON_GEM_DOMAIN_GTT
,
304 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
305 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
306 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
307 rmesa
->state
.aos
[i
+1].bo
,
308 RADEON_GEM_DOMAIN_GTT
,
312 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
313 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
314 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
315 rmesa
->state
.aos
[nr
-1].bo
,
316 RADEON_GEM_DOMAIN_GTT
,
324 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
326 BATCH_LOCALS(&rmesa
->radeon
);
329 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
330 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
334 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
335 int start
, int end
, int prim
)
337 BATCH_LOCALS(&rmesa
->radeon
);
339 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
340 struct vertex_buffer
*vb
= &tnl
->vb
;
342 type
= r300PrimitiveType(rmesa
, prim
);
343 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
345 if (type
< 0 || num_verts
<= 0)
348 /* Make space for at least 64 dwords.
349 * This is supposed to ensure that we can get all rendering
350 * commands into a single command buffer.
352 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
355 if (num_verts
> 65535) {
356 /* not implemented yet */
357 WARN_ONCE("Too many elts\n");
360 /* Note: The following is incorrect, but it's the best I can do
361 * without a major refactoring of how DMA memory is handled.
362 * The problem: Ensuring that both vertex arrays *and* index
363 * arrays are at the right position, and then ensuring that
364 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
367 * So why is the following incorrect? Well, it seems like
368 * allocating the index array might actually evict the vertex
371 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
372 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
373 r300FireEB(rmesa
, num_verts
, type
);
375 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
376 r300FireAOS(rmesa
, num_verts
, type
);
381 static GLboolean
r300RunRender(GLcontext
* ctx
,
382 struct tnl_pipeline_stage
*stage
)
384 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
386 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
387 struct vertex_buffer
*vb
= &tnl
->vb
;
389 if (RADEON_DEBUG
& DEBUG_PRIMS
)
390 fprintf(stderr
, "%s\n", __FUNCTION__
);
392 r300UpdateShaders(rmesa
);
393 if (r300EmitArrays(ctx
))
396 r300UpdateShaderStates(rmesa
);
398 r300EmitCacheFlush(rmesa
);
399 r300EmitState(rmesa
);
401 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
402 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
403 GLuint start
= vb
->Primitive
[i
].start
;
404 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
405 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
408 r300EmitCacheFlush(rmesa
);
410 r300ReleaseArrays(ctx
);
415 #define FALLBACK_IF(expr) \
418 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
419 WARN_ONCE("Software fallback:%s\n", \
421 return R300_FALLBACK_RAST; \
425 static int r300Fallback(GLcontext
* ctx
)
427 r300ContextPtr r300
= R300_CONTEXT(ctx
);
428 /* Do we need to use new-style shaders?
429 * Also is there a better way to do this? */
430 if (r300
->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
) {
431 struct r500_fragment_program
*fp
= (struct r500_fragment_program
*)
432 (char *)ctx
->FragmentProgram
._Current
;
434 if (!fp
->translated
) {
435 r500TranslateFragmentShader(r300
, fp
);
436 FALLBACK_IF(!fp
->translated
);
440 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*)
441 (char *)ctx
->FragmentProgram
._Current
;
443 if (!fp
->translated
) {
444 r300TranslateFragmentShader(r300
, fp
);
445 FALLBACK_IF(!fp
->translated
);
450 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
452 FALLBACK_IF(ctx
->Stencil
._TestTwoSide
453 && (ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[1]
454 || ctx
->Stencil
.ValueMask
[0] !=
455 ctx
->Stencil
.ValueMask
[1]
456 || ctx
->Stencil
.WriteMask
[0] !=
457 ctx
->Stencil
.WriteMask
[1]));
459 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
460 FALLBACK_IF(ctx
->Point
.PointSprite
);
462 if (!r300
->disable_lowimpact_fallback
) {
463 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
464 FALLBACK_IF(ctx
->Multisample
._Enabled
);
465 FALLBACK_IF(ctx
->Line
.StippleFlag
);
466 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
467 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
470 return R300_FALLBACK_NONE
;
473 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
474 struct tnl_pipeline_stage
*stage
)
476 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
478 if (RADEON_DEBUG
& DEBUG_PRIMS
)
479 fprintf(stderr
, "%s\n", __FUNCTION__
);
481 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
484 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
487 return r300RunRender(ctx
, stage
);
490 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
491 struct tnl_pipeline_stage
*stage
)
493 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
494 struct r300_vertex_program
*vp
;
496 hw_tcl_on
= future_hw_tcl_on
;
498 if (RADEON_DEBUG
& DEBUG_PRIMS
)
499 fprintf(stderr
, "%s\n", __FUNCTION__
);
501 if (hw_tcl_on
== GL_FALSE
)
504 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
505 hw_tcl_on
= GL_FALSE
;
509 if (!r300ValidateBuffers(ctx
))
512 r300UpdateShaders(rmesa
);
514 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
515 if (vp
->native
== GL_FALSE
) {
516 hw_tcl_on
= GL_FALSE
;
520 return r300RunRender(ctx
, stage
);
523 const struct tnl_pipeline_stage _r300_render_stage
= {
524 "r300 Hardware Rasterization",
532 const struct tnl_pipeline_stage _r300_tcl_stage
= {
533 "r300 Hardware Transform, Clipping and Lighting",