1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog.h"
76 extern int future_hw_tcl_on
;
79 * \brief Convert a OpenGL primitive type into a R300 primitive type.
81 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
83 switch (prim
& PRIM_MODE_MASK
) {
85 return R300_VAP_VF_CNTL__PRIM_POINTS
;
88 return R300_VAP_VF_CNTL__PRIM_LINES
;
91 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
94 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
97 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
99 case GL_TRIANGLE_STRIP
:
100 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
102 case GL_TRIANGLE_FAN
:
103 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
106 return R300_VAP_VF_CNTL__PRIM_QUADS
;
109 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
112 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
121 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
125 switch (prim
& PRIM_MODE_MASK
) {
130 verts_off
= num_verts
% 2;
134 verts_off
= num_verts
;
138 verts_off
= num_verts
;
141 verts_off
= num_verts
% 3;
143 case GL_TRIANGLE_STRIP
:
145 verts_off
= num_verts
;
147 case GL_TRIANGLE_FAN
:
149 verts_off
= num_verts
;
152 verts_off
= num_verts
% 4;
156 verts_off
= num_verts
;
158 verts_off
= num_verts
% 2;
162 verts_off
= num_verts
;
170 return num_verts
- verts_off
;
173 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
175 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
178 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->radeon
.tcl
.elt_dma_bo
,
179 &rmesa
->radeon
.tcl
.elt_dma_offset
, n_elts
* 4, 4);
180 radeon_bo_map(rmesa
->radeon
.tcl
.elt_dma_bo
, 1);
181 out
= rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+ rmesa
->radeon
.tcl
.elt_dma_offset
;
182 memcpy(out
, elts
, n_elts
* 4);
183 radeon_bo_unmap(rmesa
->radeon
.tcl
.elt_dma_bo
);
186 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
188 BATCH_LOCALS(&rmesa
->radeon
);
190 if (vertex_count
> 0) {
192 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
193 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
194 ((vertex_count
+ 0) << 16) |
196 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
198 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
199 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
200 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
201 (R300_VAP_PORT_IDX0
>> 2));
202 OUT_BATCH_RELOC(rmesa
->radeon
.tcl
.elt_dma_offset
,
203 rmesa
->radeon
.tcl
.elt_dma_bo
,
204 rmesa
->radeon
.tcl
.elt_dma_offset
,
205 RADEON_GEM_DOMAIN_GTT
, 0, 0);
206 OUT_BATCH(vertex_count
);
208 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
209 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
210 (R300_VAP_PORT_IDX0
>> 2));
211 OUT_BATCH(rmesa
->radeon
.tcl
.elt_dma_offset
);
212 OUT_BATCH(vertex_count
);
213 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
214 rmesa
->radeon
.tcl
.elt_dma_bo
,
215 RADEON_GEM_DOMAIN_GTT
, 0, 0);
221 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
223 BATCH_LOCALS(&rmesa
->radeon
);
225 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
228 if (RADEON_DEBUG
& DEBUG_VERTS
)
229 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
233 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
234 BEGIN_BATCH(sz
+2+(nr
* 2));
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
238 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
239 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
240 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
241 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
242 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
244 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
245 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
246 OUT_BATCH_RELOC(voffset
,
247 rmesa
->radeon
.tcl
.aos
[i
].bo
,
249 RADEON_GEM_DOMAIN_GTT
,
251 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
252 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
253 OUT_BATCH_RELOC(voffset
,
254 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
256 RADEON_GEM_DOMAIN_GTT
,
261 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
262 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
263 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
264 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
265 OUT_BATCH_RELOC(voffset
,
266 rmesa
->radeon
.tcl
.aos
[nr
- 1].bo
,
268 RADEON_GEM_DOMAIN_GTT
,
274 BEGIN_BATCH(sz
+2+(nr
* 2));
275 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
278 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
279 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
280 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
281 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
282 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
284 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
285 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
287 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
288 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
293 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
294 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
295 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
296 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
299 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
300 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
301 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
302 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
303 rmesa
->radeon
.tcl
.aos
[i
+0].bo
,
304 RADEON_GEM_DOMAIN_GTT
,
306 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
307 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
308 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
309 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
310 RADEON_GEM_DOMAIN_GTT
,
314 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
315 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
316 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
317 rmesa
->radeon
.tcl
.aos
[nr
-1].bo
,
318 RADEON_GEM_DOMAIN_GTT
,
326 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
328 BATCH_LOCALS(&rmesa
->radeon
);
331 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
332 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
336 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
337 int start
, int end
, int prim
)
340 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
341 struct vertex_buffer
*vb
= &tnl
->vb
;
343 type
= r300PrimitiveType(rmesa
, prim
);
344 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
346 if (type
< 0 || num_verts
<= 0)
349 /* Make space for at least 64 dwords.
350 * This is supposed to ensure that we can get all rendering
351 * commands into a single command buffer.
353 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
356 if (num_verts
> 65535) {
357 /* not implemented yet */
358 WARN_ONCE("Too many elts\n");
361 /* Note: The following is incorrect, but it's the best I can do
362 * without a major refactoring of how DMA memory is handled.
363 * The problem: Ensuring that both vertex arrays *and* index
364 * arrays are at the right position, and then ensuring that
365 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
368 * So why is the following incorrect? Well, it seems like
369 * allocating the index array might actually evict the vertex
372 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
373 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
374 r300FireEB(rmesa
, num_verts
, type
);
376 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
377 r300FireAOS(rmesa
, num_verts
, type
);
382 static GLboolean
r300RunRender(GLcontext
* ctx
,
383 struct tnl_pipeline_stage
*stage
)
385 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
387 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
388 struct vertex_buffer
*vb
= &tnl
->vb
;
390 if (RADEON_DEBUG
& DEBUG_PRIMS
)
391 fprintf(stderr
, "%s\n", __FUNCTION__
);
393 r300UpdateShaders(rmesa
);
394 if (r300EmitArrays(ctx
))
397 r300UpdateShaderStates(rmesa
);
399 r300EmitCacheFlush(rmesa
);
400 radeonEmitState(&rmesa
->radeon
);
402 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
403 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
404 GLuint start
= vb
->Primitive
[i
].start
;
405 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
406 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
409 r300EmitCacheFlush(rmesa
);
411 radeonReleaseArrays(ctx
, ~0);
416 #define FALLBACK_IF(expr) \
419 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
420 WARN_ONCE("Software fallback:%s\n", \
422 return R300_FALLBACK_RAST; \
426 static int r300Fallback(GLcontext
* ctx
)
428 r300ContextPtr r300
= R300_CONTEXT(ctx
);
429 const unsigned back
= ctx
->Stencil
._BackFace
;
431 FALLBACK_IF(r300
->radeon
.Fallback
);
432 /* Do we need to use new-style shaders?
433 * Also is there a better way to do this? */
434 if (r300
->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
) {
435 struct r500_fragment_program
*fp
= (struct r500_fragment_program
*)
436 (char *)ctx
->FragmentProgram
._Current
;
439 r500TranslateFragmentShader(r300
, fp
);
441 FALLBACK_IF(fp
->error
);
444 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*)
445 (char *)ctx
->FragmentProgram
._Current
;
448 r300TranslateFragmentShader(r300
, fp
);
450 FALLBACK_IF(fp
->error
);
454 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
456 /* If GL_EXT_stencil_two_side is disabled, this fallback check can
459 FALLBACK_IF(ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[back
]
460 || ctx
->Stencil
.ValueMask
[0] !=
461 ctx
->Stencil
.ValueMask
[back
]
462 || ctx
->Stencil
.WriteMask
[0] !=
463 ctx
->Stencil
.WriteMask
[back
]);
465 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
466 FALLBACK_IF(ctx
->Point
.PointSprite
);
468 if (!r300
->disable_lowimpact_fallback
) {
469 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
470 FALLBACK_IF(ctx
->Multisample
._Enabled
);
471 FALLBACK_IF(ctx
->Line
.StippleFlag
);
472 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
473 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
476 return R300_FALLBACK_NONE
;
479 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
480 struct tnl_pipeline_stage
*stage
)
482 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
484 if (RADEON_DEBUG
& DEBUG_PRIMS
)
485 fprintf(stderr
, "%s\n", __FUNCTION__
);
487 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
490 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
493 if (!r300ValidateBuffers(ctx
))
496 return r300RunRender(ctx
, stage
);
499 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
500 struct tnl_pipeline_stage
*stage
)
502 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
503 struct r300_vertex_program
*vp
;
505 hw_tcl_on
= future_hw_tcl_on
;
507 if (RADEON_DEBUG
& DEBUG_PRIMS
)
508 fprintf(stderr
, "%s\n", __FUNCTION__
);
510 if (hw_tcl_on
== GL_FALSE
)
513 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
514 hw_tcl_on
= GL_FALSE
;
518 if (!r300ValidateBuffers(ctx
))
521 r300UpdateShaders(rmesa
);
523 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
524 if (vp
->native
== GL_FALSE
) {
525 hw_tcl_on
= GL_FALSE
;
529 return r300RunRender(ctx
, stage
);
532 const struct tnl_pipeline_stage _r300_render_stage
= {
533 "r300 Hardware Rasterization",
541 const struct tnl_pipeline_stage _r300_tcl_stage
= {
542 "r300 Hardware Transform, Clipping and Lighting",