1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on
;
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
85 switch (prim
& PRIM_MODE_MASK
) {
87 return R300_VAP_VF_CNTL__PRIM_POINTS
;
90 return R300_VAP_VF_CNTL__PRIM_LINES
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
101 case GL_TRIANGLE_STRIP
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
104 case GL_TRIANGLE_FAN
:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
108 return R300_VAP_VF_CNTL__PRIM_QUADS
;
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
114 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
123 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
127 switch (prim
& PRIM_MODE_MASK
) {
132 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
140 verts_off
= num_verts
;
143 verts_off
= num_verts
% 3;
145 case GL_TRIANGLE_STRIP
:
147 verts_off
= num_verts
;
149 case GL_TRIANGLE_FAN
:
151 verts_off
= num_verts
;
154 verts_off
= num_verts
% 4;
158 verts_off
= num_verts
;
160 verts_off
= num_verts
% 2;
164 verts_off
= num_verts
;
172 return num_verts
- verts_off
;
175 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
177 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
180 rmesa
->state
.elt_dma_bo
= radeon_bo_open(rmesa
->radeon
.radeonScreen
->bom
,
182 RADEON_GEM_DOMAIN_GTT
, 0);
183 rmesa
->state
.elt_dma_offset
= 0;
184 radeon_bo_map(rmesa
->state
.elt_dma_bo
, 1);
185 out
= rmesa
->state
.elt_dma_bo
->ptr
+ rmesa
->state
.elt_dma_offset
;
186 memcpy(out
, elts
, n_elts
* 4);
187 radeon_bo_unmap(rmesa
->state
.elt_dma_bo
);
190 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
192 BATCH_LOCALS(&rmesa
->radeon
);
194 if (vertex_count
> 0) {
196 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
197 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
198 ((vertex_count
+ 0) << 16) |
200 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
202 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
203 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
204 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
205 OUT_BATCH_RELOC(rmesa
->state
.elt_dma_offset
,
206 rmesa
->state
.elt_dma_bo
,
207 rmesa
->state
.elt_dma_offset
,
208 RADEON_GEM_DOMAIN_GTT
, 0, 0);
209 OUT_BATCH(vertex_count
);
211 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
212 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
213 OUT_BATCH(rmesa
->state
.elt_dma_offset
);
214 OUT_BATCH(vertex_count
);
215 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
216 rmesa
->state
.elt_dma_bo
,
217 RADEON_GEM_DOMAIN_GTT
, 0, 0);
223 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
225 BATCH_LOCALS(&rmesa
->radeon
);
227 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
230 if (RADEON_DEBUG
& DEBUG_VERTS
)
231 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
239 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
240 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
241 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
242 (rmesa
->state
.aos
[i
].stride
<< 8) |
243 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
244 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
246 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
247 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
248 OUT_BATCH_RELOC(voffset
,
249 rmesa
->state
.aos
[i
].bo
,
251 RADEON_GEM_DOMAIN_GTT
,
253 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
254 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
255 OUT_BATCH_RELOC(voffset
,
256 rmesa
->state
.aos
[i
+1].bo
,
258 RADEON_GEM_DOMAIN_GTT
,
263 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
264 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
265 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
266 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
267 OUT_BATCH_RELOC(voffset
,
268 rmesa
->state
.aos
[nr
- 1].bo
,
270 RADEON_GEM_DOMAIN_GTT
,
274 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
275 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
276 (rmesa
->state
.aos
[i
].stride
<< 8) |
277 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
278 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
280 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
281 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
283 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
284 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
289 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
290 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
291 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
292 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
295 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
296 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
297 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
298 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
299 rmesa
->state
.aos
[i
+0].bo
,
300 RADEON_GEM_DOMAIN_GTT
,
302 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
303 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
304 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
305 rmesa
->state
.aos
[i
+1].bo
,
306 RADEON_GEM_DOMAIN_GTT
,
310 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
311 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
312 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
313 rmesa
->state
.aos
[nr
-1].bo
,
314 RADEON_GEM_DOMAIN_GTT
,
321 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
323 BATCH_LOCALS(&rmesa
->radeon
);
326 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
327 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
331 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
332 int start
, int end
, int prim
)
334 BATCH_LOCALS(&rmesa
->radeon
);
336 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
337 struct vertex_buffer
*vb
= &tnl
->vb
;
339 type
= r300PrimitiveType(rmesa
, prim
);
340 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
342 if (type
< 0 || num_verts
<= 0)
345 /* Make space for at least 64 dwords.
346 * This is supposed to ensure that we can get all rendering
347 * commands into a single command buffer.
349 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
352 if (num_verts
> 65535) {
353 /* not implemented yet */
354 WARN_ONCE("Too many elts\n");
357 /* Note: The following is incorrect, but it's the best I can do
358 * without a major refactoring of how DMA memory is handled.
359 * The problem: Ensuring that both vertex arrays *and* index
360 * arrays are at the right position, and then ensuring that
361 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
364 * So why is the following incorrect? Well, it seems like
365 * allocating the index array might actually evict the vertex
368 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
369 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
370 r300FireEB(rmesa
, num_verts
, type
);
372 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
373 r300FireAOS(rmesa
, num_verts
, type
);
378 static GLboolean
r300RunRender(GLcontext
* ctx
,
379 struct tnl_pipeline_stage
*stage
)
381 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
383 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
384 struct vertex_buffer
*vb
= &tnl
->vb
;
386 if (RADEON_DEBUG
& DEBUG_PRIMS
)
387 fprintf(stderr
, "%s\n", __FUNCTION__
);
389 r300UpdateShaders(rmesa
);
390 if (r300EmitArrays(ctx
))
393 r300UpdateShaderStates(rmesa
);
395 r300EmitCacheFlush(rmesa
);
396 r300EmitState(rmesa
);
398 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
399 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
400 GLuint start
= vb
->Primitive
[i
].start
;
401 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
402 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
405 r300EmitCacheFlush(rmesa
);
407 r300ReleaseArrays(ctx
);
412 #define FALLBACK_IF(expr) \
415 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
416 WARN_ONCE("Software fallback:%s\n", \
418 return R300_FALLBACK_RAST; \
422 static int r300Fallback(GLcontext
* ctx
)
424 r300ContextPtr r300
= R300_CONTEXT(ctx
);
425 /* Do we need to use new-style shaders?
426 * Also is there a better way to do this? */
427 if (r300
->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
) {
428 struct r500_fragment_program
*fp
= (struct r500_fragment_program
*)
429 (char *)ctx
->FragmentProgram
._Current
;
431 if (!fp
->translated
) {
432 r500TranslateFragmentShader(r300
, fp
);
433 FALLBACK_IF(!fp
->translated
);
437 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*)
438 (char *)ctx
->FragmentProgram
._Current
;
440 if (!fp
->translated
) {
441 r300TranslateFragmentShader(r300
, fp
);
442 FALLBACK_IF(!fp
->translated
);
447 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
449 FALLBACK_IF(ctx
->Stencil
._TestTwoSide
450 && (ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[1]
451 || ctx
->Stencil
.ValueMask
[0] !=
452 ctx
->Stencil
.ValueMask
[1]
453 || ctx
->Stencil
.WriteMask
[0] !=
454 ctx
->Stencil
.WriteMask
[1]));
456 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
457 FALLBACK_IF(ctx
->Point
.PointSprite
);
459 if (!r300
->disable_lowimpact_fallback
) {
460 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
461 FALLBACK_IF(ctx
->Multisample
._Enabled
);
462 FALLBACK_IF(ctx
->Line
.StippleFlag
);
463 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
464 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
467 return R300_FALLBACK_NONE
;
470 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
471 struct tnl_pipeline_stage
*stage
)
473 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
475 if (RADEON_DEBUG
& DEBUG_PRIMS
)
476 fprintf(stderr
, "%s\n", __FUNCTION__
);
478 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
481 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
484 return r300RunRender(ctx
, stage
);
487 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
488 struct tnl_pipeline_stage
*stage
)
490 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
491 struct r300_vertex_program
*vp
;
493 hw_tcl_on
= future_hw_tcl_on
;
495 if (RADEON_DEBUG
& DEBUG_PRIMS
)
496 fprintf(stderr
, "%s\n", __FUNCTION__
);
498 if (hw_tcl_on
== GL_FALSE
)
501 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
502 hw_tcl_on
= GL_FALSE
;
506 r300UpdateShaders(rmesa
);
508 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
509 if (vp
->native
== GL_FALSE
) {
510 hw_tcl_on
= GL_FALSE
;
514 return r300RunRender(ctx
, stage
);
517 const struct tnl_pipeline_stage _r300_render_stage
= {
518 "r300 Hardware Rasterization",
526 const struct tnl_pipeline_stage _r300_tcl_stage
= {
527 "r300 Hardware Transform, Clipping and Lighting",