1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on
;
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
85 switch (prim
& PRIM_MODE_MASK
) {
87 return R300_VAP_VF_CNTL__PRIM_POINTS
;
90 return R300_VAP_VF_CNTL__PRIM_LINES
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
101 case GL_TRIANGLE_STRIP
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
104 case GL_TRIANGLE_FAN
:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
108 return R300_VAP_VF_CNTL__PRIM_QUADS
;
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
114 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
123 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
127 switch (prim
& PRIM_MODE_MASK
) {
132 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
140 verts_off
= num_verts
;
143 verts_off
= num_verts
% 3;
145 case GL_TRIANGLE_STRIP
:
147 verts_off
= num_verts
;
149 case GL_TRIANGLE_FAN
:
151 verts_off
= num_verts
;
154 verts_off
= num_verts
% 4;
158 verts_off
= num_verts
;
160 verts_off
= num_verts
% 2;
164 verts_off
= num_verts
;
172 return num_verts
- verts_off
;
175 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
177 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
180 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->state
.elt_dma_bo
,
181 &rmesa
->state
.elt_dma_offset
, n_elts
* 4, 4);
182 radeon_bo_map(rmesa
->state
.elt_dma_bo
, 1);
183 out
= rmesa
->state
.elt_dma_bo
->ptr
+ rmesa
->state
.elt_dma_offset
;
184 memcpy(out
, elts
, n_elts
* 4);
185 radeon_bo_unmap(rmesa
->state
.elt_dma_bo
);
188 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
190 BATCH_LOCALS(&rmesa
->radeon
);
192 if (vertex_count
> 0) {
194 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
195 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
196 ((vertex_count
+ 0) << 16) |
198 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
200 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
201 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
202 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
203 OUT_BATCH_RELOC(rmesa
->state
.elt_dma_offset
,
204 rmesa
->state
.elt_dma_bo
,
205 rmesa
->state
.elt_dma_offset
,
206 RADEON_GEM_DOMAIN_GTT
, 0, 0);
207 OUT_BATCH(vertex_count
);
209 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
210 OUT_BATCH(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
211 OUT_BATCH(rmesa
->state
.elt_dma_offset
);
212 OUT_BATCH(vertex_count
);
213 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
214 rmesa
->state
.elt_dma_bo
,
215 RADEON_GEM_DOMAIN_GTT
, 0, 0);
221 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
223 BATCH_LOCALS(&rmesa
->radeon
);
225 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
228 if (RADEON_DEBUG
& DEBUG_VERTS
)
229 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
233 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
234 BEGIN_BATCH(sz
+2+(nr
* 2));
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
238 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
239 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
240 (rmesa
->state
.aos
[i
].stride
<< 8) |
241 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
242 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
244 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
245 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
246 OUT_BATCH_RELOC(voffset
,
247 rmesa
->state
.aos
[i
].bo
,
249 RADEON_GEM_DOMAIN_GTT
,
251 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
252 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
253 OUT_BATCH_RELOC(voffset
,
254 rmesa
->state
.aos
[i
+1].bo
,
256 RADEON_GEM_DOMAIN_GTT
,
261 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
262 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
263 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
264 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
265 OUT_BATCH_RELOC(voffset
,
266 rmesa
->state
.aos
[nr
- 1].bo
,
268 RADEON_GEM_DOMAIN_GTT
,
274 BEGIN_BATCH(sz
+2+(nr
* 2));
275 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
278 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
279 OUT_BATCH((rmesa
->state
.aos
[i
].components
<< 0) |
280 (rmesa
->state
.aos
[i
].stride
<< 8) |
281 (rmesa
->state
.aos
[i
+ 1].components
<< 16) |
282 (rmesa
->state
.aos
[i
+ 1].stride
<< 24));
284 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
285 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
287 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
288 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
293 OUT_BATCH((rmesa
->state
.aos
[nr
- 1].components
<< 0) |
294 (rmesa
->state
.aos
[nr
- 1].stride
<< 8));
295 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
296 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
299 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
300 voffset
= rmesa
->state
.aos
[i
+ 0].offset
+
301 offset
* 4 * rmesa
->state
.aos
[i
+ 0].stride
;
302 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
303 rmesa
->state
.aos
[i
+0].bo
,
304 RADEON_GEM_DOMAIN_GTT
,
306 voffset
= rmesa
->state
.aos
[i
+ 1].offset
+
307 offset
* 4 * rmesa
->state
.aos
[i
+ 1].stride
;
308 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
309 rmesa
->state
.aos
[i
+1].bo
,
310 RADEON_GEM_DOMAIN_GTT
,
314 voffset
= rmesa
->state
.aos
[nr
- 1].offset
+
315 offset
* 4 * rmesa
->state
.aos
[nr
- 1].stride
;
316 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
317 rmesa
->state
.aos
[nr
-1].bo
,
318 RADEON_GEM_DOMAIN_GTT
,
326 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
328 BATCH_LOCALS(&rmesa
->radeon
);
331 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
332 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
336 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
337 int start
, int end
, int prim
)
339 BATCH_LOCALS(&rmesa
->radeon
);
341 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
342 struct vertex_buffer
*vb
= &tnl
->vb
;
344 type
= r300PrimitiveType(rmesa
, prim
);
345 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
347 if (type
< 0 || num_verts
<= 0)
350 /* Make space for at least 64 dwords.
351 * This is supposed to ensure that we can get all rendering
352 * commands into a single command buffer.
354 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
357 if (num_verts
> 65535) {
358 /* not implemented yet */
359 WARN_ONCE("Too many elts\n");
362 /* Note: The following is incorrect, but it's the best I can do
363 * without a major refactoring of how DMA memory is handled.
364 * The problem: Ensuring that both vertex arrays *and* index
365 * arrays are at the right position, and then ensuring that
366 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
369 * So why is the following incorrect? Well, it seems like
370 * allocating the index array might actually evict the vertex
373 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
374 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
375 r300FireEB(rmesa
, num_verts
, type
);
377 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
378 r300FireAOS(rmesa
, num_verts
, type
);
383 static GLboolean
r300RunRender(GLcontext
* ctx
,
384 struct tnl_pipeline_stage
*stage
)
386 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
388 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
389 struct vertex_buffer
*vb
= &tnl
->vb
;
391 if (RADEON_DEBUG
& DEBUG_PRIMS
)
392 fprintf(stderr
, "%s\n", __FUNCTION__
);
394 r300UpdateShaders(rmesa
);
395 if (r300EmitArrays(ctx
))
398 r300UpdateShaderStates(rmesa
);
400 r300EmitCacheFlush(rmesa
);
401 r300EmitState(rmesa
);
403 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
404 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
405 GLuint start
= vb
->Primitive
[i
].start
;
406 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
407 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
410 r300EmitCacheFlush(rmesa
);
412 r300ReleaseArrays(ctx
);
417 #define FALLBACK_IF(expr) \
420 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
421 WARN_ONCE("Software fallback:%s\n", \
423 return R300_FALLBACK_RAST; \
427 static int r300Fallback(GLcontext
* ctx
)
429 r300ContextPtr r300
= R300_CONTEXT(ctx
);
430 /* Do we need to use new-style shaders?
431 * Also is there a better way to do this? */
432 if (r300
->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
) {
433 struct r500_fragment_program
*fp
= (struct r500_fragment_program
*)
434 (char *)ctx
->FragmentProgram
._Current
;
436 if (!fp
->translated
) {
437 r500TranslateFragmentShader(r300
, fp
);
438 FALLBACK_IF(!fp
->translated
);
442 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*)
443 (char *)ctx
->FragmentProgram
._Current
;
445 if (!fp
->translated
) {
446 r300TranslateFragmentShader(r300
, fp
);
447 FALLBACK_IF(!fp
->translated
);
452 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
454 FALLBACK_IF(ctx
->Stencil
._TestTwoSide
455 && (ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[1]
456 || ctx
->Stencil
.ValueMask
[0] !=
457 ctx
->Stencil
.ValueMask
[1]
458 || ctx
->Stencil
.WriteMask
[0] !=
459 ctx
->Stencil
.WriteMask
[1]));
461 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
462 FALLBACK_IF(ctx
->Point
.PointSprite
);
464 if (!r300
->disable_lowimpact_fallback
) {
465 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
466 FALLBACK_IF(ctx
->Multisample
._Enabled
);
467 FALLBACK_IF(ctx
->Line
.StippleFlag
);
468 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
469 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
472 return R300_FALLBACK_NONE
;
475 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
476 struct tnl_pipeline_stage
*stage
)
478 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
480 if (RADEON_DEBUG
& DEBUG_PRIMS
)
481 fprintf(stderr
, "%s\n", __FUNCTION__
);
483 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
486 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
489 return r300RunRender(ctx
, stage
);
492 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
493 struct tnl_pipeline_stage
*stage
)
495 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
496 struct r300_vertex_program
*vp
;
498 hw_tcl_on
= future_hw_tcl_on
;
500 if (RADEON_DEBUG
& DEBUG_PRIMS
)
501 fprintf(stderr
, "%s\n", __FUNCTION__
);
503 if (hw_tcl_on
== GL_FALSE
)
506 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
507 hw_tcl_on
= GL_FALSE
;
511 if (!r300ValidateBuffers(ctx
))
514 r300UpdateShaders(rmesa
);
516 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
517 if (vp
->native
== GL_FALSE
) {
518 hw_tcl_on
= GL_FALSE
;
522 return r300RunRender(ctx
, stage
);
525 const struct tnl_pipeline_stage _r300_render_stage
= {
526 "r300 Hardware Rasterization",
534 const struct tnl_pipeline_stage _r300_tcl_stage
= {
535 "r300 Hardware Transform, Clipping and Lighting",