1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog_common.h"
78 * \brief Convert a OpenGL primitive type into a R300 primitive type.
80 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
82 switch (prim
& PRIM_MODE_MASK
) {
84 return R300_VAP_VF_CNTL__PRIM_POINTS
;
87 return R300_VAP_VF_CNTL__PRIM_LINES
;
90 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
96 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
98 case GL_TRIANGLE_STRIP
:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
101 case GL_TRIANGLE_FAN
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
105 return R300_VAP_VF_CNTL__PRIM_QUADS
;
108 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
111 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
120 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
124 switch (prim
& PRIM_MODE_MASK
) {
129 verts_off
= num_verts
% 2;
133 verts_off
= num_verts
;
137 verts_off
= num_verts
;
140 verts_off
= num_verts
% 3;
142 case GL_TRIANGLE_STRIP
:
144 verts_off
= num_verts
;
146 case GL_TRIANGLE_FAN
:
148 verts_off
= num_verts
;
151 verts_off
= num_verts
% 4;
155 verts_off
= num_verts
;
157 verts_off
= num_verts
% 2;
161 verts_off
= num_verts
;
169 return num_verts
- verts_off
;
172 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
174 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
177 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->radeon
.tcl
.elt_dma_bo
,
178 &rmesa
->radeon
.tcl
.elt_dma_offset
, n_elts
* 4, 4);
179 radeon_bo_map(rmesa
->radeon
.tcl
.elt_dma_bo
, 1);
180 out
= rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+ rmesa
->radeon
.tcl
.elt_dma_offset
;
181 memcpy(out
, elts
, n_elts
* 4);
182 radeon_bo_unmap(rmesa
->radeon
.tcl
.elt_dma_bo
);
185 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
187 BATCH_LOCALS(&rmesa
->radeon
);
189 if (vertex_count
> 0) {
191 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
192 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
193 ((vertex_count
+ 0) << 16) |
195 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
197 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
198 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
199 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
200 (R300_VAP_PORT_IDX0
>> 2));
201 OUT_BATCH_RELOC(rmesa
->radeon
.tcl
.elt_dma_offset
,
202 rmesa
->radeon
.tcl
.elt_dma_bo
,
203 rmesa
->radeon
.tcl
.elt_dma_offset
,
204 RADEON_GEM_DOMAIN_GTT
, 0, 0);
205 OUT_BATCH(vertex_count
);
207 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
208 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
209 (R300_VAP_PORT_IDX0
>> 2));
210 OUT_BATCH(rmesa
->radeon
.tcl
.elt_dma_offset
);
211 OUT_BATCH(vertex_count
);
212 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
213 rmesa
->radeon
.tcl
.elt_dma_bo
,
214 RADEON_GEM_DOMAIN_GTT
, 0, 0);
220 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
222 BATCH_LOCALS(&rmesa
->radeon
);
224 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
227 if (RADEON_DEBUG
& DEBUG_VERTS
)
228 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
232 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
233 BEGIN_BATCH(sz
+2+(nr
* 2));
234 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
237 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
238 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
239 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
240 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
241 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
243 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
244 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
245 OUT_BATCH_RELOC(voffset
,
246 rmesa
->radeon
.tcl
.aos
[i
].bo
,
248 RADEON_GEM_DOMAIN_GTT
,
250 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
251 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
252 OUT_BATCH_RELOC(voffset
,
253 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
255 RADEON_GEM_DOMAIN_GTT
,
260 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
261 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
262 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
263 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
264 OUT_BATCH_RELOC(voffset
,
265 rmesa
->radeon
.tcl
.aos
[nr
- 1].bo
,
267 RADEON_GEM_DOMAIN_GTT
,
273 BEGIN_BATCH(sz
+2+(nr
* 2));
274 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
277 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
278 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
279 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
280 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
281 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
283 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
284 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
286 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
287 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
292 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
293 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
294 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
295 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
298 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
299 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
300 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
301 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
302 rmesa
->radeon
.tcl
.aos
[i
+0].bo
,
303 RADEON_GEM_DOMAIN_GTT
,
305 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
306 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
307 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
308 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
309 RADEON_GEM_DOMAIN_GTT
,
313 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
314 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
315 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
316 rmesa
->radeon
.tcl
.aos
[nr
-1].bo
,
317 RADEON_GEM_DOMAIN_GTT
,
325 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
327 BATCH_LOCALS(&rmesa
->radeon
);
330 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
331 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
335 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
336 int start
, int end
, int prim
)
339 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
340 struct vertex_buffer
*vb
= &tnl
->vb
;
342 type
= r300PrimitiveType(rmesa
, prim
);
343 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
345 if (type
< 0 || num_verts
<= 0)
348 /* Make space for at least 64 dwords.
349 * This is supposed to ensure that we can get all rendering
350 * commands into a single command buffer.
352 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
355 if (num_verts
> 65535) {
356 /* not implemented yet */
357 WARN_ONCE("Too many elts\n");
360 /* Note: The following is incorrect, but it's the best I can do
361 * without a major refactoring of how DMA memory is handled.
362 * The problem: Ensuring that both vertex arrays *and* index
363 * arrays are at the right position, and then ensuring that
364 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
367 * So why is the following incorrect? Well, it seems like
368 * allocating the index array might actually evict the vertex
371 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
372 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
373 r300FireEB(rmesa
, num_verts
, type
);
375 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
376 r300FireAOS(rmesa
, num_verts
, type
);
381 static GLboolean
r300RunRender(GLcontext
* ctx
,
382 struct tnl_pipeline_stage
*stage
)
384 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
386 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
387 struct vertex_buffer
*vb
= &tnl
->vb
;
389 if (RADEON_DEBUG
& DEBUG_PRIMS
)
390 fprintf(stderr
, "%s\n", __FUNCTION__
);
392 r300UpdateShaders(rmesa
);
393 if (r300EmitArrays(ctx
))
396 r300UpdateShaderStates(rmesa
);
398 r300EmitCacheFlush(rmesa
);
399 radeonEmitState(&rmesa
->radeon
);
401 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
402 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
403 GLuint start
= vb
->Primitive
[i
].start
;
404 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
405 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
408 r300EmitCacheFlush(rmesa
);
410 radeonReleaseArrays(ctx
, ~0);
415 #define FALLBACK_IF(expr) \
418 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
419 WARN_ONCE("Software fallback:%s\n", \
421 return R300_FALLBACK_RAST; \
425 static int r300Fallback(GLcontext
* ctx
)
427 r300ContextPtr r300
= R300_CONTEXT(ctx
);
428 const unsigned back
= ctx
->Stencil
._BackFace
;
430 FALLBACK_IF(r300
->radeon
.Fallback
);
432 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*) ctx
->FragmentProgram
._Current
;
433 if (fp
&& !fp
->translated
) {
434 r300TranslateFragmentShader(ctx
, ctx
->FragmentProgram
._Current
);
435 FALLBACK_IF(fp
->error
);
438 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
440 FALLBACK_IF(ctx
->Stencil
.Enabled
&& (ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[back
]
441 || ctx
->Stencil
.ValueMask
[0] != ctx
->Stencil
.ValueMask
[back
]
442 || ctx
->Stencil
.WriteMask
[0] != ctx
->Stencil
.WriteMask
[back
]));
444 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
445 FALLBACK_IF(ctx
->Point
.PointSprite
);
447 if (!r300
->disable_lowimpact_fallback
) {
448 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
449 FALLBACK_IF(ctx
->Multisample
._Enabled
);
450 FALLBACK_IF(ctx
->Line
.StippleFlag
);
451 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
452 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
455 return R300_FALLBACK_NONE
;
458 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
459 struct tnl_pipeline_stage
*stage
)
461 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
463 if (RADEON_DEBUG
& DEBUG_PRIMS
)
464 fprintf(stderr
, "%s\n", __FUNCTION__
);
466 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
469 if (rmesa
->options
.hw_tcl_enabled
== GL_FALSE
)
472 if (!r300ValidateBuffers(ctx
))
475 return r300RunRender(ctx
, stage
);
478 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
479 struct tnl_pipeline_stage
*stage
)
481 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
482 struct r300_vertex_program
*vp
;
484 if (RADEON_DEBUG
& DEBUG_PRIMS
)
485 fprintf(stderr
, "%s\n", __FUNCTION__
);
487 if (rmesa
->options
.hw_tcl_enabled
== GL_FALSE
)
490 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
491 rmesa
->options
.hw_tcl_enabled
= GL_FALSE
;
495 if (!r300ValidateBuffers(ctx
))
498 r300UpdateShaders(rmesa
);
500 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
501 if (vp
->native
== GL_FALSE
) {
502 rmesa
->options
.hw_tcl_enabled
= GL_FALSE
;
506 return r300RunRender(ctx
, stage
);
509 const struct tnl_pipeline_stage _r300_render_stage
= {
510 "r300 Hardware Rasterization",
518 const struct tnl_pipeline_stage _r300_tcl_stage
= {
519 "r300 Hardware Transform, Clipping and Lighting",