1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog_common.h"
77 extern int future_hw_tcl_on
;
80 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
84 switch (prim
& PRIM_MODE_MASK
) {
86 return R300_VAP_VF_CNTL__PRIM_POINTS
;
89 return R300_VAP_VF_CNTL__PRIM_LINES
;
92 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
95 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
98 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
100 case GL_TRIANGLE_STRIP
:
101 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
103 case GL_TRIANGLE_FAN
:
104 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
107 return R300_VAP_VF_CNTL__PRIM_QUADS
;
110 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
113 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
122 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
126 switch (prim
& PRIM_MODE_MASK
) {
131 verts_off
= num_verts
% 2;
135 verts_off
= num_verts
;
139 verts_off
= num_verts
;
142 verts_off
= num_verts
% 3;
144 case GL_TRIANGLE_STRIP
:
146 verts_off
= num_verts
;
148 case GL_TRIANGLE_FAN
:
150 verts_off
= num_verts
;
153 verts_off
= num_verts
% 4;
157 verts_off
= num_verts
;
159 verts_off
= num_verts
% 2;
163 verts_off
= num_verts
;
171 return num_verts
- verts_off
;
174 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
176 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
179 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->radeon
.tcl
.elt_dma_bo
,
180 &rmesa
->radeon
.tcl
.elt_dma_offset
, n_elts
* 4, 4);
181 radeon_bo_map(rmesa
->radeon
.tcl
.elt_dma_bo
, 1);
182 out
= rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+ rmesa
->radeon
.tcl
.elt_dma_offset
;
183 memcpy(out
, elts
, n_elts
* 4);
184 radeon_bo_unmap(rmesa
->radeon
.tcl
.elt_dma_bo
);
187 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
189 BATCH_LOCALS(&rmesa
->radeon
);
191 if (vertex_count
> 0) {
193 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
194 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
195 ((vertex_count
+ 0) << 16) |
197 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
199 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
200 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
201 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
202 (R300_VAP_PORT_IDX0
>> 2));
203 OUT_BATCH_RELOC(rmesa
->radeon
.tcl
.elt_dma_offset
,
204 rmesa
->radeon
.tcl
.elt_dma_bo
,
205 rmesa
->radeon
.tcl
.elt_dma_offset
,
206 RADEON_GEM_DOMAIN_GTT
, 0, 0);
207 OUT_BATCH(vertex_count
);
209 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
210 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
211 (R300_VAP_PORT_IDX0
>> 2));
212 OUT_BATCH(rmesa
->radeon
.tcl
.elt_dma_offset
);
213 OUT_BATCH(vertex_count
);
214 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
215 rmesa
->radeon
.tcl
.elt_dma_bo
,
216 RADEON_GEM_DOMAIN_GTT
, 0, 0);
222 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
224 BATCH_LOCALS(&rmesa
->radeon
);
226 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
229 if (RADEON_DEBUG
& DEBUG_VERTS
)
230 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
234 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
235 BEGIN_BATCH(sz
+2+(nr
* 2));
236 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
239 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
240 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
241 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
242 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
243 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
245 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
246 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
247 OUT_BATCH_RELOC(voffset
,
248 rmesa
->radeon
.tcl
.aos
[i
].bo
,
250 RADEON_GEM_DOMAIN_GTT
,
252 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
253 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
254 OUT_BATCH_RELOC(voffset
,
255 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
257 RADEON_GEM_DOMAIN_GTT
,
262 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
263 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
264 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
265 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
266 OUT_BATCH_RELOC(voffset
,
267 rmesa
->radeon
.tcl
.aos
[nr
- 1].bo
,
269 RADEON_GEM_DOMAIN_GTT
,
275 BEGIN_BATCH(sz
+2+(nr
* 2));
276 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
279 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
280 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
281 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
282 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
283 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
285 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
286 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
288 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
289 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
294 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
295 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
296 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
297 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
300 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
301 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
302 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
303 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
304 rmesa
->radeon
.tcl
.aos
[i
+0].bo
,
305 RADEON_GEM_DOMAIN_GTT
,
307 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
308 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
309 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
310 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
311 RADEON_GEM_DOMAIN_GTT
,
315 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
316 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
317 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
318 rmesa
->radeon
.tcl
.aos
[nr
-1].bo
,
319 RADEON_GEM_DOMAIN_GTT
,
327 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
329 BATCH_LOCALS(&rmesa
->radeon
);
332 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
333 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
337 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
338 int start
, int end
, int prim
)
341 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
342 struct vertex_buffer
*vb
= &tnl
->vb
;
344 type
= r300PrimitiveType(rmesa
, prim
);
345 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
347 if (type
< 0 || num_verts
<= 0)
350 /* Make space for at least 64 dwords.
351 * This is supposed to ensure that we can get all rendering
352 * commands into a single command buffer.
354 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 64, __FUNCTION__
);
357 if (num_verts
> 65535) {
358 /* not implemented yet */
359 WARN_ONCE("Too many elts\n");
362 /* Note: The following is incorrect, but it's the best I can do
363 * without a major refactoring of how DMA memory is handled.
364 * The problem: Ensuring that both vertex arrays *and* index
365 * arrays are at the right position, and then ensuring that
366 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
369 * So why is the following incorrect? Well, it seems like
370 * allocating the index array might actually evict the vertex
373 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
374 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
375 r300FireEB(rmesa
, num_verts
, type
);
377 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
378 r300FireAOS(rmesa
, num_verts
, type
);
383 static GLboolean
r300RunRender(GLcontext
* ctx
,
384 struct tnl_pipeline_stage
*stage
)
386 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
388 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
389 struct vertex_buffer
*vb
= &tnl
->vb
;
391 if (RADEON_DEBUG
& DEBUG_PRIMS
)
392 fprintf(stderr
, "%s\n", __FUNCTION__
);
394 r300UpdateShaders(rmesa
);
395 if (r300EmitArrays(ctx
))
398 r300UpdateShaderStates(rmesa
);
400 r300EmitCacheFlush(rmesa
);
401 radeonEmitState(&rmesa
->radeon
);
403 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
404 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
405 GLuint start
= vb
->Primitive
[i
].start
;
406 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
407 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
410 r300EmitCacheFlush(rmesa
);
412 radeonReleaseArrays(ctx
, ~0);
417 #define FALLBACK_IF(expr) \
420 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
421 WARN_ONCE("Software fallback:%s\n", \
423 return R300_FALLBACK_RAST; \
427 static int r300Fallback(GLcontext
* ctx
)
429 r300ContextPtr r300
= R300_CONTEXT(ctx
);
430 const unsigned back
= ctx
->Stencil
._BackFace
;
432 FALLBACK_IF(r300
->radeon
.Fallback
);
434 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*) ctx
->FragmentProgram
._Current
;
435 if (fp
&& !fp
->translated
) {
436 r300TranslateFragmentShader(ctx
, ctx
->FragmentProgram
._Current
);
437 FALLBACK_IF(fp
->error
);
440 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
442 /* If GL_EXT_stencil_two_side is disabled, this fallback check can
445 FALLBACK_IF(ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[back
]
446 || ctx
->Stencil
.ValueMask
[0] !=
447 ctx
->Stencil
.ValueMask
[back
]
448 || ctx
->Stencil
.WriteMask
[0] !=
449 ctx
->Stencil
.WriteMask
[back
]);
451 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
452 FALLBACK_IF(ctx
->Point
.PointSprite
);
454 if (!r300
->disable_lowimpact_fallback
) {
455 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
456 FALLBACK_IF(ctx
->Multisample
._Enabled
);
457 FALLBACK_IF(ctx
->Line
.StippleFlag
);
458 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
459 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
462 return R300_FALLBACK_NONE
;
465 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
466 struct tnl_pipeline_stage
*stage
)
468 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
470 if (RADEON_DEBUG
& DEBUG_PRIMS
)
471 fprintf(stderr
, "%s\n", __FUNCTION__
);
473 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
476 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
479 if (!r300ValidateBuffers(ctx
))
482 return r300RunRender(ctx
, stage
);
485 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
486 struct tnl_pipeline_stage
*stage
)
488 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
489 struct r300_vertex_program
*vp
;
491 hw_tcl_on
= future_hw_tcl_on
;
493 if (RADEON_DEBUG
& DEBUG_PRIMS
)
494 fprintf(stderr
, "%s\n", __FUNCTION__
);
496 if (hw_tcl_on
== GL_FALSE
)
499 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
500 hw_tcl_on
= GL_FALSE
;
504 if (!r300ValidateBuffers(ctx
))
507 r300UpdateShaders(rmesa
);
509 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
510 if (vp
->native
== GL_FALSE
) {
511 hw_tcl_on
= GL_FALSE
;
515 return r300RunRender(ctx
, stage
);
518 const struct tnl_pipeline_stage _r300_render_stage
= {
519 "r300 Hardware Rasterization",
527 const struct tnl_pipeline_stage _r300_tcl_stage
= {
528 "r300 Hardware Transform, Clipping and Lighting",