1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "r300_render.h"
55 #include "main/glheader.h"
56 #include "main/state.h"
57 #include "main/imports.h"
58 #include "main/enums.h"
59 #include "main/macros.h"
60 #include "main/context.h"
62 #include "main/simple_list.h"
63 #include "main/api_arrayelt.h"
64 #include "swrast/swrast.h"
65 #include "swrast_setup/swrast_setup.h"
68 #include "tnl/t_vp_build.h"
69 #include "radeon_reg.h"
70 #include "radeon_macros.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog_common.h"
78 #include "r300_swtcl.h"
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
85 switch (prim
& PRIM_MODE_MASK
) {
87 return R300_VAP_VF_CNTL__PRIM_POINTS
;
90 return R300_VAP_VF_CNTL__PRIM_LINES
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
101 case GL_TRIANGLE_STRIP
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
104 case GL_TRIANGLE_FAN
:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
108 return R300_VAP_VF_CNTL__PRIM_QUADS
;
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
114 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
123 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
127 switch (prim
& PRIM_MODE_MASK
) {
132 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
140 verts_off
= num_verts
;
143 verts_off
= num_verts
% 3;
145 case GL_TRIANGLE_STRIP
:
147 verts_off
= num_verts
;
149 case GL_TRIANGLE_FAN
:
151 verts_off
= num_verts
;
154 verts_off
= num_verts
% 4;
158 verts_off
= num_verts
;
160 verts_off
= num_verts
% 2;
164 verts_off
= num_verts
;
172 return num_verts
- verts_off
;
175 static void r300EmitElts(GLcontext
* ctx
, unsigned long n_elts
)
177 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
181 size
= ((rmesa
->ind_buf
.is_32bit
? 4 : 2) * n_elts
+ 3) & ~3;
183 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->radeon
.tcl
.elt_dma_bo
,
184 &rmesa
->radeon
.tcl
.elt_dma_offset
, size
, 4);
185 radeon_bo_map(rmesa
->radeon
.tcl
.elt_dma_bo
, 1);
186 out
= rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+ rmesa
->radeon
.tcl
.elt_dma_offset
;
187 memcpy(out
, rmesa
->ind_buf
.ptr
, size
);
188 radeon_bo_unmap(rmesa
->radeon
.tcl
.elt_dma_bo
);
191 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
193 BATCH_LOCALS(&rmesa
->radeon
);
195 r300_emit_scissor(rmesa
->radeon
.glCtx
);
196 if (vertex_count
> 0) {
200 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
201 if (rmesa
->ind_buf
.is_32bit
) {
203 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
204 ((vertex_count
+ 0) << 16) | type
|
205 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
207 size
= (vertex_count
+ 1) >> 1;
208 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
209 ((vertex_count
+ 0) << 16) | type
);
212 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
213 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
214 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
215 (R300_VAP_PORT_IDX0
>> 2));
216 OUT_BATCH_RELOC(rmesa
->radeon
.tcl
.elt_dma_offset
,
217 rmesa
->radeon
.tcl
.elt_dma_bo
,
218 rmesa
->radeon
.tcl
.elt_dma_offset
,
219 RADEON_GEM_DOMAIN_GTT
, 0, 0);
222 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
223 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
224 (R300_VAP_PORT_IDX0
>> 2));
225 OUT_BATCH(rmesa
->radeon
.tcl
.elt_dma_offset
);
227 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
228 rmesa
->radeon
.tcl
.elt_dma_bo
,
229 RADEON_GEM_DOMAIN_GTT
, 0, 0);
235 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
237 BATCH_LOCALS(&rmesa
->radeon
);
239 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
242 if (RADEON_DEBUG
& DEBUG_VERTS
)
243 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
246 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
247 BEGIN_BATCH(sz
+2+(nr
* 2));
248 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
251 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
252 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
253 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
254 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
255 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
257 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
258 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
259 OUT_BATCH_RELOC(voffset
,
260 rmesa
->radeon
.tcl
.aos
[i
].bo
,
262 RADEON_GEM_DOMAIN_GTT
,
264 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
265 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
266 OUT_BATCH_RELOC(voffset
,
267 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
269 RADEON_GEM_DOMAIN_GTT
,
274 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
275 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
276 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
277 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
278 OUT_BATCH_RELOC(voffset
,
279 rmesa
->radeon
.tcl
.aos
[nr
- 1].bo
,
281 RADEON_GEM_DOMAIN_GTT
,
287 BEGIN_BATCH(sz
+2+(nr
* 2));
288 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
291 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
292 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
293 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
294 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
295 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
297 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
298 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
300 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
301 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
306 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
307 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
308 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
309 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
312 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
313 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
314 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
315 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
316 rmesa
->radeon
.tcl
.aos
[i
+0].bo
,
317 RADEON_GEM_DOMAIN_GTT
,
319 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
320 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
321 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
322 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
323 RADEON_GEM_DOMAIN_GTT
,
327 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
328 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
329 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
330 rmesa
->radeon
.tcl
.aos
[nr
-1].bo
,
331 RADEON_GEM_DOMAIN_GTT
,
339 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
341 BATCH_LOCALS(&rmesa
->radeon
);
343 r300_emit_scissor(rmesa
->radeon
.glCtx
);
345 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
346 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
350 void r300RunRenderPrimitive(GLcontext
* ctx
, int start
, int end
, int prim
)
352 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
353 BATCH_LOCALS(&rmesa
->radeon
);
356 type
= r300PrimitiveType(rmesa
, prim
);
357 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
359 if (type
< 0 || num_verts
<= 0)
362 /* Make space for at least 128 dwords.
363 * This is supposed to ensure that we can get all rendering
364 * commands into a single command buffer.
366 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 128, __FUNCTION__
);
368 if (rmesa
->ind_buf
.ptr
) {
369 r300EmitElts(ctx
, num_verts
);
370 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, 0);
371 if (rmesa
->radeon
.radeonScreen
->kernel_mm
) {
372 BEGIN_BATCH_NO_AUTOSTATE(2);
373 OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX
, 1);
374 OUT_BATCH(rmesa
->radeon
.tcl
.aos
[0].count
);
377 r300FireEB(rmesa
, num_verts
, type
);
379 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
380 r300FireAOS(rmesa
, num_verts
, type
);
385 static void r300RunRender(GLcontext
* ctx
, struct tnl_pipeline_stage
*stage
)
387 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
389 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
390 struct vertex_buffer
*vb
= &tnl
->vb
;
392 if (RADEON_DEBUG
& DEBUG_PRIMS
)
393 fprintf(stderr
, "%s\n", __FUNCTION__
);
395 r300UpdateShaders(rmesa
);
398 r300UpdateShaderStates(rmesa
);
400 r300EmitCacheFlush(rmesa
);
401 radeonEmitState(&rmesa
->radeon
);
403 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
404 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
405 GLuint start
= vb
->Primitive
[i
].start
;
406 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
407 r300RunRenderPrimitive(ctx
, start
, end
, prim
);
410 r300EmitCacheFlush(rmesa
);
412 radeonReleaseArrays(ctx
, ~0);
416 static const char *getFallbackString(uint32_t bit
)
419 case R300_FALLBACK_VERTEX_PROGRAM
:
420 return "vertex program";
421 case R300_FALLBACK_LINE_SMOOTH
:
422 return "smooth lines";
423 case R300_FALLBACK_POINT_SMOOTH
:
424 return "smooth points";
425 case R300_FALLBACK_POLYGON_SMOOTH
:
426 return "smooth polygons";
427 case R300_FALLBACK_LINE_STIPPLE
:
428 return "line stipple";
429 case R300_FALLBACK_POLYGON_STIPPLE
:
430 return "polygon stipple";
431 case R300_FALLBACK_STENCIL_TWOSIDE
:
432 return "two-sided stencil";
433 case R300_FALLBACK_RENDER_MODE
:
434 return "render mode != GL_RENDER";
435 case R300_FALLBACK_FRAGMENT_PROGRAM
:
436 return "fragment program";
437 case R300_FALLBACK_AOS_LIMIT
:
439 case R300_FALLBACK_INVALID_BUFFERS
:
440 return "invalid buffers";
446 void r300SwitchFallback(GLcontext
*ctx
, uint32_t bit
, GLboolean mode
)
448 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
449 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
450 uint32_t old_fallback
= rmesa
->fallback
;
451 static uint32_t fallback_warn
= 0;
454 if ((fallback_warn
& bit
) == 0) {
455 if (RADEON_DEBUG
& DEBUG_FALLBACKS
)
456 _mesa_fprintf(stderr
, "WARNING! Falling back to software for %s\n", getFallbackString(bit
));
457 fallback_warn
|= bit
;
459 rmesa
->fallback
|= bit
;
461 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
462 if (rmesa
->options
.hw_tcl_enabled
) {
463 if (((old_fallback
& R300_TCL_FALLBACK_MASK
) == 0) &&
464 ((bit
& R300_TCL_FALLBACK_MASK
) > 0)) {
465 R300_STATECHANGE(rmesa
, vap_cntl_status
);
466 rmesa
->hw
.vap_cntl_status
.cmd
[1] |= R300_VAP_TCL_BYPASS
;
470 /* update only if we change from no raster fallbacks to some raster fallbacks */
471 if (((old_fallback
& R300_RASTER_FALLBACK_MASK
) == 0) &&
472 ((bit
& R300_RASTER_FALLBACK_MASK
) > 0)) {
474 radeon_firevertices(&rmesa
->radeon
);
475 rmesa
->radeon
.swtcl
.RenderIndex
= ~0;
476 _swsetup_Wakeup( ctx
);
479 rmesa
->fallback
&= ~bit
;
481 /* update only if we have disabled all tcl fallbacks */
482 if (rmesa
->options
.hw_tcl_enabled
) {
483 if ((old_fallback
& R300_RASTER_FALLBACK_MASK
) == bit
) {
484 R300_STATECHANGE(rmesa
, vap_cntl_status
);
485 rmesa
->hw
.vap_cntl_status
.cmd
[1] &= ~R300_VAP_TCL_BYPASS
;
489 /* update only if we have disabled all raster fallbacks */
490 if ((old_fallback
& R300_RASTER_FALLBACK_MASK
) == bit
) {
491 _swrast_flush( ctx
);
493 tnl
->Driver
.Render
.Start
= r300RenderStart
;
494 tnl
->Driver
.Render
.Finish
= r300RenderFinish
;
495 tnl
->Driver
.Render
.PrimitiveNotify
= r300RenderPrimitive
;
496 tnl
->Driver
.Render
.ResetLineStipple
= r300ResetLineStipple
;
497 tnl
->Driver
.Render
.BuildVertices
= _tnl_build_vertices
;
498 tnl
->Driver
.Render
.CopyPV
= _tnl_copy_pv
;
499 tnl
->Driver
.Render
.Interp
= _tnl_interp
;
501 _tnl_invalidate_vertex_state( ctx
, ~0 );
502 _tnl_invalidate_vertices( ctx
, ~0 );
508 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
509 struct tnl_pipeline_stage
*stage
)
511 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
513 if (RADEON_DEBUG
& DEBUG_PRIMS
)
514 fprintf(stderr
, "%s\n", __FUNCTION__
);
516 if (rmesa
->fallback
& R300_RASTER_FALLBACK_MASK
)
519 if (rmesa
->options
.hw_tcl_enabled
== GL_FALSE
)
522 r300RunRender(ctx
, stage
);
527 const struct tnl_pipeline_stage _r300_render_stage
= {
528 "r300 Hardware Rasterization",