1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
53 #include "r300_render.h"
55 #include "main/glheader.h"
56 #include "main/state.h"
57 #include "main/imports.h"
58 #include "main/enums.h"
59 #include "main/macros.h"
60 #include "main/context.h"
62 #include "main/simple_list.h"
63 #include "main/api_arrayelt.h"
64 #include "swrast/swrast.h"
65 #include "swrast_setup/swrast_setup.h"
68 #include "tnl/t_vp_build.h"
69 #include "radeon_reg.h"
70 #include "radeon_macros.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog_common.h"
78 #include "r300_swtcl.h"
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
85 switch (prim
& PRIM_MODE_MASK
) {
87 return R300_VAP_VF_CNTL__PRIM_POINTS
;
90 return R300_VAP_VF_CNTL__PRIM_LINES
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
101 case GL_TRIANGLE_STRIP
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
104 case GL_TRIANGLE_FAN
:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
108 return R300_VAP_VF_CNTL__PRIM_QUADS
;
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
114 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
123 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
127 switch (prim
& PRIM_MODE_MASK
) {
132 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
140 verts_off
= num_verts
;
143 verts_off
= num_verts
% 3;
145 case GL_TRIANGLE_STRIP
:
147 verts_off
= num_verts
;
149 case GL_TRIANGLE_FAN
:
151 verts_off
= num_verts
;
154 verts_off
= num_verts
% 4;
158 verts_off
= num_verts
;
160 verts_off
= num_verts
% 2;
164 verts_off
= num_verts
;
172 return num_verts
- verts_off
;
175 static void r300EmitElts(GLcontext
* ctx
, unsigned long n_elts
)
177 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
181 size
= ((rmesa
->ind_buf
.is_32bit
? 4 : 2) * n_elts
+ 3) & ~3;
183 radeonAllocDmaRegion(&rmesa
->radeon
, &rmesa
->radeon
.tcl
.elt_dma_bo
,
184 &rmesa
->radeon
.tcl
.elt_dma_offset
, size
, 4);
185 radeon_bo_map(rmesa
->radeon
.tcl
.elt_dma_bo
, 1);
186 out
= rmesa
->radeon
.tcl
.elt_dma_bo
->ptr
+ rmesa
->radeon
.tcl
.elt_dma_offset
;
187 memcpy(out
, rmesa
->ind_buf
.ptr
, size
);
188 radeon_bo_unmap(rmesa
->radeon
.tcl
.elt_dma_bo
);
191 static void r300FireEB(r300ContextPtr rmesa
, int vertex_count
, int type
)
193 BATCH_LOCALS(&rmesa
->radeon
);
195 r300_emit_scissor(rmesa
->radeon
.glCtx
);
196 if (vertex_count
> 0) {
200 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0);
201 if (rmesa
->ind_buf
.is_32bit
) {
203 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
204 ((vertex_count
+ 0) << 16) | type
|
205 R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
207 size
= (vertex_count
+ 1) >> 1;
208 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
|
209 ((vertex_count
+ 0) << 16) | type
);
212 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
213 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
214 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
215 (R300_VAP_PORT_IDX0
>> 2));
216 OUT_BATCH_RELOC(rmesa
->radeon
.tcl
.elt_dma_offset
,
217 rmesa
->radeon
.tcl
.elt_dma_bo
,
218 rmesa
->radeon
.tcl
.elt_dma_offset
,
219 RADEON_GEM_DOMAIN_GTT
, 0, 0);
222 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER
, 2);
223 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR
| (0 << R300_INDX_BUFFER_SKIP_SHIFT
) |
224 (R300_VAP_PORT_IDX0
>> 2));
225 OUT_BATCH(rmesa
->radeon
.tcl
.elt_dma_offset
);
227 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
228 rmesa
->radeon
.tcl
.elt_dma_bo
,
229 RADEON_GEM_DOMAIN_GTT
, 0, 0);
235 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
237 BATCH_LOCALS(&rmesa
->radeon
);
239 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
242 if (RADEON_DEBUG
& DEBUG_VERTS
)
243 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
247 if (!rmesa
->radeon
.radeonScreen
->kernel_mm
) {
248 BEGIN_BATCH(sz
+2+(nr
* 2));
249 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
252 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
253 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
254 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
255 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
256 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
258 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
259 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
260 OUT_BATCH_RELOC(voffset
,
261 rmesa
->radeon
.tcl
.aos
[i
].bo
,
263 RADEON_GEM_DOMAIN_GTT
,
265 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
266 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
267 OUT_BATCH_RELOC(voffset
,
268 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
270 RADEON_GEM_DOMAIN_GTT
,
275 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
276 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
277 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
278 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
279 OUT_BATCH_RELOC(voffset
,
280 rmesa
->radeon
.tcl
.aos
[nr
- 1].bo
,
282 RADEON_GEM_DOMAIN_GTT
,
288 BEGIN_BATCH(sz
+2+(nr
* 2));
289 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1);
292 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
293 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[i
].components
<< 0) |
294 (rmesa
->radeon
.tcl
.aos
[i
].stride
<< 8) |
295 (rmesa
->radeon
.tcl
.aos
[i
+ 1].components
<< 16) |
296 (rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
<< 24));
298 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
299 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
301 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
302 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
307 OUT_BATCH((rmesa
->radeon
.tcl
.aos
[nr
- 1].components
<< 0) |
308 (rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
<< 8));
309 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
310 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
313 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
314 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 0].offset
+
315 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 0].stride
;
316 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
317 rmesa
->radeon
.tcl
.aos
[i
+0].bo
,
318 RADEON_GEM_DOMAIN_GTT
,
320 voffset
= rmesa
->radeon
.tcl
.aos
[i
+ 1].offset
+
321 offset
* 4 * rmesa
->radeon
.tcl
.aos
[i
+ 1].stride
;
322 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
323 rmesa
->radeon
.tcl
.aos
[i
+1].bo
,
324 RADEON_GEM_DOMAIN_GTT
,
328 voffset
= rmesa
->radeon
.tcl
.aos
[nr
- 1].offset
+
329 offset
* 4 * rmesa
->radeon
.tcl
.aos
[nr
- 1].stride
;
330 radeon_cs_write_reloc(rmesa
->radeon
.cmdbuf
.cs
,
331 rmesa
->radeon
.tcl
.aos
[nr
-1].bo
,
332 RADEON_GEM_DOMAIN_GTT
,
340 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
342 BATCH_LOCALS(&rmesa
->radeon
);
344 r300_emit_scissor(rmesa
->radeon
.glCtx
);
346 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0);
347 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
351 void r300RunRenderPrimitive(GLcontext
* ctx
, int start
, int end
, int prim
)
353 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
354 BATCH_LOCALS(&rmesa
->radeon
);
357 type
= r300PrimitiveType(rmesa
, prim
);
358 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
360 if (type
< 0 || num_verts
<= 0)
363 /* Make space for at least 64 dwords.
364 * This is supposed to ensure that we can get all rendering
365 * commands into a single command buffer.
367 rcommonEnsureCmdBufSpace(&rmesa
->radeon
, 128, __FUNCTION__
);
369 if (rmesa
->ind_buf
.ptr
) {
370 if (num_verts
> 65535) {
371 /* not implemented yet */
372 WARN_ONCE("Too many elts\n");
375 /* Note: The following is incorrect, but it's the best I can do
376 * without a major refactoring of how DMA memory is handled.
377 * The problem: Ensuring that both vertex arrays *and* index
378 * arrays are at the right position, and then ensuring that
379 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
382 * So why is the following incorrect? Well, it seems like
383 * allocating the index array might actually evict the vertex
386 r300EmitElts(ctx
, num_verts
);
387 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
388 if (rmesa
->radeon
.radeonScreen
->kernel_mm
) {
389 BEGIN_BATCH_NO_AUTOSTATE(2);
390 OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX
, 1);
391 OUT_BATCH(rmesa
->radeon
.tcl
.aos
[0].count
);
394 r300FireEB(rmesa
, num_verts
, type
);
396 r300EmitAOS(rmesa
, rmesa
->radeon
.tcl
.aos_count
, start
);
397 r300FireAOS(rmesa
, num_verts
, type
);
402 static void r300RunRender(GLcontext
* ctx
, struct tnl_pipeline_stage
*stage
)
404 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
406 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
407 struct vertex_buffer
*vb
= &tnl
->vb
;
409 if (RADEON_DEBUG
& DEBUG_PRIMS
)
410 fprintf(stderr
, "%s\n", __FUNCTION__
);
412 r300UpdateShaders(rmesa
);
415 r300UpdateShaderStates(rmesa
);
417 r300EmitCacheFlush(rmesa
);
418 radeonEmitState(&rmesa
->radeon
);
420 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
421 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
422 GLuint start
= vb
->Primitive
[i
].start
;
423 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
424 r300RunRenderPrimitive(ctx
, start
, end
, prim
);
427 r300EmitCacheFlush(rmesa
);
429 radeonReleaseArrays(ctx
, ~0);
433 static const char *getFallbackString(uint32_t bit
)
436 case R300_FALLBACK_VERTEX_PROGRAM
:
437 return "vertex program";
438 case R300_FALLBACK_LINE_SMOOTH
:
439 return "smooth lines";
440 case R300_FALLBACK_POINT_SMOOTH
:
441 return "smooth points";
442 case R300_FALLBACK_POLYGON_SMOOTH
:
443 return "smooth polygons";
444 case R300_FALLBACK_LINE_STIPPLE
:
445 return "line stipple";
446 case R300_FALLBACK_POLYGON_STIPPLE
:
447 return "polygon stipple";
448 case R300_FALLBACK_STENCIL_TWOSIDE
:
449 return "two-sided stencil";
450 case R300_FALLBACK_RENDER_MODE
:
451 return "render mode != GL_RENDER";
452 case R300_FALLBACK_FRAGMENT_PROGRAM
:
453 return "fragment program";
454 case R300_FALLBACK_AOS_LIMIT
:
456 case R300_FALLBACK_INVALID_BUFFERS
:
457 return "invalid buffers";
463 void r300SwitchFallback(GLcontext
*ctx
, uint32_t bit
, GLboolean mode
)
465 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
466 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
467 uint32_t old_fallback
= rmesa
->fallback
;
468 static uint32_t fallback_warn
= 0;
471 if ((fallback_warn
& bit
) == 0) {
472 _mesa_fprintf(stderr
, "WARNING! Falling back to software for %s\n", getFallbackString(bit
));
473 fallback_warn
|= bit
;
475 rmesa
->fallback
|= bit
;
477 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
478 if (rmesa
->options
.hw_tcl_enabled
) {
479 if (((old_fallback
& R300_TCL_FALLBACK_MASK
) == 0) &&
480 ((bit
& R300_TCL_FALLBACK_MASK
) > 0)) {
481 R300_STATECHANGE(rmesa
, vap_cntl_status
);
482 rmesa
->hw
.vap_cntl_status
.cmd
[1] |= R300_VAP_TCL_BYPASS
;
486 /* update only if we change from no raster fallbacks to some raster fallbacks */
487 if (((old_fallback
& R300_RASTER_FALLBACK_MASK
) == 0) &&
488 ((bit
& R300_RASTER_FALLBACK_MASK
) > 0)) {
490 radeon_firevertices(&rmesa
->radeon
);
491 rmesa
->radeon
.swtcl
.RenderIndex
= ~0;
492 _swsetup_Wakeup( ctx
);
495 rmesa
->fallback
&= ~bit
;
497 /* update only if we have disabled all tcl fallbacks */
498 if (rmesa
->options
.hw_tcl_enabled
) {
499 if ((old_fallback
& R300_RASTER_FALLBACK_MASK
) == bit
) {
500 R300_STATECHANGE(rmesa
, vap_cntl_status
);
501 rmesa
->hw
.vap_cntl_status
.cmd
[1] &= ~R300_VAP_TCL_BYPASS
;
505 /* update only if we have disabled all raster fallbacks */
506 if ((old_fallback
& R300_RASTER_FALLBACK_MASK
) == bit
) {
507 _swrast_flush( ctx
);
509 tnl
->Driver
.Render
.Start
= r300RenderStart
;
510 tnl
->Driver
.Render
.Finish
= r300RenderFinish
;
511 tnl
->Driver
.Render
.PrimitiveNotify
= r300RenderPrimitive
;
512 tnl
->Driver
.Render
.ResetLineStipple
= r300ResetLineStipple
;
513 tnl
->Driver
.Render
.BuildVertices
= _tnl_build_vertices
;
514 tnl
->Driver
.Render
.CopyPV
= _tnl_copy_pv
;
515 tnl
->Driver
.Render
.Interp
= _tnl_interp
;
517 _tnl_invalidate_vertex_state( ctx
, ~0 );
518 _tnl_invalidate_vertices( ctx
, ~0 );
524 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
525 struct tnl_pipeline_stage
*stage
)
527 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
529 if (RADEON_DEBUG
& DEBUG_PRIMS
)
530 fprintf(stderr
, "%s\n", __FUNCTION__
);
532 if (rmesa
->fallback
& R300_RASTER_FALLBACK_MASK
)
535 if (rmesa
->options
.hw_tcl_enabled
== GL_FALSE
)
538 r300RunRender(ctx
, stage
);
543 const struct tnl_pipeline_stage _r300_render_stage
= {
544 "r300 Hardware Rasterization",