1 /**************************************************************************
3 Copyright (C) 2004 Nicolai Haehnle.
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * \brief R300 Render (Vertex Buffer Implementation)
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
41 * When falling back to software TCL still attempt to use hardware
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
47 * \author Nicolai Haehnle <prefect_@gmx.net>
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
60 #include "simple_list.h"
61 #include "api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on
;
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 int r300PrimitiveType(r300ContextPtr rmesa
, int prim
)
85 switch (prim
& PRIM_MODE_MASK
) {
87 return R300_VAP_VF_CNTL__PRIM_POINTS
;
90 return R300_VAP_VF_CNTL__PRIM_LINES
;
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP
;
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP
;
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES
;
101 case GL_TRIANGLE_STRIP
:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP
;
104 case GL_TRIANGLE_FAN
:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN
;
108 return R300_VAP_VF_CNTL__PRIM_QUADS
;
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP
;
114 return R300_VAP_VF_CNTL__PRIM_POLYGON
;
123 int r300NumVerts(r300ContextPtr rmesa
, int num_verts
, int prim
)
127 switch (prim
& PRIM_MODE_MASK
) {
132 verts_off
= num_verts
% 2;
136 verts_off
= num_verts
;
140 verts_off
= num_verts
;
143 verts_off
= num_verts
% 3;
145 case GL_TRIANGLE_STRIP
:
147 verts_off
= num_verts
;
149 case GL_TRIANGLE_FAN
:
151 verts_off
= num_verts
;
154 verts_off
= num_verts
% 4;
158 verts_off
= num_verts
;
160 verts_off
= num_verts
% 2;
164 verts_off
= num_verts
;
172 return num_verts
- verts_off
;
175 static void r300EmitElts(GLcontext
* ctx
, void *elts
, unsigned long n_elts
)
177 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
178 struct r300_dma_region
*rvb
= &rmesa
->state
.elt_dma
;
181 if (r300IsGartMemory(rmesa
, elts
, n_elts
* 4)) {
182 rvb
->address
= rmesa
->radeon
.radeonScreen
->gartTextures
.map
;
183 rvb
->start
= ((char *)elts
) - rvb
->address
;
185 rmesa
->radeon
.radeonScreen
->gart_texture_offset
+
188 } else if (r300IsGartMemory(rmesa
, elts
, 1)) {
189 WARN_ONCE("Pointer not within GART memory!\n");
193 r300AllocDmaRegion(rmesa
, rvb
, n_elts
* 4, 4);
194 rvb
->aos_offset
= GET_START(rvb
);
196 out
= rvb
->address
+ rvb
->start
;
197 memcpy(out
, elts
, n_elts
* 4);
200 static void r300FireEB(r300ContextPtr rmesa
, unsigned long addr
,
201 int vertex_count
, int type
)
203 int cmd_reserved
= 0;
205 drm_radeon_cmd_header_t
*cmd
= NULL
;
207 start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2
, 0), 0);
208 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES
| (vertex_count
<< 16) | type
| R300_VAP_VF_CNTL__INDEX_SIZE_32bit
);
210 start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER
, 2), 2);
211 e32(R300_EB_UNK1
| (0 << 16) | R300_EB_UNK2
);
216 static void r300EmitAOS(r300ContextPtr rmesa
, GLuint nr
, GLuint offset
)
218 int sz
= 1 + (nr
>> 1) * 3 + (nr
& 1) * 2;
220 int cmd_reserved
= 0;
222 drm_radeon_cmd_header_t
*cmd
= NULL
;
224 if (RADEON_DEBUG
& DEBUG_VERTS
)
225 fprintf(stderr
, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__
, nr
,
228 start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR
, sz
- 1), sz
- 1);
231 for (i
= 0; i
+ 1 < nr
; i
+= 2) {
232 e32((rmesa
->state
.aos
[i
].aos_size
<< 0) |
233 (rmesa
->state
.aos
[i
].aos_stride
<< 8) |
234 (rmesa
->state
.aos
[i
+ 1].aos_size
<< 16) |
235 (rmesa
->state
.aos
[i
+ 1].aos_stride
<< 24));
237 e32(rmesa
->state
.aos
[i
].aos_offset
+ offset
* 4 * rmesa
->state
.aos
[i
].aos_stride
);
238 e32(rmesa
->state
.aos
[i
+ 1].aos_offset
+ offset
* 4 * rmesa
->state
.aos
[i
+ 1].aos_stride
);
242 e32((rmesa
->state
.aos
[nr
- 1].aos_size
<< 0) |
243 (rmesa
->state
.aos
[nr
- 1].aos_stride
<< 8));
244 e32(rmesa
->state
.aos
[nr
- 1].aos_offset
+ offset
* 4 * rmesa
->state
.aos
[nr
- 1].aos_stride
);
248 static void r300FireAOS(r300ContextPtr rmesa
, int vertex_count
, int type
)
250 int cmd_reserved
= 0;
252 drm_radeon_cmd_header_t
*cmd
= NULL
;
254 start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2
, 0), 0);
255 e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST
| (vertex_count
<< 16) | type
);
258 static void r300RunRenderPrimitive(r300ContextPtr rmesa
, GLcontext
* ctx
,
259 int start
, int end
, int prim
)
262 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
263 struct vertex_buffer
*vb
= &tnl
->vb
;
265 type
= r300PrimitiveType(rmesa
, prim
);
266 num_verts
= r300NumVerts(rmesa
, end
- start
, prim
);
268 if (type
< 0 || num_verts
<= 0)
272 if (num_verts
> 65535) {
273 /* not implemented yet */
274 WARN_ONCE("Too many elts\n");
277 /* Note: The following is incorrect, but it's the best I can do
278 * without a major refactoring of how DMA memory is handled.
279 * The problem: Ensuring that both vertex arrays *and* index
280 * arrays are at the right position, and then ensuring that
281 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
284 * So why is the following incorrect? Well, it seems like
285 * allocating the index array might actually evict the vertex
288 r300EmitElts(ctx
, vb
->Elts
, num_verts
);
289 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
290 r300FireEB(rmesa
, rmesa
->state
.elt_dma
.aos_offset
, num_verts
, type
);
292 r300EmitAOS(rmesa
, rmesa
->state
.aos_count
, start
);
293 r300FireAOS(rmesa
, num_verts
, type
);
297 static GLboolean
r300RunRender(GLcontext
* ctx
,
298 struct tnl_pipeline_stage
*stage
)
300 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
302 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
303 struct vertex_buffer
*vb
= &tnl
->vb
;
306 if (RADEON_DEBUG
& DEBUG_PRIMS
)
307 fprintf(stderr
, "%s\n", __FUNCTION__
);
309 r300UpdateShaders(rmesa
);
310 if (r300EmitArrays(ctx
))
313 r300UpdateShaderStates(rmesa
);
315 r300EmitCacheFlush(rmesa
);
316 r300EmitState(rmesa
);
318 for (i
= 0; i
< vb
->PrimitiveCount
; i
++) {
319 GLuint prim
= _tnl_translate_prim(&vb
->Primitive
[i
]);
320 GLuint start
= vb
->Primitive
[i
].start
;
321 GLuint end
= vb
->Primitive
[i
].start
+ vb
->Primitive
[i
].count
;
322 r300RunRenderPrimitive(rmesa
, ctx
, start
, end
, prim
);
325 r300EmitCacheFlush(rmesa
);
331 r300ReleaseArrays(ctx
);
336 #define FALLBACK_IF(expr) \
339 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
340 WARN_ONCE("Software fallback:%s\n", \
342 return R300_FALLBACK_RAST; \
346 static int r300Fallback(GLcontext
* ctx
)
348 r300ContextPtr r300
= R300_CONTEXT(ctx
);
349 /* Do we need to use new-style shaders?
350 * Also is there a better way to do this? */
351 if (r300
->radeon
.radeonScreen
->chip_family
>= CHIP_FAMILY_RV515
) {
352 struct r500_fragment_program
*fp
= (struct r500_fragment_program
*)
353 (char *)ctx
->FragmentProgram
._Current
;
355 if (!fp
->translated
) {
356 r500TranslateFragmentShader(r300
, fp
);
357 FALLBACK_IF(!fp
->translated
);
361 struct r300_fragment_program
*fp
= (struct r300_fragment_program
*)
362 (char *)ctx
->FragmentProgram
._Current
;
364 if (!fp
->translated
) {
365 r300TranslateFragmentShader(r300
, fp
);
366 FALLBACK_IF(!fp
->translated
);
371 FALLBACK_IF(ctx
->RenderMode
!= GL_RENDER
);
373 FALLBACK_IF(ctx
->Stencil
._TestTwoSide
374 && (ctx
->Stencil
.Ref
[0] != ctx
->Stencil
.Ref
[1]
375 || ctx
->Stencil
.ValueMask
[0] !=
376 ctx
->Stencil
.ValueMask
[1]
377 || ctx
->Stencil
.WriteMask
[0] !=
378 ctx
->Stencil
.WriteMask
[1]));
380 if (ctx
->Extensions
.NV_point_sprite
|| ctx
->Extensions
.ARB_point_sprite
)
381 FALLBACK_IF(ctx
->Point
.PointSprite
);
383 if (!r300
->disable_lowimpact_fallback
) {
384 FALLBACK_IF(ctx
->Polygon
.StippleFlag
);
385 FALLBACK_IF(ctx
->Multisample
._Enabled
);
386 FALLBACK_IF(ctx
->Line
.StippleFlag
);
387 FALLBACK_IF(ctx
->Line
.SmoothFlag
);
388 FALLBACK_IF(ctx
->Point
.SmoothFlag
);
391 return R300_FALLBACK_NONE
;
394 static GLboolean
r300RunNonTCLRender(GLcontext
* ctx
,
395 struct tnl_pipeline_stage
*stage
)
397 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
399 if (RADEON_DEBUG
& DEBUG_PRIMS
)
400 fprintf(stderr
, "%s\n", __FUNCTION__
);
402 if (r300Fallback(ctx
) >= R300_FALLBACK_RAST
)
405 if (!(rmesa
->radeon
.radeonScreen
->chip_flags
& RADEON_CHIPSET_TCL
))
408 return r300RunRender(ctx
, stage
);
411 static GLboolean
r300RunTCLRender(GLcontext
* ctx
,
412 struct tnl_pipeline_stage
*stage
)
414 r300ContextPtr rmesa
= R300_CONTEXT(ctx
);
415 struct r300_vertex_program
*vp
;
417 hw_tcl_on
= future_hw_tcl_on
;
419 if (RADEON_DEBUG
& DEBUG_PRIMS
)
420 fprintf(stderr
, "%s\n", __FUNCTION__
);
422 if (hw_tcl_on
== GL_FALSE
)
425 if (r300Fallback(ctx
) >= R300_FALLBACK_TCL
) {
426 hw_tcl_on
= GL_FALSE
;
430 r300UpdateShaders(rmesa
);
432 vp
= (struct r300_vertex_program
*)CURRENT_VERTEX_SHADER(ctx
);
433 if (vp
->native
== GL_FALSE
) {
434 hw_tcl_on
= GL_FALSE
;
438 return r300RunRender(ctx
, stage
);
441 const struct tnl_pipeline_stage _r300_render_stage
= {
442 "r300 Hardware Rasterization",
450 const struct tnl_pipeline_stage _r300_tcl_stage
= {
451 "r300 Hardware Transform, Clipping and Lighting",