r300: further cleanup
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
72 #include "r300_reg.h"
73 #include "r300_tex.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog_common.h"
76
77 /**
78 * \brief Convert a OpenGL primitive type into a R300 primitive type.
79 */
80 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
81 {
82 switch (prim & PRIM_MODE_MASK) {
83 case GL_POINTS:
84 return R300_VAP_VF_CNTL__PRIM_POINTS;
85 break;
86 case GL_LINES:
87 return R300_VAP_VF_CNTL__PRIM_LINES;
88 break;
89 case GL_LINE_STRIP:
90 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
91 break;
92 case GL_LINE_LOOP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
94 break;
95 case GL_TRIANGLES:
96 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
97 break;
98 case GL_TRIANGLE_STRIP:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
100 break;
101 case GL_TRIANGLE_FAN:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
103 break;
104 case GL_QUADS:
105 return R300_VAP_VF_CNTL__PRIM_QUADS;
106 break;
107 case GL_QUAD_STRIP:
108 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
109 break;
110 case GL_POLYGON:
111 return R300_VAP_VF_CNTL__PRIM_POLYGON;
112 break;
113 default:
114 assert(0);
115 return -1;
116 break;
117 }
118 }
119
120 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
121 {
122 int verts_off = 0;
123
124 switch (prim & PRIM_MODE_MASK) {
125 case GL_POINTS:
126 verts_off = 0;
127 break;
128 case GL_LINES:
129 verts_off = num_verts % 2;
130 break;
131 case GL_LINE_STRIP:
132 if (num_verts < 2)
133 verts_off = num_verts;
134 break;
135 case GL_LINE_LOOP:
136 if (num_verts < 2)
137 verts_off = num_verts;
138 break;
139 case GL_TRIANGLES:
140 verts_off = num_verts % 3;
141 break;
142 case GL_TRIANGLE_STRIP:
143 if (num_verts < 3)
144 verts_off = num_verts;
145 break;
146 case GL_TRIANGLE_FAN:
147 if (num_verts < 3)
148 verts_off = num_verts;
149 break;
150 case GL_QUADS:
151 verts_off = num_verts % 4;
152 break;
153 case GL_QUAD_STRIP:
154 if (num_verts < 4)
155 verts_off = num_verts;
156 else
157 verts_off = num_verts % 2;
158 break;
159 case GL_POLYGON:
160 if (num_verts < 3)
161 verts_off = num_verts;
162 break;
163 default:
164 assert(0);
165 return -1;
166 break;
167 }
168
169 return num_verts - verts_off;
170 }
171
172 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
173 {
174 r300ContextPtr rmesa = R300_CONTEXT(ctx);
175 void *out;
176
177 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
178 &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
179 radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
180 out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
181 memcpy(out, elts, n_elts * 4);
182 radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
183 }
184
185 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
186 {
187 BATCH_LOCALS(&rmesa->radeon);
188
189 if (vertex_count > 0) {
190 BEGIN_BATCH(10);
191 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
192 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
193 ((vertex_count + 0) << 16) |
194 type |
195 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
196
197 if (!rmesa->radeon.radeonScreen->kernel_mm) {
198 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
199 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
200 (R300_VAP_PORT_IDX0 >> 2));
201 OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
202 rmesa->radeon.tcl.elt_dma_bo,
203 rmesa->radeon.tcl.elt_dma_offset,
204 RADEON_GEM_DOMAIN_GTT, 0, 0);
205 OUT_BATCH(vertex_count);
206 } else {
207 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
208 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
209 (R300_VAP_PORT_IDX0 >> 2));
210 OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
211 OUT_BATCH(vertex_count);
212 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
213 rmesa->radeon.tcl.elt_dma_bo,
214 RADEON_GEM_DOMAIN_GTT, 0, 0);
215 }
216 END_BATCH();
217 }
218 }
219
220 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
221 {
222 BATCH_LOCALS(&rmesa->radeon);
223 uint32_t voffset;
224 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
225 int i;
226
227 if (RADEON_DEBUG & DEBUG_VERTS)
228 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
229 offset);
230
231
232 if (!rmesa->radeon.radeonScreen->kernel_mm) {
233 BEGIN_BATCH(sz+2+(nr * 2));
234 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
235 OUT_BATCH(nr);
236
237 for (i = 0; i + 1 < nr; i += 2) {
238 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
239 (rmesa->radeon.tcl.aos[i].stride << 8) |
240 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
241 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
242
243 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
244 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
245 OUT_BATCH_RELOC(voffset,
246 rmesa->radeon.tcl.aos[i].bo,
247 voffset,
248 RADEON_GEM_DOMAIN_GTT,
249 0, 0);
250 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
251 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
252 OUT_BATCH_RELOC(voffset,
253 rmesa->radeon.tcl.aos[i+1].bo,
254 voffset,
255 RADEON_GEM_DOMAIN_GTT,
256 0, 0);
257 }
258
259 if (nr & 1) {
260 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
261 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
262 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
263 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
264 OUT_BATCH_RELOC(voffset,
265 rmesa->radeon.tcl.aos[nr - 1].bo,
266 voffset,
267 RADEON_GEM_DOMAIN_GTT,
268 0, 0);
269 }
270 END_BATCH();
271 } else {
272
273 BEGIN_BATCH(sz+2+(nr * 2));
274 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
275 OUT_BATCH(nr);
276
277 for (i = 0; i + 1 < nr; i += 2) {
278 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
279 (rmesa->radeon.tcl.aos[i].stride << 8) |
280 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
281 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
282
283 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
284 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
285 OUT_BATCH(voffset);
286 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
287 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
288 OUT_BATCH(voffset);
289 }
290
291 if (nr & 1) {
292 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
293 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
294 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
295 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
296 OUT_BATCH(voffset);
297 }
298 for (i = 0; i + 1 < nr; i += 2) {
299 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
300 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
301 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
302 rmesa->radeon.tcl.aos[i+0].bo,
303 RADEON_GEM_DOMAIN_GTT,
304 0, 0);
305 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
306 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
307 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
308 rmesa->radeon.tcl.aos[i+1].bo,
309 RADEON_GEM_DOMAIN_GTT,
310 0, 0);
311 }
312 if (nr & 1) {
313 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
314 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
315 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
316 rmesa->radeon.tcl.aos[nr-1].bo,
317 RADEON_GEM_DOMAIN_GTT,
318 0, 0);
319 }
320 END_BATCH();
321 }
322
323 }
324
325 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
326 {
327 BATCH_LOCALS(&rmesa->radeon);
328
329 BEGIN_BATCH(3);
330 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
331 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
332 END_BATCH();
333 }
334
335 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
336 int start, int end, int prim)
337 {
338 int type, num_verts;
339 TNLcontext *tnl = TNL_CONTEXT(ctx);
340 struct vertex_buffer *vb = &tnl->vb;
341
342 type = r300PrimitiveType(rmesa, prim);
343 num_verts = r300NumVerts(rmesa, end - start, prim);
344
345 if (type < 0 || num_verts <= 0)
346 return;
347
348 /* Make space for at least 64 dwords.
349 * This is supposed to ensure that we can get all rendering
350 * commands into a single command buffer.
351 */
352 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
353
354 if (vb->Elts) {
355 if (num_verts > 65535) {
356 /* not implemented yet */
357 WARN_ONCE("Too many elts\n");
358 return;
359 }
360 /* Note: The following is incorrect, but it's the best I can do
361 * without a major refactoring of how DMA memory is handled.
362 * The problem: Ensuring that both vertex arrays *and* index
363 * arrays are at the right position, and then ensuring that
364 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
365 * at once.
366 *
367 * So why is the following incorrect? Well, it seems like
368 * allocating the index array might actually evict the vertex
369 * arrays. *sigh*
370 */
371 r300EmitElts(ctx, vb->Elts, num_verts);
372 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
373 r300FireEB(rmesa, num_verts, type);
374 } else {
375 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
376 r300FireAOS(rmesa, num_verts, type);
377 }
378 COMMIT_BATCH();
379 }
380
381 static GLboolean r300RunRender(GLcontext * ctx,
382 struct tnl_pipeline_stage *stage)
383 {
384 r300ContextPtr rmesa = R300_CONTEXT(ctx);
385 int i;
386 TNLcontext *tnl = TNL_CONTEXT(ctx);
387 struct vertex_buffer *vb = &tnl->vb;
388
389 if (RADEON_DEBUG & DEBUG_PRIMS)
390 fprintf(stderr, "%s\n", __FUNCTION__);
391
392 r300UpdateShaders(rmesa);
393 if (r300EmitArrays(ctx))
394 return GL_TRUE;
395
396 r300UpdateShaderStates(rmesa);
397
398 r300EmitCacheFlush(rmesa);
399 radeonEmitState(&rmesa->radeon);
400
401 for (i = 0; i < vb->PrimitiveCount; i++) {
402 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
403 GLuint start = vb->Primitive[i].start;
404 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
405 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
406 }
407
408 r300EmitCacheFlush(rmesa);
409
410 radeonReleaseArrays(ctx, ~0);
411
412 return GL_FALSE;
413 }
414
415 #define FALLBACK_IF(expr) \
416 do { \
417 if (expr) { \
418 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
419 WARN_ONCE("Software fallback:%s\n", \
420 #expr); \
421 return R300_FALLBACK_RAST; \
422 } \
423 } while(0)
424
425 static int r300Fallback(GLcontext * ctx)
426 {
427 r300ContextPtr r300 = R300_CONTEXT(ctx);
428 const unsigned back = ctx->Stencil._BackFace;
429
430 FALLBACK_IF(r300->radeon.Fallback);
431
432 struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
433 if (fp && !fp->translated) {
434 r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
435 FALLBACK_IF(fp->error);
436 }
437
438 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
439
440 FALLBACK_IF(ctx->Stencil.Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
441 || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
442 || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back]));
443
444 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
445 FALLBACK_IF(ctx->Point.PointSprite);
446
447 if (!r300->disable_lowimpact_fallback) {
448 FALLBACK_IF(ctx->Polygon.StippleFlag);
449 FALLBACK_IF(ctx->Multisample._Enabled);
450 FALLBACK_IF(ctx->Line.StippleFlag);
451 FALLBACK_IF(ctx->Line.SmoothFlag);
452 FALLBACK_IF(ctx->Point.SmoothFlag);
453 }
454
455 return R300_FALLBACK_NONE;
456 }
457
458 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
459 struct tnl_pipeline_stage *stage)
460 {
461 r300ContextPtr rmesa = R300_CONTEXT(ctx);
462
463 if (RADEON_DEBUG & DEBUG_PRIMS)
464 fprintf(stderr, "%s\n", __FUNCTION__);
465
466 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
467 return GL_TRUE;
468
469 if (rmesa->options.hw_tcl_enabled == GL_FALSE)
470 return GL_TRUE;
471
472 if (!r300ValidateBuffers(ctx))
473 return GL_TRUE;
474
475 return r300RunRender(ctx, stage);
476 }
477
478 static GLboolean r300RunTCLRender(GLcontext * ctx,
479 struct tnl_pipeline_stage *stage)
480 {
481 r300ContextPtr rmesa = R300_CONTEXT(ctx);
482 struct r300_vertex_program *vp;
483
484 if (RADEON_DEBUG & DEBUG_PRIMS)
485 fprintf(stderr, "%s\n", __FUNCTION__);
486
487 if (rmesa->options.hw_tcl_enabled == GL_FALSE)
488 return GL_TRUE;
489
490 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
491 rmesa->options.hw_tcl_enabled = GL_FALSE;
492 return GL_TRUE;
493 }
494
495 if (!r300ValidateBuffers(ctx))
496 return GL_TRUE;
497
498 r300UpdateShaders(rmesa);
499
500 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
501 if (vp->native == GL_FALSE) {
502 rmesa->options.hw_tcl_enabled = GL_FALSE;
503 return GL_TRUE;
504 }
505
506 return r300RunRender(ctx, stage);
507 }
508
509 const struct tnl_pipeline_stage _r300_render_stage = {
510 "r300 Hardware Rasterization",
511 NULL,
512 NULL,
513 NULL,
514 NULL,
515 r300RunNonTCLRender
516 };
517
518 const struct tnl_pipeline_stage _r300_tcl_stage = {
519 "r300 Hardware Transform, Clipping and Lighting",
520 NULL,
521 NULL,
522 NULL,
523 NULL,
524 r300RunTCLRender
525 };