23d068c339928071fdbbd4ea0e087ddd17c1bc32
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
74 #include "r300_reg.h"
75 #include "r300_tex.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on;
79
80 /**
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 */
83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
84 {
85 switch (prim & PRIM_MODE_MASK) {
86 case GL_POINTS:
87 return R300_VAP_VF_CNTL__PRIM_POINTS;
88 break;
89 case GL_LINES:
90 return R300_VAP_VF_CNTL__PRIM_LINES;
91 break;
92 case GL_LINE_STRIP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 break;
95 case GL_LINE_LOOP:
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
97 break;
98 case GL_TRIANGLES:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
100 break;
101 case GL_TRIANGLE_STRIP:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
103 break;
104 case GL_TRIANGLE_FAN:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
106 break;
107 case GL_QUADS:
108 return R300_VAP_VF_CNTL__PRIM_QUADS;
109 break;
110 case GL_QUAD_STRIP:
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
112 break;
113 case GL_POLYGON:
114 return R300_VAP_VF_CNTL__PRIM_POLYGON;
115 break;
116 default:
117 assert(0);
118 return -1;
119 break;
120 }
121 }
122
123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
124 {
125 int verts_off = 0;
126
127 switch (prim & PRIM_MODE_MASK) {
128 case GL_POINTS:
129 verts_off = 0;
130 break;
131 case GL_LINES:
132 verts_off = num_verts % 2;
133 break;
134 case GL_LINE_STRIP:
135 if (num_verts < 2)
136 verts_off = num_verts;
137 break;
138 case GL_LINE_LOOP:
139 if (num_verts < 2)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLES:
143 verts_off = num_verts % 3;
144 break;
145 case GL_TRIANGLE_STRIP:
146 if (num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 if (num_verts < 3)
151 verts_off = num_verts;
152 break;
153 case GL_QUADS:
154 verts_off = num_verts % 4;
155 break;
156 case GL_QUAD_STRIP:
157 if (num_verts < 4)
158 verts_off = num_verts;
159 else
160 verts_off = num_verts % 2;
161 break;
162 case GL_POLYGON:
163 if (num_verts < 3)
164 verts_off = num_verts;
165 break;
166 default:
167 assert(0);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
176 {
177 r300ContextPtr rmesa = R300_CONTEXT(ctx);
178 void *out;
179
180 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
181 &rmesa->state.elt_dma_offset, n_elts * 4, 4);
182 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
183 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
184 memcpy(out, elts, n_elts * 4);
185 radeon_bo_unmap(rmesa->state.elt_dma_bo);
186 }
187
188 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
189 {
190 BATCH_LOCALS(&rmesa->radeon);
191
192 if (vertex_count > 0) {
193 BEGIN_BATCH(10);
194 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
195 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
196 ((vertex_count + 0) << 16) |
197 type |
198 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
199
200 if (!rmesa->radeon.radeonScreen->kernel_mm) {
201 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
202 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
203 OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
204 rmesa->state.elt_dma_bo,
205 rmesa->state.elt_dma_offset,
206 RADEON_GEM_DOMAIN_GTT, 0, 0);
207 OUT_BATCH(vertex_count);
208 } else {
209 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
210 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
211 OUT_BATCH(rmesa->state.elt_dma_offset);
212 OUT_BATCH(vertex_count);
213 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
214 rmesa->state.elt_dma_bo,
215 RADEON_GEM_DOMAIN_GTT, 0, 0);
216 }
217 END_BATCH();
218 }
219 }
220
221 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
222 {
223 BATCH_LOCALS(&rmesa->radeon);
224 uint32_t voffset;
225 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
226 int i;
227
228 if (RADEON_DEBUG & DEBUG_VERTS)
229 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
230 offset);
231
232
233 if (!rmesa->radeon.radeonScreen->kernel_mm) {
234 BEGIN_BATCH(sz+2+(nr * 2));
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
236 OUT_BATCH(nr);
237
238 for (i = 0; i + 1 < nr; i += 2) {
239 OUT_BATCH((rmesa->state.aos[i].components << 0) |
240 (rmesa->state.aos[i].stride << 8) |
241 (rmesa->state.aos[i + 1].components << 16) |
242 (rmesa->state.aos[i + 1].stride << 24));
243
244 voffset = rmesa->state.aos[i + 0].offset +
245 offset * 4 * rmesa->state.aos[i + 0].stride;
246 OUT_BATCH_RELOC(voffset,
247 rmesa->state.aos[i].bo,
248 voffset,
249 RADEON_GEM_DOMAIN_GTT,
250 0, 0);
251 voffset = rmesa->state.aos[i + 1].offset +
252 offset * 4 * rmesa->state.aos[i + 1].stride;
253 OUT_BATCH_RELOC(voffset,
254 rmesa->state.aos[i+1].bo,
255 voffset,
256 RADEON_GEM_DOMAIN_GTT,
257 0, 0);
258 }
259
260 if (nr & 1) {
261 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
262 (rmesa->state.aos[nr - 1].stride << 8));
263 voffset = rmesa->state.aos[nr - 1].offset +
264 offset * 4 * rmesa->state.aos[nr - 1].stride;
265 OUT_BATCH_RELOC(voffset,
266 rmesa->state.aos[nr - 1].bo,
267 voffset,
268 RADEON_GEM_DOMAIN_GTT,
269 0, 0);
270 }
271 END_BATCH();
272 } else {
273
274 BEGIN_BATCH(sz+2+(nr * 2));
275 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
276 OUT_BATCH(nr);
277
278 for (i = 0; i + 1 < nr; i += 2) {
279 OUT_BATCH((rmesa->state.aos[i].components << 0) |
280 (rmesa->state.aos[i].stride << 8) |
281 (rmesa->state.aos[i + 1].components << 16) |
282 (rmesa->state.aos[i + 1].stride << 24));
283
284 voffset = rmesa->state.aos[i + 0].offset +
285 offset * 4 * rmesa->state.aos[i + 0].stride;
286 OUT_BATCH(voffset);
287 voffset = rmesa->state.aos[i + 1].offset +
288 offset * 4 * rmesa->state.aos[i + 1].stride;
289 OUT_BATCH(voffset);
290 }
291
292 if (nr & 1) {
293 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
294 (rmesa->state.aos[nr - 1].stride << 8));
295 voffset = rmesa->state.aos[nr - 1].offset +
296 offset * 4 * rmesa->state.aos[nr - 1].stride;
297 OUT_BATCH(voffset);
298 }
299 for (i = 0; i + 1 < nr; i += 2) {
300 voffset = rmesa->state.aos[i + 0].offset +
301 offset * 4 * rmesa->state.aos[i + 0].stride;
302 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
303 rmesa->state.aos[i+0].bo,
304 RADEON_GEM_DOMAIN_GTT,
305 0, 0);
306 voffset = rmesa->state.aos[i + 1].offset +
307 offset * 4 * rmesa->state.aos[i + 1].stride;
308 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
309 rmesa->state.aos[i+1].bo,
310 RADEON_GEM_DOMAIN_GTT,
311 0, 0);
312 }
313 if (nr & 1) {
314 voffset = rmesa->state.aos[nr - 1].offset +
315 offset * 4 * rmesa->state.aos[nr - 1].stride;
316 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
317 rmesa->state.aos[nr-1].bo,
318 RADEON_GEM_DOMAIN_GTT,
319 0, 0);
320 }
321 END_BATCH();
322 }
323
324 }
325
326 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
327 {
328 BATCH_LOCALS(&rmesa->radeon);
329
330 BEGIN_BATCH(3);
331 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
332 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
333 END_BATCH();
334 }
335
336 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
337 int start, int end, int prim)
338 {
339 BATCH_LOCALS(&rmesa->radeon);
340 int type, num_verts;
341 TNLcontext *tnl = TNL_CONTEXT(ctx);
342 struct vertex_buffer *vb = &tnl->vb;
343
344 type = r300PrimitiveType(rmesa, prim);
345 num_verts = r300NumVerts(rmesa, end - start, prim);
346
347 if (type < 0 || num_verts <= 0)
348 return;
349
350 /* Make space for at least 64 dwords.
351 * This is supposed to ensure that we can get all rendering
352 * commands into a single command buffer.
353 */
354 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
355
356 if (vb->Elts) {
357 if (num_verts > 65535) {
358 /* not implemented yet */
359 WARN_ONCE("Too many elts\n");
360 return;
361 }
362 /* Note: The following is incorrect, but it's the best I can do
363 * without a major refactoring of how DMA memory is handled.
364 * The problem: Ensuring that both vertex arrays *and* index
365 * arrays are at the right position, and then ensuring that
366 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
367 * at once.
368 *
369 * So why is the following incorrect? Well, it seems like
370 * allocating the index array might actually evict the vertex
371 * arrays. *sigh*
372 */
373 r300EmitElts(ctx, vb->Elts, num_verts);
374 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
375 r300FireEB(rmesa, num_verts, type);
376 } else {
377 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
378 r300FireAOS(rmesa, num_verts, type);
379 }
380 COMMIT_BATCH();
381 }
382
383 static GLboolean r300RunRender(GLcontext * ctx,
384 struct tnl_pipeline_stage *stage)
385 {
386 r300ContextPtr rmesa = R300_CONTEXT(ctx);
387 int i;
388 TNLcontext *tnl = TNL_CONTEXT(ctx);
389 struct vertex_buffer *vb = &tnl->vb;
390
391 if (RADEON_DEBUG & DEBUG_PRIMS)
392 fprintf(stderr, "%s\n", __FUNCTION__);
393
394 r300UpdateShaders(rmesa);
395 if (r300EmitArrays(ctx))
396 return GL_TRUE;
397
398 r300UpdateShaderStates(rmesa);
399
400 r300EmitCacheFlush(rmesa);
401 r300EmitState(rmesa);
402
403 for (i = 0; i < vb->PrimitiveCount; i++) {
404 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
405 GLuint start = vb->Primitive[i].start;
406 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
407 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
408 }
409
410 r300EmitCacheFlush(rmesa);
411
412 r300ReleaseArrays(ctx);
413
414 return GL_FALSE;
415 }
416
417 #define FALLBACK_IF(expr) \
418 do { \
419 if (expr) { \
420 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
421 WARN_ONCE("Software fallback:%s\n", \
422 #expr); \
423 return R300_FALLBACK_RAST; \
424 } \
425 } while(0)
426
427 static int r300Fallback(GLcontext * ctx)
428 {
429 r300ContextPtr r300 = R300_CONTEXT(ctx);
430 /* Do we need to use new-style shaders?
431 * Also is there a better way to do this? */
432 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
433 struct r500_fragment_program *fp = (struct r500_fragment_program *)
434 (char *)ctx->FragmentProgram._Current;
435 if (fp) {
436 if (!fp->translated) {
437 r500TranslateFragmentShader(r300, fp);
438 FALLBACK_IF(!fp->translated);
439 }
440 }
441 } else {
442 struct r300_fragment_program *fp = (struct r300_fragment_program *)
443 (char *)ctx->FragmentProgram._Current;
444 if (fp) {
445 if (!fp->translated) {
446 r300TranslateFragmentShader(r300, fp);
447 FALLBACK_IF(!fp->translated);
448 }
449 }
450 }
451
452 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
453
454 FALLBACK_IF(ctx->Stencil._TestTwoSide
455 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
456 || ctx->Stencil.ValueMask[0] !=
457 ctx->Stencil.ValueMask[1]
458 || ctx->Stencil.WriteMask[0] !=
459 ctx->Stencil.WriteMask[1]));
460
461 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
462 FALLBACK_IF(ctx->Point.PointSprite);
463
464 if (!r300->disable_lowimpact_fallback) {
465 FALLBACK_IF(ctx->Polygon.StippleFlag);
466 FALLBACK_IF(ctx->Multisample._Enabled);
467 FALLBACK_IF(ctx->Line.StippleFlag);
468 FALLBACK_IF(ctx->Line.SmoothFlag);
469 FALLBACK_IF(ctx->Point.SmoothFlag);
470 }
471
472 return R300_FALLBACK_NONE;
473 }
474
475 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
476 struct tnl_pipeline_stage *stage)
477 {
478 r300ContextPtr rmesa = R300_CONTEXT(ctx);
479
480 if (RADEON_DEBUG & DEBUG_PRIMS)
481 fprintf(stderr, "%s\n", __FUNCTION__);
482
483 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
484 return GL_TRUE;
485
486 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
487 return GL_TRUE;
488
489 return r300RunRender(ctx, stage);
490 }
491
492 static GLboolean r300RunTCLRender(GLcontext * ctx,
493 struct tnl_pipeline_stage *stage)
494 {
495 r300ContextPtr rmesa = R300_CONTEXT(ctx);
496 struct r300_vertex_program *vp;
497
498 hw_tcl_on = future_hw_tcl_on;
499
500 if (RADEON_DEBUG & DEBUG_PRIMS)
501 fprintf(stderr, "%s\n", __FUNCTION__);
502
503 if (hw_tcl_on == GL_FALSE)
504 return GL_TRUE;
505
506 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
507 hw_tcl_on = GL_FALSE;
508 return GL_TRUE;
509 }
510
511 if (!r300ValidateBuffers(ctx))
512 return GL_TRUE;
513
514 r300UpdateShaders(rmesa);
515
516 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
517 if (vp->native == GL_FALSE) {
518 hw_tcl_on = GL_FALSE;
519 return GL_TRUE;
520 }
521
522 return r300RunRender(ctx, stage);
523 }
524
525 const struct tnl_pipeline_stage _r300_render_stage = {
526 "r300 Hardware Rasterization",
527 NULL,
528 NULL,
529 NULL,
530 NULL,
531 r300RunNonTCLRender
532 };
533
534 const struct tnl_pipeline_stage _r300_tcl_stage = {
535 "r300 Hardware Transform, Clipping and Lighting",
536 NULL,
537 NULL,
538 NULL,
539 NULL,
540 r300RunTCLRender
541 };