393bf754ff209e9836038df8bdb13daf97b165c9
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
72 #include "r300_reg.h"
73 #include "r300_tex.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog.h"
76 extern int future_hw_tcl_on;
77
78 /**
79 * \brief Convert a OpenGL primitive type into a R300 primitive type.
80 */
81 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
82 {
83 switch (prim & PRIM_MODE_MASK) {
84 case GL_POINTS:
85 return R300_VAP_VF_CNTL__PRIM_POINTS;
86 break;
87 case GL_LINES:
88 return R300_VAP_VF_CNTL__PRIM_LINES;
89 break;
90 case GL_LINE_STRIP:
91 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
92 break;
93 case GL_LINE_LOOP:
94 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
95 break;
96 case GL_TRIANGLES:
97 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
98 break;
99 case GL_TRIANGLE_STRIP:
100 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
101 break;
102 case GL_TRIANGLE_FAN:
103 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
104 break;
105 case GL_QUADS:
106 return R300_VAP_VF_CNTL__PRIM_QUADS;
107 break;
108 case GL_QUAD_STRIP:
109 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
110 break;
111 case GL_POLYGON:
112 return R300_VAP_VF_CNTL__PRIM_POLYGON;
113 break;
114 default:
115 assert(0);
116 return -1;
117 break;
118 }
119 }
120
121 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
122 {
123 int verts_off = 0;
124
125 switch (prim & PRIM_MODE_MASK) {
126 case GL_POINTS:
127 verts_off = 0;
128 break;
129 case GL_LINES:
130 verts_off = num_verts % 2;
131 break;
132 case GL_LINE_STRIP:
133 if (num_verts < 2)
134 verts_off = num_verts;
135 break;
136 case GL_LINE_LOOP:
137 if (num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 verts_off = num_verts % 3;
142 break;
143 case GL_TRIANGLE_STRIP:
144 if (num_verts < 3)
145 verts_off = num_verts;
146 break;
147 case GL_TRIANGLE_FAN:
148 if (num_verts < 3)
149 verts_off = num_verts;
150 break;
151 case GL_QUADS:
152 verts_off = num_verts % 4;
153 break;
154 case GL_QUAD_STRIP:
155 if (num_verts < 4)
156 verts_off = num_verts;
157 else
158 verts_off = num_verts % 2;
159 break;
160 case GL_POLYGON:
161 if (num_verts < 3)
162 verts_off = num_verts;
163 break;
164 default:
165 assert(0);
166 return -1;
167 break;
168 }
169
170 return num_verts - verts_off;
171 }
172
173 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
174 {
175 r300ContextPtr rmesa = R300_CONTEXT(ctx);
176 void *out;
177
178 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
179 &rmesa->state.elt_dma_offset, n_elts * 4, 4);
180 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
181 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
182 memcpy(out, elts, n_elts * 4);
183 radeon_bo_unmap(rmesa->state.elt_dma_bo);
184 }
185
186 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
187 {
188 BATCH_LOCALS(&rmesa->radeon);
189
190 if (vertex_count > 0) {
191 BEGIN_BATCH(10);
192 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
193 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
194 ((vertex_count + 0) << 16) |
195 type |
196 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
197
198 if (!rmesa->radeon.radeonScreen->kernel_mm) {
199 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
200 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
201 OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
202 rmesa->state.elt_dma_bo,
203 rmesa->state.elt_dma_offset,
204 RADEON_GEM_DOMAIN_GTT, 0, 0);
205 OUT_BATCH(vertex_count);
206 } else {
207 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
208 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
209 OUT_BATCH(rmesa->state.elt_dma_offset);
210 OUT_BATCH(vertex_count);
211 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
212 rmesa->state.elt_dma_bo,
213 RADEON_GEM_DOMAIN_GTT, 0, 0);
214 }
215 END_BATCH();
216 }
217 }
218
219 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
220 {
221 BATCH_LOCALS(&rmesa->radeon);
222 uint32_t voffset;
223 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
224 int i;
225
226 if (RADEON_DEBUG & DEBUG_VERTS)
227 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
228 offset);
229
230
231 if (!rmesa->radeon.radeonScreen->kernel_mm) {
232 BEGIN_BATCH(sz+2+(nr * 2));
233 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
234 OUT_BATCH(nr);
235
236 for (i = 0; i + 1 < nr; i += 2) {
237 OUT_BATCH((rmesa->state.aos[i].components << 0) |
238 (rmesa->state.aos[i].stride << 8) |
239 (rmesa->state.aos[i + 1].components << 16) |
240 (rmesa->state.aos[i + 1].stride << 24));
241
242 voffset = rmesa->state.aos[i + 0].offset +
243 offset * 4 * rmesa->state.aos[i + 0].stride;
244 OUT_BATCH_RELOC(voffset,
245 rmesa->state.aos[i].bo,
246 voffset,
247 RADEON_GEM_DOMAIN_GTT,
248 0, 0);
249 voffset = rmesa->state.aos[i + 1].offset +
250 offset * 4 * rmesa->state.aos[i + 1].stride;
251 OUT_BATCH_RELOC(voffset,
252 rmesa->state.aos[i+1].bo,
253 voffset,
254 RADEON_GEM_DOMAIN_GTT,
255 0, 0);
256 }
257
258 if (nr & 1) {
259 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
260 (rmesa->state.aos[nr - 1].stride << 8));
261 voffset = rmesa->state.aos[nr - 1].offset +
262 offset * 4 * rmesa->state.aos[nr - 1].stride;
263 OUT_BATCH_RELOC(voffset,
264 rmesa->state.aos[nr - 1].bo,
265 voffset,
266 RADEON_GEM_DOMAIN_GTT,
267 0, 0);
268 }
269 END_BATCH();
270 } else {
271
272 BEGIN_BATCH(sz+2+(nr * 2));
273 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
274 OUT_BATCH(nr);
275
276 for (i = 0; i + 1 < nr; i += 2) {
277 OUT_BATCH((rmesa->state.aos[i].components << 0) |
278 (rmesa->state.aos[i].stride << 8) |
279 (rmesa->state.aos[i + 1].components << 16) |
280 (rmesa->state.aos[i + 1].stride << 24));
281
282 voffset = rmesa->state.aos[i + 0].offset +
283 offset * 4 * rmesa->state.aos[i + 0].stride;
284 OUT_BATCH(voffset);
285 voffset = rmesa->state.aos[i + 1].offset +
286 offset * 4 * rmesa->state.aos[i + 1].stride;
287 OUT_BATCH(voffset);
288 }
289
290 if (nr & 1) {
291 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
292 (rmesa->state.aos[nr - 1].stride << 8));
293 voffset = rmesa->state.aos[nr - 1].offset +
294 offset * 4 * rmesa->state.aos[nr - 1].stride;
295 OUT_BATCH(voffset);
296 }
297 for (i = 0; i + 1 < nr; i += 2) {
298 voffset = rmesa->state.aos[i + 0].offset +
299 offset * 4 * rmesa->state.aos[i + 0].stride;
300 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
301 rmesa->state.aos[i+0].bo,
302 RADEON_GEM_DOMAIN_GTT,
303 0, 0);
304 voffset = rmesa->state.aos[i + 1].offset +
305 offset * 4 * rmesa->state.aos[i + 1].stride;
306 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
307 rmesa->state.aos[i+1].bo,
308 RADEON_GEM_DOMAIN_GTT,
309 0, 0);
310 }
311 if (nr & 1) {
312 voffset = rmesa->state.aos[nr - 1].offset +
313 offset * 4 * rmesa->state.aos[nr - 1].stride;
314 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
315 rmesa->state.aos[nr-1].bo,
316 RADEON_GEM_DOMAIN_GTT,
317 0, 0);
318 }
319 END_BATCH();
320 }
321
322 }
323
324 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
325 {
326 BATCH_LOCALS(&rmesa->radeon);
327
328 BEGIN_BATCH(3);
329 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
330 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
331 END_BATCH();
332 }
333
334 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
335 int start, int end, int prim)
336 {
337 BATCH_LOCALS(&rmesa->radeon);
338 int type, num_verts;
339 TNLcontext *tnl = TNL_CONTEXT(ctx);
340 struct vertex_buffer *vb = &tnl->vb;
341
342 type = r300PrimitiveType(rmesa, prim);
343 num_verts = r300NumVerts(rmesa, end - start, prim);
344
345 if (type < 0 || num_verts <= 0)
346 return;
347
348 /* Make space for at least 64 dwords.
349 * This is supposed to ensure that we can get all rendering
350 * commands into a single command buffer.
351 */
352 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
353
354 if (vb->Elts) {
355 if (num_verts > 65535) {
356 /* not implemented yet */
357 WARN_ONCE("Too many elts\n");
358 return;
359 }
360 /* Note: The following is incorrect, but it's the best I can do
361 * without a major refactoring of how DMA memory is handled.
362 * The problem: Ensuring that both vertex arrays *and* index
363 * arrays are at the right position, and then ensuring that
364 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
365 * at once.
366 *
367 * So why is the following incorrect? Well, it seems like
368 * allocating the index array might actually evict the vertex
369 * arrays. *sigh*
370 */
371 r300EmitElts(ctx, vb->Elts, num_verts);
372 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
373 r300FireEB(rmesa, num_verts, type);
374 } else {
375 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
376 r300FireAOS(rmesa, num_verts, type);
377 }
378 COMMIT_BATCH();
379 }
380
381 static GLboolean r300RunRender(GLcontext * ctx,
382 struct tnl_pipeline_stage *stage)
383 {
384 r300ContextPtr rmesa = R300_CONTEXT(ctx);
385 int i;
386 TNLcontext *tnl = TNL_CONTEXT(ctx);
387 struct vertex_buffer *vb = &tnl->vb;
388
389 if (RADEON_DEBUG & DEBUG_PRIMS)
390 fprintf(stderr, "%s\n", __FUNCTION__);
391
392 r300UpdateShaders(rmesa);
393 if (r300EmitArrays(ctx))
394 return GL_TRUE;
395
396 r300UpdateShaderStates(rmesa);
397
398 r300EmitCacheFlush(rmesa);
399 r300EmitState(rmesa);
400
401 for (i = 0; i < vb->PrimitiveCount; i++) {
402 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
403 GLuint start = vb->Primitive[i].start;
404 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
405 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
406 }
407
408 r300EmitCacheFlush(rmesa);
409
410 r300ReleaseArrays(ctx);
411
412 return GL_FALSE;
413 }
414
415 #define FALLBACK_IF(expr) \
416 do { \
417 if (expr) { \
418 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
419 WARN_ONCE("Software fallback:%s\n", \
420 #expr); \
421 return R300_FALLBACK_RAST; \
422 } \
423 } while(0)
424
425 static int r300Fallback(GLcontext * ctx)
426 {
427 r300ContextPtr r300 = R300_CONTEXT(ctx);
428 /* Do we need to use new-style shaders?
429 * Also is there a better way to do this? */
430 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
431 struct r500_fragment_program *fp = (struct r500_fragment_program *)
432 (char *)ctx->FragmentProgram._Current;
433 if (fp) {
434 if (!fp->translated) {
435 r500TranslateFragmentShader(r300, fp);
436 FALLBACK_IF(!fp->translated);
437 }
438 }
439 } else {
440 struct r300_fragment_program *fp = (struct r300_fragment_program *)
441 (char *)ctx->FragmentProgram._Current;
442 if (fp) {
443 if (!fp->translated) {
444 r300TranslateFragmentShader(r300, fp);
445 FALLBACK_IF(!fp->translated);
446 }
447 }
448 }
449
450 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
451
452 FALLBACK_IF(ctx->Stencil._TestTwoSide
453 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
454 || ctx->Stencil.ValueMask[0] !=
455 ctx->Stencil.ValueMask[1]
456 || ctx->Stencil.WriteMask[0] !=
457 ctx->Stencil.WriteMask[1]));
458
459 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
460 FALLBACK_IF(ctx->Point.PointSprite);
461
462 if (!r300->disable_lowimpact_fallback) {
463 FALLBACK_IF(ctx->Polygon.StippleFlag);
464 FALLBACK_IF(ctx->Multisample._Enabled);
465 FALLBACK_IF(ctx->Line.StippleFlag);
466 FALLBACK_IF(ctx->Line.SmoothFlag);
467 FALLBACK_IF(ctx->Point.SmoothFlag);
468 }
469
470 return R300_FALLBACK_NONE;
471 }
472
473 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
474 struct tnl_pipeline_stage *stage)
475 {
476 r300ContextPtr rmesa = R300_CONTEXT(ctx);
477
478 if (RADEON_DEBUG & DEBUG_PRIMS)
479 fprintf(stderr, "%s\n", __FUNCTION__);
480
481 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
482 return GL_TRUE;
483
484 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
485 return GL_TRUE;
486
487 return r300RunRender(ctx, stage);
488 }
489
490 static GLboolean r300RunTCLRender(GLcontext * ctx,
491 struct tnl_pipeline_stage *stage)
492 {
493 r300ContextPtr rmesa = R300_CONTEXT(ctx);
494 struct r300_vertex_program *vp;
495
496 hw_tcl_on = future_hw_tcl_on;
497
498 if (RADEON_DEBUG & DEBUG_PRIMS)
499 fprintf(stderr, "%s\n", __FUNCTION__);
500
501 if (hw_tcl_on == GL_FALSE)
502 return GL_TRUE;
503
504 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
505 hw_tcl_on = GL_FALSE;
506 return GL_TRUE;
507 }
508
509 if (!r300ValidateBuffers(ctx))
510 return GL_TRUE;
511
512 r300UpdateShaders(rmesa);
513
514 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
515 if (vp->native == GL_FALSE) {
516 hw_tcl_on = GL_FALSE;
517 return GL_TRUE;
518 }
519
520 return r300RunRender(ctx, stage);
521 }
522
523 const struct tnl_pipeline_stage _r300_render_stage = {
524 "r300 Hardware Rasterization",
525 NULL,
526 NULL,
527 NULL,
528 NULL,
529 r300RunNonTCLRender
530 };
531
532 const struct tnl_pipeline_stage _r300_tcl_stage = {
533 "r300 Hardware Transform, Clipping and Lighting",
534 NULL,
535 NULL,
536 NULL,
537 NULL,
538 r300RunTCLRender
539 };