r300: always route 4 texcoord components to RS
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
72 #include "r300_reg.h"
73 #include "r300_tex.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog_common.h"
76
77 extern int future_hw_tcl_on;
78
79 /**
80 * \brief Convert a OpenGL primitive type into a R300 primitive type.
81 */
82 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
83 {
84 switch (prim & PRIM_MODE_MASK) {
85 case GL_POINTS:
86 return R300_VAP_VF_CNTL__PRIM_POINTS;
87 break;
88 case GL_LINES:
89 return R300_VAP_VF_CNTL__PRIM_LINES;
90 break;
91 case GL_LINE_STRIP:
92 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
93 break;
94 case GL_LINE_LOOP:
95 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
96 break;
97 case GL_TRIANGLES:
98 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
99 break;
100 case GL_TRIANGLE_STRIP:
101 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
102 break;
103 case GL_TRIANGLE_FAN:
104 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
105 break;
106 case GL_QUADS:
107 return R300_VAP_VF_CNTL__PRIM_QUADS;
108 break;
109 case GL_QUAD_STRIP:
110 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
111 break;
112 case GL_POLYGON:
113 return R300_VAP_VF_CNTL__PRIM_POLYGON;
114 break;
115 default:
116 assert(0);
117 return -1;
118 break;
119 }
120 }
121
122 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
123 {
124 int verts_off = 0;
125
126 switch (prim & PRIM_MODE_MASK) {
127 case GL_POINTS:
128 verts_off = 0;
129 break;
130 case GL_LINES:
131 verts_off = num_verts % 2;
132 break;
133 case GL_LINE_STRIP:
134 if (num_verts < 2)
135 verts_off = num_verts;
136 break;
137 case GL_LINE_LOOP:
138 if (num_verts < 2)
139 verts_off = num_verts;
140 break;
141 case GL_TRIANGLES:
142 verts_off = num_verts % 3;
143 break;
144 case GL_TRIANGLE_STRIP:
145 if (num_verts < 3)
146 verts_off = num_verts;
147 break;
148 case GL_TRIANGLE_FAN:
149 if (num_verts < 3)
150 verts_off = num_verts;
151 break;
152 case GL_QUADS:
153 verts_off = num_verts % 4;
154 break;
155 case GL_QUAD_STRIP:
156 if (num_verts < 4)
157 verts_off = num_verts;
158 else
159 verts_off = num_verts % 2;
160 break;
161 case GL_POLYGON:
162 if (num_verts < 3)
163 verts_off = num_verts;
164 break;
165 default:
166 assert(0);
167 return -1;
168 break;
169 }
170
171 return num_verts - verts_off;
172 }
173
174 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
175 {
176 r300ContextPtr rmesa = R300_CONTEXT(ctx);
177 void *out;
178
179 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
180 &rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
181 radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
182 out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
183 memcpy(out, elts, n_elts * 4);
184 radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
185 }
186
187 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
188 {
189 BATCH_LOCALS(&rmesa->radeon);
190
191 if (vertex_count > 0) {
192 BEGIN_BATCH(10);
193 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
194 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
195 ((vertex_count + 0) << 16) |
196 type |
197 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
198
199 if (!rmesa->radeon.radeonScreen->kernel_mm) {
200 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
201 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
202 (R300_VAP_PORT_IDX0 >> 2));
203 OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
204 rmesa->radeon.tcl.elt_dma_bo,
205 rmesa->radeon.tcl.elt_dma_offset,
206 RADEON_GEM_DOMAIN_GTT, 0, 0);
207 OUT_BATCH(vertex_count);
208 } else {
209 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
210 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
211 (R300_VAP_PORT_IDX0 >> 2));
212 OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
213 OUT_BATCH(vertex_count);
214 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
215 rmesa->radeon.tcl.elt_dma_bo,
216 RADEON_GEM_DOMAIN_GTT, 0, 0);
217 }
218 END_BATCH();
219 }
220 }
221
222 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
223 {
224 BATCH_LOCALS(&rmesa->radeon);
225 uint32_t voffset;
226 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
227 int i;
228
229 if (RADEON_DEBUG & DEBUG_VERTS)
230 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
231 offset);
232
233
234 if (!rmesa->radeon.radeonScreen->kernel_mm) {
235 BEGIN_BATCH(sz+2+(nr * 2));
236 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
237 OUT_BATCH(nr);
238
239 for (i = 0; i + 1 < nr; i += 2) {
240 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
241 (rmesa->radeon.tcl.aos[i].stride << 8) |
242 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
243 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
244
245 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
246 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
247 OUT_BATCH_RELOC(voffset,
248 rmesa->radeon.tcl.aos[i].bo,
249 voffset,
250 RADEON_GEM_DOMAIN_GTT,
251 0, 0);
252 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
253 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
254 OUT_BATCH_RELOC(voffset,
255 rmesa->radeon.tcl.aos[i+1].bo,
256 voffset,
257 RADEON_GEM_DOMAIN_GTT,
258 0, 0);
259 }
260
261 if (nr & 1) {
262 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
263 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
264 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
265 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
266 OUT_BATCH_RELOC(voffset,
267 rmesa->radeon.tcl.aos[nr - 1].bo,
268 voffset,
269 RADEON_GEM_DOMAIN_GTT,
270 0, 0);
271 }
272 END_BATCH();
273 } else {
274
275 BEGIN_BATCH(sz+2+(nr * 2));
276 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
277 OUT_BATCH(nr);
278
279 for (i = 0; i + 1 < nr; i += 2) {
280 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
281 (rmesa->radeon.tcl.aos[i].stride << 8) |
282 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
283 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
284
285 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
286 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
287 OUT_BATCH(voffset);
288 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
289 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
290 OUT_BATCH(voffset);
291 }
292
293 if (nr & 1) {
294 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
295 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
296 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
297 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
298 OUT_BATCH(voffset);
299 }
300 for (i = 0; i + 1 < nr; i += 2) {
301 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
302 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
303 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
304 rmesa->radeon.tcl.aos[i+0].bo,
305 RADEON_GEM_DOMAIN_GTT,
306 0, 0);
307 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
308 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
309 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
310 rmesa->radeon.tcl.aos[i+1].bo,
311 RADEON_GEM_DOMAIN_GTT,
312 0, 0);
313 }
314 if (nr & 1) {
315 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
316 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
317 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
318 rmesa->radeon.tcl.aos[nr-1].bo,
319 RADEON_GEM_DOMAIN_GTT,
320 0, 0);
321 }
322 END_BATCH();
323 }
324
325 }
326
327 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
328 {
329 BATCH_LOCALS(&rmesa->radeon);
330
331 BEGIN_BATCH(3);
332 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
333 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
334 END_BATCH();
335 }
336
337 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
338 int start, int end, int prim)
339 {
340 int type, num_verts;
341 TNLcontext *tnl = TNL_CONTEXT(ctx);
342 struct vertex_buffer *vb = &tnl->vb;
343
344 type = r300PrimitiveType(rmesa, prim);
345 num_verts = r300NumVerts(rmesa, end - start, prim);
346
347 if (type < 0 || num_verts <= 0)
348 return;
349
350 /* Make space for at least 64 dwords.
351 * This is supposed to ensure that we can get all rendering
352 * commands into a single command buffer.
353 */
354 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
355
356 if (vb->Elts) {
357 if (num_verts > 65535) {
358 /* not implemented yet */
359 WARN_ONCE("Too many elts\n");
360 return;
361 }
362 /* Note: The following is incorrect, but it's the best I can do
363 * without a major refactoring of how DMA memory is handled.
364 * The problem: Ensuring that both vertex arrays *and* index
365 * arrays are at the right position, and then ensuring that
366 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
367 * at once.
368 *
369 * So why is the following incorrect? Well, it seems like
370 * allocating the index array might actually evict the vertex
371 * arrays. *sigh*
372 */
373 r300EmitElts(ctx, vb->Elts, num_verts);
374 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
375 r300FireEB(rmesa, num_verts, type);
376 } else {
377 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
378 r300FireAOS(rmesa, num_verts, type);
379 }
380 COMMIT_BATCH();
381 }
382
383 static GLboolean r300RunRender(GLcontext * ctx,
384 struct tnl_pipeline_stage *stage)
385 {
386 r300ContextPtr rmesa = R300_CONTEXT(ctx);
387 int i;
388 TNLcontext *tnl = TNL_CONTEXT(ctx);
389 struct vertex_buffer *vb = &tnl->vb;
390
391 if (RADEON_DEBUG & DEBUG_PRIMS)
392 fprintf(stderr, "%s\n", __FUNCTION__);
393
394 r300UpdateShaders(rmesa);
395 if (r300EmitArrays(ctx))
396 return GL_TRUE;
397
398 r300UpdateShaderStates(rmesa);
399
400 r300EmitCacheFlush(rmesa);
401 radeonEmitState(&rmesa->radeon);
402
403 for (i = 0; i < vb->PrimitiveCount; i++) {
404 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
405 GLuint start = vb->Primitive[i].start;
406 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
407 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
408 }
409
410 r300EmitCacheFlush(rmesa);
411
412 radeonReleaseArrays(ctx, ~0);
413
414 return GL_FALSE;
415 }
416
417 #define FALLBACK_IF(expr) \
418 do { \
419 if (expr) { \
420 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
421 WARN_ONCE("Software fallback:%s\n", \
422 #expr); \
423 return R300_FALLBACK_RAST; \
424 } \
425 } while(0)
426
427 static int r300Fallback(GLcontext * ctx)
428 {
429 r300ContextPtr r300 = R300_CONTEXT(ctx);
430 const unsigned back = ctx->Stencil._BackFace;
431
432 FALLBACK_IF(r300->radeon.Fallback);
433
434 struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current;
435 if (fp && !fp->translated) {
436 r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current);
437 FALLBACK_IF(fp->error);
438 }
439
440 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
441
442 /* If GL_EXT_stencil_two_side is disabled, this fallback check can
443 * be removed.
444 */
445 FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
446 || ctx->Stencil.ValueMask[0] !=
447 ctx->Stencil.ValueMask[back]
448 || ctx->Stencil.WriteMask[0] !=
449 ctx->Stencil.WriteMask[back]);
450
451 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
452 FALLBACK_IF(ctx->Point.PointSprite);
453
454 if (!r300->disable_lowimpact_fallback) {
455 FALLBACK_IF(ctx->Polygon.StippleFlag);
456 FALLBACK_IF(ctx->Multisample._Enabled);
457 FALLBACK_IF(ctx->Line.StippleFlag);
458 FALLBACK_IF(ctx->Line.SmoothFlag);
459 FALLBACK_IF(ctx->Point.SmoothFlag);
460 }
461
462 return R300_FALLBACK_NONE;
463 }
464
465 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
466 struct tnl_pipeline_stage *stage)
467 {
468 r300ContextPtr rmesa = R300_CONTEXT(ctx);
469
470 if (RADEON_DEBUG & DEBUG_PRIMS)
471 fprintf(stderr, "%s\n", __FUNCTION__);
472
473 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
474 return GL_TRUE;
475
476 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
477 return GL_TRUE;
478
479 if (!r300ValidateBuffers(ctx))
480 return GL_TRUE;
481
482 return r300RunRender(ctx, stage);
483 }
484
485 static GLboolean r300RunTCLRender(GLcontext * ctx,
486 struct tnl_pipeline_stage *stage)
487 {
488 r300ContextPtr rmesa = R300_CONTEXT(ctx);
489 struct r300_vertex_program *vp;
490
491 hw_tcl_on = future_hw_tcl_on;
492
493 if (RADEON_DEBUG & DEBUG_PRIMS)
494 fprintf(stderr, "%s\n", __FUNCTION__);
495
496 if (hw_tcl_on == GL_FALSE)
497 return GL_TRUE;
498
499 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
500 hw_tcl_on = GL_FALSE;
501 return GL_TRUE;
502 }
503
504 if (!r300ValidateBuffers(ctx))
505 return GL_TRUE;
506
507 r300UpdateShaders(rmesa);
508
509 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
510 if (vp->native == GL_FALSE) {
511 hw_tcl_on = GL_FALSE;
512 return GL_TRUE;
513 }
514
515 return r300RunRender(ctx, stage);
516 }
517
518 const struct tnl_pipeline_stage _r300_render_stage = {
519 "r300 Hardware Rasterization",
520 NULL,
521 NULL,
522 NULL,
523 NULL,
524 r300RunNonTCLRender
525 };
526
527 const struct tnl_pipeline_stage _r300_tcl_stage = {
528 "r300 Hardware Transform, Clipping and Lighting",
529 NULL,
530 NULL,
531 NULL,
532 NULL,
533 r300RunTCLRender
534 };