r300: move some more function to generic
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
74 #include "r300_reg.h"
75 #include "r300_tex.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on;
79
80 /**
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 */
83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
84 {
85 switch (prim & PRIM_MODE_MASK) {
86 case GL_POINTS:
87 return R300_VAP_VF_CNTL__PRIM_POINTS;
88 break;
89 case GL_LINES:
90 return R300_VAP_VF_CNTL__PRIM_LINES;
91 break;
92 case GL_LINE_STRIP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 break;
95 case GL_LINE_LOOP:
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
97 break;
98 case GL_TRIANGLES:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
100 break;
101 case GL_TRIANGLE_STRIP:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
103 break;
104 case GL_TRIANGLE_FAN:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
106 break;
107 case GL_QUADS:
108 return R300_VAP_VF_CNTL__PRIM_QUADS;
109 break;
110 case GL_QUAD_STRIP:
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
112 break;
113 case GL_POLYGON:
114 return R300_VAP_VF_CNTL__PRIM_POLYGON;
115 break;
116 default:
117 assert(0);
118 return -1;
119 break;
120 }
121 }
122
123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
124 {
125 int verts_off = 0;
126
127 switch (prim & PRIM_MODE_MASK) {
128 case GL_POINTS:
129 verts_off = 0;
130 break;
131 case GL_LINES:
132 verts_off = num_verts % 2;
133 break;
134 case GL_LINE_STRIP:
135 if (num_verts < 2)
136 verts_off = num_verts;
137 break;
138 case GL_LINE_LOOP:
139 if (num_verts < 2)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLES:
143 verts_off = num_verts % 3;
144 break;
145 case GL_TRIANGLE_STRIP:
146 if (num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 if (num_verts < 3)
151 verts_off = num_verts;
152 break;
153 case GL_QUADS:
154 verts_off = num_verts % 4;
155 break;
156 case GL_QUAD_STRIP:
157 if (num_verts < 4)
158 verts_off = num_verts;
159 else
160 verts_off = num_verts % 2;
161 break;
162 case GL_POLYGON:
163 if (num_verts < 3)
164 verts_off = num_verts;
165 break;
166 default:
167 assert(0);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
176 {
177 r300ContextPtr rmesa = R300_CONTEXT(ctx);
178 void *out;
179
180 rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
181 0, n_elts * 4, 4,
182 RADEON_GEM_DOMAIN_GTT, 0);
183 rmesa->state.elt_dma_offset = 0;
184 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
185 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
186 memcpy(out, elts, n_elts * 4);
187 radeon_bo_unmap(rmesa->state.elt_dma_bo);
188 }
189
190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
191 {
192 BATCH_LOCALS(&rmesa->radeon);
193
194 if (vertex_count > 0) {
195 BEGIN_BATCH(8);
196 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
197 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
198 ((vertex_count + 0) << 16) |
199 type |
200 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
201
202 if (!rmesa->radeon.radeonScreen->kernel_mm) {
203 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
204 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
205 OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
206 rmesa->state.elt_dma_bo,
207 rmesa->state.elt_dma_offset,
208 RADEON_GEM_DOMAIN_GTT, 0, 0);
209 OUT_BATCH(vertex_count);
210 } else {
211 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
212 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
213 OUT_BATCH(rmesa->state.elt_dma_offset);
214 OUT_BATCH(vertex_count);
215 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
216 rmesa->state.elt_dma_bo,
217 RADEON_GEM_DOMAIN_GTT, 0, 0);
218 }
219 END_BATCH();
220 }
221 }
222
223 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
224 {
225 BATCH_LOCALS(&rmesa->radeon);
226 uint32_t voffset;
227 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
228 int i;
229
230 if (RADEON_DEBUG & DEBUG_VERTS)
231 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
232 offset);
233
234 BEGIN_BATCH(sz+2);
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
236 OUT_BATCH(nr);
237
238
239 if (!rmesa->radeon.radeonScreen->kernel_mm) {
240 for (i = 0; i + 1 < nr; i += 2) {
241 OUT_BATCH((rmesa->state.aos[i].components << 0) |
242 (rmesa->state.aos[i].stride << 8) |
243 (rmesa->state.aos[i + 1].components << 16) |
244 (rmesa->state.aos[i + 1].stride << 24));
245
246 voffset = rmesa->state.aos[i + 0].offset +
247 offset * 4 * rmesa->state.aos[i + 0].stride;
248 OUT_BATCH_RELOC(voffset,
249 rmesa->state.aos[i].bo,
250 voffset,
251 RADEON_GEM_DOMAIN_GTT,
252 0, 0);
253 voffset = rmesa->state.aos[i + 1].offset +
254 offset * 4 * rmesa->state.aos[i + 1].stride;
255 OUT_BATCH_RELOC(voffset,
256 rmesa->state.aos[i+1].bo,
257 voffset,
258 RADEON_GEM_DOMAIN_GTT,
259 0, 0);
260 }
261
262 if (nr & 1) {
263 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
264 (rmesa->state.aos[nr - 1].stride << 8));
265 voffset = rmesa->state.aos[nr - 1].offset +
266 offset * 4 * rmesa->state.aos[nr - 1].stride;
267 OUT_BATCH_RELOC(voffset,
268 rmesa->state.aos[nr - 1].bo,
269 voffset,
270 RADEON_GEM_DOMAIN_GTT,
271 0, 0);
272 }
273 } else {
274 for (i = 0; i + 1 < nr; i += 2) {
275 OUT_BATCH((rmesa->state.aos[i].components << 0) |
276 (rmesa->state.aos[i].stride << 8) |
277 (rmesa->state.aos[i + 1].components << 16) |
278 (rmesa->state.aos[i + 1].stride << 24));
279
280 voffset = rmesa->state.aos[i + 0].offset +
281 offset * 4 * rmesa->state.aos[i + 0].stride;
282 OUT_BATCH(voffset);
283 voffset = rmesa->state.aos[i + 1].offset +
284 offset * 4 * rmesa->state.aos[i + 1].stride;
285 OUT_BATCH(voffset);
286 }
287
288 if (nr & 1) {
289 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
290 (rmesa->state.aos[nr - 1].stride << 8));
291 voffset = rmesa->state.aos[nr - 1].offset +
292 offset * 4 * rmesa->state.aos[nr - 1].stride;
293 OUT_BATCH(voffset);
294 }
295 for (i = 0; i + 1 < nr; i += 2) {
296 voffset = rmesa->state.aos[i + 0].offset +
297 offset * 4 * rmesa->state.aos[i + 0].stride;
298 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
299 rmesa->state.aos[i+0].bo,
300 RADEON_GEM_DOMAIN_GTT,
301 0, 0);
302 voffset = rmesa->state.aos[i + 1].offset +
303 offset * 4 * rmesa->state.aos[i + 1].stride;
304 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
305 rmesa->state.aos[i+1].bo,
306 RADEON_GEM_DOMAIN_GTT,
307 0, 0);
308 }
309 if (nr & 1) {
310 voffset = rmesa->state.aos[nr - 1].offset +
311 offset * 4 * rmesa->state.aos[nr - 1].stride;
312 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
313 rmesa->state.aos[nr-1].bo,
314 RADEON_GEM_DOMAIN_GTT,
315 0, 0);
316 }
317 }
318 END_BATCH();
319 }
320
321 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
322 {
323 BATCH_LOCALS(&rmesa->radeon);
324
325 BEGIN_BATCH(3);
326 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
327 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
328 END_BATCH();
329 }
330
331 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
332 int start, int end, int prim)
333 {
334 BATCH_LOCALS(&rmesa->radeon);
335 int type, num_verts;
336 TNLcontext *tnl = TNL_CONTEXT(ctx);
337 struct vertex_buffer *vb = &tnl->vb;
338
339 type = r300PrimitiveType(rmesa, prim);
340 num_verts = r300NumVerts(rmesa, end - start, prim);
341
342 if (type < 0 || num_verts <= 0)
343 return;
344
345 /* Make space for at least 64 dwords.
346 * This is supposed to ensure that we can get all rendering
347 * commands into a single command buffer.
348 */
349 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
350
351 if (vb->Elts) {
352 if (num_verts > 65535) {
353 /* not implemented yet */
354 WARN_ONCE("Too many elts\n");
355 return;
356 }
357 /* Note: The following is incorrect, but it's the best I can do
358 * without a major refactoring of how DMA memory is handled.
359 * The problem: Ensuring that both vertex arrays *and* index
360 * arrays are at the right position, and then ensuring that
361 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
362 * at once.
363 *
364 * So why is the following incorrect? Well, it seems like
365 * allocating the index array might actually evict the vertex
366 * arrays. *sigh*
367 */
368 r300EmitElts(ctx, vb->Elts, num_verts);
369 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
370 r300FireEB(rmesa, num_verts, type);
371 } else {
372 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
373 r300FireAOS(rmesa, num_verts, type);
374 }
375 COMMIT_BATCH();
376 }
377
378 static GLboolean r300RunRender(GLcontext * ctx,
379 struct tnl_pipeline_stage *stage)
380 {
381 r300ContextPtr rmesa = R300_CONTEXT(ctx);
382 int i;
383 TNLcontext *tnl = TNL_CONTEXT(ctx);
384 struct vertex_buffer *vb = &tnl->vb;
385
386 if (RADEON_DEBUG & DEBUG_PRIMS)
387 fprintf(stderr, "%s\n", __FUNCTION__);
388
389 r300UpdateShaders(rmesa);
390 if (r300EmitArrays(ctx))
391 return GL_TRUE;
392
393 r300UpdateShaderStates(rmesa);
394
395 r300EmitCacheFlush(rmesa);
396 r300EmitState(rmesa);
397
398 for (i = 0; i < vb->PrimitiveCount; i++) {
399 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
400 GLuint start = vb->Primitive[i].start;
401 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
402 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
403 }
404
405 r300EmitCacheFlush(rmesa);
406
407 r300ReleaseArrays(ctx);
408
409 return GL_FALSE;
410 }
411
412 #define FALLBACK_IF(expr) \
413 do { \
414 if (expr) { \
415 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
416 WARN_ONCE("Software fallback:%s\n", \
417 #expr); \
418 return R300_FALLBACK_RAST; \
419 } \
420 } while(0)
421
422 static int r300Fallback(GLcontext * ctx)
423 {
424 r300ContextPtr r300 = R300_CONTEXT(ctx);
425 /* Do we need to use new-style shaders?
426 * Also is there a better way to do this? */
427 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
428 struct r500_fragment_program *fp = (struct r500_fragment_program *)
429 (char *)ctx->FragmentProgram._Current;
430 if (fp) {
431 if (!fp->translated) {
432 r500TranslateFragmentShader(r300, fp);
433 FALLBACK_IF(!fp->translated);
434 }
435 }
436 } else {
437 struct r300_fragment_program *fp = (struct r300_fragment_program *)
438 (char *)ctx->FragmentProgram._Current;
439 if (fp) {
440 if (!fp->translated) {
441 r300TranslateFragmentShader(r300, fp);
442 FALLBACK_IF(!fp->translated);
443 }
444 }
445 }
446
447 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
448
449 FALLBACK_IF(ctx->Stencil._TestTwoSide
450 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
451 || ctx->Stencil.ValueMask[0] !=
452 ctx->Stencil.ValueMask[1]
453 || ctx->Stencil.WriteMask[0] !=
454 ctx->Stencil.WriteMask[1]));
455
456 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
457 FALLBACK_IF(ctx->Point.PointSprite);
458
459 if (!r300->disable_lowimpact_fallback) {
460 FALLBACK_IF(ctx->Polygon.StippleFlag);
461 FALLBACK_IF(ctx->Multisample._Enabled);
462 FALLBACK_IF(ctx->Line.StippleFlag);
463 FALLBACK_IF(ctx->Line.SmoothFlag);
464 FALLBACK_IF(ctx->Point.SmoothFlag);
465 }
466
467 return R300_FALLBACK_NONE;
468 }
469
470 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
471 struct tnl_pipeline_stage *stage)
472 {
473 r300ContextPtr rmesa = R300_CONTEXT(ctx);
474
475 if (RADEON_DEBUG & DEBUG_PRIMS)
476 fprintf(stderr, "%s\n", __FUNCTION__);
477
478 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
479 return GL_TRUE;
480
481 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
482 return GL_TRUE;
483
484 return r300RunRender(ctx, stage);
485 }
486
487 static GLboolean r300RunTCLRender(GLcontext * ctx,
488 struct tnl_pipeline_stage *stage)
489 {
490 r300ContextPtr rmesa = R300_CONTEXT(ctx);
491 struct r300_vertex_program *vp;
492
493 hw_tcl_on = future_hw_tcl_on;
494
495 if (RADEON_DEBUG & DEBUG_PRIMS)
496 fprintf(stderr, "%s\n", __FUNCTION__);
497
498 if (hw_tcl_on == GL_FALSE)
499 return GL_TRUE;
500
501 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
502 hw_tcl_on = GL_FALSE;
503 return GL_TRUE;
504 }
505
506 r300UpdateShaders(rmesa);
507
508 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
509 if (vp->native == GL_FALSE) {
510 hw_tcl_on = GL_FALSE;
511 return GL_TRUE;
512 }
513
514 return r300RunRender(ctx, stage);
515 }
516
517 const struct tnl_pipeline_stage _r300_render_stage = {
518 "r300 Hardware Rasterization",
519 NULL,
520 NULL,
521 NULL,
522 NULL,
523 r300RunNonTCLRender
524 };
525
526 const struct tnl_pipeline_stage _r300_tcl_stage = {
527 "r300 Hardware Transform, Clipping and Lighting",
528 NULL,
529 NULL,
530 NULL,
531 NULL,
532 r300RunTCLRender
533 };