Merge remote branch 'main/master' into radeon-rewrite
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "r300_context.h"
70 #include "r300_ioctl.h"
71 #include "r300_state.h"
72 #include "r300_reg.h"
73 #include "r300_tex.h"
74 #include "r300_emit.h"
75 #include "r300_fragprog.h"
76 extern int future_hw_tcl_on;
77
78 /**
79 * \brief Convert a OpenGL primitive type into a R300 primitive type.
80 */
81 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
82 {
83 switch (prim & PRIM_MODE_MASK) {
84 case GL_POINTS:
85 return R300_VAP_VF_CNTL__PRIM_POINTS;
86 break;
87 case GL_LINES:
88 return R300_VAP_VF_CNTL__PRIM_LINES;
89 break;
90 case GL_LINE_STRIP:
91 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
92 break;
93 case GL_LINE_LOOP:
94 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
95 break;
96 case GL_TRIANGLES:
97 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
98 break;
99 case GL_TRIANGLE_STRIP:
100 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
101 break;
102 case GL_TRIANGLE_FAN:
103 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
104 break;
105 case GL_QUADS:
106 return R300_VAP_VF_CNTL__PRIM_QUADS;
107 break;
108 case GL_QUAD_STRIP:
109 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
110 break;
111 case GL_POLYGON:
112 return R300_VAP_VF_CNTL__PRIM_POLYGON;
113 break;
114 default:
115 assert(0);
116 return -1;
117 break;
118 }
119 }
120
121 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
122 {
123 int verts_off = 0;
124
125 switch (prim & PRIM_MODE_MASK) {
126 case GL_POINTS:
127 verts_off = 0;
128 break;
129 case GL_LINES:
130 verts_off = num_verts % 2;
131 break;
132 case GL_LINE_STRIP:
133 if (num_verts < 2)
134 verts_off = num_verts;
135 break;
136 case GL_LINE_LOOP:
137 if (num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 verts_off = num_verts % 3;
142 break;
143 case GL_TRIANGLE_STRIP:
144 if (num_verts < 3)
145 verts_off = num_verts;
146 break;
147 case GL_TRIANGLE_FAN:
148 if (num_verts < 3)
149 verts_off = num_verts;
150 break;
151 case GL_QUADS:
152 verts_off = num_verts % 4;
153 break;
154 case GL_QUAD_STRIP:
155 if (num_verts < 4)
156 verts_off = num_verts;
157 else
158 verts_off = num_verts % 2;
159 break;
160 case GL_POLYGON:
161 if (num_verts < 3)
162 verts_off = num_verts;
163 break;
164 default:
165 assert(0);
166 return -1;
167 break;
168 }
169
170 return num_verts - verts_off;
171 }
172
173 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
174 {
175 r300ContextPtr rmesa = R300_CONTEXT(ctx);
176 void *out;
177
178 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->state.elt_dma_bo,
179 &rmesa->state.elt_dma_offset, n_elts * 4, 4);
180 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
181 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
182 memcpy(out, elts, n_elts * 4);
183 radeon_bo_unmap(rmesa->state.elt_dma_bo);
184 }
185
186 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
187 {
188 BATCH_LOCALS(&rmesa->radeon);
189
190 if (vertex_count > 0) {
191 BEGIN_BATCH(10);
192 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
193 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
194 ((vertex_count + 0) << 16) |
195 type |
196 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
197
198 if (!rmesa->radeon.radeonScreen->kernel_mm) {
199 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
200 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
201 (R300_VAP_PORT_IDX0 >> 2));
202 OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
203 rmesa->state.elt_dma_bo,
204 rmesa->state.elt_dma_offset,
205 RADEON_GEM_DOMAIN_GTT, 0, 0);
206 OUT_BATCH(vertex_count);
207 } else {
208 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
209 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
210 (R300_VAP_PORT_IDX0 >> 2));
211 OUT_BATCH(rmesa->state.elt_dma_offset);
212 OUT_BATCH(vertex_count);
213 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
214 rmesa->state.elt_dma_bo,
215 RADEON_GEM_DOMAIN_GTT, 0, 0);
216 }
217 END_BATCH();
218 }
219 }
220
221 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
222 {
223 BATCH_LOCALS(&rmesa->radeon);
224 uint32_t voffset;
225 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
226 int i;
227
228 if (RADEON_DEBUG & DEBUG_VERTS)
229 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
230 offset);
231
232
233 if (!rmesa->radeon.radeonScreen->kernel_mm) {
234 BEGIN_BATCH(sz+2+(nr * 2));
235 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
236 OUT_BATCH(nr);
237
238 for (i = 0; i + 1 < nr; i += 2) {
239 OUT_BATCH((rmesa->state.aos[i].components << 0) |
240 (rmesa->state.aos[i].stride << 8) |
241 (rmesa->state.aos[i + 1].components << 16) |
242 (rmesa->state.aos[i + 1].stride << 24));
243
244 voffset = rmesa->state.aos[i + 0].offset +
245 offset * 4 * rmesa->state.aos[i + 0].stride;
246 OUT_BATCH_RELOC(voffset,
247 rmesa->state.aos[i].bo,
248 voffset,
249 RADEON_GEM_DOMAIN_GTT,
250 0, 0);
251 voffset = rmesa->state.aos[i + 1].offset +
252 offset * 4 * rmesa->state.aos[i + 1].stride;
253 OUT_BATCH_RELOC(voffset,
254 rmesa->state.aos[i+1].bo,
255 voffset,
256 RADEON_GEM_DOMAIN_GTT,
257 0, 0);
258 }
259
260 if (nr & 1) {
261 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
262 (rmesa->state.aos[nr - 1].stride << 8));
263 voffset = rmesa->state.aos[nr - 1].offset +
264 offset * 4 * rmesa->state.aos[nr - 1].stride;
265 OUT_BATCH_RELOC(voffset,
266 rmesa->state.aos[nr - 1].bo,
267 voffset,
268 RADEON_GEM_DOMAIN_GTT,
269 0, 0);
270 }
271 END_BATCH();
272 } else {
273
274 BEGIN_BATCH(sz+2+(nr * 2));
275 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
276 OUT_BATCH(nr);
277
278 for (i = 0; i + 1 < nr; i += 2) {
279 OUT_BATCH((rmesa->state.aos[i].components << 0) |
280 (rmesa->state.aos[i].stride << 8) |
281 (rmesa->state.aos[i + 1].components << 16) |
282 (rmesa->state.aos[i + 1].stride << 24));
283
284 voffset = rmesa->state.aos[i + 0].offset +
285 offset * 4 * rmesa->state.aos[i + 0].stride;
286 OUT_BATCH(voffset);
287 voffset = rmesa->state.aos[i + 1].offset +
288 offset * 4 * rmesa->state.aos[i + 1].stride;
289 OUT_BATCH(voffset);
290 }
291
292 if (nr & 1) {
293 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
294 (rmesa->state.aos[nr - 1].stride << 8));
295 voffset = rmesa->state.aos[nr - 1].offset +
296 offset * 4 * rmesa->state.aos[nr - 1].stride;
297 OUT_BATCH(voffset);
298 }
299 for (i = 0; i + 1 < nr; i += 2) {
300 voffset = rmesa->state.aos[i + 0].offset +
301 offset * 4 * rmesa->state.aos[i + 0].stride;
302 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
303 rmesa->state.aos[i+0].bo,
304 RADEON_GEM_DOMAIN_GTT,
305 0, 0);
306 voffset = rmesa->state.aos[i + 1].offset +
307 offset * 4 * rmesa->state.aos[i + 1].stride;
308 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
309 rmesa->state.aos[i+1].bo,
310 RADEON_GEM_DOMAIN_GTT,
311 0, 0);
312 }
313 if (nr & 1) {
314 voffset = rmesa->state.aos[nr - 1].offset +
315 offset * 4 * rmesa->state.aos[nr - 1].stride;
316 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
317 rmesa->state.aos[nr-1].bo,
318 RADEON_GEM_DOMAIN_GTT,
319 0, 0);
320 }
321 END_BATCH();
322 }
323
324 }
325
326 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
327 {
328 BATCH_LOCALS(&rmesa->radeon);
329
330 BEGIN_BATCH(3);
331 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
332 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
333 END_BATCH();
334 }
335
336 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
337 int start, int end, int prim)
338 {
339 int type, num_verts;
340 TNLcontext *tnl = TNL_CONTEXT(ctx);
341 struct vertex_buffer *vb = &tnl->vb;
342
343 type = r300PrimitiveType(rmesa, prim);
344 num_verts = r300NumVerts(rmesa, end - start, prim);
345
346 if (type < 0 || num_verts <= 0)
347 return;
348
349 /* Make space for at least 64 dwords.
350 * This is supposed to ensure that we can get all rendering
351 * commands into a single command buffer.
352 */
353 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
354
355 if (vb->Elts) {
356 if (num_verts > 65535) {
357 /* not implemented yet */
358 WARN_ONCE("Too many elts\n");
359 return;
360 }
361 /* Note: The following is incorrect, but it's the best I can do
362 * without a major refactoring of how DMA memory is handled.
363 * The problem: Ensuring that both vertex arrays *and* index
364 * arrays are at the right position, and then ensuring that
365 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
366 * at once.
367 *
368 * So why is the following incorrect? Well, it seems like
369 * allocating the index array might actually evict the vertex
370 * arrays. *sigh*
371 */
372 r300EmitElts(ctx, vb->Elts, num_verts);
373 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
374 r300FireEB(rmesa, num_verts, type);
375 } else {
376 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
377 r300FireAOS(rmesa, num_verts, type);
378 }
379 COMMIT_BATCH();
380 }
381
382 static GLboolean r300RunRender(GLcontext * ctx,
383 struct tnl_pipeline_stage *stage)
384 {
385 r300ContextPtr rmesa = R300_CONTEXT(ctx);
386 int i;
387 TNLcontext *tnl = TNL_CONTEXT(ctx);
388 struct vertex_buffer *vb = &tnl->vb;
389
390 if (RADEON_DEBUG & DEBUG_PRIMS)
391 fprintf(stderr, "%s\n", __FUNCTION__);
392
393 r300UpdateShaders(rmesa);
394 if (r300EmitArrays(ctx))
395 return GL_TRUE;
396
397 r300UpdateShaderStates(rmesa);
398
399 r300EmitCacheFlush(rmesa);
400 radeonEmitState(&rmesa->radeon);
401
402 for (i = 0; i < vb->PrimitiveCount; i++) {
403 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
404 GLuint start = vb->Primitive[i].start;
405 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
406 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
407 }
408
409 r300EmitCacheFlush(rmesa);
410
411 r300ReleaseArrays(ctx);
412
413 return GL_FALSE;
414 }
415
416 #define FALLBACK_IF(expr) \
417 do { \
418 if (expr) { \
419 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
420 WARN_ONCE("Software fallback:%s\n", \
421 #expr); \
422 return R300_FALLBACK_RAST; \
423 } \
424 } while(0)
425
426 static int r300Fallback(GLcontext * ctx)
427 {
428 r300ContextPtr r300 = R300_CONTEXT(ctx);
429 const unsigned back = ctx->Stencil._BackFace;
430
431 FALLBACK_IF(r300->radeon.Fallback);
432 /* Do we need to use new-style shaders?
433 * Also is there a better way to do this? */
434 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
435 struct r500_fragment_program *fp = (struct r500_fragment_program *)
436 (char *)ctx->FragmentProgram._Current;
437 if (fp) {
438 if (!fp->translated) {
439 r500TranslateFragmentShader(r300, fp);
440 FALLBACK_IF(!fp->translated);
441 }
442 }
443 } else {
444 struct r300_fragment_program *fp = (struct r300_fragment_program *)
445 (char *)ctx->FragmentProgram._Current;
446 if (fp) {
447 if (!fp->translated) {
448 r300TranslateFragmentShader(r300, fp);
449 FALLBACK_IF(!fp->translated);
450 }
451 }
452 }
453
454 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
455
456 /* If GL_EXT_stencil_two_side is disabled, this fallback check can
457 * be removed.
458 */
459 FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
460 || ctx->Stencil.ValueMask[0] !=
461 ctx->Stencil.ValueMask[back]
462 || ctx->Stencil.WriteMask[0] !=
463 ctx->Stencil.WriteMask[back]);
464
465 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
466 FALLBACK_IF(ctx->Point.PointSprite);
467
468 if (!r300->disable_lowimpact_fallback) {
469 FALLBACK_IF(ctx->Polygon.StippleFlag);
470 FALLBACK_IF(ctx->Multisample._Enabled);
471 FALLBACK_IF(ctx->Line.StippleFlag);
472 FALLBACK_IF(ctx->Line.SmoothFlag);
473 FALLBACK_IF(ctx->Point.SmoothFlag);
474 }
475
476 return R300_FALLBACK_NONE;
477 }
478
479 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
480 struct tnl_pipeline_stage *stage)
481 {
482 r300ContextPtr rmesa = R300_CONTEXT(ctx);
483
484 if (RADEON_DEBUG & DEBUG_PRIMS)
485 fprintf(stderr, "%s\n", __FUNCTION__);
486
487 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
488 return GL_TRUE;
489
490 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
491 return GL_TRUE;
492
493 return r300RunRender(ctx, stage);
494 }
495
496 static GLboolean r300RunTCLRender(GLcontext * ctx,
497 struct tnl_pipeline_stage *stage)
498 {
499 r300ContextPtr rmesa = R300_CONTEXT(ctx);
500 struct r300_vertex_program *vp;
501
502 hw_tcl_on = future_hw_tcl_on;
503
504 if (RADEON_DEBUG & DEBUG_PRIMS)
505 fprintf(stderr, "%s\n", __FUNCTION__);
506
507 if (hw_tcl_on == GL_FALSE)
508 return GL_TRUE;
509
510 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
511 hw_tcl_on = GL_FALSE;
512 return GL_TRUE;
513 }
514
515 if (!r300ValidateBuffers(ctx))
516 return GL_TRUE;
517
518 r300UpdateShaders(rmesa);
519
520 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
521 if (vp->native == GL_FALSE) {
522 hw_tcl_on = GL_FALSE;
523 return GL_TRUE;
524 }
525
526 return r300RunRender(ctx, stage);
527 }
528
529 const struct tnl_pipeline_stage _r300_render_stage = {
530 "r300 Hardware Rasterization",
531 NULL,
532 NULL,
533 NULL,
534 NULL,
535 r300RunNonTCLRender
536 };
537
538 const struct tnl_pipeline_stage _r300_tcl_stage = {
539 "r300 Hardware Transform, Clipping and Lighting",
540 NULL,
541 NULL,
542 NULL,
543 NULL,
544 r300RunTCLRender
545 };