radeon: make DRI1 one work with new CS mechanism
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "main/glheader.h"
54 #include "main/state.h"
55 #include "main/imports.h"
56 #include "main/enums.h"
57 #include "main/macros.h"
58 #include "main/context.h"
59 #include "main/dd.h"
60 #include "main/simple_list.h"
61 #include "main/api_arrayelt.h"
62 #include "swrast/swrast.h"
63 #include "swrast_setup/swrast_setup.h"
64 #include "vbo/vbo.h"
65 #include "tnl/tnl.h"
66 #include "tnl/t_vp_build.h"
67 #include "radeon_reg.h"
68 #include "radeon_macros.h"
69 #include "radeon_ioctl.h"
70 #include "radeon_state.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
74 #include "r300_reg.h"
75 #include "r300_tex.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog.h"
78 extern int future_hw_tcl_on;
79
80 /**
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 */
83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
84 {
85 switch (prim & PRIM_MODE_MASK) {
86 case GL_POINTS:
87 return R300_VAP_VF_CNTL__PRIM_POINTS;
88 break;
89 case GL_LINES:
90 return R300_VAP_VF_CNTL__PRIM_LINES;
91 break;
92 case GL_LINE_STRIP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 break;
95 case GL_LINE_LOOP:
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
97 break;
98 case GL_TRIANGLES:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
100 break;
101 case GL_TRIANGLE_STRIP:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
103 break;
104 case GL_TRIANGLE_FAN:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
106 break;
107 case GL_QUADS:
108 return R300_VAP_VF_CNTL__PRIM_QUADS;
109 break;
110 case GL_QUAD_STRIP:
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
112 break;
113 case GL_POLYGON:
114 return R300_VAP_VF_CNTL__PRIM_POLYGON;
115 break;
116 default:
117 assert(0);
118 return -1;
119 break;
120 }
121 }
122
123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
124 {
125 int verts_off = 0;
126
127 switch (prim & PRIM_MODE_MASK) {
128 case GL_POINTS:
129 verts_off = 0;
130 break;
131 case GL_LINES:
132 verts_off = num_verts % 2;
133 break;
134 case GL_LINE_STRIP:
135 if (num_verts < 2)
136 verts_off = num_verts;
137 break;
138 case GL_LINE_LOOP:
139 if (num_verts < 2)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLES:
143 verts_off = num_verts % 3;
144 break;
145 case GL_TRIANGLE_STRIP:
146 if (num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 if (num_verts < 3)
151 verts_off = num_verts;
152 break;
153 case GL_QUADS:
154 verts_off = num_verts % 4;
155 break;
156 case GL_QUAD_STRIP:
157 if (num_verts < 4)
158 verts_off = num_verts;
159 else
160 verts_off = num_verts % 2;
161 break;
162 case GL_POLYGON:
163 if (num_verts < 3)
164 verts_off = num_verts;
165 break;
166 default:
167 assert(0);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
176 {
177 r300ContextPtr rmesa = R300_CONTEXT(ctx);
178 void *out;
179
180 rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
181 0, n_elts * 4, 4,
182 RADEON_GEM_DOMAIN_GTT, 0);
183 rmesa->state.elt_dma_offset = 0;
184 radeon_bo_map(rmesa->state.elt_dma_bo, 1);
185 out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset;
186 memcpy(out, elts, n_elts * 4);
187 radeon_bo_unmap(rmesa->state.elt_dma_bo);
188 }
189
190 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
191 {
192 BATCH_LOCALS(rmesa);
193
194 if (vertex_count > 0) {
195 BEGIN_BATCH(8);
196 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
197 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
198 ((vertex_count + 0) << 16) |
199 type |
200 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
201
202 if (!rmesa->radeon.radeonScreen->kernel_mm) {
203 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
204 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
205 OUT_BATCH_RELOC(rmesa->state.elt_dma_offset,
206 rmesa->state.elt_dma_bo,
207 rmesa->state.elt_dma_offset,
208 RADEON_GEM_DOMAIN_GTT, 0, 0);
209 OUT_BATCH(vertex_count);
210 } else {
211 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
212 OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
213 OUT_BATCH(rmesa->state.elt_dma_offset);
214 OUT_BATCH(vertex_count);
215 radeon_cs_write_reloc(rmesa->cmdbuf.cs,
216 rmesa->state.elt_dma_bo,
217 0,
218 rmesa->state.elt_dma_bo->size,
219 RADEON_GEM_DOMAIN_GTT, 0, 0);
220 }
221 END_BATCH();
222 }
223 }
224
225 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
226 {
227 BATCH_LOCALS(rmesa);
228 uint32_t voffset;
229 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
230 int i;
231
232 if (RADEON_DEBUG & DEBUG_VERTS)
233 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
234 offset);
235
236 BEGIN_BATCH(sz+2);
237 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
238 OUT_BATCH(nr);
239
240
241 if (!rmesa->radeon.radeonScreen->kernel_mm) {
242 for (i = 0; i + 1 < nr; i += 2) {
243 OUT_BATCH((rmesa->state.aos[i].components << 0) |
244 (rmesa->state.aos[i].stride << 8) |
245 (rmesa->state.aos[i + 1].components << 16) |
246 (rmesa->state.aos[i + 1].stride << 24));
247
248 voffset = rmesa->state.aos[i + 0].offset +
249 offset * 4 * rmesa->state.aos[i + 0].stride;
250 OUT_BATCH_RELOC(voffset,
251 rmesa->state.aos[i].bo,
252 voffset,
253 RADEON_GEM_DOMAIN_GTT,
254 0, 0);
255 voffset = rmesa->state.aos[i + 1].offset +
256 offset * 4 * rmesa->state.aos[i + 1].stride;
257 OUT_BATCH_RELOC(voffset,
258 rmesa->state.aos[i+1].bo,
259 voffset,
260 RADEON_GEM_DOMAIN_GTT,
261 0, 0);
262 }
263
264 if (nr & 1) {
265 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
266 (rmesa->state.aos[nr - 1].stride << 8));
267 voffset = rmesa->state.aos[nr - 1].offset +
268 offset * 4 * rmesa->state.aos[nr - 1].stride;
269 OUT_BATCH_RELOC(voffset,
270 rmesa->state.aos[nr - 1].bo,
271 voffset,
272 RADEON_GEM_DOMAIN_GTT,
273 0, 0);
274 }
275 } else {
276 for (i = 0; i + 1 < nr; i += 2) {
277 OUT_BATCH((rmesa->state.aos[i].components << 0) |
278 (rmesa->state.aos[i].stride << 8) |
279 (rmesa->state.aos[i + 1].components << 16) |
280 (rmesa->state.aos[i + 1].stride << 24));
281
282 voffset = rmesa->state.aos[i + 0].offset +
283 offset * 4 * rmesa->state.aos[i + 0].stride;
284 OUT_BATCH(voffset);
285 voffset = rmesa->state.aos[i + 1].offset +
286 offset * 4 * rmesa->state.aos[i + 1].stride;
287 OUT_BATCH(voffset);
288 }
289
290 if (nr & 1) {
291 OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) |
292 (rmesa->state.aos[nr - 1].stride << 8));
293 voffset = rmesa->state.aos[nr - 1].offset +
294 offset * 4 * rmesa->state.aos[nr - 1].stride;
295 OUT_BATCH(voffset);
296 }
297 for (i = 0; i + 1 < nr; i += 2) {
298 voffset = rmesa->state.aos[i + 0].offset +
299 offset * 4 * rmesa->state.aos[i + 0].stride;
300 radeon_cs_write_reloc(rmesa->cmdbuf.cs,
301 rmesa->state.aos[i+0].bo,
302 voffset,
303 rmesa->state.aos[i+0].bo->size,
304 RADEON_GEM_DOMAIN_GTT,
305 0, 0);
306 voffset = rmesa->state.aos[i + 1].offset +
307 offset * 4 * rmesa->state.aos[i + 1].stride;
308 radeon_cs_write_reloc(rmesa->cmdbuf.cs,
309 rmesa->state.aos[i+1].bo,
310 voffset,
311 rmesa->state.aos[i+1].bo->size,
312 RADEON_GEM_DOMAIN_GTT,
313 0, 0);
314 }
315 if (nr & 1) {
316 voffset = rmesa->state.aos[nr - 1].offset +
317 offset * 4 * rmesa->state.aos[nr - 1].stride;
318 radeon_cs_write_reloc(rmesa->cmdbuf.cs,
319 rmesa->state.aos[nr-1].bo,
320 voffset,
321 rmesa->state.aos[nr-1].bo->size,
322 RADEON_GEM_DOMAIN_GTT,
323 0, 0);
324 }
325 }
326 END_BATCH();
327 }
328
329 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
330 {
331 BATCH_LOCALS(rmesa);
332
333 BEGIN_BATCH(3);
334 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
335 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
336 END_BATCH();
337 }
338
339 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
340 int start, int end, int prim)
341 {
342 BATCH_LOCALS(rmesa);
343 int type, num_verts;
344 TNLcontext *tnl = TNL_CONTEXT(ctx);
345 struct vertex_buffer *vb = &tnl->vb;
346
347 type = r300PrimitiveType(rmesa, prim);
348 num_verts = r300NumVerts(rmesa, end - start, prim);
349
350 if (type < 0 || num_verts <= 0)
351 return;
352
353 /* Make space for at least 64 dwords.
354 * This is supposed to ensure that we can get all rendering
355 * commands into a single command buffer.
356 */
357 r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__);
358
359 if (vb->Elts) {
360 if (num_verts > 65535) {
361 /* not implemented yet */
362 WARN_ONCE("Too many elts\n");
363 return;
364 }
365 /* Note: The following is incorrect, but it's the best I can do
366 * without a major refactoring of how DMA memory is handled.
367 * The problem: Ensuring that both vertex arrays *and* index
368 * arrays are at the right position, and then ensuring that
369 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
370 * at once.
371 *
372 * So why is the following incorrect? Well, it seems like
373 * allocating the index array might actually evict the vertex
374 * arrays. *sigh*
375 */
376 r300EmitElts(ctx, vb->Elts, num_verts);
377 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
378 r300FireEB(rmesa, num_verts, type);
379 } else {
380 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
381 r300FireAOS(rmesa, num_verts, type);
382 }
383 COMMIT_BATCH();
384 }
385
386 static GLboolean r300RunRender(GLcontext * ctx,
387 struct tnl_pipeline_stage *stage)
388 {
389 r300ContextPtr rmesa = R300_CONTEXT(ctx);
390 int i;
391 TNLcontext *tnl = TNL_CONTEXT(ctx);
392 struct vertex_buffer *vb = &tnl->vb;
393
394 if (RADEON_DEBUG & DEBUG_PRIMS)
395 fprintf(stderr, "%s\n", __FUNCTION__);
396
397 r300UpdateShaders(rmesa);
398 if (r300EmitArrays(ctx))
399 return GL_TRUE;
400
401 r300UpdateShaderStates(rmesa);
402
403 r300EmitCacheFlush(rmesa);
404 r300EmitState(rmesa);
405
406 for (i = 0; i < vb->PrimitiveCount; i++) {
407 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
408 GLuint start = vb->Primitive[i].start;
409 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
410 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
411 }
412
413 r300EmitCacheFlush(rmesa);
414
415 r300ReleaseArrays(ctx);
416
417 return GL_FALSE;
418 }
419
420 #define FALLBACK_IF(expr) \
421 do { \
422 if (expr) { \
423 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
424 WARN_ONCE("Software fallback:%s\n", \
425 #expr); \
426 return R300_FALLBACK_RAST; \
427 } \
428 } while(0)
429
430 static int r300Fallback(GLcontext * ctx)
431 {
432 r300ContextPtr r300 = R300_CONTEXT(ctx);
433 /* Do we need to use new-style shaders?
434 * Also is there a better way to do this? */
435 if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
436 struct r500_fragment_program *fp = (struct r500_fragment_program *)
437 (char *)ctx->FragmentProgram._Current;
438 if (fp) {
439 if (!fp->translated) {
440 r500TranslateFragmentShader(r300, fp);
441 FALLBACK_IF(!fp->translated);
442 }
443 }
444 } else {
445 struct r300_fragment_program *fp = (struct r300_fragment_program *)
446 (char *)ctx->FragmentProgram._Current;
447 if (fp) {
448 if (!fp->translated) {
449 r300TranslateFragmentShader(r300, fp);
450 FALLBACK_IF(!fp->translated);
451 }
452 }
453 }
454
455 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
456
457 FALLBACK_IF(ctx->Stencil._TestTwoSide
458 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
459 || ctx->Stencil.ValueMask[0] !=
460 ctx->Stencil.ValueMask[1]
461 || ctx->Stencil.WriteMask[0] !=
462 ctx->Stencil.WriteMask[1]));
463
464 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
465 FALLBACK_IF(ctx->Point.PointSprite);
466
467 if (!r300->disable_lowimpact_fallback) {
468 FALLBACK_IF(ctx->Polygon.StippleFlag);
469 FALLBACK_IF(ctx->Multisample._Enabled);
470 FALLBACK_IF(ctx->Line.StippleFlag);
471 FALLBACK_IF(ctx->Line.SmoothFlag);
472 FALLBACK_IF(ctx->Point.SmoothFlag);
473 }
474
475 return R300_FALLBACK_NONE;
476 }
477
478 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
479 struct tnl_pipeline_stage *stage)
480 {
481 r300ContextPtr rmesa = R300_CONTEXT(ctx);
482
483 if (RADEON_DEBUG & DEBUG_PRIMS)
484 fprintf(stderr, "%s\n", __FUNCTION__);
485
486 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
487 return GL_TRUE;
488
489 if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
490 return GL_TRUE;
491
492 return r300RunRender(ctx, stage);
493 }
494
495 static GLboolean r300RunTCLRender(GLcontext * ctx,
496 struct tnl_pipeline_stage *stage)
497 {
498 r300ContextPtr rmesa = R300_CONTEXT(ctx);
499 struct r300_vertex_program *vp;
500
501 hw_tcl_on = future_hw_tcl_on;
502
503 if (RADEON_DEBUG & DEBUG_PRIMS)
504 fprintf(stderr, "%s\n", __FUNCTION__);
505
506 if (hw_tcl_on == GL_FALSE)
507 return GL_TRUE;
508
509 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
510 hw_tcl_on = GL_FALSE;
511 return GL_TRUE;
512 }
513
514 r300UpdateShaders(rmesa);
515
516 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
517 if (vp->native == GL_FALSE) {
518 hw_tcl_on = GL_FALSE;
519 return GL_TRUE;
520 }
521
522 return r300RunRender(ctx, stage);
523 }
524
525 const struct tnl_pipeline_stage _r300_render_stage = {
526 "r300 Hardware Rasterization",
527 NULL,
528 NULL,
529 NULL,
530 NULL,
531 r300RunNonTCLRender
532 };
533
534 const struct tnl_pipeline_stage _r300_tcl_stage = {
535 "r300 Hardware Transform, Clipping and Lighting",
536 NULL,
537 NULL,
538 NULL,
539 NULL,
540 r300RunTCLRender
541 };