draw: corrections to allow for different cliptest cases
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "r300_render.h"
54
55 #include "main/glheader.h"
56 #include "main/imports.h"
57 #include "main/enums.h"
58 #include "main/macros.h"
59 #include "main/context.h"
60 #include "main/dd.h"
61 #include "main/simple_list.h"
62 #include "main/api_arrayelt.h"
63 #include "swrast/swrast.h"
64 #include "swrast_setup/swrast_setup.h"
65 #include "vbo/vbo.h"
66 #include "vbo/vbo_split.h"
67 #include "r300_context.h"
68 #include "r300_state.h"
69 #include "r300_reg.h"
70 #include "r300_emit.h"
71 #include "r300_swtcl.h"
72
73 /**
74 * \brief Convert a OpenGL primitive type into a R300 primitive type.
75 */
76 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
77 {
78 switch (prim & PRIM_MODE_MASK) {
79 case GL_POINTS:
80 return R300_VAP_VF_CNTL__PRIM_POINTS;
81 break;
82 case GL_LINES:
83 return R300_VAP_VF_CNTL__PRIM_LINES;
84 break;
85 case GL_LINE_STRIP:
86 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
87 break;
88 case GL_LINE_LOOP:
89 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
90 break;
91 case GL_TRIANGLES:
92 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
93 break;
94 case GL_TRIANGLE_STRIP:
95 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
96 break;
97 case GL_TRIANGLE_FAN:
98 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
99 break;
100 case GL_QUADS:
101 return R300_VAP_VF_CNTL__PRIM_QUADS;
102 break;
103 case GL_QUAD_STRIP:
104 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
105 break;
106 case GL_POLYGON:
107 return R300_VAP_VF_CNTL__PRIM_POLYGON;
108 break;
109 default:
110 assert(0);
111 return -1;
112 break;
113 }
114 }
115
116 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
117 {
118 int verts_off = 0;
119
120 switch (prim & PRIM_MODE_MASK) {
121 case GL_POINTS:
122 verts_off = 0;
123 break;
124 case GL_LINES:
125 verts_off = num_verts % 2;
126 break;
127 case GL_LINE_STRIP:
128 if (num_verts < 2)
129 verts_off = num_verts;
130 break;
131 case GL_LINE_LOOP:
132 if (num_verts < 2)
133 verts_off = num_verts;
134 break;
135 case GL_TRIANGLES:
136 verts_off = num_verts % 3;
137 break;
138 case GL_TRIANGLE_STRIP:
139 if (num_verts < 3)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLE_FAN:
143 if (num_verts < 3)
144 verts_off = num_verts;
145 break;
146 case GL_QUADS:
147 verts_off = num_verts % 4;
148 break;
149 case GL_QUAD_STRIP:
150 if (num_verts < 4)
151 verts_off = num_verts;
152 else
153 verts_off = num_verts % 2;
154 break;
155 case GL_POLYGON:
156 if (num_verts < 3)
157 verts_off = num_verts;
158 break;
159 default:
160 assert(0);
161 return -1;
162 break;
163 }
164
165 return num_verts - verts_off;
166 }
167
168 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
169 {
170 BATCH_LOCALS(&rmesa->radeon);
171 int size;
172
173 /* offset is in indices */
174 BEGIN_BATCH(10);
175 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
176 if (rmesa->ind_buf.is_32bit) {
177 /* convert to bytes */
178 offset *= 4;
179 size = vertex_count;
180 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
181 (vertex_count << 16) | type |
182 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
183 } else {
184 /* convert to bytes */
185 offset *= 2;
186 size = (vertex_count + 1) >> 1;
187 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
188 (vertex_count << 16) | type);
189 }
190
191 if (!rmesa->radeon.radeonScreen->kernel_mm) {
192 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
193 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
194 (R300_VAP_PORT_IDX0 >> 2));
195 OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
196 OUT_BATCH(size);
197 } else {
198 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
199 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
200 (R300_VAP_PORT_IDX0 >> 2));
201 OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
202 OUT_BATCH(size);
203 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
204 rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
205 }
206 END_BATCH();
207 }
208
209 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
210 {
211 BATCH_LOCALS(&rmesa->radeon);
212 uint32_t voffset;
213 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
214 int i;
215
216 if (RADEON_DEBUG & RADEON_VERTS)
217 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
218 offset);
219
220 if (!rmesa->radeon.radeonScreen->kernel_mm) {
221 BEGIN_BATCH(sz+2+(nr * 2));
222 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
223 OUT_BATCH(nr);
224
225 for (i = 0; i + 1 < nr; i += 2) {
226 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
227 (rmesa->radeon.tcl.aos[i].stride << 8) |
228 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
229 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
230
231 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
232 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
233 OUT_BATCH_RELOC(voffset,
234 rmesa->radeon.tcl.aos[i].bo,
235 voffset,
236 RADEON_GEM_DOMAIN_GTT,
237 0, 0);
238 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
239 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
240 OUT_BATCH_RELOC(voffset,
241 rmesa->radeon.tcl.aos[i+1].bo,
242 voffset,
243 RADEON_GEM_DOMAIN_GTT,
244 0, 0);
245 }
246
247 if (nr & 1) {
248 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
249 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
250 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
251 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
252 OUT_BATCH_RELOC(voffset,
253 rmesa->radeon.tcl.aos[nr - 1].bo,
254 voffset,
255 RADEON_GEM_DOMAIN_GTT,
256 0, 0);
257 }
258 END_BATCH();
259 } else {
260
261 BEGIN_BATCH(sz+2+(nr * 2));
262 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
263 OUT_BATCH(nr);
264
265 for (i = 0; i + 1 < nr; i += 2) {
266 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
267 (rmesa->radeon.tcl.aos[i].stride << 8) |
268 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
269 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
270
271 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
272 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
273 OUT_BATCH(voffset);
274 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
275 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
276 OUT_BATCH(voffset);
277 }
278
279 if (nr & 1) {
280 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
281 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
282 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
283 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
284 OUT_BATCH(voffset);
285 }
286 for (i = 0; i + 1 < nr; i += 2) {
287 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
288 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
289 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
290 rmesa->radeon.tcl.aos[i+0].bo,
291 RADEON_GEM_DOMAIN_GTT,
292 0, 0);
293 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
294 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
295 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
296 rmesa->radeon.tcl.aos[i+1].bo,
297 RADEON_GEM_DOMAIN_GTT,
298 0, 0);
299 }
300 if (nr & 1) {
301 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
302 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
303 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
304 rmesa->radeon.tcl.aos[nr-1].bo,
305 RADEON_GEM_DOMAIN_GTT,
306 0, 0);
307 }
308 END_BATCH();
309 }
310
311 }
312
313 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
314 {
315 BATCH_LOCALS(&rmesa->radeon);
316
317 r300_emit_scissor(rmesa->radeon.glCtx);
318 BEGIN_BATCH(3);
319 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
320 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
321 END_BATCH();
322 }
323
324 void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
325 {
326 r300ContextPtr rmesa = R300_CONTEXT(ctx);
327 BATCH_LOCALS(&rmesa->radeon);
328 int type, num_verts;
329
330 radeon_prepare_render(&rmesa->radeon);
331
332 type = r300PrimitiveType(rmesa, prim);
333 num_verts = r300NumVerts(rmesa, end - start, prim);
334
335 if (type < 0 || num_verts <= 0)
336 return;
337
338 if (rmesa->ind_buf.bo) {
339 GLuint first, incr, offset = 0;
340
341 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
342 num_verts > 65500) {
343 WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
344 return;
345 }
346
347
348 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
349 if (rmesa->radeon.radeonScreen->kernel_mm) {
350 BEGIN_BATCH_NO_AUTOSTATE(2);
351 OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
352 OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
353 END_BATCH();
354 }
355
356 r300_emit_scissor(rmesa->radeon.glCtx);
357 while (num_verts > 0) {
358 int nr;
359 int align;
360
361 nr = MIN2(num_verts, 65535);
362 nr -= (nr - first) % incr;
363
364 /* get alignment for IB correct */
365 if (nr != num_verts) {
366 do {
367 align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
368 if (align % 4)
369 nr -= incr;
370 } while(align % 4);
371 if (nr <= 0) {
372 WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
373 return;
374 }
375
376 }
377 r300FireEB(rmesa, nr, type, offset);
378
379 num_verts -= nr;
380 offset += nr;
381 }
382
383 } else {
384 GLuint first, incr, offset = 0;
385
386 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
387 num_verts > 65535) {
388 WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
389 return;
390 }
391
392 if (rmesa->radeon.radeonScreen->kernel_mm) {
393 BEGIN_BATCH_NO_AUTOSTATE(2);
394 OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
395 OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
396 END_BATCH();
397 }
398
399 r300_emit_scissor(rmesa->radeon.glCtx);
400 while (num_verts > 0) {
401 int nr;
402 nr = MIN2(num_verts, 65535);
403 nr -= (nr - first) % incr;
404 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
405 r300FireAOS(rmesa, nr, type);
406 num_verts -= nr;
407 offset += nr;
408 }
409 }
410 COMMIT_BATCH();
411 }
412
413 static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit)
414 {
415 static char common_fallback_str[32];
416 switch (bit) {
417 case R300_FALLBACK_VERTEX_PROGRAM :
418 return "vertex program";
419 case R300_FALLBACK_LINE_SMOOTH:
420 return "smooth lines";
421 case R300_FALLBACK_POINT_SMOOTH:
422 return "smooth points";
423 case R300_FALLBACK_POLYGON_SMOOTH:
424 return "smooth polygons";
425 case R300_FALLBACK_LINE_STIPPLE:
426 return "line stipple";
427 case R300_FALLBACK_POLYGON_STIPPLE:
428 return "polygon stipple";
429 case R300_FALLBACK_STENCIL_TWOSIDE:
430 return "two-sided stencil";
431 case R300_FALLBACK_RENDER_MODE:
432 return "render mode != GL_RENDER";
433 case R300_FALLBACK_FRAGMENT_PROGRAM:
434 return "fragment program";
435 case R300_FALLBACK_RADEON_COMMON:
436 snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback);
437 return common_fallback_str;
438 case R300_FALLBACK_AOS_LIMIT:
439 return "aos limit";
440 case R300_FALLBACK_INVALID_BUFFERS:
441 return "invalid buffers";
442 default:
443 return "unknown";
444 }
445 }
446
447 void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
448 {
449 TNLcontext *tnl = TNL_CONTEXT(ctx);
450 r300ContextPtr rmesa = R300_CONTEXT(ctx);
451 uint32_t old_fallback = rmesa->fallback;
452 static uint32_t fallback_warn = 0;
453
454 if (mode) {
455 if ((fallback_warn & bit) == 0) {
456 if (RADEON_DEBUG & RADEON_FALLBACKS)
457 fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit));
458 fallback_warn |= bit;
459 }
460 rmesa->fallback |= bit;
461
462 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
463 if (rmesa->options.hw_tcl_enabled) {
464 if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
465 ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
466 R300_STATECHANGE(rmesa, vap_cntl_status);
467 rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
468 }
469 }
470
471 /* update only if we change from no raster fallbacks to some raster fallbacks */
472 if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
473 ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
474
475 radeon_firevertices(&rmesa->radeon);
476 rmesa->radeon.swtcl.RenderIndex = ~0;
477 _swsetup_Wakeup( ctx );
478 }
479 } else {
480 rmesa->fallback &= ~bit;
481
482 /* update only if we have disabled all tcl fallbacks */
483 if (rmesa->options.hw_tcl_enabled) {
484 if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) {
485 R300_STATECHANGE(rmesa, vap_cntl_status);
486 rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
487 }
488 }
489
490 /* update only if we have disabled all raster fallbacks */
491 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
492 _swrast_flush( ctx );
493
494 tnl->Driver.Render.Start = r300RenderStart;
495 tnl->Driver.Render.Finish = r300RenderFinish;
496 tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
497 tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
498 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
499 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
500 tnl->Driver.Render.Interp = _tnl_interp;
501
502 _tnl_invalidate_vertex_state( ctx, ~0 );
503 _tnl_invalidate_vertices( ctx, ~0 );
504 }
505 }
506
507 }