Merge branch 'asm-shader-rework-1'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "r300_render.h"
54
55 #include "main/glheader.h"
56 #include "main/state.h"
57 #include "main/imports.h"
58 #include "main/enums.h"
59 #include "main/macros.h"
60 #include "main/context.h"
61 #include "main/dd.h"
62 #include "main/simple_list.h"
63 #include "main/api_arrayelt.h"
64 #include "swrast/swrast.h"
65 #include "swrast_setup/swrast_setup.h"
66 #include "vbo/vbo.h"
67 #include "vbo/vbo_split.h"
68 #include "tnl/tnl.h"
69 #include "tnl/t_vp_build.h"
70 #include "radeon_reg.h"
71 #include "radeon_macros.h"
72 #include "r300_context.h"
73 #include "r300_ioctl.h"
74 #include "r300_state.h"
75 #include "r300_reg.h"
76 #include "r300_tex.h"
77 #include "r300_emit.h"
78 #include "r300_fragprog_common.h"
79 #include "r300_swtcl.h"
80
81 /**
82 * \brief Convert a OpenGL primitive type into a R300 primitive type.
83 */
84 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
85 {
86 switch (prim & PRIM_MODE_MASK) {
87 case GL_POINTS:
88 return R300_VAP_VF_CNTL__PRIM_POINTS;
89 break;
90 case GL_LINES:
91 return R300_VAP_VF_CNTL__PRIM_LINES;
92 break;
93 case GL_LINE_STRIP:
94 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
95 break;
96 case GL_LINE_LOOP:
97 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
98 break;
99 case GL_TRIANGLES:
100 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
101 break;
102 case GL_TRIANGLE_STRIP:
103 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
104 break;
105 case GL_TRIANGLE_FAN:
106 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
107 break;
108 case GL_QUADS:
109 return R300_VAP_VF_CNTL__PRIM_QUADS;
110 break;
111 case GL_QUAD_STRIP:
112 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
113 break;
114 case GL_POLYGON:
115 return R300_VAP_VF_CNTL__PRIM_POLYGON;
116 break;
117 default:
118 assert(0);
119 return -1;
120 break;
121 }
122 }
123
124 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
125 {
126 int verts_off = 0;
127
128 switch (prim & PRIM_MODE_MASK) {
129 case GL_POINTS:
130 verts_off = 0;
131 break;
132 case GL_LINES:
133 verts_off = num_verts % 2;
134 break;
135 case GL_LINE_STRIP:
136 if (num_verts < 2)
137 verts_off = num_verts;
138 break;
139 case GL_LINE_LOOP:
140 if (num_verts < 2)
141 verts_off = num_verts;
142 break;
143 case GL_TRIANGLES:
144 verts_off = num_verts % 3;
145 break;
146 case GL_TRIANGLE_STRIP:
147 if (num_verts < 3)
148 verts_off = num_verts;
149 break;
150 case GL_TRIANGLE_FAN:
151 if (num_verts < 3)
152 verts_off = num_verts;
153 break;
154 case GL_QUADS:
155 verts_off = num_verts % 4;
156 break;
157 case GL_QUAD_STRIP:
158 if (num_verts < 4)
159 verts_off = num_verts;
160 else
161 verts_off = num_verts % 2;
162 break;
163 case GL_POLYGON:
164 if (num_verts < 3)
165 verts_off = num_verts;
166 break;
167 default:
168 assert(0);
169 return -1;
170 break;
171 }
172
173 return num_verts - verts_off;
174 }
175
176 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
177 {
178 BATCH_LOCALS(&rmesa->radeon);
179 int size;
180
181 /* offset is in indices */
182 BEGIN_BATCH(10);
183 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
184 if (rmesa->ind_buf.is_32bit) {
185 /* convert to bytes */
186 offset *= 4;
187 size = vertex_count;
188 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
189 (vertex_count << 16) | type |
190 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
191 } else {
192 /* convert to bytes */
193 offset *= 2;
194 size = (vertex_count + 1) >> 1;
195 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
196 (vertex_count << 16) | type);
197 }
198
199 if (!rmesa->radeon.radeonScreen->kernel_mm) {
200 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
201 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
202 (R300_VAP_PORT_IDX0 >> 2));
203 OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
204 OUT_BATCH(size);
205 } else {
206 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
207 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
208 (R300_VAP_PORT_IDX0 >> 2));
209 OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
210 OUT_BATCH(size);
211 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
212 rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
213 }
214 END_BATCH();
215 }
216
217 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
218 {
219 BATCH_LOCALS(&rmesa->radeon);
220 uint32_t voffset;
221 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
222 int i;
223
224 if (RADEON_DEBUG & DEBUG_VERTS)
225 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
226 offset);
227
228 if (!rmesa->radeon.radeonScreen->kernel_mm) {
229 BEGIN_BATCH(sz+2+(nr * 2));
230 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
231 OUT_BATCH(nr);
232
233 for (i = 0; i + 1 < nr; i += 2) {
234 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
235 (rmesa->radeon.tcl.aos[i].stride << 8) |
236 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
237 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
238
239 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
240 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
241 OUT_BATCH_RELOC(voffset,
242 rmesa->radeon.tcl.aos[i].bo,
243 voffset,
244 RADEON_GEM_DOMAIN_GTT,
245 0, 0);
246 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
247 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
248 OUT_BATCH_RELOC(voffset,
249 rmesa->radeon.tcl.aos[i+1].bo,
250 voffset,
251 RADEON_GEM_DOMAIN_GTT,
252 0, 0);
253 }
254
255 if (nr & 1) {
256 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
257 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
258 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
259 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
260 OUT_BATCH_RELOC(voffset,
261 rmesa->radeon.tcl.aos[nr - 1].bo,
262 voffset,
263 RADEON_GEM_DOMAIN_GTT,
264 0, 0);
265 }
266 END_BATCH();
267 } else {
268
269 BEGIN_BATCH(sz+2+(nr * 2));
270 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
271 OUT_BATCH(nr);
272
273 for (i = 0; i + 1 < nr; i += 2) {
274 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
275 (rmesa->radeon.tcl.aos[i].stride << 8) |
276 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
277 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
278
279 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
280 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
281 OUT_BATCH(voffset);
282 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
283 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
284 OUT_BATCH(voffset);
285 }
286
287 if (nr & 1) {
288 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
289 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
290 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
291 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
292 OUT_BATCH(voffset);
293 }
294 for (i = 0; i + 1 < nr; i += 2) {
295 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
296 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
297 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
298 rmesa->radeon.tcl.aos[i+0].bo,
299 RADEON_GEM_DOMAIN_GTT,
300 0, 0);
301 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
302 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
303 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
304 rmesa->radeon.tcl.aos[i+1].bo,
305 RADEON_GEM_DOMAIN_GTT,
306 0, 0);
307 }
308 if (nr & 1) {
309 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
310 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
311 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
312 rmesa->radeon.tcl.aos[nr-1].bo,
313 RADEON_GEM_DOMAIN_GTT,
314 0, 0);
315 }
316 END_BATCH();
317 }
318
319 }
320
321 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
322 {
323 BATCH_LOCALS(&rmesa->radeon);
324
325 r300_emit_scissor(rmesa->radeon.glCtx);
326 BEGIN_BATCH(3);
327 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
328 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
329 END_BATCH();
330 }
331
332 void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
333 {
334 r300ContextPtr rmesa = R300_CONTEXT(ctx);
335 BATCH_LOCALS(&rmesa->radeon);
336 int type, num_verts;
337
338 type = r300PrimitiveType(rmesa, prim);
339 num_verts = r300NumVerts(rmesa, end - start, prim);
340
341 if (type < 0 || num_verts <= 0)
342 return;
343
344 /* Make space for at least 128 dwords.
345 * This is supposed to ensure that we can get all rendering
346 * commands into a single command buffer.
347 */
348 rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__);
349
350 if (rmesa->ind_buf.bo) {
351 GLuint first, incr, offset = 0;
352
353 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
354 num_verts > 65500) {
355 WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
356 return;
357 }
358
359
360 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
361 if (rmesa->radeon.radeonScreen->kernel_mm) {
362 BEGIN_BATCH_NO_AUTOSTATE(2);
363 OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
364 OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
365 END_BATCH();
366 }
367
368 r300_emit_scissor(rmesa->radeon.glCtx);
369 while (num_verts > 0) {
370 int nr;
371 int align;
372
373 nr = MIN2(num_verts, 65535);
374 nr -= (nr - first) % incr;
375
376 /* get alignment for IB correct */
377 if (nr != num_verts) {
378 do {
379 align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
380 if (align % 4)
381 nr -= incr;
382 } while(align % 4);
383 if (nr <= 0) {
384 WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
385 return;
386 }
387
388 }
389 r300FireEB(rmesa, nr, type, offset);
390
391 num_verts -= nr;
392 offset += nr;
393 }
394
395 } else {
396 GLuint first, incr, offset = 0;
397
398 if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
399 num_verts > 65500) {
400 WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
401 return;
402 }
403 r300_emit_scissor(rmesa->radeon.glCtx);
404 while (num_verts > 0) {
405 int nr;
406 nr = MIN2(num_verts, 65535);
407 nr -= (nr - first) % incr;
408 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
409 r300FireAOS(rmesa, nr, type);
410 num_verts -= nr;
411 offset += nr;
412 }
413 }
414 COMMIT_BATCH();
415 }
416
417 static const char *getFallbackString(uint32_t bit)
418 {
419 switch (bit) {
420 case R300_FALLBACK_VERTEX_PROGRAM :
421 return "vertex program";
422 case R300_FALLBACK_LINE_SMOOTH:
423 return "smooth lines";
424 case R300_FALLBACK_POINT_SMOOTH:
425 return "smooth points";
426 case R300_FALLBACK_POLYGON_SMOOTH:
427 return "smooth polygons";
428 case R300_FALLBACK_LINE_STIPPLE:
429 return "line stipple";
430 case R300_FALLBACK_POLYGON_STIPPLE:
431 return "polygon stipple";
432 case R300_FALLBACK_STENCIL_TWOSIDE:
433 return "two-sided stencil";
434 case R300_FALLBACK_RENDER_MODE:
435 return "render mode != GL_RENDER";
436 case R300_FALLBACK_FRAGMENT_PROGRAM:
437 return "fragment program";
438 case R300_FALLBACK_AOS_LIMIT:
439 return "aos limit";
440 case R300_FALLBACK_INVALID_BUFFERS:
441 return "invalid buffers";
442 default:
443 return "unknown";
444 }
445 }
446
447 void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
448 {
449 TNLcontext *tnl = TNL_CONTEXT(ctx);
450 r300ContextPtr rmesa = R300_CONTEXT(ctx);
451 uint32_t old_fallback = rmesa->fallback;
452 static uint32_t fallback_warn = 0;
453
454 if (mode) {
455 if ((fallback_warn & bit) == 0) {
456 if (RADEON_DEBUG & DEBUG_FALLBACKS)
457 _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
458 fallback_warn |= bit;
459 }
460 rmesa->fallback |= bit;
461
462 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
463 if (rmesa->options.hw_tcl_enabled) {
464 if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
465 ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
466 R300_STATECHANGE(rmesa, vap_cntl_status);
467 rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
468 }
469 }
470
471 /* update only if we change from no raster fallbacks to some raster fallbacks */
472 if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
473 ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
474
475 radeon_firevertices(&rmesa->radeon);
476 rmesa->radeon.swtcl.RenderIndex = ~0;
477 _swsetup_Wakeup( ctx );
478 }
479 } else {
480 rmesa->fallback &= ~bit;
481
482 /* update only if we have disabled all tcl fallbacks */
483 if (rmesa->options.hw_tcl_enabled) {
484 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
485 R300_STATECHANGE(rmesa, vap_cntl_status);
486 rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
487 }
488 }
489
490 /* update only if we have disabled all raster fallbacks */
491 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
492 _swrast_flush( ctx );
493
494 tnl->Driver.Render.Start = r300RenderStart;
495 tnl->Driver.Render.Finish = r300RenderFinish;
496 tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
497 tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
498 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
499 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
500 tnl->Driver.Render.Interp = _tnl_interp;
501
502 _tnl_invalidate_vertex_state( ctx, ~0 );
503 _tnl_invalidate_vertices( ctx, ~0 );
504 }
505 }
506
507 }