r300: Endianness fixes for recent vertex path changes.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 *
49 * \todo Add immediate implementation back? Perhaps this is useful if there are
50 * no bugs...
51 */
52
53 #include "r300_render.h"
54
55 #include "main/glheader.h"
56 #include "main/state.h"
57 #include "main/imports.h"
58 #include "main/enums.h"
59 #include "main/macros.h"
60 #include "main/context.h"
61 #include "main/dd.h"
62 #include "main/simple_list.h"
63 #include "main/api_arrayelt.h"
64 #include "swrast/swrast.h"
65 #include "swrast_setup/swrast_setup.h"
66 #include "vbo/vbo.h"
67 #include "tnl/tnl.h"
68 #include "tnl/t_vp_build.h"
69 #include "radeon_reg.h"
70 #include "radeon_macros.h"
71 #include "r300_context.h"
72 #include "r300_ioctl.h"
73 #include "r300_state.h"
74 #include "r300_reg.h"
75 #include "r300_tex.h"
76 #include "r300_emit.h"
77 #include "r300_fragprog_common.h"
78 #include "r300_swtcl.h"
79
80 /**
81 * \brief Convert a OpenGL primitive type into a R300 primitive type.
82 */
83 int r300PrimitiveType(r300ContextPtr rmesa, int prim)
84 {
85 switch (prim & PRIM_MODE_MASK) {
86 case GL_POINTS:
87 return R300_VAP_VF_CNTL__PRIM_POINTS;
88 break;
89 case GL_LINES:
90 return R300_VAP_VF_CNTL__PRIM_LINES;
91 break;
92 case GL_LINE_STRIP:
93 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 break;
95 case GL_LINE_LOOP:
96 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
97 break;
98 case GL_TRIANGLES:
99 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
100 break;
101 case GL_TRIANGLE_STRIP:
102 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
103 break;
104 case GL_TRIANGLE_FAN:
105 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
106 break;
107 case GL_QUADS:
108 return R300_VAP_VF_CNTL__PRIM_QUADS;
109 break;
110 case GL_QUAD_STRIP:
111 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
112 break;
113 case GL_POLYGON:
114 return R300_VAP_VF_CNTL__PRIM_POLYGON;
115 break;
116 default:
117 assert(0);
118 return -1;
119 break;
120 }
121 }
122
123 int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
124 {
125 int verts_off = 0;
126
127 switch (prim & PRIM_MODE_MASK) {
128 case GL_POINTS:
129 verts_off = 0;
130 break;
131 case GL_LINES:
132 verts_off = num_verts % 2;
133 break;
134 case GL_LINE_STRIP:
135 if (num_verts < 2)
136 verts_off = num_verts;
137 break;
138 case GL_LINE_LOOP:
139 if (num_verts < 2)
140 verts_off = num_verts;
141 break;
142 case GL_TRIANGLES:
143 verts_off = num_verts % 3;
144 break;
145 case GL_TRIANGLE_STRIP:
146 if (num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 if (num_verts < 3)
151 verts_off = num_verts;
152 break;
153 case GL_QUADS:
154 verts_off = num_verts % 4;
155 break;
156 case GL_QUAD_STRIP:
157 if (num_verts < 4)
158 verts_off = num_verts;
159 else
160 verts_off = num_verts % 2;
161 break;
162 case GL_POLYGON:
163 if (num_verts < 3)
164 verts_off = num_verts;
165 break;
166 default:
167 assert(0);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, unsigned long n_elts)
176 {
177 r300ContextPtr rmesa = R300_CONTEXT(ctx);
178 void *out;
179 GLuint size;
180
181 size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3;
182
183 radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
184 &rmesa->radeon.tcl.elt_dma_offset, size, 4);
185 radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
186 out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
187 memcpy(out, rmesa->ind_buf.ptr, size);
188 radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
189 }
190
191 static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
192 {
193 BATCH_LOCALS(&rmesa->radeon);
194
195 r300_emit_scissor(rmesa->radeon.glCtx);
196 if (vertex_count > 0) {
197 int size;
198
199 BEGIN_BATCH(10);
200 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
201 if (rmesa->ind_buf.is_32bit) {
202 size = vertex_count;
203 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
204 ((vertex_count + 0) << 16) | type |
205 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
206 } else {
207 size = (vertex_count + 1) >> 1;
208 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
209 ((vertex_count + 0) << 16) | type);
210 }
211
212 if (!rmesa->radeon.radeonScreen->kernel_mm) {
213 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
214 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
215 (R300_VAP_PORT_IDX0 >> 2));
216 OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
217 rmesa->radeon.tcl.elt_dma_bo,
218 rmesa->radeon.tcl.elt_dma_offset,
219 RADEON_GEM_DOMAIN_GTT, 0, 0);
220 OUT_BATCH(size);
221 } else {
222 OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
223 OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
224 (R300_VAP_PORT_IDX0 >> 2));
225 OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
226 OUT_BATCH(size);
227 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
228 rmesa->radeon.tcl.elt_dma_bo,
229 RADEON_GEM_DOMAIN_GTT, 0, 0);
230 }
231 END_BATCH();
232 }
233 }
234
235 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
236 {
237 BATCH_LOCALS(&rmesa->radeon);
238 uint32_t voffset;
239 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
240 int i;
241
242 if (RADEON_DEBUG & DEBUG_VERTS)
243 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
244 offset);
245
246
247 if (!rmesa->radeon.radeonScreen->kernel_mm) {
248 BEGIN_BATCH(sz+2+(nr * 2));
249 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
250 OUT_BATCH(nr);
251
252 for (i = 0; i + 1 < nr; i += 2) {
253 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
254 (rmesa->radeon.tcl.aos[i].stride << 8) |
255 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
256 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
257
258 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
259 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
260 OUT_BATCH_RELOC(voffset,
261 rmesa->radeon.tcl.aos[i].bo,
262 voffset,
263 RADEON_GEM_DOMAIN_GTT,
264 0, 0);
265 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
266 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
267 OUT_BATCH_RELOC(voffset,
268 rmesa->radeon.tcl.aos[i+1].bo,
269 voffset,
270 RADEON_GEM_DOMAIN_GTT,
271 0, 0);
272 }
273
274 if (nr & 1) {
275 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
276 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
277 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
278 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
279 OUT_BATCH_RELOC(voffset,
280 rmesa->radeon.tcl.aos[nr - 1].bo,
281 voffset,
282 RADEON_GEM_DOMAIN_GTT,
283 0, 0);
284 }
285 END_BATCH();
286 } else {
287
288 BEGIN_BATCH(sz+2+(nr * 2));
289 OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
290 OUT_BATCH(nr);
291
292 for (i = 0; i + 1 < nr; i += 2) {
293 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
294 (rmesa->radeon.tcl.aos[i].stride << 8) |
295 (rmesa->radeon.tcl.aos[i + 1].components << 16) |
296 (rmesa->radeon.tcl.aos[i + 1].stride << 24));
297
298 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
299 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
300 OUT_BATCH(voffset);
301 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
302 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
303 OUT_BATCH(voffset);
304 }
305
306 if (nr & 1) {
307 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
308 (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
309 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
310 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
311 OUT_BATCH(voffset);
312 }
313 for (i = 0; i + 1 < nr; i += 2) {
314 voffset = rmesa->radeon.tcl.aos[i + 0].offset +
315 offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
316 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
317 rmesa->radeon.tcl.aos[i+0].bo,
318 RADEON_GEM_DOMAIN_GTT,
319 0, 0);
320 voffset = rmesa->radeon.tcl.aos[i + 1].offset +
321 offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
322 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
323 rmesa->radeon.tcl.aos[i+1].bo,
324 RADEON_GEM_DOMAIN_GTT,
325 0, 0);
326 }
327 if (nr & 1) {
328 voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
329 offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
330 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
331 rmesa->radeon.tcl.aos[nr-1].bo,
332 RADEON_GEM_DOMAIN_GTT,
333 0, 0);
334 }
335 END_BATCH();
336 }
337
338 }
339
340 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
341 {
342 BATCH_LOCALS(&rmesa->radeon);
343
344 r300_emit_scissor(rmesa->radeon.glCtx);
345 BEGIN_BATCH(3);
346 OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
347 OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
348 END_BATCH();
349 }
350
351 void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
352 {
353 r300ContextPtr rmesa = R300_CONTEXT(ctx);
354 int type, num_verts;
355
356 type = r300PrimitiveType(rmesa, prim);
357 num_verts = r300NumVerts(rmesa, end - start, prim);
358
359 if (type < 0 || num_verts <= 0)
360 return;
361
362 /* Make space for at least 64 dwords.
363 * This is supposed to ensure that we can get all rendering
364 * commands into a single command buffer.
365 */
366 rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
367
368 if (rmesa->ind_buf.ptr) {
369 if (num_verts > 65535) {
370 /* not implemented yet */
371 WARN_ONCE("Too many elts\n");
372 return;
373 }
374 /* Note: The following is incorrect, but it's the best I can do
375 * without a major refactoring of how DMA memory is handled.
376 * The problem: Ensuring that both vertex arrays *and* index
377 * arrays are at the right position, and then ensuring that
378 * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
379 * at once.
380 *
381 * So why is the following incorrect? Well, it seems like
382 * allocating the index array might actually evict the vertex
383 * arrays. *sigh*
384 */
385 r300EmitElts(ctx, num_verts);
386 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
387 r300FireEB(rmesa, num_verts, type);
388 } else {
389 r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
390 r300FireAOS(rmesa, num_verts, type);
391 }
392 COMMIT_BATCH();
393 }
394
395 static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage)
396 {
397 r300ContextPtr rmesa = R300_CONTEXT(ctx);
398 int i;
399 TNLcontext *tnl = TNL_CONTEXT(ctx);
400 struct vertex_buffer *vb = &tnl->vb;
401
402 if (RADEON_DEBUG & DEBUG_PRIMS)
403 fprintf(stderr, "%s\n", __FUNCTION__);
404
405 r300UpdateShaders(rmesa);
406 r300EmitArrays(ctx);
407
408 r300UpdateShaderStates(rmesa);
409
410 r300EmitCacheFlush(rmesa);
411 radeonEmitState(&rmesa->radeon);
412
413 for (i = 0; i < vb->PrimitiveCount; i++) {
414 GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
415 GLuint start = vb->Primitive[i].start;
416 GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
417 r300RunRenderPrimitive(ctx, start, end, prim);
418 }
419
420 r300EmitCacheFlush(rmesa);
421
422 radeonReleaseArrays(ctx, ~0);
423 }
424
425
426 static const char *getFallbackString(uint32_t bit)
427 {
428 switch (bit) {
429 case R300_FALLBACK_VERTEX_PROGRAM :
430 return "vertex program";
431 case R300_FALLBACK_LINE_SMOOTH:
432 return "smooth lines";
433 case R300_FALLBACK_POINT_SMOOTH:
434 return "smooth points";
435 case R300_FALLBACK_POLYGON_SMOOTH:
436 return "smooth polygons";
437 case R300_FALLBACK_LINE_STIPPLE:
438 return "line stipple";
439 case R300_FALLBACK_POLYGON_STIPPLE:
440 return "polygon stipple";
441 case R300_FALLBACK_STENCIL_TWOSIDE:
442 return "two-sided stencil";
443 case R300_FALLBACK_RENDER_MODE:
444 return "render mode != GL_RENDER";
445 case R300_FALLBACK_FRAGMENT_PROGRAM:
446 return "fragment program";
447 case R300_FALLBACK_AOS_LIMIT:
448 return "aos limit";
449 case R300_FALLBACK_INVALID_BUFFERS:
450 return "invalid buffers";
451 default:
452 return "unknown";
453 }
454 }
455
456 void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
457 {
458 TNLcontext *tnl = TNL_CONTEXT(ctx);
459 r300ContextPtr rmesa = R300_CONTEXT(ctx);
460 uint32_t old_fallback = rmesa->fallback;
461 static uint32_t fallback_warn = 0;
462
463 if (mode) {
464 if ((fallback_warn & bit) == 0) {
465 _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit));
466 fallback_warn |= bit;
467 }
468 rmesa->fallback |= bit;
469
470 /* update only if we change from no tcl fallbacks to some tcl fallbacks */
471 if (rmesa->options.hw_tcl_enabled) {
472 if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
473 ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
474 R300_STATECHANGE(rmesa, vap_cntl_status);
475 rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
476 }
477 }
478
479 /* update only if we change from no raster fallbacks to some raster fallbacks */
480 if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
481 ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
482
483 radeon_firevertices(&rmesa->radeon);
484 rmesa->radeon.swtcl.RenderIndex = ~0;
485 _swsetup_Wakeup( ctx );
486 }
487 } else {
488 rmesa->fallback &= ~bit;
489
490 /* update only if we have disabled all tcl fallbacks */
491 if (rmesa->options.hw_tcl_enabled) {
492 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
493 R300_STATECHANGE(rmesa, vap_cntl_status);
494 rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
495 }
496 }
497
498 /* update only if we have disabled all raster fallbacks */
499 if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
500 _swrast_flush( ctx );
501
502 tnl->Driver.Render.Start = r300RenderStart;
503 tnl->Driver.Render.Finish = r300RenderFinish;
504 tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
505 tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
506 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
507 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
508 tnl->Driver.Render.Interp = _tnl_interp;
509
510 _tnl_invalidate_vertex_state( ctx, ~0 );
511 _tnl_invalidate_vertices( ctx, ~0 );
512 }
513 }
514
515 }
516
517 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
518 struct tnl_pipeline_stage *stage)
519 {
520 r300ContextPtr rmesa = R300_CONTEXT(ctx);
521
522 if (RADEON_DEBUG & DEBUG_PRIMS)
523 fprintf(stderr, "%s\n", __FUNCTION__);
524
525 if (rmesa->fallback & R300_RASTER_FALLBACK_MASK)
526 return GL_TRUE;
527
528 if (rmesa->options.hw_tcl_enabled == GL_FALSE)
529 return GL_TRUE;
530
531 r300RunRender(ctx, stage);
532
533 return GL_FALSE;
534 }
535
536 const struct tnl_pipeline_stage _r300_render_stage = {
537 "r300 Hardware Rasterization",
538 NULL,
539 NULL,
540 NULL,
541 NULL,
542 r300RunNonTCLRender
543 };