f7b178d0747b555e5ebd05ff584d34ab8e7deb0c
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /**
29 * \file
30 *
31 * \brief R300 Render (Vertex Buffer Implementation)
32 *
33 * The immediate implementation has been removed from CVS in favor of the vertex
34 * buffer implementation.
35 *
36 * The render functions are called by the pipeline manager to render a batch of
37 * primitives. They return TRUE to pass on to the next stage (i.e. software
38 * rasterization) or FALSE to indicate that the pipeline has finished after
39 * rendering something.
40 *
41 * When falling back to software TCL still attempt to use hardware
42 * rasterization.
43 *
44 * I am not sure that the cache related registers are setup correctly, but
45 * obviously this does work... Further investigation is needed.
46 *
47 * \author Nicolai Haehnle <prefect_@gmx.net>
48 */
49
50 #include "glheader.h"
51 #include "state.h"
52 #include "imports.h"
53 #include "enums.h"
54 #include "macros.h"
55 #include "context.h"
56 #include "dd.h"
57 #include "simple_list.h"
58 #include "api_arrayelt.h"
59 #include "swrast/swrast.h"
60 #include "swrast_setup/swrast_setup.h"
61 #include "vbo/vbo.h"
62 #include "tnl/tnl.h"
63 #include "tnl/t_vp_build.h"
64 #include "radeon_reg.h"
65 #include "radeon_macros.h"
66 #include "radeon_ioctl.h"
67 #include "radeon_state.h"
68 #include "r300_context.h"
69 #include "r300_ioctl.h"
70 #include "r300_state.h"
71 #include "r300_reg.h"
72 #include "r300_tex.h"
73 #include "r300_emit.h"
74 extern int future_hw_tcl_on;
75
76 /**
77 * \brief Convert a OpenGL primitive type into a R300 primitive type.
78 */
79 static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim)
80 {
81 switch (prim & PRIM_MODE_MASK) {
82 case GL_POINTS:
83 return R300_VAP_VF_CNTL__PRIM_POINTS;
84 break;
85 case GL_LINES:
86 return R300_VAP_VF_CNTL__PRIM_LINES;
87 break;
88 case GL_LINE_STRIP:
89 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
90 break;
91 case GL_LINE_LOOP:
92 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
93 break;
94 case GL_TRIANGLES:
95 return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
96 break;
97 case GL_TRIANGLE_STRIP:
98 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
99 break;
100 case GL_TRIANGLE_FAN:
101 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
102 break;
103 case GL_QUADS:
104 return R300_VAP_VF_CNTL__PRIM_QUADS;
105 break;
106 case GL_QUAD_STRIP:
107 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
108 break;
109 case GL_POLYGON:
110 return R300_VAP_VF_CNTL__PRIM_POLYGON;
111 break;
112 default:
113 fprintf(stderr,
114 "%s:%s Do not know how to handle primitive 0x%04x - help me !\n",
115 __FILE__, __FUNCTION__, prim & PRIM_MODE_MASK);
116 return -1;
117 break;
118 }
119 }
120
121 static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
122 {
123 int verts_off = 0;
124
125 switch (prim & PRIM_MODE_MASK) {
126 case GL_POINTS:
127 verts_off = 0;
128 break;
129 case GL_LINES:
130 verts_off = num_verts % 2;
131 break;
132 case GL_LINE_STRIP:
133 if (num_verts < 2)
134 verts_off = num_verts;
135 break;
136 case GL_LINE_LOOP:
137 if (num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 verts_off = num_verts % 3;
142 break;
143 case GL_TRIANGLE_STRIP:
144 if (num_verts < 3)
145 verts_off = num_verts;
146 break;
147 case GL_TRIANGLE_FAN:
148 if (num_verts < 3)
149 verts_off = num_verts;
150 break;
151 case GL_QUADS:
152 verts_off = num_verts % 4;
153 break;
154 case GL_QUAD_STRIP:
155 if (num_verts < 4)
156 verts_off = num_verts;
157 else
158 verts_off = num_verts % 2;
159 break;
160 case GL_POLYGON:
161 if (num_verts < 3)
162 verts_off = num_verts;
163 break;
164 default:
165 fprintf(stderr,
166 "%s:%s Do not know how to handle primitive 0x%04x - help me !\n",
167 __FILE__, __FUNCTION__, prim & PRIM_MODE_MASK);
168 return -1;
169 break;
170 }
171
172 return num_verts - verts_off;
173 }
174
175 static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts,
176 int elt_size)
177 {
178 r300ContextPtr rmesa = R300_CONTEXT(ctx);
179 struct r300_dma_region *rvb = &rmesa->state.elt_dma;
180 void *out;
181
182 assert(elt_size == 2 || elt_size == 4);
183
184 if (r300IsGartMemory(rmesa, elts, n_elts * elt_size)) {
185 rvb->address = rmesa->radeon.radeonScreen->gartTextures.map;
186 rvb->start = ((char *)elts) - rvb->address;
187 rvb->aos_offset =
188 rmesa->radeon.radeonScreen->gart_texture_offset +
189 rvb->start;
190 return;
191 } else if (r300IsGartMemory(rmesa, elts, 1)) {
192 WARN_ONCE("Pointer not within GART memory!\n");
193 _mesa_exit(-1);
194 }
195
196 r300AllocDmaRegion(rmesa, rvb, n_elts * elt_size, elt_size);
197 rvb->aos_offset = GET_START(rvb);
198
199 out = rvb->address + rvb->start;
200 memcpy(out, elts, n_elts * elt_size);
201 }
202
203 static void r300FireEB(r300ContextPtr rmesa, unsigned long addr,
204 int vertex_count, int type, int elt_size)
205 {
206 int cmd_reserved = 0;
207 int cmd_written = 0;
208 drm_radeon_cmd_header_t *cmd = NULL;
209 unsigned long t_addr;
210 unsigned long magic_1, magic_2;
211
212 assert(elt_size == 2 || elt_size == 4);
213
214 if (addr & (elt_size - 1)) {
215 WARN_ONCE("Badly aligned buffer\n");
216 return;
217 }
218
219 magic_1 = (addr % 32) / 4;
220 t_addr = addr & ~0x1d;
221 magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
222
223 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
224 if (elt_size == 4) {
225 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
226 (vertex_count << 16) | type |
227 R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
228 } else {
229 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
230 (vertex_count << 16) | type);
231 }
232
233 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
234 #ifdef OPTIMIZE_ELTS
235 if (elt_size == 4) {
236 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
237 e32(addr);
238 } else {
239 e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
240 e32(t_addr);
241 }
242 #else
243 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
244 e32(addr);
245 #endif
246
247 if (elt_size == 4) {
248 e32(vertex_count);
249 } else {
250 #ifdef OPTIMIZE_ELTS
251 e32(magic_2);
252 #else
253 e32((vertex_count + 1) / 2);
254 #endif
255 }
256 }
257
258 static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
259 {
260 int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
261 int i;
262 int cmd_reserved = 0;
263 int cmd_written = 0;
264 drm_radeon_cmd_header_t *cmd = NULL;
265
266 if (RADEON_DEBUG & DEBUG_VERTS)
267 fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __func__, nr,
268 offset);
269
270 start_packet3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
271 e32(nr);
272 for (i = 0; i + 1 < nr; i += 2) {
273 e32((rmesa->state.aos[i].aos_size << 0)
274 | (rmesa->state.aos[i].aos_stride << 8)
275 | (rmesa->state.aos[i + 1].aos_size << 16)
276 | (rmesa->state.aos[i + 1].aos_stride << 24)
277 );
278 e32(rmesa->state.aos[i].aos_offset +
279 offset * 4 * rmesa->state.aos[i].aos_stride);
280 e32(rmesa->state.aos[i + 1].aos_offset +
281 offset * 4 * rmesa->state.aos[i + 1].aos_stride);
282 }
283
284 if (nr & 1) {
285 e32((rmesa->state.aos[nr - 1].aos_size << 0)
286 | (rmesa->state.aos[nr - 1].aos_stride << 8)
287 );
288 e32(rmesa->state.aos[nr - 1].aos_offset +
289 offset * 4 * rmesa->state.aos[nr - 1].aos_stride);
290 }
291 }
292
293 static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
294 {
295 int cmd_reserved = 0;
296 int cmd_written = 0;
297 drm_radeon_cmd_header_t *cmd = NULL;
298
299 start_packet3(RADEON_CP_PACKET3_3D_DRAW_VBUF_2, 0);
300 e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16)
301 | type);
302 }
303
304 static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
305 int start, int end, int prim)
306 {
307 int type, num_verts;
308
309 type = r300PrimitiveType(rmesa, ctx, prim);
310 num_verts = r300NumVerts(rmesa, end - start, prim);
311
312 if (type < 0 || num_verts <= 0)
313 return;
314
315 if (rmesa->state.VB.Elts) {
316 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
317 if (num_verts > 65535) {
318 /* not implemented yet */
319 WARN_ONCE("Too many elts\n");
320 return;
321 }
322 r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts,
323 rmesa->state.VB.elt_size);
324 r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset,
325 num_verts, type, rmesa->state.VB.elt_size);
326 } else {
327 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
328 r300FireAOS(rmesa, num_verts, type);
329 }
330 }
331
332 #define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \
333 rvb->AttribPtr[(a)].type = GL_FLOAT, \
334 rvb->AttribPtr[(a)].stride = vb->b->stride, \
335 rvb->AttribPtr[(a)].data = vb->b->data
336
337 static void radeon_vb_to_rvb(r300ContextPtr rmesa,
338 struct radeon_vertex_buffer *rvb,
339 struct vertex_buffer *vb)
340 {
341 int i;
342 GLcontext *ctx;
343 ctx = rmesa->radeon.glCtx;
344
345 memset(rvb, 0, sizeof(*rvb));
346
347 rvb->Elts = vb->Elts;
348 rvb->elt_size = 4;
349 rvb->elt_min = 0;
350 rvb->elt_max = vb->Count;
351
352 rvb->Count = vb->Count;
353
354 if (hw_tcl_on) {
355 CONV_VB(VERT_ATTRIB_POS, ObjPtr);
356 } else {
357 assert(vb->ClipPtr);
358 CONV_VB(VERT_ATTRIB_POS, ClipPtr);
359 }
360
361 CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr);
362 CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]);
363 CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]);
364 CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr);
365
366 for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++)
367 CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]);
368
369 for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++)
370 CONV_VB(VERT_ATTRIB_GENERIC0 + i,
371 AttribPtr[VERT_ATTRIB_GENERIC0 + i]);
372
373 rvb->Primitive = vb->Primitive;
374 rvb->PrimitiveCount = vb->PrimitiveCount;
375 rvb->LockFirst = rvb->LockCount = 0;
376 rvb->lock_uptodate = GL_FALSE;
377 }
378
379 static GLboolean r300RunRender(GLcontext * ctx,
380 struct tnl_pipeline_stage *stage)
381 {
382 r300ContextPtr rmesa = R300_CONTEXT(ctx);
383 struct radeon_vertex_buffer *VB = &rmesa->state.VB;
384 int i;
385 int cmd_reserved = 0;
386 int cmd_written = 0;
387 drm_radeon_cmd_header_t *cmd = NULL;
388
389 if (RADEON_DEBUG & DEBUG_PRIMS)
390 fprintf(stderr, "%s\n", __FUNCTION__);
391
392 if (stage) {
393 TNLcontext *tnl = TNL_CONTEXT(ctx);
394 radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
395 }
396
397 r300UpdateShaders(rmesa);
398 if (r300EmitArrays(ctx))
399 return GL_TRUE;
400
401 r300UpdateShaderStates(rmesa);
402
403 reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
404 e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
405
406 reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
407 e32(R300_RB3D_ZCACHE_UNKNOWN_03);
408
409 r300EmitState(rmesa);
410
411 for (i = 0; i < VB->PrimitiveCount; i++) {
412 GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
413 GLuint start = VB->Primitive[i].start;
414 GLuint end = VB->Primitive[i].start + VB->Primitive[i].count;
415 r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
416 }
417
418 reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0);
419 e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
420
421 reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0);
422 e32(R300_RB3D_ZCACHE_UNKNOWN_03);
423
424 #ifdef USER_BUFFERS
425 r300UseArrays(ctx);
426 #endif
427
428 r300ReleaseArrays(ctx);
429
430 return GL_FALSE;
431 }
432
433 #define FALLBACK_IF(expr) \
434 do { \
435 if (expr) { \
436 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
437 WARN_ONCE("Software fallback:%s\n", \
438 #expr); \
439 return R300_FALLBACK_RAST; \
440 } \
441 } while(0)
442
443 static int r300Fallback(GLcontext * ctx)
444 {
445 r300ContextPtr r300 = R300_CONTEXT(ctx);
446 struct r300_fragment_program *fp = (struct r300_fragment_program *)
447 (char *)ctx->FragmentProgram._Current;
448
449 if (fp) {
450 if (!fp->translated)
451 r300TranslateFragmentShader(r300, fp);
452 FALLBACK_IF(!fp->translated);
453 }
454
455 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
456
457 FALLBACK_IF(ctx->Stencil._TestTwoSide
458 && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1]
459 || ctx->Stencil.ValueMask[0] !=
460 ctx->Stencil.ValueMask[1]
461 || ctx->Stencil.WriteMask[0] !=
462 ctx->Stencil.WriteMask[1]));
463
464 FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
465
466 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
467 FALLBACK_IF(ctx->Point.PointSprite);
468
469 if (!r300->disable_lowimpact_fallback) {
470 FALLBACK_IF(ctx->Polygon.OffsetPoint);
471 FALLBACK_IF(ctx->Polygon.OffsetLine);
472 FALLBACK_IF(ctx->Polygon.StippleFlag);
473 FALLBACK_IF(ctx->Multisample.Enabled);
474 FALLBACK_IF(ctx->Line.StippleFlag);
475 FALLBACK_IF(ctx->Line.SmoothFlag);
476 FALLBACK_IF(ctx->Point.SmoothFlag);
477 }
478
479 return R300_FALLBACK_NONE;
480 }
481
482 static GLboolean r300RunNonTCLRender(GLcontext * ctx,
483 struct tnl_pipeline_stage *stage)
484 {
485 if (RADEON_DEBUG & DEBUG_PRIMS)
486 fprintf(stderr, "%s\n", __FUNCTION__);
487
488 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
489 return GL_TRUE;
490
491 return r300RunRender(ctx, stage);
492 }
493
494 static GLboolean r300RunTCLRender(GLcontext * ctx,
495 struct tnl_pipeline_stage *stage)
496 {
497 r300ContextPtr rmesa = R300_CONTEXT(ctx);
498 struct r300_vertex_program *vp;
499
500 hw_tcl_on = future_hw_tcl_on;
501
502 if (RADEON_DEBUG & DEBUG_PRIMS)
503 fprintf(stderr, "%s\n", __FUNCTION__);
504
505 if (hw_tcl_on == GL_FALSE)
506 return GL_TRUE;
507
508 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
509 hw_tcl_on = GL_FALSE;
510 return GL_TRUE;
511 }
512
513 r300UpdateShaders(rmesa);
514
515 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
516 if (vp->native == GL_FALSE) {
517 hw_tcl_on = GL_FALSE;
518 return GL_TRUE;
519 }
520
521 return r300RunRender(ctx, stage);
522 }
523
524 const struct tnl_pipeline_stage _r300_render_stage = {
525 "r300 Hardware Rasterization",
526 NULL,
527 NULL,
528 NULL,
529 NULL,
530 r300RunNonTCLRender
531 };
532
533 const struct tnl_pipeline_stage _r300_tcl_stage = {
534 "r300 Hardware Transform, Clipping and Lighting",
535 NULL,
536 NULL,
537 NULL,
538 NULL,
539 r300RunTCLRender
540 };