Merge branch 'master' of git+ssh://keithw@git.freedesktop.org/git/mesa/mesa into...
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "vbo/vbo.h"
46 #include "tnl/tnl.h"
47 #include "tnl/t_vp_build.h"
48
49 #include "radeon_reg.h"
50 #include "radeon_macros.h"
51 #include "radeon_ioctl.h"
52 #include "radeon_state.h"
53 #include "r300_context.h"
54 #include "r300_ioctl.h"
55 #include "r300_state.h"
56 #include "r300_reg.h"
57 #include "r300_program.h"
58 #include "r300_tex.h"
59 #include "r300_maos.h"
60 #include "r300_emit.h"
61
62 extern int future_hw_tcl_on;
63
64 /**********************************************************************
65 * Hardware rasterization
66 *
67 * When we fell back to software TCL, we still try to use the
68 * rasterization hardware for rendering.
69 **********************************************************************/
70
71 static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim)
72 {
73 int type=-1;
74
75 switch (prim & PRIM_MODE_MASK) {
76 case GL_POINTS:
77 type=R300_VAP_VF_CNTL__PRIM_POINTS;
78 break;
79 case GL_LINES:
80 type=R300_VAP_VF_CNTL__PRIM_LINES;
81 break;
82 case GL_LINE_STRIP:
83 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
84 break;
85 case GL_LINE_LOOP:
86 type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
87 break;
88 case GL_TRIANGLES:
89 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
90 break;
91 case GL_TRIANGLE_STRIP:
92 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
93 break;
94 case GL_TRIANGLE_FAN:
95 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
96 break;
97 case GL_QUADS:
98 type=R300_VAP_VF_CNTL__PRIM_QUADS;
99 break;
100 case GL_QUAD_STRIP:
101 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
102 break;
103 case GL_POLYGON:
104 type=R300_VAP_VF_CNTL__PRIM_POLYGON;
105 break;
106 default:
107 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
108 __FILE__, __FUNCTION__,
109 prim & PRIM_MODE_MASK);
110 return -1;
111 break;
112 }
113 return type;
114 }
115
116 int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim)
117 {
118 int verts_off=0;
119 char *name="UNKNOWN";
120
121 switch (prim & PRIM_MODE_MASK) {
122 case GL_POINTS:
123 name="P";
124 verts_off = 0;
125 break;
126 case GL_LINES:
127 name="L";
128 verts_off = num_verts % 2;
129 break;
130 case GL_LINE_STRIP:
131 name="LS";
132 if(num_verts < 2)
133 verts_off = num_verts;
134 break;
135 case GL_LINE_LOOP:
136 name="LL";
137 if(num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 name="T";
142 verts_off = num_verts % 3;
143 break;
144 case GL_TRIANGLE_STRIP:
145 name="TS";
146 if(num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 name="TF";
151 if(num_verts < 3)
152 verts_off = num_verts;
153 break;
154 case GL_QUADS:
155 name="Q";
156 verts_off = num_verts % 4;
157 break;
158 case GL_QUAD_STRIP:
159 name="QS";
160 if(num_verts < 4)
161 verts_off = num_verts;
162 else
163 verts_off = num_verts % 2;
164 break;
165 case GL_POLYGON:
166 name="P";
167 if(num_verts < 3)
168 verts_off = num_verts;
169 break;
170 default:
171 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
172 __FILE__, __FUNCTION__,
173 prim & PRIM_MODE_MASK);
174 return -1;
175 break;
176 }
177
178 if (RADEON_DEBUG & DEBUG_VERTS) {
179 if (num_verts - verts_off == 0) {
180 WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name);
181 return 0;
182 }
183
184 if (verts_off > 0) {
185 WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name);
186 }
187 }
188
189 return num_verts - verts_off;
190 }
191
192 /* Immediate implementation has been removed from CVS. */
193
194 /* vertex buffer implementation */
195
196 static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_count, int type, int elt_size)
197 {
198 int cmd_reserved = 0;
199 int cmd_written = 0;
200 drm_radeon_cmd_header_t *cmd = NULL;
201 unsigned long addr_a;
202 unsigned long t_addr;
203 unsigned long magic_1, magic_2;
204 GLcontext *ctx;
205 ctx = rmesa->radeon.glCtx;
206
207 assert(elt_size == 2 || elt_size == 4);
208
209 if(addr & (elt_size-1)){
210 WARN_ONCE("Badly aligned buffer\n");
211 return ;
212 }
213 #ifdef OPTIMIZE_ELTS
214 addr_a = 0;
215
216 magic_1 = (addr % 32) / 4;
217 t_addr = addr & (~0x1d);
218 magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
219
220 check_space(6);
221
222 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
223 if(elt_size == 4){
224 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
225 } else {
226 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
227 }
228
229 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
230 if(elt_size == 4){
231 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
232 e32(addr /*& 0xffffffe3*/);
233 } else {
234 e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
235 e32(t_addr);
236 }
237
238 if(elt_size == 4){
239 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
240 } else {
241 e32(magic_2); /* Total number of dwords needed? */
242 }
243 //cp_delay(rmesa, 1);
244 #if 0
245 fprintf(stderr, "magic_1 %d\n", magic_1);
246 fprintf(stderr, "t_addr %x\n", t_addr);
247 fprintf(stderr, "magic_2 %d\n", magic_2);
248 exit(1);
249 #endif
250 #else
251 (void)magic_2, (void)magic_1, (void)t_addr;
252
253 addr_a = 0;
254
255 check_space(6);
256
257 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
258 if(elt_size == 4){
259 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
260 } else {
261 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
262 }
263
264 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
265 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
266 e32(addr /*& 0xffffffe3*/);
267
268 if(elt_size == 4){
269 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
270 } else {
271 e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
272 }
273 //cp_delay(rmesa, 1);
274 #endif
275 }
276
277 static void r300_render_vb_primitive(r300ContextPtr rmesa,
278 GLcontext *ctx,
279 int start,
280 int end,
281 int prim)
282 {
283 int type, num_verts;
284
285 type=r300_get_primitive_type(rmesa, ctx, prim);
286 num_verts=r300_get_num_verts(rmesa, end-start, prim);
287
288 if(type<0 || num_verts <= 0)return;
289
290 if(rmesa->state.VB.Elts){
291 r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start);
292 #if 0
293 int cmd_reserved = 0;
294 int cmd_written = 0;
295 drm_radeon_cmd_header_t *cmd = NULL;
296 int i;
297 start_index32_packet(num_verts, type);
298 for(i=0; i < num_verts; i++)
299 e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */
300 #else
301 if(num_verts == 1){
302 //start_index32_packet(num_verts, type);
303 //e32(rmesa->state.Elts[start]);
304 return;
305 }
306
307 if(num_verts > 65535){ /* not implemented yet */
308 WARN_ONCE("Too many elts\n");
309 return;
310 }
311
312 r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size);
313 fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size);
314 #endif
315 }else{
316 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
317 fire_AOS(rmesa, num_verts, type);
318 }
319 }
320
321 GLboolean r300_run_vb_render(GLcontext *ctx,
322 struct tnl_pipeline_stage *stage)
323 {
324 r300ContextPtr rmesa = R300_CONTEXT(ctx);
325 struct radeon_vertex_buffer *VB = &rmesa->state.VB;
326 int i;
327 int cmd_reserved = 0;
328 int cmd_written = 0;
329 drm_radeon_cmd_header_t *cmd = NULL;
330
331
332 if (RADEON_DEBUG & DEBUG_PRIMS)
333 fprintf(stderr, "%s\n", __FUNCTION__);
334
335 if (stage) {
336 TNLcontext *tnl = TNL_CONTEXT(ctx);
337 radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
338 }
339
340 r300UpdateShaders(rmesa);
341 if (r300EmitArrays(ctx))
342 return GL_TRUE;
343
344 r300UpdateShaderStates(rmesa);
345
346 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
347 e32(0x0000000a);
348
349 reg_start(0x4f18,0);
350 e32(0x00000003);
351
352 r300EmitState(rmesa);
353
354 for(i=0; i < VB->PrimitiveCount; i++){
355 GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
356 GLuint start = VB->Primitive[i].start;
357 GLuint length = VB->Primitive[i].count;
358
359 r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
360 }
361
362 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
363 e32(0x0000000a/*0x2*/);
364
365 reg_start(0x4f18,0);
366 e32(0x00000003/*0x1*/);
367
368 #ifdef USER_BUFFERS
369 r300UseArrays(ctx);
370 #endif
371 r300ReleaseArrays(ctx);
372 return GL_FALSE;
373 }
374
375 #define FALLBACK_IF(expr) \
376 do { \
377 if (expr) { \
378 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
379 WARN_ONCE("Software fallback:%s\n", \
380 #expr); \
381 return R300_FALLBACK_RAST; \
382 } \
383 } while(0)
384
385 int r300Fallback(GLcontext *ctx)
386 {
387 r300ContextPtr r300 = R300_CONTEXT(ctx);
388 int i;
389
390 /* We do not do SELECT or FEEDBACK (yet ?)
391 * Is it worth doing them ?
392 */
393 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
394
395 #if 0
396 /* These should work now.. */
397 FALLBACK_IF(ctx->Color.DitherFlag);
398 /* GL_ALPHA_TEST */
399 FALLBACK_IF(ctx->Color.AlphaEnabled);
400 /* GL_BLEND */
401 FALLBACK_IF(ctx->Color.BlendEnabled);
402 /* GL_POLYGON_OFFSET_FILL */
403 FALLBACK_IF(ctx->Polygon.OffsetFill);
404 /* FOG seems to trigger an unknown output
405 * in vertex program.
406 */
407 FALLBACK_IF(ctx->Fog.Enabled);
408 #endif
409
410 if(!r300->disable_lowimpact_fallback){
411 /* GL_POLYGON_OFFSET_POINT */
412 FALLBACK_IF(ctx->Polygon.OffsetPoint);
413 /* GL_POLYGON_OFFSET_LINE */
414 FALLBACK_IF(ctx->Polygon.OffsetLine);
415 #if 0
416 /* GL_STENCIL_TEST */
417 FALLBACK_IF(ctx->Stencil.Enabled);
418 /* GL_POLYGON_SMOOTH disabling to get blender going */
419 FALLBACK_IF(ctx->Polygon.SmoothFlag);
420 #endif
421 /* GL_POLYGON_STIPPLE */
422 FALLBACK_IF(ctx->Polygon.StippleFlag);
423 /* GL_MULTISAMPLE_ARB */
424 FALLBACK_IF(ctx->Multisample.Enabled);
425 /* blender ? */
426 FALLBACK_IF(ctx->Line.StippleFlag);
427 /* GL_LINE_SMOOTH */
428 FALLBACK_IF(ctx->Line.SmoothFlag);
429 /* GL_POINT_SMOOTH */
430 FALLBACK_IF(ctx->Point.SmoothFlag);
431 }
432
433 /* Fallback for LOGICOP */
434 FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
435
436 /* Rest could be done with vertex fragments */
437 if (ctx->Extensions.NV_point_sprite ||
438 ctx->Extensions.ARB_point_sprite)
439 /* GL_POINT_SPRITE_NV */
440 FALLBACK_IF(ctx->Point.PointSprite);
441
442 /* Fallback for rectangular texture */
443 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
444 if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT)
445 return R300_FALLBACK_TCL;
446
447 return R300_FALLBACK_NONE;
448 }
449
450 /**
451 * Called by the pipeline manager to render a batch of primitives.
452 * We can return true to pass on to the next stage (i.e. software
453 * rasterization) or false to indicate that the pipeline has finished
454 * after we render something.
455 */
456 static GLboolean r300_run_render(GLcontext *ctx,
457 struct tnl_pipeline_stage *stage)
458 {
459
460 if (RADEON_DEBUG & DEBUG_PRIMS)
461 fprintf(stderr, "%s\n", __FUNCTION__);
462
463 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
464 return GL_TRUE;
465
466 return r300_run_vb_render(ctx, stage);
467 }
468
469 const struct tnl_pipeline_stage _r300_render_stage = {
470 "r300 hw rasterize",
471 NULL,
472 NULL,
473 NULL,
474 NULL,
475 r300_run_render /* run */
476 };
477
478 static GLboolean r300_run_tcl_render(GLcontext *ctx,
479 struct tnl_pipeline_stage *stage)
480 {
481 r300ContextPtr rmesa = R300_CONTEXT(ctx);
482 struct r300_vertex_program *vp;
483
484 hw_tcl_on=future_hw_tcl_on;
485
486 if (RADEON_DEBUG & DEBUG_PRIMS)
487 fprintf(stderr, "%s\n", __FUNCTION__);
488 if(hw_tcl_on == GL_FALSE)
489 return GL_TRUE;
490
491 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
492 hw_tcl_on = GL_FALSE;
493 return GL_TRUE;
494 }
495
496 r300UpdateShaders(rmesa);
497
498 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
499 #if 0 /* Draw every second request with software arb vp */
500 vp->native++;
501 vp->native &= 1;
502 //vp->native = GL_FALSE;
503 #endif
504
505 #if 0 /* You dont want to know what this does... */
506 TNLcontext *tnl = TNL_CONTEXT(ctx);
507 struct tnl_cache *cache;
508 struct tnl_cache_item *c;
509
510 cache = tnl->vp_cache;
511 c = cache->items[0xc000cc0e % cache->size];
512
513 if(c && c->data == vp)
514 vp->native = GL_FALSE;
515
516 #endif
517 #if 0
518 vp->native = GL_FALSE;
519 #endif
520 if (vp->native == GL_FALSE) {
521 hw_tcl_on = GL_FALSE;
522 return GL_TRUE;
523 }
524 //r300UpdateShaderStates(rmesa);
525
526 return r300_run_vb_render(ctx, stage);
527 }
528
529 const struct tnl_pipeline_stage _r300_tcl_stage = {
530 "r300 tcl",
531 NULL,
532 NULL,
533 NULL,
534 NULL,
535 r300_run_tcl_render /* run */
536 };
537
538 /* R300 texture rectangle expects coords in 0..1 range, not 0..dimension
539 * as in the extension spec. Need to translate here.
540 *
541 * Note that swrast expects 0..dimension, so if a fallback is active,
542 * don't do anything. (Maybe need to configure swrast to match hw)
543 */
544 struct texrect_stage_data {
545 GLvector4f texcoord[MAX_TEXTURE_UNITS];
546 };
547
548 #define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
549
550
551 static GLboolean run_texrect_stage( GLcontext *ctx,
552 struct tnl_pipeline_stage *stage )
553 {
554 struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
555 r300ContextPtr rmesa = R300_CONTEXT(ctx);
556 TNLcontext *tnl = TNL_CONTEXT(ctx);
557 struct vertex_buffer *VB = &tnl->vb;
558 GLuint i;
559
560 if (rmesa->radeon.Fallback)
561 return GL_TRUE;
562
563 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
564 if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) {
565 struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
566 struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
567 const GLfloat iw = 1.0/texImage->Width;
568 const GLfloat ih = 1.0/texImage->Height;
569 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
570 GLint instride = VB->TexCoordPtr[i]->stride;
571 GLfloat (*out)[4] = store->texcoord[i].data;
572 GLint j;
573
574 store->texcoord[i].size = VB->TexCoordPtr[i]->size;
575 for (j = 0 ; j < VB->Count ; j++) {
576 switch (VB->TexCoordPtr[i]->size) {
577 case 4:
578 out[j][3] = in[3];
579 /* fallthrough */
580 case 3:
581 out[j][2] = in[2];
582 /* fallthrough */
583 default:
584 out[j][0] = in[0] * iw;
585 out[j][1] = in[1] * ih;
586 }
587 in = (GLfloat *)((GLubyte *)in + instride);
588 }
589
590 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
591 }
592 }
593
594 return GL_TRUE;
595 }
596
597
598 /* Called the first time stage->run() is invoked.
599 */
600 static GLboolean alloc_texrect_data( GLcontext *ctx,
601 struct tnl_pipeline_stage *stage )
602 {
603 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
604 struct texrect_stage_data *store;
605 GLuint i;
606
607 stage->privatePtr = CALLOC(sizeof(*store));
608 store = TEXRECT_STAGE_DATA(stage);
609 if (!store)
610 return GL_FALSE;
611
612 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
613 _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
614
615 return GL_TRUE;
616 }
617
618 static void free_texrect_data( struct tnl_pipeline_stage *stage )
619 {
620 struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
621 GLuint i;
622
623 if (store) {
624 for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
625 if (store->texcoord[i].data)
626 _mesa_vector4f_free( &store->texcoord[i] );
627 FREE( store );
628 stage->privatePtr = NULL;
629 }
630 }
631
632 const struct tnl_pipeline_stage _r300_texrect_stage =
633 {
634 "r300 texrect stage", /* name */
635 NULL,
636 alloc_texrect_data,
637 free_texrect_data,
638 NULL,
639 run_texrect_stage
640 };
641