91305cb5a22352137e8ddf0fd93c364eaa219af9
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "array_cache/acache.h"
46 #include "tnl/tnl.h"
47 #include "tnl/t_vp_build.h"
48
49 #include "radeon_reg.h"
50 #include "radeon_macros.h"
51 #include "radeon_ioctl.h"
52 #include "radeon_state.h"
53 #include "r300_context.h"
54 #include "r300_ioctl.h"
55 #include "r300_state.h"
56 #include "r300_reg.h"
57 #include "r300_program.h"
58 #include "r300_tex.h"
59 #include "r300_maos.h"
60 #include "r300_emit.h"
61
62 extern int future_hw_tcl_on;
63
64 /**********************************************************************
65 * Hardware rasterization
66 *
67 * When we fell back to software TCL, we still try to use the
68 * rasterization hardware for rendering.
69 **********************************************************************/
70
71 static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim)
72 {
73 int type=-1;
74
75 switch (prim & PRIM_MODE_MASK) {
76 case GL_POINTS:
77 type=R300_VAP_VF_CNTL__PRIM_POINTS;
78 break;
79 case GL_LINES:
80 type=R300_VAP_VF_CNTL__PRIM_LINES;
81 break;
82 case GL_LINE_STRIP:
83 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
84 break;
85 case GL_LINE_LOOP:
86 type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
87 break;
88 case GL_TRIANGLES:
89 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
90 break;
91 case GL_TRIANGLE_STRIP:
92 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
93 break;
94 case GL_TRIANGLE_FAN:
95 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
96 break;
97 case GL_QUADS:
98 type=R300_VAP_VF_CNTL__PRIM_QUADS;
99 break;
100 case GL_QUAD_STRIP:
101 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
102 break;
103 case GL_POLYGON:
104 type=R300_VAP_VF_CNTL__PRIM_POLYGON;
105 break;
106 default:
107 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
108 __FILE__, __FUNCTION__,
109 prim & PRIM_MODE_MASK);
110 return -1;
111 break;
112 }
113 return type;
114 }
115
116 int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim)
117 {
118 int verts_off=0;
119 char *name="UNKNOWN";
120
121 switch (prim & PRIM_MODE_MASK) {
122 case GL_POINTS:
123 name="P";
124 verts_off = 0;
125 break;
126 case GL_LINES:
127 name="L";
128 verts_off = num_verts % 2;
129 break;
130 case GL_LINE_STRIP:
131 name="LS";
132 if(num_verts < 2)
133 verts_off = num_verts;
134 break;
135 case GL_LINE_LOOP:
136 name="LL";
137 if(num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 name="T";
142 verts_off = num_verts % 3;
143 break;
144 case GL_TRIANGLE_STRIP:
145 name="TS";
146 if(num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 name="TF";
151 if(num_verts < 3)
152 verts_off = num_verts;
153 break;
154 case GL_QUADS:
155 name="Q";
156 verts_off = num_verts % 4;
157 break;
158 case GL_QUAD_STRIP:
159 name="QS";
160 if(num_verts < 4)
161 verts_off = num_verts;
162 else
163 verts_off = num_verts % 2;
164 break;
165 case GL_POLYGON:
166 name="P";
167 if(num_verts < 3)
168 verts_off = num_verts;
169 break;
170 default:
171 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
172 __FILE__, __FUNCTION__,
173 prim & PRIM_MODE_MASK);
174 return -1;
175 break;
176 }
177
178 if (RADEON_DEBUG & DEBUG_VERTS) {
179 if (num_verts - verts_off == 0) {
180 WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name);
181 return 0;
182 }
183
184 if (verts_off > 0) {
185 WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name);
186 }
187 }
188
189 return num_verts - verts_off;
190 }
191
192 /* Immediate implementation has been removed from CVS. */
193
194 /* vertex buffer implementation */
195
196 static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_count, int type, int elt_size)
197 {
198 int cmd_reserved = 0;
199 int cmd_written = 0;
200 drm_radeon_cmd_header_t *cmd = NULL;
201 unsigned long addr_a;
202 unsigned long t_addr;
203 unsigned long magic_1, magic_2;
204 GLcontext *ctx;
205 ctx = rmesa->radeon.glCtx;
206
207 assert(elt_size == 2 || elt_size == 4);
208
209 if(addr & (elt_size-1)){
210 WARN_ONCE("Badly aligned buffer\n");
211 return ;
212 }
213 #ifdef OPTIMIZE_ELTS
214 addr_a = 0;
215
216 magic_1 = (addr % 32) / 4;
217 t_addr = addr & (~0x1d);
218 magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
219
220 check_space(6);
221
222 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
223 if(elt_size == 4){
224 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
225 } else {
226 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
227 }
228
229 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
230 if(elt_size == 4){
231 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
232 e32(addr /*& 0xffffffe3*/);
233 } else {
234 e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
235 e32(t_addr);
236 }
237
238 if(elt_size == 4){
239 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
240 } else {
241 e32(magic_2); /* Total number of dwords needed? */
242 }
243 //cp_delay(rmesa, 1);
244 #if 0
245 fprintf(stderr, "magic_1 %d\n", magic_1);
246 fprintf(stderr, "t_addr %x\n", t_addr);
247 fprintf(stderr, "magic_2 %d\n", magic_2);
248 exit(1);
249 #endif
250 #else
251 (void)magic_2, (void)magic_1, (void)t_addr;
252
253 addr_a = 0;
254
255 check_space(6);
256
257 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
258 if(elt_size == 4){
259 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
260 } else {
261 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
262 }
263
264 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
265 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
266 e32(addr /*& 0xffffffe3*/);
267
268 if(elt_size == 4){
269 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
270 } else {
271 e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
272 }
273 //cp_delay(rmesa, 1);
274 #endif
275 }
276
277 static void r300_render_vb_primitive(r300ContextPtr rmesa,
278 GLcontext *ctx,
279 int start,
280 int end,
281 int prim)
282 {
283 int type, num_verts;
284
285 type=r300_get_primitive_type(rmesa, ctx, prim);
286 num_verts=r300_get_num_verts(rmesa, end-start, prim);
287
288 if(type<0 || num_verts <= 0)return;
289
290 if(rmesa->state.VB.Elts){
291 r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start);
292 #if 0
293 int cmd_reserved = 0;
294 int cmd_written = 0;
295 drm_radeon_cmd_header_t *cmd = NULL;
296 int i;
297 start_index32_packet(num_verts, type);
298 for(i=0; i < num_verts; i++)
299 e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */
300 #else
301 if(num_verts == 1){
302 //start_index32_packet(num_verts, type);
303 //e32(rmesa->state.Elts[start]);
304 return;
305 }
306
307 if(num_verts > 65535){ /* not implemented yet */
308 WARN_ONCE("Too many elts\n");
309 return;
310 }
311
312 r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size);
313 fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size);
314 #endif
315 }else{
316 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
317 fire_AOS(rmesa, num_verts, type);
318 }
319 }
320
321 GLboolean r300_run_vb_render(GLcontext *ctx,
322 struct tnl_pipeline_stage *stage)
323 {
324 r300ContextPtr rmesa = R300_CONTEXT(ctx);
325 struct radeon_vertex_buffer *VB = &rmesa->state.VB;
326 int i;
327 int cmd_reserved = 0;
328 int cmd_written = 0;
329 drm_radeon_cmd_header_t *cmd = NULL;
330
331
332 if (RADEON_DEBUG & DEBUG_PRIMS)
333 fprintf(stderr, "%s\n", __FUNCTION__);
334
335 if (stage) {
336 TNLcontext *tnl = TNL_CONTEXT(ctx);
337 radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
338 }
339
340 r300UpdateShaders(rmesa);
341 if (r300EmitArrays(ctx))
342 return GL_TRUE;
343
344 r300UpdateShaderStates(rmesa);
345
346 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
347 e32(0x0000000a);
348
349 reg_start(0x4f18,0);
350 e32(0x00000003);
351
352 r300EmitState(rmesa);
353
354 for(i=0; i < VB->PrimitiveCount; i++){
355 GLuint prim = VB->Primitive[i].mode;
356 GLuint start = VB->Primitive[i].start;
357 GLuint length = VB->Primitive[i].count;
358
359 r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
360 }
361
362 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
363 e32(0x0000000a/*0x2*/);
364
365 reg_start(0x4f18,0);
366 e32(0x00000003/*0x1*/);
367
368 #ifdef USER_BUFFERS
369 r300UseArrays(ctx);
370 #endif
371 r300ReleaseArrays(ctx);
372 return GL_FALSE;
373 }
374
375 #define FALLBACK_IF(expr) \
376 do { \
377 if (expr) { \
378 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
379 WARN_ONCE("Software fallback:%s\n", \
380 #expr); \
381 return R300_FALLBACK_RAST; \
382 } \
383 } while(0)
384
385 int r300Fallback(GLcontext *ctx)
386 {
387 r300ContextPtr r300 = R300_CONTEXT(ctx);
388 int i;
389
390 /* We do not do SELECT or FEEDBACK (yet ?)
391 * Is it worth doing them ?
392 */
393 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
394
395 #if 0
396 /* These should work now.. */
397 FALLBACK_IF(ctx->Color.DitherFlag);
398 /* GL_ALPHA_TEST */
399 FALLBACK_IF(ctx->Color.AlphaEnabled);
400 /* GL_BLEND */
401 FALLBACK_IF(ctx->Color.BlendEnabled);
402 /* GL_POLYGON_OFFSET_FILL */
403 FALLBACK_IF(ctx->Polygon.OffsetFill);
404 /* FOG seems to trigger an unknown output
405 * in vertex program.
406 */
407 FALLBACK_IF(ctx->Fog.Enabled);
408 #endif
409 FALLBACK_IF(ctx->Stencil._TestTwoSide &&
410 (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] ||
411 ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[1] ||
412 ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1]));
413
414 if(!r300->disable_lowimpact_fallback){
415 /* GL_POLYGON_OFFSET_POINT */
416 FALLBACK_IF(ctx->Polygon.OffsetPoint);
417 /* GL_POLYGON_OFFSET_LINE */
418 FALLBACK_IF(ctx->Polygon.OffsetLine);
419 #if 0
420 /* GL_STENCIL_TEST */
421 FALLBACK_IF(ctx->Stencil.Enabled);
422 /* GL_POLYGON_SMOOTH disabling to get blender going */
423 FALLBACK_IF(ctx->Polygon.SmoothFlag);
424 #endif
425 /* GL_POLYGON_STIPPLE */
426 FALLBACK_IF(ctx->Polygon.StippleFlag);
427 /* GL_MULTISAMPLE_ARB */
428 FALLBACK_IF(ctx->Multisample.Enabled);
429 /* blender ? */
430 FALLBACK_IF(ctx->Line.StippleFlag);
431 /* GL_LINE_SMOOTH */
432 FALLBACK_IF(ctx->Line.SmoothFlag);
433 /* GL_POINT_SMOOTH */
434 FALLBACK_IF(ctx->Point.SmoothFlag);
435 }
436
437 /* Fallback for LOGICOP */
438 FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
439
440 /* Rest could be done with vertex fragments */
441 if (ctx->Extensions.NV_point_sprite ||
442 ctx->Extensions.ARB_point_sprite)
443 /* GL_POINT_SPRITE_NV */
444 FALLBACK_IF(ctx->Point.PointSprite);
445
446 /* Fallback for rectangular texture */
447 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
448 if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT)
449 return R300_FALLBACK_TCL;
450
451 return R300_FALLBACK_NONE;
452 }
453
454 /**
455 * Called by the pipeline manager to render a batch of primitives.
456 * We can return true to pass on to the next stage (i.e. software
457 * rasterization) or false to indicate that the pipeline has finished
458 * after we render something.
459 */
460 static GLboolean r300_run_render(GLcontext *ctx,
461 struct tnl_pipeline_stage *stage)
462 {
463
464 if (RADEON_DEBUG & DEBUG_PRIMS)
465 fprintf(stderr, "%s\n", __FUNCTION__);
466
467 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
468 return GL_TRUE;
469
470 return r300_run_vb_render(ctx, stage);
471 }
472
473 const struct tnl_pipeline_stage _r300_render_stage = {
474 "r300 hw rasterize",
475 NULL,
476 NULL,
477 NULL,
478 NULL,
479 r300_run_render /* run */
480 };
481
482 static GLboolean r300_run_tcl_render(GLcontext *ctx,
483 struct tnl_pipeline_stage *stage)
484 {
485 r300ContextPtr rmesa = R300_CONTEXT(ctx);
486 struct r300_vertex_program *vp;
487
488 hw_tcl_on=future_hw_tcl_on;
489
490 if (RADEON_DEBUG & DEBUG_PRIMS)
491 fprintf(stderr, "%s\n", __FUNCTION__);
492 if(hw_tcl_on == GL_FALSE)
493 return GL_TRUE;
494
495 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
496 hw_tcl_on = GL_FALSE;
497 return GL_TRUE;
498 }
499
500 r300UpdateShaders(rmesa);
501
502 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
503 #if 0 /* Draw every second request with software arb vp */
504 vp->native++;
505 vp->native &= 1;
506 //vp->native = GL_FALSE;
507 #endif
508
509 #if 0 /* You dont want to know what this does... */
510 TNLcontext *tnl = TNL_CONTEXT(ctx);
511 struct tnl_cache *cache;
512 struct tnl_cache_item *c;
513
514 cache = tnl->vp_cache;
515 c = cache->items[0xc000cc0e % cache->size];
516
517 if(c && c->data == vp)
518 vp->native = GL_FALSE;
519
520 #endif
521 #if 0
522 vp->native = GL_FALSE;
523 #endif
524 if (vp->native == GL_FALSE) {
525 hw_tcl_on = GL_FALSE;
526 return GL_TRUE;
527 }
528 //r300UpdateShaderStates(rmesa);
529
530 return r300_run_vb_render(ctx, stage);
531 }
532
533 const struct tnl_pipeline_stage _r300_tcl_stage = {
534 "r300 tcl",
535 NULL,
536 NULL,
537 NULL,
538 NULL,
539 r300_run_tcl_render /* run */
540 };
541
542 /* R300 texture rectangle expects coords in 0..1 range, not 0..dimension
543 * as in the extension spec. Need to translate here.
544 *
545 * Note that swrast expects 0..dimension, so if a fallback is active,
546 * don't do anything. (Maybe need to configure swrast to match hw)
547 */
548 struct texrect_stage_data {
549 GLvector4f texcoord[MAX_TEXTURE_UNITS];
550 };
551
552 #define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
553
554
555 static GLboolean run_texrect_stage( GLcontext *ctx,
556 struct tnl_pipeline_stage *stage )
557 {
558 struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
559 r300ContextPtr rmesa = R300_CONTEXT(ctx);
560 TNLcontext *tnl = TNL_CONTEXT(ctx);
561 struct vertex_buffer *VB = &tnl->vb;
562 GLuint i;
563
564 if (rmesa->radeon.Fallback)
565 return GL_TRUE;
566
567 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
568 if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) {
569 struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
570 struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
571 const GLfloat iw = 1.0/texImage->Width;
572 const GLfloat ih = 1.0/texImage->Height;
573 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
574 GLint instride = VB->TexCoordPtr[i]->stride;
575 GLfloat (*out)[4] = store->texcoord[i].data;
576 GLint j;
577
578 store->texcoord[i].size = VB->TexCoordPtr[i]->size;
579 for (j = 0 ; j < VB->Count ; j++) {
580 switch (VB->TexCoordPtr[i]->size) {
581 case 4:
582 out[j][3] = in[3];
583 /* fallthrough */
584 case 3:
585 out[j][2] = in[2];
586 /* fallthrough */
587 default:
588 out[j][0] = in[0] * iw;
589 out[j][1] = in[1] * ih;
590 }
591 in = (GLfloat *)((GLubyte *)in + instride);
592 }
593
594 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
595 }
596 }
597
598 return GL_TRUE;
599 }
600
601
602 /* Called the first time stage->run() is invoked.
603 */
604 static GLboolean alloc_texrect_data( GLcontext *ctx,
605 struct tnl_pipeline_stage *stage )
606 {
607 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
608 struct texrect_stage_data *store;
609 GLuint i;
610
611 stage->privatePtr = CALLOC(sizeof(*store));
612 store = TEXRECT_STAGE_DATA(stage);
613 if (!store)
614 return GL_FALSE;
615
616 for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
617 _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
618
619 return GL_TRUE;
620 }
621
622 static void free_texrect_data( struct tnl_pipeline_stage *stage )
623 {
624 struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
625 GLuint i;
626
627 if (store) {
628 for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
629 if (store->texcoord[i].data)
630 _mesa_vector4f_free( &store->texcoord[i] );
631 FREE( store );
632 stage->privatePtr = NULL;
633 }
634 }
635
636 const struct tnl_pipeline_stage _r300_texrect_stage =
637 {
638 "r300 texrect stage", /* name */
639 NULL,
640 alloc_texrect_data,
641 free_texrect_data,
642 NULL,
643 run_texrect_stage
644 };
645