merge of glsl-compiler-1 branch
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "vbo/vbo.h"
46 #include "tnl/tnl.h"
47 #include "tnl/t_vp_build.h"
48
49 #include "radeon_reg.h"
50 #include "radeon_macros.h"
51 #include "radeon_ioctl.h"
52 #include "radeon_state.h"
53 #include "r300_context.h"
54 #include "r300_ioctl.h"
55 #include "r300_state.h"
56 #include "r300_reg.h"
57 #include "r300_program.h"
58 #include "r300_tex.h"
59 #include "r300_maos.h"
60 #include "r300_emit.h"
61
62 extern int future_hw_tcl_on;
63
64 /**********************************************************************
65 * Hardware rasterization
66 *
67 * When we fell back to software TCL, we still try to use the
68 * rasterization hardware for rendering.
69 **********************************************************************/
70
71 static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim)
72 {
73 int type=-1;
74
75 switch (prim & PRIM_MODE_MASK) {
76 case GL_POINTS:
77 type=R300_VAP_VF_CNTL__PRIM_POINTS;
78 break;
79 case GL_LINES:
80 type=R300_VAP_VF_CNTL__PRIM_LINES;
81 break;
82 case GL_LINE_STRIP:
83 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
84 break;
85 case GL_LINE_LOOP:
86 type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
87 break;
88 case GL_TRIANGLES:
89 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
90 break;
91 case GL_TRIANGLE_STRIP:
92 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
93 break;
94 case GL_TRIANGLE_FAN:
95 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
96 break;
97 case GL_QUADS:
98 type=R300_VAP_VF_CNTL__PRIM_QUADS;
99 break;
100 case GL_QUAD_STRIP:
101 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
102 break;
103 case GL_POLYGON:
104 type=R300_VAP_VF_CNTL__PRIM_POLYGON;
105 break;
106 default:
107 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
108 __FILE__, __FUNCTION__,
109 prim & PRIM_MODE_MASK);
110 return -1;
111 break;
112 }
113 return type;
114 }
115
116 int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim)
117 {
118 int verts_off=0;
119 char *name="UNKNOWN";
120
121 switch (prim & PRIM_MODE_MASK) {
122 case GL_POINTS:
123 name="P";
124 verts_off = 0;
125 break;
126 case GL_LINES:
127 name="L";
128 verts_off = num_verts % 2;
129 break;
130 case GL_LINE_STRIP:
131 name="LS";
132 if(num_verts < 2)
133 verts_off = num_verts;
134 break;
135 case GL_LINE_LOOP:
136 name="LL";
137 if(num_verts < 2)
138 verts_off = num_verts;
139 break;
140 case GL_TRIANGLES:
141 name="T";
142 verts_off = num_verts % 3;
143 break;
144 case GL_TRIANGLE_STRIP:
145 name="TS";
146 if(num_verts < 3)
147 verts_off = num_verts;
148 break;
149 case GL_TRIANGLE_FAN:
150 name="TF";
151 if(num_verts < 3)
152 verts_off = num_verts;
153 break;
154 case GL_QUADS:
155 name="Q";
156 verts_off = num_verts % 4;
157 break;
158 case GL_QUAD_STRIP:
159 name="QS";
160 if(num_verts < 4)
161 verts_off = num_verts;
162 else
163 verts_off = num_verts % 2;
164 break;
165 case GL_POLYGON:
166 name="P";
167 if(num_verts < 3)
168 verts_off = num_verts;
169 break;
170 default:
171 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
172 __FILE__, __FUNCTION__,
173 prim & PRIM_MODE_MASK);
174 return -1;
175 break;
176 }
177
178 if (RADEON_DEBUG & DEBUG_VERTS) {
179 if (num_verts - verts_off == 0) {
180 WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name);
181 return 0;
182 }
183
184 if (verts_off > 0) {
185 WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name);
186 }
187 }
188
189 return num_verts - verts_off;
190 }
191
192 /* Immediate implementation has been removed from CVS. */
193
194 /* vertex buffer implementation */
195
196 static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_count, int type, int elt_size)
197 {
198 int cmd_reserved = 0;
199 int cmd_written = 0;
200 drm_radeon_cmd_header_t *cmd = NULL;
201 unsigned long addr_a;
202 unsigned long t_addr;
203 unsigned long magic_1, magic_2;
204 GLcontext *ctx;
205 ctx = rmesa->radeon.glCtx;
206
207 assert(elt_size == 2 || elt_size == 4);
208
209 if(addr & (elt_size-1)){
210 WARN_ONCE("Badly aligned buffer\n");
211 return ;
212 }
213 #ifdef OPTIMIZE_ELTS
214 addr_a = 0;
215
216 magic_1 = (addr % 32) / 4;
217 t_addr = addr & (~0x1d);
218 magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1;
219
220 check_space(6);
221
222 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
223 if(elt_size == 4){
224 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
225 } else {
226 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
227 }
228
229 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
230 if(elt_size == 4){
231 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
232 e32(addr /*& 0xffffffe3*/);
233 } else {
234 e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2);
235 e32(t_addr);
236 }
237
238 if(elt_size == 4){
239 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
240 } else {
241 e32(magic_2); /* Total number of dwords needed? */
242 }
243 //cp_delay(rmesa, 1);
244 #if 0
245 fprintf(stderr, "magic_1 %d\n", magic_1);
246 fprintf(stderr, "t_addr %x\n", t_addr);
247 fprintf(stderr, "magic_2 %d\n", magic_2);
248 exit(1);
249 #endif
250 #else
251 (void)magic_2, (void)magic_1, (void)t_addr;
252
253 addr_a = 0;
254
255 check_space(6);
256
257 start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
258 if(elt_size == 4){
259 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
260 } else {
261 e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
262 }
263
264 start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
265 e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2);
266 e32(addr /*& 0xffffffe3*/);
267
268 if(elt_size == 4){
269 e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */
270 } else {
271 e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */
272 }
273 //cp_delay(rmesa, 1);
274 #endif
275 }
276
277 static void r300_render_vb_primitive(r300ContextPtr rmesa,
278 GLcontext *ctx,
279 int start,
280 int end,
281 int prim)
282 {
283 int type, num_verts;
284
285 type=r300_get_primitive_type(rmesa, ctx, prim);
286 num_verts=r300_get_num_verts(rmesa, end-start, prim);
287
288 if(type<0 || num_verts <= 0)return;
289
290 if(rmesa->state.VB.Elts){
291 r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start);
292 #if 0
293 int cmd_reserved = 0;
294 int cmd_written = 0;
295 drm_radeon_cmd_header_t *cmd = NULL;
296 int i;
297 start_index32_packet(num_verts, type);
298 for(i=0; i < num_verts; i++)
299 e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */
300 #else
301 if(num_verts == 1){
302 //start_index32_packet(num_verts, type);
303 //e32(rmesa->state.Elts[start]);
304 return;
305 }
306
307 if(num_verts > 65535){ /* not implemented yet */
308 WARN_ONCE("Too many elts\n");
309 return;
310 }
311
312 r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size);
313 fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size);
314 #endif
315 }else{
316 r300EmitAOS(rmesa, rmesa->state.aos_count, start);
317 fire_AOS(rmesa, num_verts, type);
318 }
319 }
320
321 GLboolean r300_run_vb_render(GLcontext *ctx,
322 struct tnl_pipeline_stage *stage)
323 {
324 r300ContextPtr rmesa = R300_CONTEXT(ctx);
325 struct radeon_vertex_buffer *VB = &rmesa->state.VB;
326 int i;
327 int cmd_reserved = 0;
328 int cmd_written = 0;
329 drm_radeon_cmd_header_t *cmd = NULL;
330
331
332 if (RADEON_DEBUG & DEBUG_PRIMS)
333 fprintf(stderr, "%s\n", __FUNCTION__);
334
335 if (stage) {
336 TNLcontext *tnl = TNL_CONTEXT(ctx);
337 radeon_vb_to_rvb(rmesa, VB, &tnl->vb);
338 }
339
340 r300UpdateShaders(rmesa);
341 if (r300EmitArrays(ctx))
342 return GL_TRUE;
343
344 r300UpdateShaderStates(rmesa);
345
346 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
347 e32(R300_RB3D_DSTCACHE_UNKNOWN_0A);
348
349 reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
350 e32(R300_RB3D_ZCACHE_UNKNOWN_03);
351
352 r300EmitState(rmesa);
353
354 for(i=0; i < VB->PrimitiveCount; i++){
355 GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
356 GLuint start = VB->Primitive[i].start;
357 GLuint length = VB->Primitive[i].count;
358
359 r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
360 }
361
362 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
363 e32(R300_RB3D_DSTCACHE_UNKNOWN_0A /*R300_RB3D_DSTCACHE_UNKNOWN_02*/);
364
365 reg_start(R300_RB3D_ZCACHE_CTLSTAT,0);
366 e32(R300_RB3D_ZCACHE_UNKNOWN_03 /*R300_RB3D_ZCACHE_UNKNOWN_01*/);
367
368 #ifdef USER_BUFFERS
369 r300UseArrays(ctx);
370 #endif
371 r300ReleaseArrays(ctx);
372 return GL_FALSE;
373 }
374
375 #define FALLBACK_IF(expr) \
376 do { \
377 if (expr) { \
378 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
379 WARN_ONCE("Software fallback:%s\n", \
380 #expr); \
381 return R300_FALLBACK_RAST; \
382 } \
383 } while(0)
384
385 int r300Fallback(GLcontext *ctx)
386 {
387 r300ContextPtr r300 = R300_CONTEXT(ctx);
388 struct r300_fragment_program *rp =
389 (struct r300_fragment_program *)
390 (char *)ctx->FragmentProgram._Current;
391 int i;
392
393 if (rp) {
394 if (!rp->translated)
395 r300_translate_fragment_shader(r300, rp);
396
397 FALLBACK_IF(!rp->translated);
398 }
399
400 /* We do not do SELECT or FEEDBACK (yet ?)
401 * Is it worth doing them ?
402 */
403 FALLBACK_IF(ctx->RenderMode != GL_RENDER);
404
405 #if 0
406 /* These should work now.. */
407 FALLBACK_IF(ctx->Color.DitherFlag);
408 /* GL_ALPHA_TEST */
409 FALLBACK_IF(ctx->Color.AlphaEnabled);
410 /* GL_BLEND */
411 FALLBACK_IF(ctx->Color.BlendEnabled);
412 /* GL_POLYGON_OFFSET_FILL */
413 FALLBACK_IF(ctx->Polygon.OffsetFill);
414 /* FOG seems to trigger an unknown output
415 * in vertex program.
416 */
417 FALLBACK_IF(ctx->Fog.Enabled);
418 #endif
419 FALLBACK_IF(ctx->Stencil._TestTwoSide &&
420 (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] ||
421 ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[1] ||
422 ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1]));
423
424 if(!r300->disable_lowimpact_fallback){
425 /* GL_POLYGON_OFFSET_POINT */
426 FALLBACK_IF(ctx->Polygon.OffsetPoint);
427 /* GL_POLYGON_OFFSET_LINE */
428 FALLBACK_IF(ctx->Polygon.OffsetLine);
429 #if 0
430 /* GL_STENCIL_TEST */
431 FALLBACK_IF(ctx->Stencil.Enabled);
432 /* GL_POLYGON_SMOOTH disabling to get blender going */
433 FALLBACK_IF(ctx->Polygon.SmoothFlag);
434 #endif
435 /* GL_POLYGON_STIPPLE */
436 FALLBACK_IF(ctx->Polygon.StippleFlag);
437 /* GL_MULTISAMPLE_ARB */
438 FALLBACK_IF(ctx->Multisample.Enabled);
439 /* blender ? */
440 FALLBACK_IF(ctx->Line.StippleFlag);
441 /* GL_LINE_SMOOTH */
442 FALLBACK_IF(ctx->Line.SmoothFlag);
443 /* GL_POINT_SMOOTH */
444 FALLBACK_IF(ctx->Point.SmoothFlag);
445 }
446
447 /* Fallback for LOGICOP */
448 FALLBACK_IF(ctx->Color.ColorLogicOpEnabled);
449
450 /* Rest could be done with vertex fragments */
451 if (ctx->Extensions.NV_point_sprite ||
452 ctx->Extensions.ARB_point_sprite)
453 /* GL_POINT_SPRITE_NV */
454 FALLBACK_IF(ctx->Point.PointSprite);
455
456 return R300_FALLBACK_NONE;
457 }
458
459 /**
460 * Called by the pipeline manager to render a batch of primitives.
461 * We can return true to pass on to the next stage (i.e. software
462 * rasterization) or false to indicate that the pipeline has finished
463 * after we render something.
464 */
465 static GLboolean r300_run_render(GLcontext *ctx,
466 struct tnl_pipeline_stage *stage)
467 {
468
469 if (RADEON_DEBUG & DEBUG_PRIMS)
470 fprintf(stderr, "%s\n", __FUNCTION__);
471
472 if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
473 return GL_TRUE;
474
475 return r300_run_vb_render(ctx, stage);
476 }
477
478 const struct tnl_pipeline_stage _r300_render_stage = {
479 "r300 hw rasterize",
480 NULL,
481 NULL,
482 NULL,
483 NULL,
484 r300_run_render /* run */
485 };
486
487 static GLboolean r300_run_tcl_render(GLcontext *ctx,
488 struct tnl_pipeline_stage *stage)
489 {
490 r300ContextPtr rmesa = R300_CONTEXT(ctx);
491 struct r300_vertex_program *vp;
492
493 hw_tcl_on=future_hw_tcl_on;
494
495 if (RADEON_DEBUG & DEBUG_PRIMS)
496 fprintf(stderr, "%s\n", __FUNCTION__);
497 if(hw_tcl_on == GL_FALSE)
498 return GL_TRUE;
499
500 if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
501 hw_tcl_on = GL_FALSE;
502 return GL_TRUE;
503 }
504
505 r300UpdateShaders(rmesa);
506
507 vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
508 #if 0 /* Draw every second request with software arb vp */
509 vp->native++;
510 vp->native &= 1;
511 //vp->native = GL_FALSE;
512 #endif
513
514 #if 0 /* You dont want to know what this does... */
515 TNLcontext *tnl = TNL_CONTEXT(ctx);
516 struct tnl_cache *cache;
517 struct tnl_cache_item *c;
518
519 cache = tnl->vp_cache;
520 c = cache->items[0xc000cc0e % cache->size];
521
522 if(c && c->data == vp)
523 vp->native = GL_FALSE;
524
525 #endif
526 #if 0
527 vp->native = GL_FALSE;
528 #endif
529 if (vp->native == GL_FALSE) {
530 hw_tcl_on = GL_FALSE;
531 return GL_TRUE;
532 }
533 //r300UpdateShaderStates(rmesa);
534
535 return r300_run_vb_render(ctx, stage);
536 }
537
538 const struct tnl_pipeline_stage _r300_tcl_stage = {
539 "r300 tcl",
540 NULL,
541 NULL,
542 NULL,
543 NULL,
544 r300_run_tcl_render /* run */
545 };
546