2f832d27513361ad261028a2d4d023dad27e1a80
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "array_cache/acache.h"
46 #include "tnl/tnl.h"
47
48 #include "radeon_reg.h"
49 #include "radeon_macros.h"
50 #include "radeon_ioctl.h"
51 #include "radeon_state.h"
52 #include "r300_context.h"
53 #include "r300_ioctl.h"
54 #include "r300_state.h"
55 #include "r300_reg.h"
56 #include "r300_program.h"
57 #include "r300_tex.h"
58
59 #include "r300_emit.h"
60
61 /**********************************************************************
62 * Hardware rasterization
63 *
64 * When we fell back to software TCL, we still try to use the
65 * rasterization hardware for rendering.
66 **********************************************************************/
67
68 static int r300_get_primitive_type(r300ContextPtr rmesa,
69 GLcontext *ctx,
70 int start,
71 int end,
72 int prim)
73 {
74 TNLcontext *tnl = TNL_CONTEXT(ctx);
75 struct vertex_buffer *VB = &tnl->vb;
76 GLuint i;
77 int type=-1, min_vertices=0;
78 char *name="UNKNOWN";
79
80 if(end<=start)return -1; /* do we need to watch for this ? */
81
82 switch (prim & PRIM_MODE_MASK) {
83 case GL_POINTS:
84 name="P";
85 type=R300_VAP_VF_CNTL__PRIM_POINTS;
86 min_vertices=1;
87 break;
88 case GL_LINES:
89 name="L";
90 type=R300_VAP_VF_CNTL__PRIM_LINES;
91 min_vertices=2;
92 break;
93 case GL_LINE_STRIP:
94 name="LS";
95 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
96 min_vertices=2;
97 break;
98 case GL_LINE_LOOP:
99 name="LL";
100 min_vertices=2;
101 return -1;
102 break;
103 case GL_TRIANGLES:
104 name="T";
105 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
106 min_vertices=3;
107 break;
108 case GL_TRIANGLE_STRIP:
109 name="TS";
110 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
111 min_vertices=3;
112 break;
113 case GL_TRIANGLE_FAN:
114 name="TF";
115 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
116 min_vertices=3;
117 break;
118 case GL_QUADS:
119 name="Q";
120 type=R300_VAP_VF_CNTL__PRIM_QUADS;
121 min_vertices=4;
122 break;
123 case GL_QUAD_STRIP:
124 name="QS";
125 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
126 min_vertices=4;
127 break;
128 case GL_POLYGON:
129 name="P";
130 type=R300_VAP_VF_CNTL__PRIM_POLYGON;
131 min_vertices=3;
132 break;
133 default:
134 fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n",
135 __FILE__, __FUNCTION__,
136 prim & PRIM_MODE_MASK);
137 return -1;
138 break;
139 }
140 #if 0
141 fprintf(stderr, "[%d-%d]%s ", start, end, name);
142 #endif
143 if(start+min_vertices>end){
144 static int warn_once=1;
145 if(warn_once){
146 fprintf(stderr, "%s:%s Not enough vertices to draw primitive %02x - help me !\n",
147 __FILE__, __FUNCTION__,
148 prim & PRIM_MODE_MASK);
149 warn_once=0;
150 }
151 return -1;
152 }
153 return type;
154 }
155
156 /* This function compiles GL context into state registers that
157 describe data routing inside of R300 pipeline.
158
159 In particular, it programs input_route, output_vtx_fmt, texture
160 unit configuration and gb_output_vtx_fmt
161
162 This function encompasses setup_AOS() from r300_lib.c
163 */
164
165
166
167
168 /* Immediate implementation - vertex data is sent via command stream */
169
170 static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0};
171
172 #define output_vector(v, i) \
173 { \
174 int _i; \
175 for(_i=0;_i<v->size;_i++){ \
176 efloat(VEC_ELT(v, GLfloat, i)[_i]); \
177 } \
178 for(_i=v->size;_i<4;_i++){ \
179 efloat(default_vector[_i]); \
180 } \
181 }
182
183 /* Immediate implementation - vertex data is sent via command stream */
184
185 static void r300_render_immediate_primitive(r300ContextPtr rmesa,
186 GLcontext *ctx,
187 int start,
188 int end,
189 int prim)
190 {
191 TNLcontext *tnl = TNL_CONTEXT(ctx);
192 struct vertex_buffer *VB = &tnl->vb;
193 GLuint i;
194 int k, type;
195 LOCAL_VARS
196
197 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
198
199 #if 0
200 fprintf(stderr,"ObjPtr: size=%d stride=%d\n",
201 VB->ObjPtr->size, VB->ObjPtr->stride);
202 fprintf(stderr,"ColorPtr[0]: size=%d stride=%d\n",
203 VB->ColorPtr[0]->size, VB->ColorPtr[0]->stride);
204 fprintf(stderr,"TexCoordPtr[0]: size=%d stride=%d\n",
205 VB->TexCoordPtr[0]->size, VB->TexCoordPtr[0]->stride);
206 #endif
207
208 if(type<0)return;
209
210 /* A packet cannot have more than 16383 data words.. */
211 if(((end-start)*8+4*rmesa->state.texture.tc_count)>16380){
212 fprintf(stderr, "%s:%s: Too many vertices to paint. Fix me !\n");
213 return;
214 }
215
216 start_immediate_packet(end-start, type, 8+4*rmesa->state.texture.tc_count);
217
218 for(i=start;i<end;i++){
219 #if 0
220 fprintf(stderr, "* (%f %f %f %f) (%f %f %f %f)\n",
221 VEC_ELT(VB->ObjPtr, GLfloat, i)[0],
222 VEC_ELT(VB->ObjPtr, GLfloat, i)[1],
223 VEC_ELT(VB->ObjPtr, GLfloat, i)[2],
224 VEC_ELT(VB->ObjPtr, GLfloat, i)[3],
225
226 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
227 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
228 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
229 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]
230 );
231 #endif
232
233
234 /* coordinates */
235 output_vector(VB->ObjPtr, i);
236
237 /* color components */
238 output_vector(VB->ColorPtr[0], i);
239
240 /* texture coordinates */
241 for(k=0;k < ctx->Const.MaxTextureUnits;k++)
242 if(ctx->Texture.Unit[k].Enabled)
243 output_vector(VB->TexCoordPtr[k], i);
244 }
245
246 }
247
248
249 static GLboolean r300_run_immediate_render(GLcontext *ctx,
250 struct tnl_pipeline_stage *stage)
251 {
252 r300ContextPtr rmesa = R300_CONTEXT(ctx);
253 TNLcontext *tnl = TNL_CONTEXT(ctx);
254 struct vertex_buffer *VB = &tnl->vb;
255 GLuint i;
256 /* Only do 2d textures */
257 struct gl_texture_object *to=ctx->Texture.Unit[0].Current2D;
258 r300TexObjPtr t=to->DriverData;
259 LOCAL_VARS
260
261
262 /* Update texture state - needs to be done only when actually changed..
263 All the time for now.. */
264
265
266 if (RADEON_DEBUG == DEBUG_PRIMS)
267 fprintf(stderr, "%s\n", __FUNCTION__);
268
269 #if 1 /* we need this, somehow */
270 /* Flush state - make sure command buffer is nice and large */
271 r300Flush(ctx);
272 /* Make sure we have enough space */
273 #else 0
274 /* Count is very imprecize, but should be good upper bound */
275 r300EnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size + 4+2+30
276 +VB->PrimitiveCount*(1+8)+VB->Count*4*rmesa->state.texture.tc_count+4, __FUNCTION__);
277 #endif
278
279 /* needed before starting 3d operation .. */
280 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
281 e32(0x0000000a);
282
283 reg_start(0x4f18,0);
284 e32(0x00000003);
285
286
287 #if 0 /* looks like the Z offset issue got fixed */
288 rmesa->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
289 | R300_VPORT_X_OFFSET_ENA
290 | R300_VPORT_Y_SCALE_ENA
291 | R300_VPORT_Y_OFFSET_ENA
292 | R300_VTX_W0_FMT;
293 R300_STATECHANGE(rmesa, vte);
294 #endif
295
296
297
298 /* Magic register - note it is right after 20b0 */
299
300
301 if(rmesa->state.texture.tc_count>0){
302 reg_start(0x20b4,0);
303 e32(0x0000000c);
304
305 }
306
307 r300EmitState(rmesa);
308
309 #if 0
310 reg_start(R300_RB3D_COLORMASK, 0);
311 e32(0xf);
312
313 vsf_start_fragment(0x406, 4);
314 efloat(0.0);
315 efloat(0.0);
316 efloat(0.0);
317 efloat(1.0);
318
319 vsf_start_fragment(0x400, 4);
320 efloat(0.0);
321 efloat(0.0);
322 efloat(0.0);
323 efloat(1.0);
324 #endif
325
326 /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
327 r300EmitLOAD_VBPNTR(rmesa, 0);
328
329 for(i=0; i < VB->PrimitiveCount; i++){
330 GLuint prim = VB->Primitive[i].mode;
331 GLuint start = VB->Primitive[i].start;
332 GLuint length = VB->Primitive[i].count;
333 r300_render_immediate_primitive(rmesa, ctx, start, start + length, prim);
334 }
335
336 /* This sequence is required after any 3d drawing packet
337 I suspect it work arounds a bug (or deficiency) in hardware */
338
339 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
340 e32(0x0000000a);
341
342 reg_start(0x4f18,0);
343 e32(0x00000003);
344
345 return GL_FALSE;
346 }
347
348
349 /* vertex buffer implementation */
350
351 /* We use the start part of GART texture buffer for vertices */
352
353
354 static void upload_vertex_buffer(r300ContextPtr rmesa, GLcontext *ctx)
355 {
356 TNLcontext *tnl = TNL_CONTEXT(ctx);
357 struct vertex_buffer *VB = &tnl->vb;
358 int idx=0;
359 int i,j,k;
360 radeonScreenPtr rsp=rmesa->radeon.radeonScreen;
361
362 /* A hack - we don't want to overwrite vertex buffers, so we
363 just use AGP space for them.. Fix me ! */
364 static int offset=0;
365 if(offset>2*1024*1024){
366 //fprintf(stderr, "Wrapping agp vertex buffer offset\n");
367 offset=0;
368 }
369 /* Not the most efficient implementation, but, for now, I just want something that
370 works */
371 /* to do - make single memcpy per column (is it possible ?) */
372 /* to do - use dirty flags to avoid redundant copies */
373 #define UPLOAD_VECTOR(v)\
374 { \
375 /* Is the data dirty ? */ \
376 if (v->flags & ((1<<v->size)-1)) { \
377 /* fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); */ \
378 if(v->size*4==v->stride){\
379 /* fast path */ \
380 memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \
381 } else { \
382 for(i=0;i<VB->Count;i++){ \
383 /* copy one vertex at a time*/ \
384 memcpy(rsp->gartTextures.map+offset+i*v->size*4, VEC_ELT(v, GLfloat, i), v->size*4); \
385 } \
386 } \
387 /* v->flags &= ~((1<<v->size)-1);*/ \
388 } \
389 rmesa->state.aos[idx].offset=rsp->gartTextures.handle+offset; \
390 offset+=v->size*4*VB->Count; \
391 idx++; \
392 }
393
394 UPLOAD_VECTOR(VB->ObjPtr);
395 UPLOAD_VECTOR(VB->ColorPtr[0]);
396 /* texture coordinates */
397 for(k=0;k < ctx->Const.MaxTextureUnits;k++)
398 if(ctx->Texture.Unit[k].Enabled)
399 UPLOAD_VECTOR(VB->TexCoordPtr[k]);
400
401 if(idx>=R300_MAX_AOS_ARRAYS){
402 fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n");
403 exit(-1);
404 }
405 }
406
407 static void r300_render_vb_primitive(r300ContextPtr rmesa,
408 GLcontext *ctx,
409 int start,
410 int end,
411 int prim)
412 {
413 int type;
414 LOCAL_VARS
415
416 if(end<=start)return; /* do we need to watch for this ? */
417
418 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
419 if(type<0)return;
420
421 fire_AOS(PASS_PREFIX end-start, type);
422 }
423
424 static GLboolean r300_run_vb_render(GLcontext *ctx,
425 struct tnl_pipeline_stage *stage)
426 {
427 r300ContextPtr rmesa = R300_CONTEXT(ctx);
428 TNLcontext *tnl = TNL_CONTEXT(ctx);
429 struct vertex_buffer *VB = &tnl->vb;
430 int i, j;
431 LOCAL_VARS
432
433 if (RADEON_DEBUG == DEBUG_PRIMS)
434 fprintf(stderr, "%s\n", __FUNCTION__);
435
436
437 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
438 e32(0x0000000a);
439
440 reg_start(0x4f18,0);
441 e32(0x00000003);
442
443 r300_setup_routing(ctx, GL_FALSE);
444
445 r300EmitState(rmesa);
446
447 /* setup array of structures data */
448 LOCK_HARDWARE(&(rmesa->radeon));
449
450 upload_vertex_buffer(rmesa, ctx);
451 //fprintf(stderr, "Using %d AOS arrays\n", n_arrays);
452
453 for(i=0; i < VB->PrimitiveCount; i++){
454 GLuint prim = VB->Primitive[i].mode;
455 GLuint start = VB->Primitive[i].start;
456 GLuint length = VB->Primitive[i].count;
457
458 /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. */
459 r300EmitLOAD_VBPNTR(rmesa, start);
460
461 r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
462 }
463
464 /* This sequence is required after any 3d drawing packet
465 I suspect it works around a bug (or deficiency) in hardware */
466
467 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
468 e32(0x0000000a);
469
470 reg_start(0x4f18,0);
471 e32(0x00000003);
472
473 end_3d(PASS_PREFIX_VOID);
474
475 /* Flush state - we are done drawing.. */
476 r300FlushCmdBufLocked(ctx, __FUNCTION__);
477 radeonWaitForIdleLocked(&(rmesa->radeon));
478
479 UNLOCK_HARDWARE(&(rmesa->radeon));
480 return GL_FALSE;
481 }
482
483
484 /**
485 * Called by the pipeline manager to render a batch of primitives.
486 * We can return true to pass on to the next stage (i.e. software
487 * rasterization) or false to indicate that the pipeline has finished
488 * after we render something.
489 */
490 static GLboolean r300_run_render(GLcontext *ctx,
491 struct tnl_pipeline_stage *stage)
492 {
493 r300ContextPtr rmesa = R300_CONTEXT(ctx);
494 TNLcontext *tnl = TNL_CONTEXT(ctx);
495 struct vertex_buffer *VB = &tnl->vb;
496 GLuint i;
497
498 if (RADEON_DEBUG == DEBUG_PRIMS)
499 fprintf(stderr, "%s\n", __FUNCTION__);
500
501
502 #if 1
503
504 #if 1
505 return r300_run_immediate_render(ctx, stage);
506 #else
507 return r300_run_vb_render(ctx, stage);
508 #endif
509 #else
510 return GL_TRUE;
511 #endif
512
513 #if 0
514 mgaContextPtr mmesa = MGA_CONTEXT(ctx);
515 TNLcontext *tnl = TNL_CONTEXT(ctx);
516 struct vertex_buffer *VB = &tnl->vb;
517 GLuint i;
518
519 /* Don't handle clipping or indexed vertices or vertex manipulations.
520 */
521 if (mmesa->RenderIndex != 0 ||
522 !mga_validate_render( ctx, VB )) {
523 return GL_TRUE;
524 }
525
526 tnl->Driver.Render.Start( ctx );
527 mmesa->SetupNewInputs = ~0;
528
529 for (i = 0 ; i < VB->PrimitiveCount ; i++)
530 {
531 GLuint prim = VB->Primitive[i].mode;
532 GLuint start = VB->Primitive[i].start;
533 GLuint length = VB->Primitive[i].count;
534
535 if (!length)
536 continue;
537
538 mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
539 prim);
540 }
541
542 tnl->Driver.Render.Finish( ctx );
543
544 return GL_FALSE; /* finished the pipe */
545 #endif
546 }
547
548
549 /**
550 * Called by the pipeline manager once before rendering.
551 * We check the GL state here to
552 * a) decide whether we can do the current state in hardware and
553 * b) update hardware registers
554 */
555 #define FALLBACK_IF(expr) \
556 do { \
557 if (expr) { \
558 if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
559 fprintf(stderr, "%s: fallback:%s\n", \
560 __FUNCTION__, #expr); \
561 stage->active = GL_FALSE; \
562 return; \
563 } \
564 } while(0)
565
566 static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage)
567 {
568 r300ContextPtr r300 = R300_CONTEXT(ctx);
569 int i;
570
571 if (RADEON_DEBUG & DEBUG_STATE)
572 fprintf(stderr, "%s\n", __FUNCTION__);
573
574 /* We only support rendering in hardware for now */
575 if (ctx->RenderMode != GL_RENDER) {
576 stage->active = GL_FALSE;
577 return;
578 }
579
580 // I failed to figure out how dither works in hardware,
581 // let's just ignore it for now
582 //FALLBACK_IF(ctx->Color.DitherFlag);
583
584 /* I'm almost certain I forgot something here */
585 #if 0 /* This should work now.. */
586 FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST
587 FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND
588 #endif
589 FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG
590 FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH
591 FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE
592 FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH
593 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
594 FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV
595 FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT
596 FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE
597 FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL
598 FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH
599 FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE
600 //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST
601 FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB
602
603 /* One step at a time - let one texture pass.. */
604 for (i = 1; i < ctx->Const.MaxTextureUnits; i++)
605 FALLBACK_IF(ctx->Texture.Unit[i].Enabled);
606
607
608 /* let r300_run_render do its job */
609 #if 0
610 stage->active = GL_FALSE;
611 #endif
612 }
613
614
615 static void dtr(struct tnl_pipeline_stage *stage)
616 {
617 (void)stage;
618 }
619
620 const struct tnl_pipeline_stage _r300_render_stage = {
621 "r300 hw rasterize",
622 _NEW_ALL, /* re-check (always re-check for now) */
623 0, /* re-run (always runs) */
624 GL_TRUE, /* active */
625 0, 0, /* inputs (set in check_render), outputs */
626 0, 0, /* changed_inputs, private */
627 dtr, /* destructor */
628 r300_check_render, /* check */
629 r300_run_render /* run */
630 };