Rework slightly r300_get_primitive_type - make it clearer and more compact..
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "array_cache/acache.h"
46 #include "tnl/tnl.h"
47
48 #include "radeon_ioctl.h"
49 #include "radeon_state.h"
50 #include "r300_context.h"
51 #include "r300_ioctl.h"
52 #include "r300_state.h"
53 #include "r300_reg.h"
54 #include "r300_program.h"
55
56 #include "r300_lib.h"
57
58
59 /**********************************************************************
60 * Hardware rasterization
61 *
62 * When we fell back to software TCL, we still try to use the
63 * rasterization hardware for rendering.
64 **********************************************************************/
65
66 static int r300_get_primitive_type(r300ContextPtr rmesa,
67 GLcontext *ctx,
68 int start,
69 int end,
70 int prim)
71 {
72 TNLcontext *tnl = TNL_CONTEXT(ctx);
73 struct vertex_buffer *VB = &tnl->vb;
74 GLuint i;
75 int type=-1, min_vertices=0;
76 char *name="UNKNOWN";
77
78 if(end<=start)return -1; /* do we need to watch for this ? */
79
80 switch (prim & PRIM_MODE_MASK) {
81 case GL_POINTS:
82 name="P";
83 type=R300_VAP_VF_CNTL__PRIM_POINTS;
84 min_vertices=1;
85 break;
86 case GL_LINES:
87 name="L";
88 type=R300_VAP_VF_CNTL__PRIM_LINES;
89 min_vertices=2;
90 break;
91 case GL_LINE_STRIP:
92 name="LS";
93 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
94 min_vertices=2;
95 break;
96 case GL_LINE_LOOP:
97 name="LL";
98 min_vertices=2;
99 return -1;
100 break;
101 case GL_TRIANGLES:
102 name="T";
103 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
104 min_vertices=3;
105 break;
106 case GL_TRIANGLE_STRIP:
107 name="TS";
108 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
109 min_vertices=3;
110 break;
111 case GL_TRIANGLE_FAN:
112 name="TF";
113 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
114 min_vertices=3;
115 break;
116 case GL_QUADS:
117 name="Q";
118 type=R300_VAP_VF_CNTL__PRIM_QUADS;
119 min_vertices=4;
120 break;
121 case GL_QUAD_STRIP:
122 name="QS";
123 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
124 min_vertices=4;
125 break;
126 default:
127 fprintf(stderr, "Cannot handle primitive %02x ", prim & PRIM_MODE_MASK);
128 return -1;
129 break;
130 }
131 #if 1
132 fprintf(stderr, "[%d-%d]%s ", start, end, name);
133 #endif
134 if(start+min_vertices>=end){
135 fprintf(stderr, "Not enough vertices\n");
136 return -1;
137 }
138 return type;
139 }
140
141
142
143 /* Immediate implementation - vertex data is sent via command stream */
144
145 static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0};
146
147 #define output_vector(v, i) \
148 { \
149 int _i; \
150 for(_i=0;_i<v->size;_i++){ \
151 efloat(VEC_ELT(v, GLfloat, i)[_i]); \
152 } \
153 for(_i=v->size;_i<4;_i++){ \
154 efloat(default_vector[_i]); \
155 } \
156 }
157
158 static void r300_render_flat_primitive(r300ContextPtr rmesa,
159 GLcontext *ctx,
160 int start,
161 int end,
162 int prim)
163 {
164 TNLcontext *tnl = TNL_CONTEXT(ctx);
165 struct vertex_buffer *VB = &tnl->vb;
166 GLuint i;
167 int k, type;
168 LOCAL_VARS
169
170 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
171 if(type<0)return;
172
173
174 start_immediate_packet(end-start, type, 8);
175
176 for(i=start;i<end;i++){
177 #if 0
178 fprintf(stderr, "* (%f %f %f %f) (%f %f %f %f)\n",
179 VEC_ELT(VB->ObjPtr, GLfloat, i)[0],
180 VEC_ELT(VB->ObjPtr, GLfloat, i)[1],
181 VEC_ELT(VB->ObjPtr, GLfloat, i)[2],
182 VEC_ELT(VB->ObjPtr, GLfloat, i)[3],
183
184 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
185 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
186 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
187 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]
188 );
189 #endif
190
191
192 /* coordinates */
193 output_vector(VB->ObjPtr, i);
194
195 /* color components */
196 output_vector(VB->ColorPtr[0], i);
197 }
198
199 }
200
201 static GLboolean r300_run_flat_render(GLcontext *ctx,
202 struct tnl_pipeline_stage *stage)
203 {
204 r300ContextPtr rmesa = R300_CONTEXT(ctx);
205 TNLcontext *tnl = TNL_CONTEXT(ctx);
206 struct vertex_buffer *VB = &tnl->vb;
207 GLuint i;
208 AOS_DATA vb_arrays[2];
209 LOCAL_VARS
210
211 if (RADEON_DEBUG == DEBUG_PRIMS)
212 fprintf(stderr, "%s\n", __FUNCTION__);
213
214 /* setup array of structures data */
215
216 /* Note: immediate vertex data includes all coordinates.
217 To save bandwidth use either VBUF or state-based vertex generation */
218 /* xyz */
219 vb_arrays[0].element_size=4;
220 vb_arrays[0].stride=4;
221 vb_arrays[0].offset=0; /* Not used */
222 vb_arrays[0].format=AOS_FORMAT_FLOAT;
223 vb_arrays[0].ncomponents=4;
224 vb_arrays[0].reg=REG_COORDS;
225
226 /* color */
227 vb_arrays[1].element_size=4;
228 vb_arrays[1].stride=4;
229 vb_arrays[1].offset=0; /* Not used */
230 vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR;
231 vb_arrays[1].ncomponents=4;
232 vb_arrays[1].reg=REG_COLOR0;
233
234
235 /* needed before starting 3d operation .. */
236 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
237 e32(0x0000000a);
238
239 reg_start(0x4f18,0);
240 e32(0x00000003);
241
242 r300EmitState(rmesa);
243
244 reg_start(0x20b0,0);
245 e32(0x0000043f);
246
247 FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16;
248 memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
249
250 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4;
251 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0;
252 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0;
253 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0;
254 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0;
255
256 program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE);
257
258 /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
259 setup_AOS(PASS_PREFIX vb_arrays, 2);
260
261 for(i=0; i < VB->PrimitiveCount; i++){
262 GLuint prim = VB->Primitive[i].mode;
263 GLuint start = VB->Primitive[i].start;
264 GLuint length = VB->Primitive[i].count;
265 r300_render_flat_primitive(rmesa, ctx, start, start + length, prim);
266 }
267
268 end_3d(PASS_PREFIX_VOID);
269
270 fprintf(stderr, "\n");
271 return GL_FALSE;
272 }
273
274 /* vertex buffer implementation */
275
276 /* We use the start part of GART texture buffer for vertices */
277
278 #define R300_MAX_AOS_ARRAYS 16
279
280 static void upload_vertex_buffer(r300ContextPtr rmesa,
281 GLcontext *ctx, AOS_DATA *array, int *n_arrays)
282 {
283 TNLcontext *tnl = TNL_CONTEXT(ctx);
284 struct vertex_buffer *VB = &tnl->vb;
285 int offset=0, idx=0;
286 int i,j;
287 radeonScreenPtr rsp=rmesa->radeon.radeonScreen;
288 /* Not the most efficient implementation, but, for now, I just want something that
289 works */
290 /* to do - make single memcpy per column (is it possible ?) */
291 /* to do - use dirty flags to avoid redundant copies */
292 #define UPLOAD_VECTOR(v, r, f)\
293 { \
294 /* Is the data dirty ? */ \
295 if (v->flags & ((1<<v->size)-1)) { \
296 fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); \
297 if(v->size*4==v->stride){\
298 /* fast path */ \
299 memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \
300 } else { \
301 for(i=0;i<VB->Count;i++){ \
302 /* copy one vertex at a time*/ \
303 memcpy(rsp->gartTextures.map+offset, VEC_ELT(v, GLfloat, i), v->size*4); \
304 } \
305 } \
306 /* v->flags &= ~((1<<v->size)-1);*/ \
307 } \
308 array[idx].element_size=v->size; \
309 array[idx].stride=v->size; \
310 array[idx].format=(f); \
311 array[idx].ncomponents=v->size; \
312 array[idx].offset=rsp->gartTextures.handle+offset; \
313 array[idx].reg=r; \
314 offset+=v->size*4*VB->Count; \
315 idx++; \
316 /* Fill in the rest with the components of default_vector */\
317 /* \
318 if(v->size<4){ \
319 array[idx].element_size=4-v->size; \
320 array[idx].stride=0; \
321 array[idx].format=(f); \
322 array[idx].ncomponents=4-v->size; \
323 array[idx].offset=rsp->gartTextures.handle+v->size*4;\
324 array[idx].reg=r; \
325 idx++; \
326 } \
327 */\
328 }
329
330 /* Put a copy of default vector */
331 memcpy(rsp->gartTextures.map, default_vector, 16);
332 offset+=16;
333
334 UPLOAD_VECTOR(VB->ObjPtr, REG_COORDS, AOS_FORMAT_FLOAT);
335 UPLOAD_VECTOR(VB->ColorPtr[0], REG_COLOR0, AOS_FORMAT_FLOAT_COLOR);
336
337 *n_arrays=idx;
338 if(idx>=R300_MAX_AOS_ARRAYS){
339 fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n");
340 exit(-1);
341 }
342 }
343
344 static void r300_render_vb_flat_primitive(r300ContextPtr rmesa,
345 GLcontext *ctx,
346 int start,
347 int end,
348 int prim)
349 {
350 TNLcontext *tnl = TNL_CONTEXT(ctx);
351 struct vertex_buffer *VB = &tnl->vb;
352 GLuint i;
353 int k, type, n_arrays;
354 LOCAL_VARS
355
356 if(end<=start)return; /* do we need to watch for this ? */
357
358 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
359 if(type<0)return;
360
361 fire_AOS(PASS_PREFIX end-start, type);
362 }
363
364 static VERTEX_SHADER_FRAGMENT default_vector_vsf={
365 length: 4,
366 body: {
367 f: {0.0, 0.0, 0.0, 1.0}
368 }
369 };
370
371 static GLboolean r300_run_vb_flat_render(GLcontext *ctx,
372 struct tnl_pipeline_stage *stage)
373 {
374 r300ContextPtr rmesa = R300_CONTEXT(ctx);
375 TNLcontext *tnl = TNL_CONTEXT(ctx);
376 struct vertex_buffer *VB = &tnl->vb;
377 int i, j, n_arrays;
378 AOS_DATA vb_arrays[R300_MAX_AOS_ARRAYS];
379 AOS_DATA vb_arrays2[R300_MAX_AOS_ARRAYS];
380 LOCAL_VARS
381
382 if (RADEON_DEBUG == DEBUG_PRIMS)
383 fprintf(stderr, "%s\n", __FUNCTION__);
384
385 /* setup array of structures data */
386
387 upload_vertex_buffer(rmesa, ctx, vb_arrays, &n_arrays);
388 fprintf(stderr, "Using %d AOS arrays\n", n_arrays);
389
390 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
391 e32(0x0000000a);
392
393 reg_start(0x4f18,0);
394 e32(0x00000003);
395
396 r300EmitState(rmesa);
397
398 reg_start(0x20b0,0);
399 e32(0x0000043f);
400
401 FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16;
402 memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
403
404 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4;
405 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0;
406 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0;
407 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0;
408 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0;
409
410 program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE);
411
412 reg_start(R300_RE_OCCLUSION_CNTL, 0);
413 e32(R300_OCCLUSION_ON);
414
415 set_quad0(PASS_PREFIX 1.0,1.0,1.0,1.0);
416 set_init21(PASS_PREFIX 0.0,1.0);
417
418 for(i=0; i < VB->PrimitiveCount; i++){
419 GLuint prim = VB->Primitive[i].mode;
420 GLuint start = VB->Primitive[i].start;
421 GLuint length = VB->Primitive[i].count;
422
423 /* copy arrays */
424 memcpy(vb_arrays2, vb_arrays, sizeof(AOS_DATA)*n_arrays);
425 for(j=0;j<n_arrays;j++){
426 vb_arrays2[j].offset+=vb_arrays2[j].stride*start;
427 }
428
429 setup_AOS(PASS_PREFIX vb_arrays2, n_arrays);
430
431 r300_render_vb_flat_primitive(rmesa, ctx, start, start + length, prim);
432 }
433
434 end_3d(PASS_PREFIX_VOID);
435
436 /* Flush state - we are done drawing.. */
437 r300Flush(ctx);
438 fprintf(stderr, "\n");
439 return GL_FALSE;
440 }
441
442 /**
443 * Called by the pipeline manager to render a batch of primitives.
444 * We can return true to pass on to the next stage (i.e. software
445 * rasterization) or false to indicate that the pipeline has finished
446 * after we render something.
447 */
448 static GLboolean r300_run_render(GLcontext *ctx,
449 struct tnl_pipeline_stage *stage)
450 {
451 r300ContextPtr rmesa = R300_CONTEXT(ctx);
452 TNLcontext *tnl = TNL_CONTEXT(ctx);
453 struct vertex_buffer *VB = &tnl->vb;
454 GLuint i;
455
456 if (RADEON_DEBUG == DEBUG_PRIMS)
457 fprintf(stderr, "%s\n", __FUNCTION__);
458
459 #if 1
460 return r300_run_flat_render(ctx, stage);
461 #else
462 return GL_TRUE;
463 #endif
464
465 #if 0
466 mgaContextPtr mmesa = MGA_CONTEXT(ctx);
467 TNLcontext *tnl = TNL_CONTEXT(ctx);
468 struct vertex_buffer *VB = &tnl->vb;
469 GLuint i;
470
471 /* Don't handle clipping or indexed vertices or vertex manipulations.
472 */
473 if (mmesa->RenderIndex != 0 ||
474 !mga_validate_render( ctx, VB )) {
475 return GL_TRUE;
476 }
477
478 tnl->Driver.Render.Start( ctx );
479 mmesa->SetupNewInputs = ~0;
480
481 for (i = 0 ; i < VB->PrimitiveCount ; i++)
482 {
483 GLuint prim = VB->Primitive[i].mode;
484 GLuint start = VB->Primitive[i].start;
485 GLuint length = VB->Primitive[i].count;
486
487 if (!length)
488 continue;
489
490 mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
491 prim);
492 }
493
494 tnl->Driver.Render.Finish( ctx );
495
496 return GL_FALSE; /* finished the pipe */
497 #endif
498 }
499
500
501 /**
502 * Called by the pipeline manager once before rendering.
503 * We check the GL state here to
504 * a) decide whether we can do the current state in hardware and
505 * b) update hardware registers
506 */
507 #define FALLBACK_IF(expr) \
508 do { \
509 if (expr) { \
510 if (RADEON_DEBUG & DEBUG_FALLBACKS) \
511 fprintf(stderr, "%s: fallback:%s\n", \
512 __FUNCTION__, #expr); \
513 stage->active = GL_FALSE; \
514 return; \
515 } \
516 } while(0)
517
518 static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage)
519 {
520 r300ContextPtr r300 = R300_CONTEXT(ctx);
521 int i;
522
523 if (RADEON_DEBUG & DEBUG_STATE)
524 fprintf(stderr, "%s\n", __FUNCTION__);
525
526 /* We only support rendering in hardware for now */
527 if (ctx->RenderMode != GL_RENDER) {
528 stage->active = GL_FALSE;
529 return;
530 }
531
532 // I failed to figure out how dither works in hardware,
533 // let's just ignore it for now
534 //FALLBACK_IF(ctx->Color.DitherFlag);
535
536 /* I'm almost certain I forgot something here */
537 FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST
538 FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND
539 FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG
540 FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH
541 FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE
542 FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH
543 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
544 FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV
545 FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT
546 FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE
547 FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL
548 FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH
549 FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE
550 FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST
551 FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB
552
553 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
554 FALLBACK_IF(ctx->Texture.Unit[i].Enabled);
555
556
557 /* let r300_run_render do its job */
558 #if 0
559 stage->active = GL_FALSE;
560 #endif
561 }
562
563
564 static void dtr(struct tnl_pipeline_stage *stage)
565 {
566 (void)stage;
567 }
568
569 const struct tnl_pipeline_stage _r300_render_stage = {
570 "r300 hw rasterize",
571 _NEW_ALL, /* re-check (always re-check for now) */
572 0, /* re-run (always runs) */
573 GL_TRUE, /* active */
574 0, 0, /* inputs (set in check_render), outputs */
575 0, 0, /* changed_inputs, private */
576 dtr, /* destructor */
577 r300_check_render, /* check */
578 r300_run_render /* run */
579 };