48b7ebb56e44c50c9e9f3be342ef8c43033d933f
[mesa.git] / src / mesa / drivers / dri / r300 / r300_render.c
1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Nicolai Haehnle <prefect_@gmx.net>
31 */
32
33 #include "glheader.h"
34 #include "state.h"
35 #include "imports.h"
36 #include "enums.h"
37 #include "macros.h"
38 #include "context.h"
39 #include "dd.h"
40 #include "simple_list.h"
41
42 #include "api_arrayelt.h"
43 #include "swrast/swrast.h"
44 #include "swrast_setup/swrast_setup.h"
45 #include "array_cache/acache.h"
46 #include "tnl/tnl.h"
47
48 #include "radeon_ioctl.h"
49 #include "radeon_state.h"
50 #include "r300_context.h"
51 #include "r300_ioctl.h"
52 #include "r300_state.h"
53 #include "r300_reg.h"
54 #include "r300_program.h"
55
56 #include "r300_lib.h"
57
58
59 /**********************************************************************
60 * Hardware rasterization
61 *
62 * When we fell back to software TCL, we still try to use the
63 * rasterization hardware for rendering.
64 **********************************************************************/
65
66 static int r300_get_primitive_type(r300ContextPtr rmesa,
67 GLcontext *ctx,
68 int start,
69 int end,
70 int prim)
71 {
72 TNLcontext *tnl = TNL_CONTEXT(ctx);
73 struct vertex_buffer *VB = &tnl->vb;
74 GLuint i;
75 int type=-1;
76
77 if(end<=start)return -1; /* do we need to watch for this ? */
78
79 fprintf(stderr, "[%d-%d]", start, end);
80 switch (prim & PRIM_MODE_MASK) {
81 case GL_LINES:
82 fprintf(stderr, "L ");
83 type=R300_VAP_VF_CNTL__PRIM_LINES;
84 if(end<start+2){
85 fprintf(stderr, "Not enough vertices\n");
86 return -1; /* need enough vertices for Q */
87 }
88 break;
89 case GL_LINE_STRIP:
90 fprintf(stderr, "LS ");
91 type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
92 if(end<start+2){
93 fprintf(stderr, "Not enough vertices\n");
94 return -1; /* need enough vertices for Q */
95 }
96 break;
97 case GL_LINE_LOOP:
98 fprintf(stderr, "LL ");
99 return -1;
100 if(end<start+2){
101 fprintf(stderr, "Not enough vertices\n");
102 return -1; /* need enough vertices for Q */
103 }
104 break;
105 case GL_TRIANGLES:
106 fprintf(stderr, "T ");
107 type=R300_VAP_VF_CNTL__PRIM_TRIANGLES;
108 if(end<start+3){
109 fprintf(stderr, "Not enough vertices\n");
110 return -1; /* need enough vertices for Q */
111 }
112 break;
113 case GL_TRIANGLE_STRIP:
114 fprintf(stderr, "TS ");
115 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
116 if(end<start+3){
117 fprintf(stderr, "Not enough vertices\n");
118 return -1; /* need enough vertices for Q */
119 }
120 break;
121 case GL_TRIANGLE_FAN:
122 fprintf(stderr, "TF ");
123 type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
124 if(end<start+3){
125 fprintf(stderr, "Not enough vertices\n");
126 return -1; /* need enough vertices for Q */
127 }
128 break;
129 case GL_QUADS:
130 fprintf(stderr, "Q ");
131 type=R300_VAP_VF_CNTL__PRIM_QUADS;
132 if(end<start+4){
133 fprintf(stderr, "Not enough vertices\n");
134 return -1; /* need enough vertices for Q */
135 }
136 break;
137 case GL_QUAD_STRIP:
138 fprintf(stderr, "QS ");
139 type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
140 if(end<start+4){
141 fprintf(stderr, "Not enough vertices\n");
142 return -1; /* need enough vertices for Q */
143 }
144 break;
145 default:
146 fprintf(stderr, "Cannot handle primitive %02x ", prim & PRIM_MODE_MASK);
147 return -1;
148 break;
149 }
150 return type;
151 }
152
153
154
155 /* Immediate implementation - vertex data is sent via command stream */
156
157 static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0};
158
159 #define output_vector(v, i) \
160 { \
161 int _i; \
162 for(_i=0;_i<v->size;_i++){ \
163 efloat(VEC_ELT(v, GLfloat, i)[_i]); \
164 } \
165 for(_i=v->size;_i<4;_i++){ \
166 efloat(default_vector[_i]); \
167 } \
168 }
169
170 static void r300_render_flat_primitive(r300ContextPtr rmesa,
171 GLcontext *ctx,
172 int start,
173 int end,
174 int prim)
175 {
176 TNLcontext *tnl = TNL_CONTEXT(ctx);
177 struct vertex_buffer *VB = &tnl->vb;
178 GLuint i;
179 int k, type;
180 LOCAL_VARS
181
182 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
183 if(type<0)return;
184
185
186 start_immediate_packet(end-start, type, 8);
187
188 for(i=start;i<end;i++){
189 #if 0
190 fprintf(stderr, "* (%f %f %f %f) (%f %f %f %f)\n",
191 VEC_ELT(VB->ObjPtr, GLfloat, i)[0],
192 VEC_ELT(VB->ObjPtr, GLfloat, i)[1],
193 VEC_ELT(VB->ObjPtr, GLfloat, i)[2],
194 VEC_ELT(VB->ObjPtr, GLfloat, i)[3],
195
196 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
197 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
198 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
199 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]
200 );
201 #endif
202
203
204 /* coordinates */
205 output_vector(VB->ObjPtr, i);
206
207 /* color components */
208 output_vector(VB->ColorPtr[0], i);
209 }
210
211 }
212
213 static GLboolean r300_run_flat_render(GLcontext *ctx,
214 struct tnl_pipeline_stage *stage)
215 {
216 r300ContextPtr rmesa = R300_CONTEXT(ctx);
217 TNLcontext *tnl = TNL_CONTEXT(ctx);
218 struct vertex_buffer *VB = &tnl->vb;
219 GLuint i;
220 AOS_DATA vb_arrays[2];
221 LOCAL_VARS
222
223 if (RADEON_DEBUG == DEBUG_PRIMS)
224 fprintf(stderr, "%s\n", __FUNCTION__);
225
226 /* setup array of structures data */
227
228 /* Note: immediate vertex data includes all coordinates.
229 To save bandwidth use either VBUF or state-based vertex generation */
230 /* xyz */
231 vb_arrays[0].element_size=4;
232 vb_arrays[0].stride=4;
233 vb_arrays[0].offset=0; /* Not used */
234 vb_arrays[0].format=AOS_FORMAT_FLOAT;
235 vb_arrays[0].ncomponents=4;
236 vb_arrays[0].reg=REG_COORDS;
237
238 /* color */
239 vb_arrays[1].element_size=4;
240 vb_arrays[1].stride=4;
241 vb_arrays[1].offset=0; /* Not used */
242 vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR;
243 vb_arrays[1].ncomponents=4;
244 vb_arrays[1].reg=REG_COLOR0;
245
246
247 /* needed before starting 3d operation .. */
248 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
249 e32(0x0000000a);
250
251 reg_start(0x4f18,0);
252 e32(0x00000003);
253
254 r300EmitState(rmesa);
255
256 reg_start(0x20b0,0);
257 e32(0x0000043f);
258
259 FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16;
260 memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4);
261
262 #if 0
263 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4;
264 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0;
265 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0;
266 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0;
267 FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0;
268 #endif
269
270 program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE);
271
272 /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */
273 setup_AOS(PASS_PREFIX vb_arrays, 2);
274
275 for(i=0; i < VB->PrimitiveCount; i++){
276 GLuint prim = VB->Primitive[i].mode;
277 GLuint start = VB->Primitive[i].start;
278 GLuint length = VB->Primitive[i].count;
279 r300_render_flat_primitive(rmesa, ctx, start, start + length, prim);
280 }
281
282 end_3d(PASS_PREFIX_VOID);
283
284 fprintf(stderr, "\n");
285 return GL_FALSE;
286 }
287
288 /* vertex buffer implementation */
289
290 /* We use the start part of GART texture buffer for vertices */
291
292 #define R300_MAX_AOS_ARRAYS 16
293
294 static void upload_vertex_buffer(r300ContextPtr rmesa,
295 GLcontext *ctx, AOS_DATA *array, int *n_arrays)
296 {
297 TNLcontext *tnl = TNL_CONTEXT(ctx);
298 struct vertex_buffer *VB = &tnl->vb;
299 int offset=0, idx=0;
300 int i,j;
301 radeonScreenPtr rsp=rmesa->radeon.radeonScreen;
302 /* Not the most efficient implementation, but, for now, I just want something that
303 works */
304 /* to do - make single memcpy per column (is it possible ?) */
305 /* to do - use dirty flags to avoid redundant copies */
306 #define UPLOAD_VECTOR(v, r, f)\
307 { \
308 /* Is the data dirty ? */ \
309 if (v->flags & ((1<<v->size)-1)) { \
310 fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); \
311 if(v->size*4==v->stride){\
312 /* fast path */ \
313 memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \
314 } else { \
315 for(i=0;i<VB->Count;i++){ \
316 /* copy one vertex at a time*/ \
317 memcpy(rsp->gartTextures.map+offset, VEC_ELT(v, GLfloat, i), v->size*4); \
318 } \
319 } \
320 /* v->flags &= ~((1<<v->size)-1);*/ \
321 } \
322 array[idx].element_size=v->size; \
323 array[idx].stride=v->size; \
324 array[idx].format=(f); \
325 array[idx].ncomponents=v->size; \
326 array[idx].offset=rsp->gartTextures.handle+offset; \
327 array[idx].reg=r; \
328 offset+=v->size*4*VB->Count; \
329 idx++; \
330 /* Fill in the rest with the components of default_vector */\
331 /* \
332 if(v->size<4){ \
333 array[idx].element_size=4-v->size; \
334 array[idx].stride=0; \
335 array[idx].format=(f); \
336 array[idx].ncomponents=4-v->size; \
337 array[idx].offset=rsp->gartTextures.handle+v->size*4;\
338 array[idx].reg=r; \
339 idx++; \
340 } \
341 */\
342 }
343
344 /* Put a copy of default vector */
345 memcpy(rsp->gartTextures.map, default_vector, 16);
346 offset+=16;
347
348 UPLOAD_VECTOR(VB->ObjPtr, REG_COORDS, AOS_FORMAT_FLOAT);
349 UPLOAD_VECTOR(VB->ColorPtr[0], REG_COLOR0, AOS_FORMAT_FLOAT_COLOR);
350
351 *n_arrays=idx;
352 if(idx>=R300_MAX_AOS_ARRAYS){
353 fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n");
354 exit(-1);
355 }
356 }
357
358 static void r300_render_vb_flat_primitive(r300ContextPtr rmesa,
359 GLcontext *ctx,
360 int start,
361 int end,
362 int prim)
363 {
364 TNLcontext *tnl = TNL_CONTEXT(ctx);
365 struct vertex_buffer *VB = &tnl->vb;
366 GLuint i;
367 int k, type, n_arrays;
368 LOCAL_VARS
369
370 if(end<=start)return; /* do we need to watch for this ? */
371
372 type=r300_get_primitive_type(rmesa, ctx, start, end, prim);
373 if(type<0)return;
374
375 fire_AOS(PASS_PREFIX end-start, type);
376 }
377
378 static VERTEX_SHADER_FRAGMENT default_vector_vsf={
379 length: 4,
380 body: {
381 f: {0.0, 0.0, 0.0, 1.0}
382 }
383 };
384
385 static GLboolean r300_run_vb_flat_render(GLcontext *ctx,
386 struct tnl_pipeline_stage *stage)
387 {
388 r300ContextPtr rmesa = R300_CONTEXT(ctx);
389 TNLcontext *tnl = TNL_CONTEXT(ctx);
390 struct vertex_buffer *VB = &tnl->vb;
391 int i, j, n_arrays;
392 AOS_DATA vb_arrays[R300_MAX_AOS_ARRAYS];
393 AOS_DATA vb_arrays2[R300_MAX_AOS_ARRAYS];
394 LOCAL_VARS
395
396 if (RADEON_DEBUG == DEBUG_PRIMS)
397 fprintf(stderr, "%s\n", __FUNCTION__);
398
399 /* setup array of structures data */
400
401 upload_vertex_buffer(rmesa, ctx, vb_arrays, &n_arrays);
402 fprintf(stderr, "Using %d AOS arrays\n", n_arrays);
403
404 r300EmitState(rmesa);
405
406 reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
407 e32(0x0000000a);
408
409 reg_start(0x4f18,0);
410 e32(0x00000003);
411
412 reg_start(0x20b0,0);
413 e32(0x0000043f);
414
415 program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE);
416
417 //upload_vertex_shader_fragment(PASS_PREFIX VSF_DEST_UNKNOWN1, &default_vector_vsf);
418
419 reg_start(R300_RE_OCCLUSION_CNTL, 0);
420 e32(R300_OCCLUSION_ON);
421
422 set_quad0(PASS_PREFIX 1.0,1.0,1.0,1.0);
423 set_init21(PASS_PREFIX 0.0,1.0);
424
425 for(i=0; i < VB->PrimitiveCount; i++){
426 GLuint prim = VB->Primitive[i].mode;
427 GLuint start = VB->Primitive[i].start;
428 GLuint length = VB->Primitive[i].count;
429
430 /* copy arrays */
431 memcpy(vb_arrays2, vb_arrays, sizeof(AOS_DATA)*n_arrays);
432 for(j=0;j<n_arrays;j++){
433 vb_arrays2[j].offset+=vb_arrays2[j].stride*start;
434 }
435
436 setup_AOS(PASS_PREFIX vb_arrays2, n_arrays);
437
438 r300_render_vb_flat_primitive(rmesa, ctx, start, start + length, prim);
439 }
440
441 end_3d(PASS_PREFIX_VOID);
442
443 /* Flush state - we are done drawing.. */
444 r300Flush(ctx);
445 fprintf(stderr, "\n");
446 return GL_FALSE;
447 }
448
449 /**
450 * Called by the pipeline manager to render a batch of primitives.
451 * We can return true to pass on to the next stage (i.e. software
452 * rasterization) or false to indicate that the pipeline has finished
453 * after we render something.
454 */
455 static GLboolean r300_run_render(GLcontext *ctx,
456 struct tnl_pipeline_stage *stage)
457 {
458 r300ContextPtr rmesa = R300_CONTEXT(ctx);
459 TNLcontext *tnl = TNL_CONTEXT(ctx);
460 struct vertex_buffer *VB = &tnl->vb;
461 GLuint i;
462
463 if (RADEON_DEBUG == DEBUG_PRIMS)
464 fprintf(stderr, "%s\n", __FUNCTION__);
465
466 #if 1
467 return r300_run_flat_render(ctx, stage);
468 #else
469 return GL_TRUE;
470 #endif
471
472 #if 0
473 mgaContextPtr mmesa = MGA_CONTEXT(ctx);
474 TNLcontext *tnl = TNL_CONTEXT(ctx);
475 struct vertex_buffer *VB = &tnl->vb;
476 GLuint i;
477
478 /* Don't handle clipping or indexed vertices or vertex manipulations.
479 */
480 if (mmesa->RenderIndex != 0 ||
481 !mga_validate_render( ctx, VB )) {
482 return GL_TRUE;
483 }
484
485 tnl->Driver.Render.Start( ctx );
486 mmesa->SetupNewInputs = ~0;
487
488 for (i = 0 ; i < VB->PrimitiveCount ; i++)
489 {
490 GLuint prim = VB->Primitive[i].mode;
491 GLuint start = VB->Primitive[i].start;
492 GLuint length = VB->Primitive[i].count;
493
494 if (!length)
495 continue;
496
497 mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
498 prim);
499 }
500
501 tnl->Driver.Render.Finish( ctx );
502
503 return GL_FALSE; /* finished the pipe */
504 #endif
505 }
506
507
508 /**
509 * Called by the pipeline manager once before rendering.
510 * We check the GL state here to
511 * a) decide whether we can do the current state in hardware and
512 * b) update hardware registers
513 */
514 #define FALLBACK_IF(expr) \
515 do { \
516 if (expr) { \
517 if (RADEON_DEBUG & DEBUG_FALLBACKS) \
518 fprintf(stderr, "%s: fallback:%s\n", \
519 __FUNCTION__, #expr); \
520 stage->active = GL_FALSE; \
521 return; \
522 } \
523 } while(0)
524
525 static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage)
526 {
527 r300ContextPtr r300 = R300_CONTEXT(ctx);
528 int i;
529
530 if (RADEON_DEBUG & DEBUG_STATE)
531 fprintf(stderr, "%s\n", __FUNCTION__);
532
533 /* We only support rendering in hardware for now */
534 if (ctx->RenderMode != GL_RENDER) {
535 stage->active = GL_FALSE;
536 return;
537 }
538
539 // I failed to figure out how dither works in hardware,
540 // let's just ignore it for now
541 //FALLBACK_IF(ctx->Color.DitherFlag);
542
543 /* I'm almost certain I forgot something here */
544 FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST
545 FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND
546 FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG
547 FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH
548 FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE
549 FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH
550 if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
551 FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV
552 FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT
553 FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE
554 FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL
555 FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH
556 FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE
557 FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST
558 FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB
559
560 for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
561 FALLBACK_IF(ctx->Texture.Unit[i].Enabled);
562
563
564 /* let r300_run_render do its job */
565 #if 0
566 stage->active = GL_FALSE;
567 #endif
568 }
569
570
571 static void dtr(struct tnl_pipeline_stage *stage)
572 {
573 (void)stage;
574 }
575
576 const struct tnl_pipeline_stage _r300_render_stage = {
577 "r300 hw rasterize",
578 _NEW_ALL, /* re-check (always re-check for now) */
579 0, /* re-run (always runs) */
580 GL_TRUE, /* active */
581 0, 0, /* inputs (set in check_render), outputs */
582 0, 0, /* changed_inputs, private */
583 dtr, /* destructor */
584 r300_check_render, /* check */
585 r300_run_render /* run */
586 };