Merge commit 'origin/gallium-0.1' into gallium-0.1
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_format.h"
34 #include "pipe/p_util.h"
35 #include "spu_blend.h"
36 #include "spu_colorpack.h"
37 #include "spu_main.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41
42 #include "spu_ztest.h"
43
44
45 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
46 typedef vector unsigned int mask_t;
47
48 typedef union
49 {
50 vector float v;
51 float f[4];
52 } float4;
53
54
55 /**
56 * Simplified types taken from other parts of Gallium
57 */
58 struct vertex_header {
59 vector float data[1];
60 };
61
62
63
64 /* XXX fix this */
65 #undef CEILF
66 #define CEILF(X) ((float) (int) ((X) + 0.99999))
67
68
69 #define QUAD_TOP_LEFT 0
70 #define QUAD_TOP_RIGHT 1
71 #define QUAD_BOTTOM_LEFT 2
72 #define QUAD_BOTTOM_RIGHT 3
73 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
74 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
75 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
76 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
77 #define MASK_ALL 0xf
78
79
80 #define DEBUG_VERTS 0
81
82 /**
83 * Triangle edge info
84 */
85 struct edge {
86 float dx; /**< X(v1) - X(v0), used only during setup */
87 float dy; /**< Y(v1) - Y(v0), used only during setup */
88 float dxdy; /**< dx/dy */
89 float sx, sy; /**< first sample point coord */
90 int lines; /**< number of lines on this edge */
91 };
92
93
94 struct interp_coef
95 {
96 float4 a0;
97 float4 dadx;
98 float4 dady;
99 };
100
101
102 /**
103 * Triangle setup info (derived from draw_stage).
104 * Also used for line drawing (taking some liberties).
105 */
106 struct setup_stage {
107
108 /* Vertices are just an array of floats making up each attribute in
109 * turn. Currently fixed at 4 floats, but should change in time.
110 * Codegen will help cope with this.
111 */
112 const struct vertex_header *vmax;
113 const struct vertex_header *vmid;
114 const struct vertex_header *vmin;
115 const struct vertex_header *vprovoke;
116
117 struct edge ebot;
118 struct edge etop;
119 struct edge emaj;
120
121 float oneoverarea;
122
123 uint tx, ty;
124
125 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
126
127 #if 0
128 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
129 #else
130 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
131 #endif
132
133 #if 0
134 struct quad_header quad;
135 #endif
136
137 struct {
138 int left[2]; /**< [0] = row0, [1] = row1 */
139 int right[2];
140 int y;
141 unsigned y_flags;
142 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
143 } span;
144 };
145
146
147
148 static struct setup_stage setup;
149
150
151
152
153 #if 0
154 /**
155 * Basically a cast wrapper.
156 */
157 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
158 {
159 return (struct setup_stage *)stage;
160 }
161 #endif
162
163 #if 0
164 /**
165 * Clip setup.quad against the scissor/surface bounds.
166 */
167 static INLINE void
168 quad_clip(struct setup_stage *setup)
169 {
170 const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
171 const int minx = (int) cliprect->minx;
172 const int maxx = (int) cliprect->maxx;
173 const int miny = (int) cliprect->miny;
174 const int maxy = (int) cliprect->maxy;
175
176 if (setup.quad.x0 >= maxx ||
177 setup.quad.y0 >= maxy ||
178 setup.quad.x0 + 1 < minx ||
179 setup.quad.y0 + 1 < miny) {
180 /* totally clipped */
181 setup.quad.mask = 0x0;
182 return;
183 }
184 if (setup.quad.x0 < minx)
185 setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
186 if (setup.quad.y0 < miny)
187 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
188 if (setup.quad.x0 == maxx - 1)
189 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
190 if (setup.quad.y0 == maxy - 1)
191 setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
192 }
193 #endif
194
195 #if 0
196 /**
197 * Emit a quad (pass to next stage) with clipping.
198 */
199 static INLINE void
200 clip_emit_quad(struct setup_stage *setup)
201 {
202 quad_clip(setup);
203 if (setup.quad.mask) {
204 struct softpipe_context *sp = setup.softpipe;
205 sp->quad.first->run(sp->quad.first, &setup.quad);
206 }
207 }
208 #endif
209
210 /**
211 * Evaluate attribute coefficients (plane equations) to compute
212 * attribute values for the four fragments in a quad.
213 * Eg: four colors will be compute.
214 */
215 static INLINE void
216 eval_coeff(uint slot, float x, float y, vector float result[4])
217 {
218 switch (spu.vertex_info.interp_mode[slot]) {
219 case INTERP_CONSTANT:
220 result[QUAD_TOP_LEFT] =
221 result[QUAD_TOP_RIGHT] =
222 result[QUAD_BOTTOM_LEFT] =
223 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
224 break;
225
226 case INTERP_LINEAR:
227 /* fall-through, for now */
228 default:
229 {
230 register vector float dadx = setup.coef[slot].dadx.v;
231 register vector float dady = setup.coef[slot].dady.v;
232 register vector float topLeft
233 = spu_add(setup.coef[slot].a0.v,
234 spu_add(spu_mul(spu_splats(x), dadx),
235 spu_mul(spu_splats(y), dady)));
236
237 result[QUAD_TOP_LEFT] = topLeft;
238 result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
239 result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
240 result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
241 }
242 }
243 }
244
245
246 static INLINE vector float
247 eval_z(float x, float y)
248 {
249 const uint slot = 0;
250 const float dzdx = setup.coef[slot].dadx.f[2];
251 const float dzdy = setup.coef[slot].dady.f[2];
252 const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
253 const vector float topLeftv = spu_splats(topLeft);
254 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
255 return spu_add(topLeftv, derivs);
256 }
257
258
259 static INLINE mask_t
260 do_depth_test(int x, int y, mask_t quadmask)
261 {
262 float4 zvals;
263 mask_t mask;
264
265 zvals.v = eval_z((float) x, (float) y);
266
267 if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
268 int ix = (x - setup.cliprect_minx) / 4;
269 int iy = (y - setup.cliprect_miny) / 2;
270 mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
271 }
272 else {
273 int ix = (x - setup.cliprect_minx) / 2;
274 int iy = (y - setup.cliprect_miny) / 2;
275 mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
276 }
277
278 if (spu_extract(spu_orx(mask), 0))
279 spu.cur_ztile_status = TILE_STATUS_DIRTY;
280
281 return mask;
282 }
283
284
285 /**
286 * Emit a quad (pass to next stage). No clipping is done.
287 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
288 * should be skipped. But adding the test for that slows things down
289 * overall.
290 */
291 static INLINE void
292 emit_quad( int x, int y, mask_t mask )
293 {
294 #if 0
295 struct softpipe_context *sp = setup.softpipe;
296 setup.quad.x0 = x;
297 setup.quad.y0 = y;
298 setup.quad.mask = mask;
299 sp->quad.first->run(sp->quad.first, &setup.quad);
300 #else
301
302 if (spu.depth_stencil.depth.enabled) {
303 mask = do_depth_test(x, y, mask);
304 }
305
306 /* If any bits in mask are set... */
307 if (spu_extract(spu_orx(mask), 0)) {
308 const int ix = x - setup.cliprect_minx;
309 const int iy = y - setup.cliprect_miny;
310 const vector unsigned char shuffle = spu.color_shuffle;
311 vector float colors[4];
312
313 spu.cur_ctile_status = TILE_STATUS_DIRTY;
314
315 if (spu.texture.start) {
316 /* texture mapping */
317 vector float texcoords[4];
318 eval_coeff(2, (float) x, (float) y, texcoords);
319
320 if (spu_extract(mask, 0))
321 colors[0] = spu.sample_texture(texcoords[0]);
322 if (spu_extract(mask, 1))
323 colors[1] = spu.sample_texture(texcoords[1]);
324 if (spu_extract(mask, 2))
325 colors[2] = spu.sample_texture(texcoords[2]);
326 if (spu_extract(mask, 3))
327 colors[3] = spu.sample_texture(texcoords[3]);
328 }
329 else {
330 /* simple shading */
331 eval_coeff(1, (float) x, (float) y, colors);
332 }
333
334 #if 1
335 if (spu.blend.blend_enable)
336 blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors);
337 #endif
338
339 if (spu_extract(mask, 0))
340 spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle);
341 if (spu_extract(mask, 1))
342 spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle);
343 if (spu_extract(mask, 2))
344 spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle);
345 if (spu_extract(mask, 3))
346 spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle);
347
348 #if 0
349 /* SIMD_Z with swizzled color buffer (someday) */
350 vector unsigned int uicolors = *((vector unsigned int *) &colors);
351 spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask);
352 #endif
353 }
354
355 #endif
356 }
357
358
359 /**
360 * Given an X or Y coordinate, return the block/quad coordinate that it
361 * belongs to.
362 */
363 static INLINE int block( int x )
364 {
365 return x & ~1;
366 }
367
368
369 /**
370 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
371 * the triangle's bounds.
372 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
373 */
374 static INLINE mask_t calculate_mask( int x )
375 {
376 /* This is a little tricky.
377 * Use & instead of && to avoid branches.
378 * Use negation to convert true/false to ~0/0 values.
379 */
380 mask_t mask;
381 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
382 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
383 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
384 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
385 return mask;
386 }
387
388
389 /**
390 * Render a horizontal span of quads
391 */
392 static void flush_spans( void )
393 {
394 int minleft, maxright;
395 int x;
396
397 switch (setup.span.y_flags) {
398 case 0x3:
399 /* both odd and even lines written (both quad rows) */
400 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
401 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
402 break;
403
404 case 0x1:
405 /* only even line written (quad top row) */
406 minleft = setup.span.left[0];
407 maxright = setup.span.right[0];
408 break;
409
410 case 0x2:
411 /* only odd line written (quad bottom row) */
412 minleft = setup.span.left[1];
413 maxright = setup.span.right[1];
414 break;
415
416 default:
417 return;
418 }
419
420
421 /* OK, we're very likely to need the tile data now.
422 * clear or finish waiting if needed.
423 */
424 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
425 /* wait for mfc_get() to complete */
426 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
427 wait_on_mask(1 << TAG_READ_TILE_COLOR);
428 spu.cur_ctile_status = TILE_STATUS_CLEAN;
429 }
430 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
431 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
432 clear_c_tile(&spu.ctile);
433 spu.cur_ctile_status = TILE_STATUS_DIRTY;
434 }
435 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
436
437 if (spu.depth_stencil.depth.enabled) {
438 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
439 /* wait for mfc_get() to complete */
440 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
441 wait_on_mask(1 << TAG_READ_TILE_Z);
442 spu.cur_ztile_status = TILE_STATUS_CLEAN;
443 }
444 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
445 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
446 clear_z_tile(&spu.ztile);
447 spu.cur_ztile_status = TILE_STATUS_DIRTY;
448 }
449 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
450 }
451
452 /* XXX this loop could be moved into the above switch cases and
453 * calculate_mask() could be simplified a bit...
454 */
455 for (x = block(minleft); x <= block(maxright); x += 2) {
456 #if 1
457 emit_quad( x, setup.span.y, calculate_mask( x ) );
458 #endif
459 }
460
461 setup.span.y = 0;
462 setup.span.y_flags = 0;
463 setup.span.right[0] = 0;
464 setup.span.right[1] = 0;
465 }
466
467 #if DEBUG_VERTS
468 static void print_vertex(const struct vertex_header *v)
469 {
470 int i;
471 fprintf(stderr, "Vertex: (%p)\n", v);
472 for (i = 0; i < setup.quad.nr_attrs; i++) {
473 fprintf(stderr, " %d: %f %f %f %f\n", i,
474 v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
475 }
476 }
477 #endif
478
479
480 static boolean setup_sort_vertices(const struct vertex_header *v0,
481 const struct vertex_header *v1,
482 const struct vertex_header *v2)
483 {
484
485 #if DEBUG_VERTS
486 fprintf(stderr, "Triangle:\n");
487 print_vertex(v0);
488 print_vertex(v1);
489 print_vertex(v2);
490 #endif
491
492 setup.vprovoke = v2;
493
494 /* determine bottom to top order of vertices */
495 {
496 float y0 = spu_extract(v0->data[0], 1);
497 float y1 = spu_extract(v1->data[0], 1);
498 float y2 = spu_extract(v2->data[0], 1);
499 if (y0 <= y1) {
500 if (y1 <= y2) {
501 /* y0<=y1<=y2 */
502 setup.vmin = v0;
503 setup.vmid = v1;
504 setup.vmax = v2;
505 }
506 else if (y2 <= y0) {
507 /* y2<=y0<=y1 */
508 setup.vmin = v2;
509 setup.vmid = v0;
510 setup.vmax = v1;
511 }
512 else {
513 /* y0<=y2<=y1 */
514 setup.vmin = v0;
515 setup.vmid = v2;
516 setup.vmax = v1;
517 }
518 }
519 else {
520 if (y0 <= y2) {
521 /* y1<=y0<=y2 */
522 setup.vmin = v1;
523 setup.vmid = v0;
524 setup.vmax = v2;
525 }
526 else if (y2 <= y1) {
527 /* y2<=y1<=y0 */
528 setup.vmin = v2;
529 setup.vmid = v1;
530 setup.vmax = v0;
531 }
532 else {
533 /* y1<=y2<=y0 */
534 setup.vmin = v1;
535 setup.vmid = v2;
536 setup.vmax = v0;
537 }
538 }
539 }
540
541 /* Check if triangle is completely outside the tile bounds */
542 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
543 return FALSE;
544 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
545 return FALSE;
546 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
547 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
548 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
549 return FALSE;
550 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
551 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
552 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
553 return FALSE;
554
555 setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
556 setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
557 setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
558 setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
559 setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
560 setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
561
562 /*
563 * Compute triangle's area. Use 1/area to compute partial
564 * derivatives of attributes later.
565 *
566 * The area will be the same as prim->det, but the sign may be
567 * different depending on how the vertices get sorted above.
568 *
569 * To determine whether the primitive is front or back facing we
570 * use the prim->det value because its sign is correct.
571 */
572 {
573 const float area = (setup.emaj.dx * setup.ebot.dy -
574 setup.ebot.dx * setup.emaj.dy);
575
576 setup.oneoverarea = 1.0f / area;
577 /*
578 _mesa_printf("%s one-over-area %f area %f det %f\n",
579 __FUNCTION__, setup.oneoverarea, area, prim->det );
580 */
581 }
582
583 #if 0
584 /* We need to know if this is a front or back-facing triangle for:
585 * - the GLSL gl_FrontFacing fragment attribute (bool)
586 * - two-sided stencil test
587 */
588 setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
589 #endif
590
591 return TRUE;
592 }
593
594
595 /**
596 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
597 * The value value comes from vertex->data[slot].
598 * The result will be put into setup.coef[slot].a0.
599 * \param slot which attribute slot
600 */
601 static INLINE void
602 const_coeff(uint slot)
603 {
604 setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
605 setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
606 setup.coef[slot].a0.v = setup.vprovoke->data[slot];
607 }
608
609
610 /**
611 * Compute a0, dadx and dady for a linearly interpolated coefficient,
612 * for a triangle.
613 */
614 static INLINE void
615 tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
616 {
617 uint i;
618 const float *vmin_d = (float *) &setup.vmin->data[slot];
619 const float *vmid_d = (float *) &setup.vmid->data[slot];
620 const float *vmax_d = (float *) &setup.vmax->data[slot];
621 const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
622 const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
623
624 for (i = firstComp; i < lastComp; i++) {
625 float botda = vmid_d[i] - vmin_d[i];
626 float majda = vmax_d[i] - vmin_d[i];
627 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
628 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
629
630 ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
631
632 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
633 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
634
635 /* calculate a0 as the value which would be sampled for the
636 * fragment at (0,0), taking into account that we want to sample at
637 * pixel centers, in other words (0.5, 0.5).
638 *
639 * this is neat but unfortunately not a good way to do things for
640 * triangles with very large values of dadx or dady as it will
641 * result in the subtraction and re-addition from a0 of a very
642 * large number, which means we'll end up loosing a lot of the
643 * fractional bits and precision from a0. the way to fix this is
644 * to define a0 as the sample at a pixel center somewhere near vmin
645 * instead - i'll switch to this later.
646 */
647 setup.coef[slot].a0.f[i] = (vmin_d[i] -
648 (setup.coef[slot].dadx.f[i] * x +
649 setup.coef[slot].dady.f[i] * y));
650 }
651
652 /*
653 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
654 slot, "xyzw"[i],
655 setup.coef[slot].a0[i],
656 setup.coef[slot].dadx.f[i],
657 setup.coef[slot].dady.f[i]);
658 */
659 }
660
661
662 /**
663 * As above, but interp setup all four vector components.
664 */
665 static INLINE void
666 tri_linear_coeff4(uint slot)
667 {
668 const vector float vmin_d = setup.vmin->data[slot];
669 const vector float vmid_d = setup.vmid->data[slot];
670 const vector float vmax_d = setup.vmax->data[slot];
671 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
672 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
673
674 vector float botda = vmid_d - vmin_d;
675 vector float majda = vmax_d - vmin_d;
676
677 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
678 spu_mul(botda, spu_splats(setup.emaj.dy)));
679 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
680 spu_mul(majda, spu_splats(setup.ebot.dx)));
681
682 setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
683 setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
684
685 vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
686 vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
687
688 setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
689 }
690
691
692
693 #if 0
694 /**
695 * Compute a0, dadx and dady for a perspective-corrected interpolant,
696 * for a triangle.
697 * We basically multiply the vertex value by 1/w before computing
698 * the plane coefficients (a0, dadx, dady).
699 * Later, when we compute the value at a particular fragment position we'll
700 * divide the interpolated value by the interpolated W at that fragment.
701 */
702 static void tri_persp_coeff( unsigned slot,
703 unsigned i )
704 {
705 /* premultiply by 1/w:
706 */
707 float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
708 float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
709 float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
710
711 float botda = mida - mina;
712 float majda = maxa - mina;
713 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
714 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
715
716 /*
717 printf("tri persp %d,%d: %f %f %f\n", slot, i,
718 setup.vmin->data[slot][i],
719 setup.vmid->data[slot][i],
720 setup.vmax->data[slot][i]
721 );
722 */
723
724 assert(slot < PIPE_MAX_SHADER_INPUTS);
725 assert(i <= 3);
726
727 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
728 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
729 setup.coef[slot].a0.f[i] = (mina -
730 (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
731 setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
732 }
733 #endif
734
735
736 /**
737 * Compute the setup.coef[] array dadx, dady, a0 values.
738 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
739 */
740 static void setup_tri_coefficients(void)
741 {
742 #if 1
743 uint i;
744
745 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
746 switch (spu.vertex_info.interp_mode[i]) {
747 case INTERP_NONE:
748 break;
749 case INTERP_POS:
750 /*tri_linear_coeff(i, 2, 3);*/
751 /* XXX interp W if PERSPECTIVE... */
752 tri_linear_coeff4(i);
753 break;
754 case INTERP_CONSTANT:
755 const_coeff(i);
756 break;
757 case INTERP_LINEAR:
758 tri_linear_coeff4(i);
759 break;
760 case INTERP_PERSPECTIVE:
761 tri_linear_coeff4(i); /* temporary */
762 break;
763 default:
764 ASSERT(0);
765 }
766 }
767 #else
768 ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
769 ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
770 spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
771 tri_linear_coeff(0, 2, 3); /* slot 0, z */
772 tri_linear_coeff(1, 0, 4); /* slot 1, color */
773 #endif
774 }
775
776
777 static void setup_tri_edges(void)
778 {
779 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
780 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
781
782 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
783 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
784 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
785
786 setup.emaj.sy = CEILF(vmin_y);
787 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
788 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
789 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
790
791 setup.etop.sy = CEILF(vmid_y);
792 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
793 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
794 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
795
796 setup.ebot.sy = CEILF(vmin_y);
797 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
798 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
799 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
800 }
801
802
803 /**
804 * Render the upper or lower half of a triangle.
805 * Scissoring/cliprect is applied here too.
806 */
807 static void subtriangle( struct edge *eleft,
808 struct edge *eright,
809 unsigned lines )
810 {
811 const int minx = setup.cliprect_minx;
812 const int maxx = setup.cliprect_maxx;
813 const int miny = setup.cliprect_miny;
814 const int maxy = setup.cliprect_maxy;
815 int y, start_y, finish_y;
816 int sy = (int)eleft->sy;
817
818 ASSERT((int)eleft->sy == (int) eright->sy);
819
820 /* clip top/bottom */
821 start_y = sy;
822 finish_y = sy + lines;
823
824 if (start_y < miny)
825 start_y = miny;
826
827 if (finish_y > maxy)
828 finish_y = maxy;
829
830 start_y -= sy;
831 finish_y -= sy;
832
833 /*
834 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
835 */
836
837 for (y = start_y; y < finish_y; y++) {
838
839 /* avoid accumulating adds as floats don't have the precision to
840 * accurately iterate large triangle edges that way. luckily we
841 * can just multiply these days.
842 *
843 * this is all drowned out by the attribute interpolation anyway.
844 */
845 int left = (int)(eleft->sx + y * eleft->dxdy);
846 int right = (int)(eright->sx + y * eright->dxdy);
847
848 /* clip left/right */
849 if (left < minx)
850 left = minx;
851 if (right > maxx)
852 right = maxx;
853
854 if (left < right) {
855 int _y = sy + y;
856 if (block(_y) != setup.span.y) {
857 flush_spans();
858 setup.span.y = block(_y);
859 }
860
861 setup.span.left[_y&1] = left;
862 setup.span.right[_y&1] = right;
863 setup.span.y_flags |= 1<<(_y&1);
864 }
865 }
866
867
868 /* save the values so that emaj can be restarted:
869 */
870 eleft->sx += lines * eleft->dxdy;
871 eright->sx += lines * eright->dxdy;
872 eleft->sy += lines;
873 eright->sy += lines;
874 }
875
876
877 /**
878 * Draw triangle into tile at (tx, ty) (tile coords)
879 * The tile data should have already been fetched.
880 */
881 boolean
882 tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
883 {
884 setup.tx = tx;
885 setup.ty = ty;
886
887 /* set clipping bounds to tile bounds */
888 setup.cliprect_minx = tx * TILE_SIZE;
889 setup.cliprect_miny = ty * TILE_SIZE;
890 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
891 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
892
893 if (!setup_sort_vertices((struct vertex_header *) v0,
894 (struct vertex_header *) v1,
895 (struct vertex_header *) v2)) {
896 return FALSE; /* totally clipped */
897 }
898
899 setup_tri_coefficients();
900 setup_tri_edges();
901
902 setup.span.y = 0;
903 setup.span.y_flags = 0;
904 setup.span.right[0] = 0;
905 setup.span.right[1] = 0;
906 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
907
908 /* init_constant_attribs( setup ); */
909
910 if (setup.oneoverarea < 0.0) {
911 /* emaj on left:
912 */
913 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
914 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
915 }
916 else {
917 /* emaj on right:
918 */
919 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
920 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
921 }
922
923 flush_spans();
924
925 return TRUE;
926 }