cell: send rasterizer state to SPUs in proper way, remove front_winding hack
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "util/u_math.h"
36 #include "spu_colorpack.h"
37 #include "spu_main.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41
42
43 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44 typedef vector unsigned int mask_t;
45
46 typedef union
47 {
48 vector float v;
49 float f[4];
50 } float4;
51
52
53 /**
54 * Simplified types taken from other parts of Gallium
55 */
56 struct vertex_header {
57 vector float data[1];
58 };
59
60
61
62 /* XXX fix this */
63 #undef CEILF
64 #define CEILF(X) ((float) (int) ((X) + 0.99999))
65
66
67 #define QUAD_TOP_LEFT 0
68 #define QUAD_TOP_RIGHT 1
69 #define QUAD_BOTTOM_LEFT 2
70 #define QUAD_BOTTOM_RIGHT 3
71 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
72 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
73 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
74 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
75 #define MASK_ALL 0xf
76
77
78 #define DEBUG_VERTS 0
79
80 /**
81 * Triangle edge info
82 */
83 struct edge {
84 float dx; /**< X(v1) - X(v0), used only during setup */
85 float dy; /**< Y(v1) - Y(v0), used only during setup */
86 float dxdy; /**< dx/dy */
87 float sx, sy; /**< first sample point coord */
88 int lines; /**< number of lines on this edge */
89 };
90
91
92 struct interp_coef
93 {
94 float4 a0;
95 float4 dadx;
96 float4 dady;
97 };
98
99
100 /**
101 * Triangle setup info (derived from draw_stage).
102 * Also used for line drawing (taking some liberties).
103 */
104 struct setup_stage {
105
106 /* Vertices are just an array of floats making up each attribute in
107 * turn. Currently fixed at 4 floats, but should change in time.
108 * Codegen will help cope with this.
109 */
110 const struct vertex_header *vmax;
111 const struct vertex_header *vmid;
112 const struct vertex_header *vmin;
113 const struct vertex_header *vprovoke;
114
115 struct edge ebot;
116 struct edge etop;
117 struct edge emaj;
118
119 float oneOverArea;
120
121 uint facing;
122
123 uint tx, ty; /**< position of current tile (x, y) */
124
125 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
126
127 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
128
129 struct {
130 int left[2]; /**< [0] = row0, [1] = row1 */
131 int right[2];
132 int y;
133 unsigned y_flags;
134 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
135 } span;
136 };
137
138
139 static struct setup_stage setup;
140
141
142 /**
143 * Evaluate attribute coefficients (plane equations) to compute
144 * attribute values for the four fragments in a quad.
145 * Eg: four colors will be computed (in AoS format).
146 */
147 static INLINE void
148 eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
149 {
150 switch (spu.vertex_info.attrib[slot].interp_mode) {
151 case INTERP_CONSTANT:
152 result[QUAD_TOP_LEFT] =
153 result[QUAD_TOP_RIGHT] =
154 result[QUAD_BOTTOM_LEFT] =
155 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
156 break;
157 case INTERP_LINEAR:
158 {
159 vector float dadx = setup.coef[slot].dadx.v;
160 vector float dady = setup.coef[slot].dady.v;
161 vector float topLeft =
162 spu_add(setup.coef[slot].a0.v,
163 spu_add(spu_mul(spu_splats(x), dadx),
164 spu_mul(spu_splats(y), dady)));
165
166 result[QUAD_TOP_LEFT] = topLeft;
167 result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
168 result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
169 result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
170 }
171 break;
172 case INTERP_PERSPECTIVE:
173 {
174 vector float dadx = setup.coef[slot].dadx.v;
175 vector float dady = setup.coef[slot].dady.v;
176 vector float topLeft =
177 spu_add(setup.coef[slot].a0.v,
178 spu_add(spu_mul(spu_splats(x), dadx),
179 spu_mul(spu_splats(y), dady)));
180
181 vector float wInv = spu_re(w); /* 1.0 / w */
182
183 result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv);
184 result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv);
185 result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv);
186 result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv);
187 }
188 break;
189 case INTERP_POS:
190 case INTERP_NONE:
191 break;
192 default:
193 ASSERT(0);
194 }
195 }
196
197
198 /**
199 * As above, but return 4 vectors in SOA format.
200 * XXX this will all be re-written someday.
201 */
202 static INLINE void
203 eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4])
204 {
205 eval_coeff(slot, x, y, w, result);
206 _transpose_matrix4x4(result, result);
207 }
208
209
210 /** Evalute coefficients to get Z for four pixels in a quad */
211 static INLINE vector float
212 eval_z(float x, float y)
213 {
214 const uint slot = 0;
215 const float dzdx = setup.coef[slot].dadx.f[2];
216 const float dzdy = setup.coef[slot].dady.f[2];
217 const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
218 const vector float topLeftv = spu_splats(topLeft);
219 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
220 return spu_add(topLeftv, derivs);
221 }
222
223
224 /** Evalute coefficients to get W for four pixels in a quad */
225 static INLINE vector float
226 eval_w(float x, float y)
227 {
228 const uint slot = 0;
229 const float dwdx = setup.coef[slot].dadx.f[3];
230 const float dwdy = setup.coef[slot].dady.f[3];
231 const float topLeft = setup.coef[slot].a0.f[3] + x * dwdx + y * dwdy;
232 const vector float topLeftv = spu_splats(topLeft);
233 const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy };
234 return spu_add(topLeftv, derivs);
235 }
236
237
238 /**
239 * Emit a quad (pass to next stage). No clipping is done.
240 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
241 * should be skipped. But adding the test for that slows things down
242 * overall.
243 */
244 static INLINE void
245 emit_quad( int x, int y, mask_t mask)
246 {
247 /* If any bits in mask are set... */
248 if (spu_extract(spu_orx(mask), 0)) {
249 const int ix = x - setup.cliprect_minx;
250 const int iy = y - setup.cliprect_miny;
251
252 spu.cur_ctile_status = TILE_STATUS_DIRTY;
253 spu.cur_ztile_status = TILE_STATUS_DIRTY;
254
255 {
256 /*
257 * Run fragment shader, execute per-fragment ops, update fb/tile.
258 */
259 vector float inputs[4*4], outputs[2*4];
260 vector float fragZ = eval_z((float) x, (float) y);
261 vector float fragW = eval_w((float) x, (float) y);
262
263 /* setup inputs */
264 #if 0
265 eval_coeff_soa(1, (float) x, (float) y, fragW, inputs);
266 #else
267 uint i;
268 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
269 eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4);
270 }
271 #endif
272 ASSERT(spu.fragment_program);
273 ASSERT(spu.fragment_ops);
274
275 /* Execute the current fragment program */
276 spu.fragment_program(inputs, outputs, spu.constants);
277
278 /* Execute per-fragment/quad operations, including:
279 * alpha test, z test, stencil test, blend and framebuffer writing.
280 */
281 spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile,
282 fragZ,
283 outputs[0*4+0],
284 outputs[0*4+1],
285 outputs[0*4+2],
286 outputs[0*4+3],
287 mask,
288 setup.facing);
289 }
290 }
291 }
292
293
294 /**
295 * Given an X or Y coordinate, return the block/quad coordinate that it
296 * belongs to.
297 */
298 static INLINE int
299 block(int x)
300 {
301 return x & ~1;
302 }
303
304
305 /**
306 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
307 * the triangle's bounds.
308 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
309 */
310 static INLINE mask_t
311 calculate_mask(int x)
312 {
313 /* This is a little tricky.
314 * Use & instead of && to avoid branches.
315 * Use negation to convert true/false to ~0/0 values.
316 */
317 mask_t mask;
318 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
319 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
320 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
321 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
322 return mask;
323 }
324
325
326 /**
327 * Render a horizontal span of quads
328 */
329 static void
330 flush_spans(void)
331 {
332 int minleft, maxright;
333 int x;
334
335 switch (setup.span.y_flags) {
336 case 0x3:
337 /* both odd and even lines written (both quad rows) */
338 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
339 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
340 break;
341
342 case 0x1:
343 /* only even line written (quad top row) */
344 minleft = setup.span.left[0];
345 maxright = setup.span.right[0];
346 break;
347
348 case 0x2:
349 /* only odd line written (quad bottom row) */
350 minleft = setup.span.left[1];
351 maxright = setup.span.right[1];
352 break;
353
354 default:
355 return;
356 }
357
358 /* OK, we're very likely to need the tile data now.
359 * clear or finish waiting if needed.
360 */
361 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
362 /* wait for mfc_get() to complete */
363 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
364 wait_on_mask(1 << TAG_READ_TILE_COLOR);
365 spu.cur_ctile_status = TILE_STATUS_CLEAN;
366 }
367 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
368 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
369 clear_c_tile(&spu.ctile);
370 spu.cur_ctile_status = TILE_STATUS_DIRTY;
371 }
372 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
373
374 if (spu.read_depth) {
375 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
376 /* wait for mfc_get() to complete */
377 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
378 wait_on_mask(1 << TAG_READ_TILE_Z);
379 spu.cur_ztile_status = TILE_STATUS_CLEAN;
380 }
381 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
382 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
383 clear_z_tile(&spu.ztile);
384 spu.cur_ztile_status = TILE_STATUS_DIRTY;
385 }
386 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
387 }
388
389 /* XXX this loop could be moved into the above switch cases and
390 * calculate_mask() could be simplified a bit...
391 */
392 for (x = block(minleft); x <= block(maxright); x += 2) {
393 emit_quad( x, setup.span.y, calculate_mask( x ));
394 }
395
396 setup.span.y = 0;
397 setup.span.y_flags = 0;
398 setup.span.right[0] = 0;
399 setup.span.right[1] = 0;
400 }
401
402
403 #if DEBUG_VERTS
404 static void
405 print_vertex(const struct vertex_header *v)
406 {
407 uint i;
408 fprintf(stderr, " Vertex: (%p)\n", v);
409 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
410 fprintf(stderr, " %d: %f %f %f %f\n", i,
411 spu_extract(v->data[i], 0),
412 spu_extract(v->data[i], 1),
413 spu_extract(v->data[i], 2),
414 spu_extract(v->data[i], 3));
415 }
416 }
417 #endif
418
419
420 static boolean
421 setup_sort_vertices(const struct vertex_header *v0,
422 const struct vertex_header *v1,
423 const struct vertex_header *v2)
424 {
425 #if DEBUG_VERTS
426 if (spu.init.id==0) {
427 fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
428 print_vertex(v0);
429 print_vertex(v1);
430 print_vertex(v2);
431 }
432 #endif
433
434 setup.vprovoke = v2;
435
436 /* determine bottom to top order of vertices */
437 {
438 float y0 = spu_extract(v0->data[0], 1);
439 float y1 = spu_extract(v1->data[0], 1);
440 float y2 = spu_extract(v2->data[0], 1);
441 if (y0 <= y1) {
442 if (y1 <= y2) {
443 /* y0<=y1<=y2 */
444 setup.vmin = v0;
445 setup.vmid = v1;
446 setup.vmax = v2;
447 }
448 else if (y2 <= y0) {
449 /* y2<=y0<=y1 */
450 setup.vmin = v2;
451 setup.vmid = v0;
452 setup.vmax = v1;
453 }
454 else {
455 /* y0<=y2<=y1 */
456 setup.vmin = v0;
457 setup.vmid = v2;
458 setup.vmax = v1;
459 }
460 }
461 else {
462 if (y0 <= y2) {
463 /* y1<=y0<=y2 */
464 setup.vmin = v1;
465 setup.vmid = v0;
466 setup.vmax = v2;
467 }
468 else if (y2 <= y1) {
469 /* y2<=y1<=y0 */
470 setup.vmin = v2;
471 setup.vmid = v1;
472 setup.vmax = v0;
473 }
474 else {
475 /* y1<=y2<=y0 */
476 setup.vmin = v1;
477 setup.vmid = v2;
478 setup.vmax = v0;
479 }
480 }
481 }
482
483 /* Check if triangle is completely outside the tile bounds */
484 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
485 return FALSE;
486 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
487 return FALSE;
488 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
489 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
490 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
491 return FALSE;
492 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
493 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
494 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
495 return FALSE;
496
497 setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
498 setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
499 setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
500 setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
501 setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
502 setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
503
504 /*
505 * Compute triangle's area. Use 1/area to compute partial
506 * derivatives of attributes later.
507 *
508 * The area will be the same as prim->det, but the sign may be
509 * different depending on how the vertices get sorted above.
510 *
511 * To determine whether the primitive is front or back facing we
512 * use the prim->det value because its sign is correct.
513 */
514 {
515 const float area = (setup.emaj.dx * setup.ebot.dy -
516 setup.ebot.dx * setup.emaj.dy);
517
518 setup.oneOverArea = 1.0f / area;
519 /*
520 _mesa_printf("%s one-over-area %f area %f det %f\n",
521 __FUNCTION__, setup.oneOverArea, area, prim->det );
522 */
523 }
524
525 #if 0
526 /* We need to know if this is a front or back-facing triangle for:
527 * - the GLSL gl_FrontFacing fragment attribute (bool)
528 * - two-sided stencil test
529 */
530 setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
531 #endif
532
533 return TRUE;
534 }
535
536
537 /**
538 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
539 * The value value comes from vertex->data[slot].
540 * The result will be put into setup.coef[slot].a0.
541 * \param slot which attribute slot
542 */
543 static INLINE void
544 const_coeff4(uint slot)
545 {
546 setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
547 setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
548 setup.coef[slot].a0.v = setup.vprovoke->data[slot];
549 }
550
551
552 /**
553 * As above, but interp setup all four vector components.
554 */
555 static INLINE void
556 tri_linear_coeff4(uint slot)
557 {
558 const vector float vmin_d = setup.vmin->data[slot];
559 const vector float vmid_d = setup.vmid->data[slot];
560 const vector float vmax_d = setup.vmax->data[slot];
561 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
562 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
563
564 vector float botda = vmid_d - vmin_d;
565 vector float majda = vmax_d - vmin_d;
566
567 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
568 spu_mul(botda, spu_splats(setup.emaj.dy)));
569 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
570 spu_mul(majda, spu_splats(setup.ebot.dx)));
571
572 setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea));
573 setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea));
574
575 vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
576 vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
577
578 setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
579 }
580
581
582 /**
583 * Compute a0, dadx and dady for a perspective-corrected interpolant,
584 * for a triangle.
585 * We basically multiply the vertex value by 1/w before computing
586 * the plane coefficients (a0, dadx, dady).
587 * Later, when we compute the value at a particular fragment position we'll
588 * divide the interpolated value by the interpolated W at that fragment.
589 */
590 static void
591 tri_persp_coeff4(uint slot)
592 {
593 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
594 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
595
596 const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
597 const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
598 const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
599
600 vector float vmin_d = setup.vmin->data[slot];
601 vector float vmid_d = setup.vmid->data[slot];
602 vector float vmax_d = setup.vmax->data[slot];
603
604 vmin_d = spu_mul(vmin_d, vmin_w);
605 vmid_d = spu_mul(vmid_d, vmid_w);
606 vmax_d = spu_mul(vmax_d, vmax_w);
607
608 vector float botda = vmid_d - vmin_d;
609 vector float majda = vmax_d - vmin_d;
610
611 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
612 spu_mul(botda, spu_splats(setup.emaj.dy)));
613 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
614 spu_mul(majda, spu_splats(setup.ebot.dx)));
615
616 setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneOverArea));
617 setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneOverArea));
618
619 vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
620 vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
621
622 setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
623 }
624
625
626
627 /**
628 * Compute the setup.coef[] array dadx, dady, a0 values.
629 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
630 */
631 static void
632 setup_tri_coefficients(void)
633 {
634 uint i;
635
636 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
637 switch (spu.vertex_info.attrib[i].interp_mode) {
638 case INTERP_NONE:
639 break;
640 case INTERP_CONSTANT:
641 const_coeff4(i);
642 break;
643 case INTERP_POS:
644 /* fall-through */
645 case INTERP_LINEAR:
646 tri_linear_coeff4(i);
647 break;
648 case INTERP_PERSPECTIVE:
649 tri_persp_coeff4(i);
650 break;
651 default:
652 ASSERT(0);
653 }
654 }
655 }
656
657
658 static void
659 setup_tri_edges(void)
660 {
661 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
662 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
663
664 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
665 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
666 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
667
668 setup.emaj.sy = CEILF(vmin_y);
669 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
670 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
671 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
672
673 setup.etop.sy = CEILF(vmid_y);
674 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
675 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
676 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
677
678 setup.ebot.sy = CEILF(vmin_y);
679 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
680 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
681 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
682 }
683
684
685 /**
686 * Render the upper or lower half of a triangle.
687 * Scissoring/cliprect is applied here too.
688 */
689 static void
690 subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
691 {
692 const int minx = setup.cliprect_minx;
693 const int maxx = setup.cliprect_maxx;
694 const int miny = setup.cliprect_miny;
695 const int maxy = setup.cliprect_maxy;
696 int y, start_y, finish_y;
697 int sy = (int)eleft->sy;
698
699 ASSERT((int)eleft->sy == (int) eright->sy);
700
701 /* clip top/bottom */
702 start_y = sy;
703 finish_y = sy + lines;
704
705 if (start_y < miny)
706 start_y = miny;
707
708 if (finish_y > maxy)
709 finish_y = maxy;
710
711 start_y -= sy;
712 finish_y -= sy;
713
714 /*
715 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
716 */
717
718 for (y = start_y; y < finish_y; y++) {
719
720 /* avoid accumulating adds as floats don't have the precision to
721 * accurately iterate large triangle edges that way. luckily we
722 * can just multiply these days.
723 *
724 * this is all drowned out by the attribute interpolation anyway.
725 */
726 int left = (int)(eleft->sx + y * eleft->dxdy);
727 int right = (int)(eright->sx + y * eright->dxdy);
728
729 /* clip left/right */
730 if (left < minx)
731 left = minx;
732 if (right > maxx)
733 right = maxx;
734
735 if (left < right) {
736 int _y = sy + y;
737 if (block(_y) != setup.span.y) {
738 flush_spans();
739 setup.span.y = block(_y);
740 }
741
742 setup.span.left[_y&1] = left;
743 setup.span.right[_y&1] = right;
744 setup.span.y_flags |= 1<<(_y&1);
745 }
746 }
747
748
749 /* save the values so that emaj can be restarted:
750 */
751 eleft->sx += lines * eleft->dxdy;
752 eright->sx += lines * eright->dxdy;
753 eleft->sy += lines;
754 eright->sy += lines;
755 }
756
757
758 static float
759 determinant(const float *v0, const float *v1, const float *v2)
760 {
761 /* edge vectors e = v0 - v2, f = v1 - v2 */
762 const float ex = v0[0] - v2[0];
763 const float ey = v0[1] - v2[1];
764 const float fx = v1[0] - v2[0];
765 const float fy = v1[1] - v2[1];
766
767 /* det = cross(e,f).z */
768 return ex * fy - ey * fx;
769 }
770
771
772 /**
773 * Draw triangle into tile at (tx, ty) (tile coords)
774 * The tile data should have already been fetched.
775 */
776 boolean
777 tri_draw(const float *v0, const float *v1, const float *v2,
778 uint tx, uint ty)
779 {
780 setup.tx = tx;
781 setup.ty = ty;
782
783 /* set clipping bounds to tile bounds */
784 setup.cliprect_minx = tx * TILE_SIZE;
785 setup.cliprect_miny = ty * TILE_SIZE;
786 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
787 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
788
789 /* Before we sort vertices, determine the facing of the triangle,
790 * which will be needed for front/back-face stencil application
791 */
792 float det = determinant(v0, v1, v2);
793 setup.facing = (det > 0.0) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
794
795 if (!setup_sort_vertices((struct vertex_header *) v0,
796 (struct vertex_header *) v1,
797 (struct vertex_header *) v2)) {
798 return FALSE; /* totally clipped */
799 }
800
801 setup_tri_coefficients();
802 setup_tri_edges();
803
804 setup.span.y = 0;
805 setup.span.y_flags = 0;
806 setup.span.right[0] = 0;
807 setup.span.right[1] = 0;
808
809 if (setup.oneOverArea < 0.0) {
810 /* emaj on left */
811 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
812 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
813 }
814 else {
815 /* emaj on right */
816 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
817 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
818 }
819
820 flush_spans();
821
822 return TRUE;
823 }