Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include <transpose_matrix4x4.h>
33 #include "pipe/p_compiler.h"
34 #include "pipe/p_format.h"
35 #include "util/u_math.h"
36 #include "spu_colorpack.h"
37 #include "spu_main.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41
42
43 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44 typedef vector unsigned int mask_t;
45
46
47
48 /**
49 * Simplified types taken from other parts of Gallium
50 */
51 struct vertex_header {
52 vector float data[1];
53 };
54
55
56
57 /* XXX fix this */
58 #undef CEILF
59 #define CEILF(X) ((float) (int) ((X) + 0.99999))
60
61
62 #define QUAD_TOP_LEFT 0
63 #define QUAD_TOP_RIGHT 1
64 #define QUAD_BOTTOM_LEFT 2
65 #define QUAD_BOTTOM_RIGHT 3
66 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
67 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
68 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
69 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
70 #define MASK_ALL 0xf
71
72
73 #define DEBUG_VERTS 0
74
75 /**
76 * Triangle edge info
77 */
78 struct edge {
79 float dx; /**< X(v1) - X(v0), used only during setup */
80 float dy; /**< Y(v1) - Y(v0), used only during setup */
81 float dxdy; /**< dx/dy */
82 float sx, sy; /**< first sample point coord */
83 int lines; /**< number of lines on this edge */
84 };
85
86
87 struct interp_coef
88 {
89 vector float a0;
90 vector float dadx;
91 vector float dady;
92 };
93
94
95 /**
96 * Triangle setup info (derived from draw_stage).
97 * Also used for line drawing (taking some liberties).
98 */
99 struct setup_stage {
100
101 /* Vertices are just an array of floats making up each attribute in
102 * turn. Currently fixed at 4 floats, but should change in time.
103 * Codegen will help cope with this.
104 */
105 const struct vertex_header *vmax;
106 const struct vertex_header *vmid;
107 const struct vertex_header *vmin;
108 const struct vertex_header *vprovoke;
109
110 struct edge ebot;
111 struct edge etop;
112 struct edge emaj;
113
114 float oneOverArea; /* XXX maybe make into vector? */
115
116 uint facing;
117
118 uint tx, ty; /**< position of current tile (x, y) */
119
120 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
121
122 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
123
124 struct {
125 int left[2]; /**< [0] = row0, [1] = row1 */
126 int right[2];
127 int y;
128 unsigned y_flags;
129 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
130 } span;
131 };
132
133
134 static struct setup_stage setup;
135
136
137 /**
138 * Evaluate attribute coefficients (plane equations) to compute
139 * attribute values for the four fragments in a quad.
140 * Eg: four colors will be computed (in AoS format).
141 */
142 static INLINE void
143 eval_coeff(uint slot, float x, float y, vector float w, vector float result[4])
144 {
145 switch (spu.vertex_info.attrib[slot].interp_mode) {
146 case INTERP_CONSTANT:
147 result[QUAD_TOP_LEFT] =
148 result[QUAD_TOP_RIGHT] =
149 result[QUAD_BOTTOM_LEFT] =
150 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
151 break;
152 case INTERP_LINEAR:
153 {
154 vector float dadx = setup.coef[slot].dadx;
155 vector float dady = setup.coef[slot].dady;
156 vector float topLeft =
157 spu_add(setup.coef[slot].a0,
158 spu_add(spu_mul(spu_splats(x), dadx),
159 spu_mul(spu_splats(y), dady)));
160
161 result[QUAD_TOP_LEFT] = topLeft;
162 result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
163 result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
164 result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
165 }
166 break;
167 case INTERP_PERSPECTIVE:
168 {
169 vector float dadx = setup.coef[slot].dadx;
170 vector float dady = setup.coef[slot].dady;
171 vector float topLeft =
172 spu_add(setup.coef[slot].a0,
173 spu_add(spu_mul(spu_splats(x), dadx),
174 spu_mul(spu_splats(y), dady)));
175
176 vector float wInv = spu_re(w); /* 1.0 / w */
177
178 result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv);
179 result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv);
180 result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv);
181 result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv);
182 }
183 break;
184 case INTERP_POS:
185 case INTERP_NONE:
186 break;
187 default:
188 ASSERT(0);
189 }
190 }
191
192
193 /**
194 * As above, but return 4 vectors in SOA format.
195 * XXX this will all be re-written someday.
196 */
197 static INLINE void
198 eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4])
199 {
200 eval_coeff(slot, x, y, w, result);
201 _transpose_matrix4x4(result, result);
202 }
203
204
205 /** Evalute coefficients to get Z for four pixels in a quad */
206 static INLINE vector float
207 eval_z(float x, float y)
208 {
209 const uint slot = 0;
210 const float dzdx = spu_extract(setup.coef[slot].dadx, 2);
211 const float dzdy = spu_extract(setup.coef[slot].dady, 2);
212 const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy;
213 const vector float topLeftv = spu_splats(topLeft);
214 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
215 return spu_add(topLeftv, derivs);
216 }
217
218
219 /** Evalute coefficients to get W for four pixels in a quad */
220 static INLINE vector float
221 eval_w(float x, float y)
222 {
223 const uint slot = 0;
224 const float dwdx = spu_extract(setup.coef[slot].dadx, 3);
225 const float dwdy = spu_extract(setup.coef[slot].dady, 3);
226 const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy;
227 const vector float topLeftv = spu_splats(topLeft);
228 const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy };
229 return spu_add(topLeftv, derivs);
230 }
231
232
233 /**
234 * Emit a quad (pass to next stage). No clipping is done.
235 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
236 * should be skipped. But adding the test for that slows things down
237 * overall.
238 */
239 static INLINE void
240 emit_quad( int x, int y, mask_t mask)
241 {
242 /* If any bits in mask are set... */
243 if (spu_extract(spu_orx(mask), 0)) {
244 const int ix = x - setup.cliprect_minx;
245 const int iy = y - setup.cliprect_miny;
246
247 spu.cur_ctile_status = TILE_STATUS_DIRTY;
248 spu.cur_ztile_status = TILE_STATUS_DIRTY;
249
250 {
251 /*
252 * Run fragment shader, execute per-fragment ops, update fb/tile.
253 */
254 vector float inputs[4*4], outputs[2*4];
255 vector float fragZ = eval_z((float) x, (float) y);
256 vector float fragW = eval_w((float) x, (float) y);
257 vector unsigned int kill_mask;
258
259 /* setup inputs */
260 #if 0
261 eval_coeff_soa(1, (float) x, (float) y, fragW, inputs);
262 #else
263 uint i;
264 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
265 eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4);
266 }
267 #endif
268 ASSERT(spu.fragment_program);
269 ASSERT(spu.fragment_ops);
270
271 /* Execute the current fragment program */
272 kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
273
274 mask = spu_andc(mask, kill_mask);
275
276 /* Execute per-fragment/quad operations, including:
277 * alpha test, z test, stencil test, blend and framebuffer writing.
278 * Note that there are two different fragment operations functions
279 * that can be called, one for front-facing fragments, and one
280 * for back-facing fragments. (Often the two are the same;
281 * but in some cases, like two-sided stenciling, they can be
282 * very different.) So choose the correct function depending
283 * on the calculated facing.
284 */
285 spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
286 fragZ,
287 outputs[0*4+0],
288 outputs[0*4+1],
289 outputs[0*4+2],
290 outputs[0*4+3],
291 mask);
292 }
293 }
294 }
295
296
297 /**
298 * Given an X or Y coordinate, return the block/quad coordinate that it
299 * belongs to.
300 */
301 static INLINE int
302 block(int x)
303 {
304 return x & ~1;
305 }
306
307
308 /**
309 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
310 * the triangle's bounds.
311 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
312 */
313 static INLINE mask_t
314 calculate_mask(int x)
315 {
316 /* This is a little tricky.
317 * Use & instead of && to avoid branches.
318 * Use negation to convert true/false to ~0/0 values.
319 */
320 mask_t mask;
321 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
322 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
323 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
324 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
325 return mask;
326 }
327
328
329 /**
330 * Render a horizontal span of quads
331 */
332 static void
333 flush_spans(void)
334 {
335 int minleft, maxright;
336 int x;
337
338 switch (setup.span.y_flags) {
339 case 0x3:
340 /* both odd and even lines written (both quad rows) */
341 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
342 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
343 break;
344
345 case 0x1:
346 /* only even line written (quad top row) */
347 minleft = setup.span.left[0];
348 maxright = setup.span.right[0];
349 break;
350
351 case 0x2:
352 /* only odd line written (quad bottom row) */
353 minleft = setup.span.left[1];
354 maxright = setup.span.right[1];
355 break;
356
357 default:
358 return;
359 }
360
361 /* OK, we're very likely to need the tile data now.
362 * clear or finish waiting if needed.
363 */
364 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
365 /* wait for mfc_get() to complete */
366 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
367 wait_on_mask(1 << TAG_READ_TILE_COLOR);
368 spu.cur_ctile_status = TILE_STATUS_CLEAN;
369 }
370 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
371 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
372 clear_c_tile(&spu.ctile);
373 spu.cur_ctile_status = TILE_STATUS_DIRTY;
374 }
375 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
376
377 if (spu.read_depth_stencil) {
378 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
379 /* wait for mfc_get() to complete */
380 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
381 wait_on_mask(1 << TAG_READ_TILE_Z);
382 spu.cur_ztile_status = TILE_STATUS_CLEAN;
383 }
384 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
385 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
386 clear_z_tile(&spu.ztile);
387 spu.cur_ztile_status = TILE_STATUS_DIRTY;
388 }
389 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
390 }
391
392 /* XXX this loop could be moved into the above switch cases and
393 * calculate_mask() could be simplified a bit...
394 */
395 for (x = block(minleft); x <= block(maxright); x += 2) {
396 emit_quad( x, setup.span.y, calculate_mask( x ));
397 }
398
399 setup.span.y = 0;
400 setup.span.y_flags = 0;
401 setup.span.right[0] = 0;
402 setup.span.right[1] = 0;
403 }
404
405
406 #if DEBUG_VERTS
407 static void
408 print_vertex(const struct vertex_header *v)
409 {
410 uint i;
411 fprintf(stderr, " Vertex: (%p)\n", v);
412 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
413 fprintf(stderr, " %d: %f %f %f %f\n", i,
414 spu_extract(v->data[i], 0),
415 spu_extract(v->data[i], 1),
416 spu_extract(v->data[i], 2),
417 spu_extract(v->data[i], 3));
418 }
419 }
420 #endif
421
422
423 /**
424 * Sort vertices from top to bottom.
425 * Compute area and determine front vs. back facing.
426 * Do coarse clip test against tile bounds
427 * \return FALSE if tri is totally outside tile, TRUE otherwise
428 */
429 static boolean
430 setup_sort_vertices(const struct vertex_header *v0,
431 const struct vertex_header *v1,
432 const struct vertex_header *v2)
433 {
434 float area, sign;
435
436 #if DEBUG_VERTS
437 if (spu.init.id==0) {
438 fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
439 print_vertex(v0);
440 print_vertex(v1);
441 print_vertex(v2);
442 }
443 #endif
444
445 /* determine bottom to top order of vertices */
446 {
447 float y0 = spu_extract(v0->data[0], 1);
448 float y1 = spu_extract(v1->data[0], 1);
449 float y2 = spu_extract(v2->data[0], 1);
450 if (y0 <= y1) {
451 if (y1 <= y2) {
452 /* y0<=y1<=y2 */
453 setup.vmin = v0;
454 setup.vmid = v1;
455 setup.vmax = v2;
456 sign = -1.0f;
457 }
458 else if (y2 <= y0) {
459 /* y2<=y0<=y1 */
460 setup.vmin = v2;
461 setup.vmid = v0;
462 setup.vmax = v1;
463 sign = -1.0f;
464 }
465 else {
466 /* y0<=y2<=y1 */
467 setup.vmin = v0;
468 setup.vmid = v2;
469 setup.vmax = v1;
470 sign = 1.0f;
471 }
472 }
473 else {
474 if (y0 <= y2) {
475 /* y1<=y0<=y2 */
476 setup.vmin = v1;
477 setup.vmid = v0;
478 setup.vmax = v2;
479 sign = 1.0f;
480 }
481 else if (y2 <= y1) {
482 /* y2<=y1<=y0 */
483 setup.vmin = v2;
484 setup.vmid = v1;
485 setup.vmax = v0;
486 sign = 1.0f;
487 }
488 else {
489 /* y1<=y2<=y0 */
490 setup.vmin = v1;
491 setup.vmid = v2;
492 setup.vmax = v0;
493 sign = -1.0f;
494 }
495 }
496 }
497
498 /* Check if triangle is completely outside the tile bounds */
499 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
500 return FALSE;
501 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
502 return FALSE;
503 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
504 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
505 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
506 return FALSE;
507 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
508 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
509 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
510 return FALSE;
511
512 setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
513 setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
514 setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
515 setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
516 setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
517 setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
518
519 /*
520 * Compute triangle's area. Use 1/area to compute partial
521 * derivatives of attributes later.
522 */
523 area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
524
525 setup.oneOverArea = 1.0f / area;
526
527 /* The product of area * sign indicates front/back orientation (0/1).
528 * Just in case someone gets the bright idea of switching the front
529 * and back constants without noticing that we're assuming their
530 * values in this operation, also assert that the values are
531 * what we think they are.
532 */
533 ASSERT(CELL_FACING_FRONT == 0);
534 ASSERT(CELL_FACING_BACK == 1);
535 setup.facing = (area * sign > 0.0f)
536 ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
537
538 setup.vprovoke = v2;
539
540 return TRUE;
541 }
542
543
544 /**
545 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
546 * The value value comes from vertex->data[slot].
547 * The result will be put into setup.coef[slot].a0.
548 * \param slot which attribute slot
549 */
550 static INLINE void
551 const_coeff4(uint slot)
552 {
553 setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
554 setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
555 setup.coef[slot].a0 = setup.vprovoke->data[slot];
556 }
557
558
559 /**
560 * As above, but interp setup all four vector components.
561 */
562 static INLINE void
563 tri_linear_coeff4(uint slot)
564 {
565 const vector float vmin_d = setup.vmin->data[slot];
566 const vector float vmid_d = setup.vmid->data[slot];
567 const vector float vmax_d = setup.vmax->data[slot];
568 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
569 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
570
571 vector float botda = vmid_d - vmin_d;
572 vector float majda = vmax_d - vmin_d;
573
574 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
575 spu_mul(botda, spu_splats(setup.emaj.dy)));
576 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
577 spu_mul(majda, spu_splats(setup.ebot.dx)));
578
579 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
580 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
581
582 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
583 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
584
585 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
586 }
587
588
589 /**
590 * Compute a0, dadx and dady for a perspective-corrected interpolant,
591 * for a triangle.
592 * We basically multiply the vertex value by 1/w before computing
593 * the plane coefficients (a0, dadx, dady).
594 * Later, when we compute the value at a particular fragment position we'll
595 * divide the interpolated value by the interpolated W at that fragment.
596 */
597 static void
598 tri_persp_coeff4(uint slot)
599 {
600 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
601 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
602
603 const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
604 const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
605 const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
606
607 vector float vmin_d = setup.vmin->data[slot];
608 vector float vmid_d = setup.vmid->data[slot];
609 vector float vmax_d = setup.vmax->data[slot];
610
611 vmin_d = spu_mul(vmin_d, vmin_w);
612 vmid_d = spu_mul(vmid_d, vmid_w);
613 vmax_d = spu_mul(vmax_d, vmax_w);
614
615 vector float botda = vmid_d - vmin_d;
616 vector float majda = vmax_d - vmin_d;
617
618 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
619 spu_mul(botda, spu_splats(setup.emaj.dy)));
620 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
621 spu_mul(majda, spu_splats(setup.ebot.dx)));
622
623 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
624 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
625
626 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
627 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
628
629 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
630 }
631
632
633
634 /**
635 * Compute the setup.coef[] array dadx, dady, a0 values.
636 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
637 */
638 static void
639 setup_tri_coefficients(void)
640 {
641 uint i;
642
643 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
644 switch (spu.vertex_info.attrib[i].interp_mode) {
645 case INTERP_NONE:
646 break;
647 case INTERP_CONSTANT:
648 const_coeff4(i);
649 break;
650 case INTERP_POS:
651 /* fall-through */
652 case INTERP_LINEAR:
653 tri_linear_coeff4(i);
654 break;
655 case INTERP_PERSPECTIVE:
656 tri_persp_coeff4(i);
657 break;
658 default:
659 ASSERT(0);
660 }
661 }
662 }
663
664
665 static void
666 setup_tri_edges(void)
667 {
668 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
669 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
670
671 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
672 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
673 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
674
675 setup.emaj.sy = CEILF(vmin_y);
676 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
677 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
678 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
679
680 setup.etop.sy = CEILF(vmid_y);
681 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
682 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
683 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
684
685 setup.ebot.sy = CEILF(vmin_y);
686 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
687 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
688 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
689 }
690
691
692 /**
693 * Render the upper or lower half of a triangle.
694 * Scissoring/cliprect is applied here too.
695 */
696 static void
697 subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
698 {
699 const int minx = setup.cliprect_minx;
700 const int maxx = setup.cliprect_maxx;
701 const int miny = setup.cliprect_miny;
702 const int maxy = setup.cliprect_maxy;
703 int y, start_y, finish_y;
704 int sy = (int)eleft->sy;
705
706 ASSERT((int)eleft->sy == (int) eright->sy);
707
708 /* clip top/bottom */
709 start_y = sy;
710 finish_y = sy + lines;
711
712 if (start_y < miny)
713 start_y = miny;
714
715 if (finish_y > maxy)
716 finish_y = maxy;
717
718 start_y -= sy;
719 finish_y -= sy;
720
721 /*
722 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
723 */
724
725 for (y = start_y; y < finish_y; y++) {
726
727 /* avoid accumulating adds as floats don't have the precision to
728 * accurately iterate large triangle edges that way. luckily we
729 * can just multiply these days.
730 *
731 * this is all drowned out by the attribute interpolation anyway.
732 */
733 int left = (int)(eleft->sx + y * eleft->dxdy);
734 int right = (int)(eright->sx + y * eright->dxdy);
735
736 /* clip left/right */
737 if (left < minx)
738 left = minx;
739 if (right > maxx)
740 right = maxx;
741
742 if (left < right) {
743 int _y = sy + y;
744 if (block(_y) != setup.span.y) {
745 flush_spans();
746 setup.span.y = block(_y);
747 }
748
749 setup.span.left[_y&1] = left;
750 setup.span.right[_y&1] = right;
751 setup.span.y_flags |= 1<<(_y&1);
752 }
753 }
754
755
756 /* save the values so that emaj can be restarted:
757 */
758 eleft->sx += lines * eleft->dxdy;
759 eright->sx += lines * eright->dxdy;
760 eleft->sy += lines;
761 eright->sy += lines;
762 }
763
764
765 /**
766 * Draw triangle into tile at (tx, ty) (tile coords)
767 * The tile data should have already been fetched.
768 */
769 boolean
770 tri_draw(const float *v0, const float *v1, const float *v2,
771 uint tx, uint ty)
772 {
773 setup.tx = tx;
774 setup.ty = ty;
775
776 /* set clipping bounds to tile bounds */
777 setup.cliprect_minx = tx * TILE_SIZE;
778 setup.cliprect_miny = ty * TILE_SIZE;
779 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
780 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
781
782 if (!setup_sort_vertices((struct vertex_header *) v0,
783 (struct vertex_header *) v1,
784 (struct vertex_header *) v2)) {
785 return FALSE; /* totally clipped */
786 }
787
788 setup_tri_coefficients();
789 setup_tri_edges();
790
791 setup.span.y = 0;
792 setup.span.y_flags = 0;
793 setup.span.right[0] = 0;
794 setup.span.right[1] = 0;
795
796 if (setup.oneOverArea < 0.0) {
797 /* emaj on left */
798 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
799 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
800 }
801 else {
802 /* emaj on right */
803 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
804 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
805 }
806
807 flush_spans();
808
809 return TRUE;
810 }