i965: fix bugs in projective texture coordinates
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_format.h"
34 #include "util/u_math.h"
35 #include "spu_colorpack.h"
36 #include "spu_main.h"
37 #include "spu_shuffle.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41
42
43 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44 typedef vector unsigned int mask_t;
45
46
47
48 /**
49 * Simplified types taken from other parts of Gallium
50 */
51 struct vertex_header {
52 vector float data[1];
53 };
54
55
56
57 /* XXX fix this */
58 #undef CEILF
59 #define CEILF(X) ((float) (int) ((X) + 0.99999f))
60
61
62 #define QUAD_TOP_LEFT 0
63 #define QUAD_TOP_RIGHT 1
64 #define QUAD_BOTTOM_LEFT 2
65 #define QUAD_BOTTOM_RIGHT 3
66 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
67 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
68 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
69 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
70 #define MASK_ALL 0xf
71
72
73 #define CHAN0 0
74 #define CHAN1 1
75 #define CHAN2 2
76 #define CHAN3 3
77
78
79 #define DEBUG_VERTS 0
80
81 /**
82 * Triangle edge info
83 */
84 struct edge {
85 union {
86 struct {
87 float dx; /**< X(v1) - X(v0), used only during setup */
88 float dy; /**< Y(v1) - Y(v0), used only during setup */
89 };
90 vec_float4 ds; /**< vector accessor for dx and dy */
91 };
92 float dxdy; /**< dx/dy */
93 float sx, sy; /**< first sample point coord */
94 int lines; /**< number of lines on this edge */
95 };
96
97
98 struct interp_coef
99 {
100 vector float a0;
101 vector float dadx;
102 vector float dady;
103 };
104
105
106 /**
107 * Triangle setup info (derived from draw_stage).
108 * Also used for line drawing (taking some liberties).
109 */
110 struct setup_stage {
111
112 /* Vertices are just an array of floats making up each attribute in
113 * turn. Currently fixed at 4 floats, but should change in time.
114 * Codegen will help cope with this.
115 */
116 union {
117 struct {
118 const struct vertex_header *vmin;
119 const struct vertex_header *vmid;
120 const struct vertex_header *vmax;
121 const struct vertex_header *vprovoke;
122 };
123 qword vertex_headers;
124 };
125
126 struct edge ebot;
127 struct edge etop;
128 struct edge emaj;
129
130 float oneOverArea; /* XXX maybe make into vector? */
131
132 uint facing;
133
134 uint tx, ty; /**< position of current tile (x, y) */
135
136 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
137
138 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
139
140 struct {
141 vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
142 int y;
143 unsigned y_flags;
144 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
145 } span;
146 };
147
148
149 static struct setup_stage setup;
150
151
152 static INLINE vector float
153 splatx(vector float v)
154 {
155 return spu_splats(spu_extract(v, CHAN0));
156 }
157
158 static INLINE vector float
159 splaty(vector float v)
160 {
161 return spu_splats(spu_extract(v, CHAN1));
162 }
163
164 static INLINE vector float
165 splatz(vector float v)
166 {
167 return spu_splats(spu_extract(v, CHAN2));
168 }
169
170 static INLINE vector float
171 splatw(vector float v)
172 {
173 return spu_splats(spu_extract(v, CHAN3));
174 }
175
176
177 /**
178 * Setup fragment shader inputs by evaluating triangle's vertex
179 * attribute coefficient info.
180 * \param x quad x pos
181 * \param y quad y pos
182 * \param fragZ returns quad Z values
183 * \param fragInputs returns fragment program inputs
184 * Note: this code could be incorporated into the fragment program
185 * itself to avoid the loop and switch.
186 */
187 static void
188 eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
189 {
190 static const vector float deltaX = (const vector float) {0, 1, 0, 1};
191 static const vector float deltaY = (const vector float) {0, 0, 1, 1};
192
193 const uint posSlot = 0;
194 const vector float pos = setup.coef[posSlot].a0;
195 const vector float dposdx = setup.coef[posSlot].dadx;
196 const vector float dposdy = setup.coef[posSlot].dady;
197 const vector float fragX = spu_splats(x) + deltaX;
198 const vector float fragY = spu_splats(y) + deltaY;
199 vector float fragW, wInv;
200 uint i;
201
202 *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
203 fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
204 wInv = spu_re(fragW); /* 1 / w */
205
206 /* loop over fragment program inputs */
207 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
208 uint attr = i + 1;
209 enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
210
211 /* constant term */
212 vector float a0 = setup.coef[attr].a0;
213 vector float r0 = splatx(a0);
214 vector float r1 = splaty(a0);
215 vector float r2 = splatz(a0);
216 vector float r3 = splatw(a0);
217
218 if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
219 /* linear term */
220 vector float dadx = setup.coef[attr].dadx;
221 vector float dady = setup.coef[attr].dady;
222 /* Use SPU intrinsics here to get slightly better code.
223 * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
224 */
225 r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
226 r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
227 r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
228 r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
229 if (interp == INTERP_PERSPECTIVE) {
230 /* perspective term */
231 r0 *= wInv;
232 r1 *= wInv;
233 r2 *= wInv;
234 r3 *= wInv;
235 }
236 }
237 fragInputs[CHAN0] = r0;
238 fragInputs[CHAN1] = r1;
239 fragInputs[CHAN2] = r2;
240 fragInputs[CHAN3] = r3;
241 fragInputs += 4;
242 }
243 }
244
245
246 /**
247 * Emit a quad (pass to next stage). No clipping is done.
248 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
249 * should be skipped. But adding the test for that slows things down
250 * overall.
251 */
252 static INLINE void
253 emit_quad( int x, int y, mask_t mask)
254 {
255 /* If any bits in mask are set... */
256 if (spu_extract(spu_orx(mask), 0)) {
257 const int ix = x - setup.cliprect_minx;
258 const int iy = y - setup.cliprect_miny;
259
260 spu.cur_ctile_status = TILE_STATUS_DIRTY;
261 spu.cur_ztile_status = TILE_STATUS_DIRTY;
262
263 {
264 /*
265 * Run fragment shader, execute per-fragment ops, update fb/tile.
266 */
267 vector float inputs[4*4], outputs[2*4];
268 vector unsigned int kill_mask;
269 vector float fragZ;
270
271 eval_inputs((float) x, (float) y, &fragZ, inputs);
272
273 ASSERT(spu.fragment_program);
274 ASSERT(spu.fragment_ops);
275
276 /* Execute the current fragment program */
277 kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
278
279 mask = spu_andc(mask, kill_mask);
280
281 /* Execute per-fragment/quad operations, including:
282 * alpha test, z test, stencil test, blend and framebuffer writing.
283 * Note that there are two different fragment operations functions
284 * that can be called, one for front-facing fragments, and one
285 * for back-facing fragments. (Often the two are the same;
286 * but in some cases, like two-sided stenciling, they can be
287 * very different.) So choose the correct function depending
288 * on the calculated facing.
289 */
290 spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
291 fragZ,
292 outputs[0*4+0],
293 outputs[0*4+1],
294 outputs[0*4+2],
295 outputs[0*4+3],
296 mask);
297 }
298 }
299 }
300
301
302 /**
303 * Given an X or Y coordinate, return the block/quad coordinate that it
304 * belongs to.
305 */
306 static INLINE int
307 block(int x)
308 {
309 return x & ~1;
310 }
311
312
313 /**
314 * Render a horizontal span of quads
315 */
316 static void
317 flush_spans(void)
318 {
319 int minleft, maxright;
320
321 const int l0 = spu_extract(setup.span.quad, 0);
322 const int l1 = spu_extract(setup.span.quad, 1);
323 const int r0 = spu_extract(setup.span.quad, 2);
324 const int r1 = spu_extract(setup.span.quad, 3);
325
326 switch (setup.span.y_flags) {
327 case 0x3:
328 /* both odd and even lines written (both quad rows) */
329 minleft = MIN2(l0, l1);
330 maxright = MAX2(r0, r1);
331 break;
332
333 case 0x1:
334 /* only even line written (quad top row) */
335 minleft = l0;
336 maxright = r0;
337 break;
338
339 case 0x2:
340 /* only odd line written (quad bottom row) */
341 minleft = l1;
342 maxright = r1;
343 break;
344
345 default:
346 return;
347 }
348
349 /* OK, we're very likely to need the tile data now.
350 * clear or finish waiting if needed.
351 */
352 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
353 /* wait for mfc_get() to complete */
354 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
355 wait_on_mask(1 << TAG_READ_TILE_COLOR);
356 spu.cur_ctile_status = TILE_STATUS_CLEAN;
357 }
358 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
359 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
360 clear_c_tile(&spu.ctile);
361 spu.cur_ctile_status = TILE_STATUS_DIRTY;
362 }
363 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
364
365 if (spu.read_depth_stencil) {
366 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
367 /* wait for mfc_get() to complete */
368 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
369 wait_on_mask(1 << TAG_READ_TILE_Z);
370 spu.cur_ztile_status = TILE_STATUS_CLEAN;
371 }
372 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
373 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
374 clear_z_tile(&spu.ztile);
375 spu.cur_ztile_status = TILE_STATUS_DIRTY;
376 }
377 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
378 }
379
380 /* XXX this loop could be moved into the above switch cases... */
381
382 /* Setup for mask calculation */
383 const vec_int4 quad_LlRr = setup.span.quad;
384 const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
385 const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
386 const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
387
388 const vec_int4 twos = spu_splats(2);
389
390 const int x = block(minleft);
391 vec_int4 xs = {x, x+1, x, x+1};
392
393 for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
394 /**
395 * Computes mask to indicate which pixels in the 2x2 quad are actually
396 * inside the triangle's bounds.
397 */
398
399 /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
400 const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
401 const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
402
403 /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
404 const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
405
406 /* Combine results to create mask */
407 const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
408
409 emit_quad(spu_extract(xs, 0), setup.span.y, mask);
410 }
411
412 setup.span.y = 0;
413 setup.span.y_flags = 0;
414 /* Zero right elements */
415 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
416 }
417
418
419 #if DEBUG_VERTS
420 static void
421 print_vertex(const struct vertex_header *v)
422 {
423 uint i;
424 fprintf(stderr, " Vertex: (%p)\n", v);
425 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
426 fprintf(stderr, " %d: %f %f %f %f\n", i,
427 spu_extract(v->data[i], 0),
428 spu_extract(v->data[i], 1),
429 spu_extract(v->data[i], 2),
430 spu_extract(v->data[i], 3));
431 }
432 }
433 #endif
434
435
436 /**
437 * Sort vertices from top to bottom.
438 * Compute area and determine front vs. back facing.
439 * Do coarse clip test against tile bounds
440 * \return FALSE if tri is totally outside tile, TRUE otherwise
441 */
442 static boolean
443 setup_sort_vertices(const struct vertex_header *v0,
444 const struct vertex_header *v1,
445 const struct vertex_header *v2)
446 {
447 float area, sign;
448
449 #if DEBUG_VERTS
450 if (spu.init.id==0) {
451 fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
452 print_vertex(v0);
453 print_vertex(v1);
454 print_vertex(v2);
455 }
456 #endif
457
458 /* determine bottom to top order of vertices */
459 {
460 /* A table of shuffle patterns for putting vertex_header pointers into
461 correct order. Quite magical. */
462 const vec_uchar16 sort_order_patterns[] = {
463 SHUFFLE4(A,B,C,C),
464 SHUFFLE4(C,A,B,C),
465 SHUFFLE4(A,C,B,C),
466 SHUFFLE4(B,C,A,C),
467 SHUFFLE4(B,A,C,C),
468 SHUFFLE4(C,B,A,C) };
469
470 /* The vertex_header pointers, packed for easy shuffling later */
471 const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
472
473 /* Collate y values into two vectors for comparison.
474 Using only one shuffle constant! ;) */
475 const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C));
476 const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C));
477 const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C));
478 const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C));
479
480 /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
481 const vec_uint4 compare = spu_cmpgt(y_012, y_120);
482 /* Compress the result of the comparison into 4 bits */
483 const vec_uint4 gather = spu_gather(compare);
484 /* Subtract one to attain the index into the LUT. Magical. */
485 const unsigned int index = spu_extract(gather, 0) - 1;
486
487 /* Load the appropriate pattern and construct the desired vector. */
488 setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]);
489
490 /* Using the result of the comparison, set sign.
491 Very magical. */
492 sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
493 }
494
495 /* Check if triangle is completely outside the tile bounds */
496 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
497 return FALSE;
498 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
499 return FALSE;
500 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
501 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
502 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
503 return FALSE;
504 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
505 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
506 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
507 return FALSE;
508
509 setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
510 setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
511 setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
512
513 /*
514 * Compute triangle's area. Use 1/area to compute partial
515 * derivatives of attributes later.
516 */
517 area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
518
519 setup.oneOverArea = 1.0f / area;
520
521 /* The product of area * sign indicates front/back orientation (0/1).
522 * Just in case someone gets the bright idea of switching the front
523 * and back constants without noticing that we're assuming their
524 * values in this operation, also assert that the values are
525 * what we think they are.
526 */
527 ASSERT(CELL_FACING_FRONT == 0);
528 ASSERT(CELL_FACING_BACK == 1);
529 setup.facing = (area * sign > 0.0f)
530 ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
531
532 return TRUE;
533 }
534
535
536 /**
537 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
538 * The value value comes from vertex->data[slot].
539 * The result will be put into setup.coef[slot].a0.
540 * \param slot which attribute slot
541 */
542 static INLINE void
543 const_coeff4(uint slot)
544 {
545 setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
546 setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
547 setup.coef[slot].a0 = setup.vprovoke->data[slot];
548 }
549
550
551 /**
552 * As above, but interp setup all four vector components.
553 */
554 static INLINE void
555 tri_linear_coeff4(uint slot)
556 {
557 const vector float vmin_d = setup.vmin->data[slot];
558 const vector float vmid_d = setup.vmid->data[slot];
559 const vector float vmax_d = setup.vmax->data[slot];
560 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
561 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
562
563 vector float botda = vmid_d - vmin_d;
564 vector float majda = vmax_d - vmin_d;
565
566 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
567 spu_mul(botda, spu_splats(setup.emaj.dy)));
568 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
569 spu_mul(majda, spu_splats(setup.ebot.dx)));
570
571 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
572 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
573
574 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
575 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
576
577 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
578 }
579
580
581 /**
582 * Compute a0, dadx and dady for a perspective-corrected interpolant,
583 * for a triangle.
584 * We basically multiply the vertex value by 1/w before computing
585 * the plane coefficients (a0, dadx, dady).
586 * Later, when we compute the value at a particular fragment position we'll
587 * divide the interpolated value by the interpolated W at that fragment.
588 */
589 static void
590 tri_persp_coeff4(uint slot)
591 {
592 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
593 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
594
595 const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
596 const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
597 const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
598
599 vector float vmin_d = setup.vmin->data[slot];
600 vector float vmid_d = setup.vmid->data[slot];
601 vector float vmax_d = setup.vmax->data[slot];
602
603 vmin_d = spu_mul(vmin_d, vmin_w);
604 vmid_d = spu_mul(vmid_d, vmid_w);
605 vmax_d = spu_mul(vmax_d, vmax_w);
606
607 vector float botda = vmid_d - vmin_d;
608 vector float majda = vmax_d - vmin_d;
609
610 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
611 spu_mul(botda, spu_splats(setup.emaj.dy)));
612 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
613 spu_mul(majda, spu_splats(setup.ebot.dx)));
614
615 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
616 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
617
618 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
619 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
620
621 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
622 }
623
624
625
626 /**
627 * Compute the setup.coef[] array dadx, dady, a0 values.
628 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
629 */
630 static void
631 setup_tri_coefficients(void)
632 {
633 uint i;
634
635 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
636 switch (spu.vertex_info.attrib[i].interp_mode) {
637 case INTERP_NONE:
638 break;
639 case INTERP_CONSTANT:
640 const_coeff4(i);
641 break;
642 case INTERP_POS:
643 /* fall-through */
644 case INTERP_LINEAR:
645 tri_linear_coeff4(i);
646 break;
647 case INTERP_PERSPECTIVE:
648 tri_persp_coeff4(i);
649 break;
650 default:
651 ASSERT(0);
652 }
653 }
654 }
655
656
657 static void
658 setup_tri_edges(void)
659 {
660 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
661 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
662
663 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
664 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
665 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
666
667 setup.emaj.sy = CEILF(vmin_y);
668 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
669 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
670 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
671
672 setup.etop.sy = CEILF(vmid_y);
673 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
674 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
675 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
676
677 setup.ebot.sy = CEILF(vmin_y);
678 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
679 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
680 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
681 }
682
683
684 /**
685 * Render the upper or lower half of a triangle.
686 * Scissoring/cliprect is applied here too.
687 */
688 static void
689 subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
690 {
691 const int minx = setup.cliprect_minx;
692 const int maxx = setup.cliprect_maxx;
693 const int miny = setup.cliprect_miny;
694 const int maxy = setup.cliprect_maxy;
695 int y, start_y, finish_y;
696 int sy = (int)eleft->sy;
697
698 ASSERT((int)eleft->sy == (int) eright->sy);
699
700 /* clip top/bottom */
701 start_y = sy;
702 finish_y = sy + lines;
703
704 if (start_y < miny)
705 start_y = miny;
706
707 if (finish_y > maxy)
708 finish_y = maxy;
709
710 start_y -= sy;
711 finish_y -= sy;
712
713 /*
714 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
715 */
716
717 for (y = start_y; y < finish_y; y++) {
718
719 /* avoid accumulating adds as floats don't have the precision to
720 * accurately iterate large triangle edges that way. luckily we
721 * can just multiply these days.
722 *
723 * this is all drowned out by the attribute interpolation anyway.
724 */
725 int left = (int)(eleft->sx + y * eleft->dxdy);
726 int right = (int)(eright->sx + y * eright->dxdy);
727
728 /* clip left/right */
729 if (left < minx)
730 left = minx;
731 if (right > maxx)
732 right = maxx;
733
734 if (left < right) {
735 int _y = sy + y;
736 if (block(_y) != setup.span.y) {
737 flush_spans();
738 setup.span.y = block(_y);
739 }
740
741 int offset = _y&1;
742 vec_int4 quad_LlRr = {left, left, right, right};
743 /* Store left and right in 0 or 1 row of quad based on offset */
744 setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
745 setup.span.y_flags |= 1<<offset;
746 }
747 }
748
749
750 /* save the values so that emaj can be restarted:
751 */
752 eleft->sx += lines * eleft->dxdy;
753 eright->sx += lines * eright->dxdy;
754 eleft->sy += lines;
755 eright->sy += lines;
756 }
757
758
759 /**
760 * Draw triangle into tile at (tx, ty) (tile coords)
761 * The tile data should have already been fetched.
762 */
763 boolean
764 tri_draw(const float *v0, const float *v1, const float *v2,
765 uint tx, uint ty)
766 {
767 setup.tx = tx;
768 setup.ty = ty;
769
770 /* set clipping bounds to tile bounds */
771 setup.cliprect_minx = tx * TILE_SIZE;
772 setup.cliprect_miny = ty * TILE_SIZE;
773 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
774 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
775
776 if (!setup_sort_vertices((struct vertex_header *) v0,
777 (struct vertex_header *) v1,
778 (struct vertex_header *) v2)) {
779 return FALSE; /* totally clipped */
780 }
781
782 setup_tri_coefficients();
783 setup_tri_edges();
784
785 setup.span.y = 0;
786 setup.span.y_flags = 0;
787 /* Zero right elements */
788 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
789
790 if (setup.oneOverArea < 0.0) {
791 /* emaj on left */
792 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
793 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
794 }
795 else {
796 /* emaj on right */
797 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
798 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
799 }
800
801 flush_spans();
802
803 return TRUE;
804 }