d9f5a4667203a009cdb6b3deffd67c590ab20101
[mesa.git] / src / gallium / drivers / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_format.h"
34 #include "util/u_math.h"
35 #include "spu_colorpack.h"
36 #include "spu_main.h"
37 #include "spu_shuffle.h"
38 #include "spu_texture.h"
39 #include "spu_tile.h"
40 #include "spu_tri.h"
41
42
43 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
44 typedef vector unsigned int mask_t;
45
46
47
48 /**
49 * Simplified types taken from other parts of Gallium
50 */
51 struct vertex_header {
52 vector float data[1];
53 };
54
55
56
57 /* XXX fix this */
58 #undef CEILF
59 #define CEILF(X) ((float) (int) ((X) + 0.99999f))
60
61
62 #define QUAD_TOP_LEFT 0
63 #define QUAD_TOP_RIGHT 1
64 #define QUAD_BOTTOM_LEFT 2
65 #define QUAD_BOTTOM_RIGHT 3
66 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
67 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
68 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
69 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
70 #define MASK_ALL 0xf
71
72
73 #define CHAN0 0
74 #define CHAN1 1
75 #define CHAN2 2
76 #define CHAN3 3
77
78
79 #define DEBUG_VERTS 0
80
81 /**
82 * Triangle edge info
83 */
84 struct edge {
85 union {
86 struct {
87 float dx; /**< X(v1) - X(v0), used only during setup */
88 float dy; /**< Y(v1) - Y(v0), used only during setup */
89 };
90 vec_float4 ds; /**< vector accessor for dx and dy */
91 };
92 float dxdy; /**< dx/dy */
93 float sx, sy; /**< first sample point coord */
94 int lines; /**< number of lines on this edge */
95 };
96
97
98 struct interp_coef
99 {
100 vector float a0;
101 vector float dadx;
102 vector float dady;
103 };
104
105
106 /**
107 * Triangle setup info (derived from draw_stage).
108 * Also used for line drawing (taking some liberties).
109 */
110 struct setup_stage {
111
112 /* Vertices are just an array of floats making up each attribute in
113 * turn. Currently fixed at 4 floats, but should change in time.
114 * Codegen will help cope with this.
115 */
116 union {
117 struct {
118 const struct vertex_header *vmin;
119 const struct vertex_header *vmid;
120 const struct vertex_header *vmax;
121 const struct vertex_header *vprovoke;
122 };
123 qword vertex_headers;
124 };
125
126 struct edge ebot;
127 struct edge etop;
128 struct edge emaj;
129
130 float oneOverArea; /* XXX maybe make into vector? */
131
132 uint facing;
133
134 uint tx, ty; /**< position of current tile (x, y) */
135
136 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
137
138 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
139
140 struct {
141 vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
142 int y;
143 unsigned y_flags;
144 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
145 } span;
146 };
147
148
149 static struct setup_stage setup;
150
151
152 static INLINE vector float
153 splatx(vector float v)
154 {
155 return spu_splats(spu_extract(v, CHAN0));
156 }
157
158 static INLINE vector float
159 splaty(vector float v)
160 {
161 return spu_splats(spu_extract(v, CHAN1));
162 }
163
164 static INLINE vector float
165 splatz(vector float v)
166 {
167 return spu_splats(spu_extract(v, CHAN2));
168 }
169
170 static INLINE vector float
171 splatw(vector float v)
172 {
173 return spu_splats(spu_extract(v, CHAN3));
174 }
175
176
177 /**
178 * Setup fragment shader inputs by evaluating triangle's vertex
179 * attribute coefficient info.
180 * \param x quad x pos
181 * \param y quad y pos
182 * \param fragZ returns quad Z values
183 * \param fragInputs returns fragment program inputs
184 * Note: this code could be incorporated into the fragment program
185 * itself to avoid the loop and switch.
186 */
187 static void
188 eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
189 {
190 static const vector float deltaX = (const vector float) {0, 1, 0, 1};
191 static const vector float deltaY = (const vector float) {0, 0, 1, 1};
192
193 const uint posSlot = 0;
194 const vector float pos = setup.coef[posSlot].a0;
195 const vector float dposdx = setup.coef[posSlot].dadx;
196 const vector float dposdy = setup.coef[posSlot].dady;
197 const vector float fragX = spu_splats(x) + deltaX;
198 const vector float fragY = spu_splats(y) + deltaY;
199 vector float fragW, wInv;
200 uint i;
201
202 *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
203 fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
204 wInv = spu_re(fragW); /* 1 / w */
205
206 /* loop over fragment program inputs */
207 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
208 uint attr = i + 1;
209 enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
210
211 /* constant term */
212 vector float a0 = setup.coef[attr].a0;
213 vector float r0 = splatx(a0);
214 vector float r1 = splaty(a0);
215 vector float r2 = splatz(a0);
216 vector float r3 = splatw(a0);
217
218 if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
219 /* linear term */
220 vector float dadx = setup.coef[attr].dadx;
221 vector float dady = setup.coef[attr].dady;
222 /* Use SPU intrinsics here to get slightly better code.
223 * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
224 */
225 r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
226 r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
227 r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
228 r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
229 if (interp == INTERP_PERSPECTIVE) {
230 /* perspective term */
231 r0 *= wInv;
232 r1 *= wInv;
233 r2 *= wInv;
234 r3 *= wInv;
235 }
236 }
237 fragInputs[CHAN0] = r0;
238 fragInputs[CHAN1] = r1;
239 fragInputs[CHAN2] = r2;
240 fragInputs[CHAN3] = r3;
241 fragInputs += 4;
242 }
243 }
244
245
246 /**
247 * Emit a quad (pass to next stage). No clipping is done.
248 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
249 * should be skipped. But adding the test for that slows things down
250 * overall.
251 */
252 static INLINE void
253 emit_quad( int x, int y, mask_t mask)
254 {
255 /* If any bits in mask are set... */
256 if (spu_extract(spu_orx(mask), 0)) {
257 const int ix = x - setup.cliprect_minx;
258 const int iy = y - setup.cliprect_miny;
259
260 spu.cur_ctile_status = TILE_STATUS_DIRTY;
261 spu.cur_ztile_status = TILE_STATUS_DIRTY;
262
263 {
264 /*
265 * Run fragment shader, execute per-fragment ops, update fb/tile.
266 */
267 vector float inputs[4*4], outputs[2*4];
268 vector unsigned int kill_mask;
269 vector float fragZ;
270
271 eval_inputs((float) x, (float) y, &fragZ, inputs);
272
273 ASSERT(spu.fragment_program);
274 ASSERT(spu.fragment_ops);
275
276 /* Execute the current fragment program */
277 kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
278
279 mask = spu_andc(mask, kill_mask);
280
281 /* Execute per-fragment/quad operations, including:
282 * alpha test, z test, stencil test, blend and framebuffer writing.
283 * Note that there are two different fragment operations functions
284 * that can be called, one for front-facing fragments, and one
285 * for back-facing fragments. (Often the two are the same;
286 * but in some cases, like two-sided stenciling, they can be
287 * very different.) So choose the correct function depending
288 * on the calculated facing.
289 */
290 spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
291 fragZ,
292 outputs[0*4+0],
293 outputs[0*4+1],
294 outputs[0*4+2],
295 outputs[0*4+3],
296 mask);
297 }
298 }
299 }
300
301
302 /**
303 * Given an X or Y coordinate, return the block/quad coordinate that it
304 * belongs to.
305 */
306 static INLINE int
307 block(int x)
308 {
309 return x & ~1;
310 }
311
312
313 /**
314 * Render a horizontal span of quads
315 */
316 static void
317 flush_spans(void)
318 {
319 int minleft, maxright;
320
321 const int l0 = spu_extract(setup.span.quad, 0);
322 const int l1 = spu_extract(setup.span.quad, 1);
323 const int r0 = spu_extract(setup.span.quad, 2);
324 const int r1 = spu_extract(setup.span.quad, 3);
325
326 switch (setup.span.y_flags) {
327 case 0x3:
328 /* both odd and even lines written (both quad rows) */
329 minleft = MIN2(l0, l1);
330 maxright = MAX2(r0, r1);
331 break;
332
333 case 0x1:
334 /* only even line written (quad top row) */
335 minleft = l0;
336 maxright = r0;
337 break;
338
339 case 0x2:
340 /* only odd line written (quad bottom row) */
341 minleft = l1;
342 maxright = r1;
343 break;
344
345 default:
346 return;
347 }
348
349 /* OK, we're very likely to need the tile data now.
350 * clear or finish waiting if needed.
351 */
352 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
353 /* wait for mfc_get() to complete */
354 //printf("SPU: %u: waiting for ctile\n", spu.init.id);
355 wait_on_mask(1 << TAG_READ_TILE_COLOR);
356 spu.cur_ctile_status = TILE_STATUS_CLEAN;
357 }
358 else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
359 //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
360 clear_c_tile(&spu.ctile);
361 spu.cur_ctile_status = TILE_STATUS_DIRTY;
362 }
363 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
364
365 if (spu.read_depth_stencil) {
366 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
367 /* wait for mfc_get() to complete */
368 //printf("SPU: %u: waiting for ztile\n", spu.init.id);
369 wait_on_mask(1 << TAG_READ_TILE_Z);
370 spu.cur_ztile_status = TILE_STATUS_CLEAN;
371 }
372 else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
373 //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
374 clear_z_tile(&spu.ztile);
375 spu.cur_ztile_status = TILE_STATUS_DIRTY;
376 }
377 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
378 }
379
380 /* XXX this loop could be moved into the above switch cases... */
381
382 /* Setup for mask calculation */
383 const vec_int4 quad_LlRr = setup.span.quad;
384 const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
385 const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
386 const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
387
388 const vec_int4 twos = spu_splats(2);
389
390 const int x = block(minleft);
391 vec_int4 xs = {x, x+1, x, x+1};
392
393 for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
394 /**
395 * Computes mask to indicate which pixels in the 2x2 quad are actually
396 * inside the triangle's bounds.
397 */
398
399 /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
400 const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
401 const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
402
403 /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
404 const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
405
406 /* Combine results to create mask */
407 const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
408
409 emit_quad(spu_extract(xs, 0), setup.span.y, mask);
410 }
411
412 setup.span.y = 0;
413 setup.span.y_flags = 0;
414 /* Zero right elements */
415 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
416 }
417
418
419 #if DEBUG_VERTS
420 static void
421 print_vertex(const struct vertex_header *v)
422 {
423 uint i;
424 fprintf(stderr, " Vertex: (%p)\n", v);
425 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
426 fprintf(stderr, " %d: %f %f %f %f\n", i,
427 spu_extract(v->data[i], 0),
428 spu_extract(v->data[i], 1),
429 spu_extract(v->data[i], 2),
430 spu_extract(v->data[i], 3));
431 }
432 }
433 #endif
434
435
436 /**
437 * Sort vertices from top to bottom.
438 * Compute area and determine front vs. back facing.
439 * Do coarse clip test against tile bounds
440 * \return FALSE if tri is totally outside tile, TRUE otherwise
441 */
442 static boolean
443 setup_sort_vertices(const qword vs)
444 {
445 float area, sign;
446
447 #if DEBUG_VERTS
448 if (spu.init.id==0) {
449 fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
450 print_vertex(v0);
451 print_vertex(v1);
452 print_vertex(v2);
453 }
454 #endif
455
456 /* determine bottom to top order of vertices */
457 {
458 /* A table of shuffle patterns for putting vertex_header pointers into
459 correct order. Quite magical. */
460 const qword sort_order_patterns[] = {
461 SHUFB4(A,B,C,C),
462 SHUFB4(C,A,B,C),
463 SHUFB4(A,C,B,C),
464 SHUFB4(B,C,A,C),
465 SHUFB4(B,A,C,C),
466 SHUFB4(C,B,A,C) };
467
468 /* Collate y values into two vectors for comparison.
469 Using only one shuffle constant! ;) */
470 const vector float f0 = ((const struct vertex_header*)si_to_ptr(vs))->data[0];
471 const vector float f1 = ((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0];
472 const vector float f2 = ((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0];
473 const vec_float4 y_02_ = spu_shuffle(f0, f2, SHUFFLE4(0,B,b,C));
474 const vec_float4 y_10_ = spu_shuffle(f1, f0, SHUFFLE4(0,B,b,C));
475 const vec_float4 y_012 = spu_shuffle(y_02_, f1, SHUFFLE4(0,B,b,C));
476 const vec_float4 y_120 = spu_shuffle(y_10_, f2, SHUFFLE4(0,B,b,C));
477
478 /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
479 const vec_uint4 compare = spu_cmpgt(y_012, y_120);
480 /* Compress the result of the comparison into 4 bits */
481 const vec_uint4 gather = spu_gather(compare);
482 /* Subtract one to attain the index into the LUT. Magical. */
483 const unsigned int index = spu_extract(gather, 0) - 1;
484
485 /* Load the appropriate pattern and construct the desired vector. */
486 setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
487
488 /* Using the result of the comparison, set sign.
489 Very magical. */
490 sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
491 }
492
493 /* Check if triangle is completely outside the tile bounds */
494 if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
495 return FALSE;
496 if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
497 return FALSE;
498 if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
499 spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
500 spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
501 return FALSE;
502 if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
503 spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
504 spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
505 return FALSE;
506
507 setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
508 setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
509 setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
510
511 /*
512 * Compute triangle's area. Use 1/area to compute partial
513 * derivatives of attributes later.
514 */
515 area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
516
517 setup.oneOverArea = 1.0f / area;
518
519 /* The product of area * sign indicates front/back orientation (0/1).
520 * Just in case someone gets the bright idea of switching the front
521 * and back constants without noticing that we're assuming their
522 * values in this operation, also assert that the values are
523 * what we think they are.
524 */
525 ASSERT(CELL_FACING_FRONT == 0);
526 ASSERT(CELL_FACING_BACK == 1);
527 setup.facing = (area * sign > 0.0f)
528 ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
529
530 return TRUE;
531 }
532
533
534 /**
535 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
536 * The value value comes from vertex->data[slot].
537 * The result will be put into setup.coef[slot].a0.
538 * \param slot which attribute slot
539 */
540 static INLINE void
541 const_coeff4(uint slot)
542 {
543 setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
544 setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
545 setup.coef[slot].a0 = setup.vprovoke->data[slot];
546 }
547
548
549 /**
550 * As above, but interp setup all four vector components.
551 */
552 static INLINE void
553 tri_linear_coeff4(uint slot)
554 {
555 const vector float vmin_d = setup.vmin->data[slot];
556 const vector float vmid_d = setup.vmid->data[slot];
557 const vector float vmax_d = setup.vmax->data[slot];
558 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
559 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
560
561 vector float botda = vmid_d - vmin_d;
562 vector float majda = vmax_d - vmin_d;
563
564 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
565 spu_mul(botda, spu_splats(setup.emaj.dy)));
566 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
567 spu_mul(majda, spu_splats(setup.ebot.dx)));
568
569 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
570 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
571
572 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
573 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
574
575 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
576 }
577
578
579 /**
580 * Compute a0, dadx and dady for a perspective-corrected interpolant,
581 * for a triangle.
582 * We basically multiply the vertex value by 1/w before computing
583 * the plane coefficients (a0, dadx, dady).
584 * Later, when we compute the value at a particular fragment position we'll
585 * divide the interpolated value by the interpolated W at that fragment.
586 */
587 static void
588 tri_persp_coeff4(uint slot)
589 {
590 const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
591 const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
592
593 const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
594 const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
595 const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
596
597 vector float vmin_d = setup.vmin->data[slot];
598 vector float vmid_d = setup.vmid->data[slot];
599 vector float vmax_d = setup.vmax->data[slot];
600
601 vmin_d = spu_mul(vmin_d, vmin_w);
602 vmid_d = spu_mul(vmid_d, vmid_w);
603 vmax_d = spu_mul(vmax_d, vmax_w);
604
605 vector float botda = vmid_d - vmin_d;
606 vector float majda = vmax_d - vmin_d;
607
608 vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
609 spu_mul(botda, spu_splats(setup.emaj.dy)));
610 vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
611 spu_mul(majda, spu_splats(setup.ebot.dx)));
612
613 setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
614 setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
615
616 vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
617 vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
618
619 setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
620 }
621
622
623
624 /**
625 * Compute the setup.coef[] array dadx, dady, a0 values.
626 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
627 */
628 static void
629 setup_tri_coefficients(void)
630 {
631 uint i;
632
633 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
634 switch (spu.vertex_info.attrib[i].interp_mode) {
635 case INTERP_NONE:
636 break;
637 case INTERP_CONSTANT:
638 const_coeff4(i);
639 break;
640 case INTERP_POS:
641 /* fall-through */
642 case INTERP_LINEAR:
643 tri_linear_coeff4(i);
644 break;
645 case INTERP_PERSPECTIVE:
646 tri_persp_coeff4(i);
647 break;
648 default:
649 ASSERT(0);
650 }
651 }
652 }
653
654
655 static void
656 setup_tri_edges(void)
657 {
658 float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
659 float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
660
661 float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
662 float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
663 float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
664
665 setup.emaj.sy = CEILF(vmin_y);
666 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
667 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
668 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
669
670 setup.etop.sy = CEILF(vmid_y);
671 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
672 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
673 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
674
675 setup.ebot.sy = CEILF(vmin_y);
676 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
677 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
678 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
679 }
680
681
682 /**
683 * Render the upper or lower half of a triangle.
684 * Scissoring/cliprect is applied here too.
685 */
686 static void
687 subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
688 {
689 const int minx = setup.cliprect_minx;
690 const int maxx = setup.cliprect_maxx;
691 const int miny = setup.cliprect_miny;
692 const int maxy = setup.cliprect_maxy;
693 int y, start_y, finish_y;
694 int sy = (int)eleft->sy;
695
696 ASSERT((int)eleft->sy == (int) eright->sy);
697
698 /* clip top/bottom */
699 start_y = sy;
700 finish_y = sy + lines;
701
702 if (start_y < miny)
703 start_y = miny;
704
705 if (finish_y > maxy)
706 finish_y = maxy;
707
708 start_y -= sy;
709 finish_y -= sy;
710
711 /*
712 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
713 */
714
715 for (y = start_y; y < finish_y; y++) {
716
717 /* avoid accumulating adds as floats don't have the precision to
718 * accurately iterate large triangle edges that way. luckily we
719 * can just multiply these days.
720 *
721 * this is all drowned out by the attribute interpolation anyway.
722 */
723 int left = (int)(eleft->sx + y * eleft->dxdy);
724 int right = (int)(eright->sx + y * eright->dxdy);
725
726 /* clip left/right */
727 if (left < minx)
728 left = minx;
729 if (right > maxx)
730 right = maxx;
731
732 if (left < right) {
733 int _y = sy + y;
734 if (block(_y) != setup.span.y) {
735 flush_spans();
736 setup.span.y = block(_y);
737 }
738
739 int offset = _y&1;
740 vec_int4 quad_LlRr = {left, left, right, right};
741 /* Store left and right in 0 or 1 row of quad based on offset */
742 setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
743 setup.span.y_flags |= 1<<offset;
744 }
745 }
746
747
748 /* save the values so that emaj can be restarted:
749 */
750 eleft->sx += lines * eleft->dxdy;
751 eright->sx += lines * eright->dxdy;
752 eleft->sy += lines;
753 eright->sy += lines;
754 }
755
756
757 /**
758 * Draw triangle into tile at (tx, ty) (tile coords)
759 * The tile data should have already been fetched.
760 */
761 boolean
762 tri_draw(const qword vs,
763 uint tx, uint ty)
764 {
765 setup.tx = tx;
766 setup.ty = ty;
767
768 /* set clipping bounds to tile bounds */
769 setup.cliprect_minx = tx * TILE_SIZE;
770 setup.cliprect_miny = ty * TILE_SIZE;
771 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
772 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
773
774 if(!setup_sort_vertices(vs)) {
775 return FALSE; /* totally clipped */
776 }
777
778 setup_tri_coefficients();
779 setup_tri_edges();
780
781 setup.span.y = 0;
782 setup.span.y_flags = 0;
783 /* Zero right elements */
784 setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
785
786 if (setup.oneOverArea < 0.0) {
787 /* emaj on left */
788 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
789 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
790 }
791 else {
792 /* emaj on right */
793 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
794 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
795 }
796
797 flush_spans();
798
799 return TRUE;
800 }