Cell: comment about emit_quad() mask
[mesa.git] / src / mesa / pipe / cell / spu / spu_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * Triangle rendering within a tile.
30 */
31
32 #include "pipe/p_compiler.h"
33 #include "pipe/p_format.h"
34 #include "pipe/p_util.h"
35 #include "spu_colorpack.h"
36 #include "spu_main.h"
37 #include "spu_texture.h"
38 #include "spu_tile.h"
39 #include "spu_tri.h"
40
41 #include "spu_ztest.h"
42
43
44 /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
45 typedef vector unsigned int mask_t;
46
47
48 /**
49 * Simplified types taken from other parts of Gallium
50 */
51 struct vertex_header {
52 float data[0][4];
53 };
54
55
56
57 /* XXX fix this */
58 #undef CEILF
59 #define CEILF(X) ((float) (int) ((X) + 0.99999))
60
61
62 #define QUAD_TOP_LEFT 0
63 #define QUAD_TOP_RIGHT 1
64 #define QUAD_BOTTOM_LEFT 2
65 #define QUAD_BOTTOM_RIGHT 3
66 #define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
67 #define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
68 #define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
69 #define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
70 #define MASK_ALL 0xf
71
72
73 #define DEBUG_VERTS 0
74
75 /**
76 * Triangle edge info
77 */
78 struct edge {
79 float dx; /**< X(v1) - X(v0), used only during setup */
80 float dy; /**< Y(v1) - Y(v0), used only during setup */
81 float dxdy; /**< dx/dy */
82 float sx, sy; /**< first sample point coord */
83 int lines; /**< number of lines on this edge */
84 };
85
86
87 struct interp_coef
88 {
89 float4 a0;
90 float4 dadx;
91 float4 dady;
92 };
93
94
95 /**
96 * Triangle setup info (derived from draw_stage).
97 * Also used for line drawing (taking some liberties).
98 */
99 struct setup_stage {
100
101 /* Vertices are just an array of floats making up each attribute in
102 * turn. Currently fixed at 4 floats, but should change in time.
103 * Codegen will help cope with this.
104 */
105 const struct vertex_header *vmax;
106 const struct vertex_header *vmid;
107 const struct vertex_header *vmin;
108 const struct vertex_header *vprovoke;
109
110 struct edge ebot;
111 struct edge etop;
112 struct edge emaj;
113
114 float oneoverarea;
115
116 uint tx, ty;
117
118 int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy;
119
120 #if 0
121 struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS];
122 #else
123 struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
124 #endif
125
126 #if 0
127 struct quad_header quad;
128 #endif
129
130 struct {
131 int left[2]; /**< [0] = row0, [1] = row1 */
132 int right[2];
133 int y;
134 unsigned y_flags;
135 unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
136 } span;
137 };
138
139
140
141 static struct setup_stage setup;
142
143
144
145
146 #if 0
147 /**
148 * Basically a cast wrapper.
149 */
150 static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
151 {
152 return (struct setup_stage *)stage;
153 }
154 #endif
155
156 #if 0
157 /**
158 * Clip setup.quad against the scissor/surface bounds.
159 */
160 static INLINE void
161 quad_clip(struct setup_stage *setup)
162 {
163 const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
164 const int minx = (int) cliprect->minx;
165 const int maxx = (int) cliprect->maxx;
166 const int miny = (int) cliprect->miny;
167 const int maxy = (int) cliprect->maxy;
168
169 if (setup.quad.x0 >= maxx ||
170 setup.quad.y0 >= maxy ||
171 setup.quad.x0 + 1 < minx ||
172 setup.quad.y0 + 1 < miny) {
173 /* totally clipped */
174 setup.quad.mask = 0x0;
175 return;
176 }
177 if (setup.quad.x0 < minx)
178 setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
179 if (setup.quad.y0 < miny)
180 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
181 if (setup.quad.x0 == maxx - 1)
182 setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
183 if (setup.quad.y0 == maxy - 1)
184 setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
185 }
186 #endif
187
188 #if 0
189 /**
190 * Emit a quad (pass to next stage) with clipping.
191 */
192 static INLINE void
193 clip_emit_quad(struct setup_stage *setup)
194 {
195 quad_clip(setup);
196 if (setup.quad.mask) {
197 struct softpipe_context *sp = setup.softpipe;
198 sp->quad.first->run(sp->quad.first, &setup.quad);
199 }
200 }
201 #endif
202
203 /**
204 * Evaluate attribute coefficients (plane equations) to compute
205 * attribute values for the four fragments in a quad.
206 * Eg: four colors will be compute.
207 */
208 static INLINE void
209 eval_coeff(uint slot, float x, float y, float4 result[4])
210 {
211 switch (spu.vertex_info.interp_mode[slot]) {
212 case INTERP_CONSTANT:
213 result[QUAD_TOP_LEFT] =
214 result[QUAD_TOP_RIGHT] =
215 result[QUAD_BOTTOM_LEFT] =
216 result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0;
217 break;
218
219 case INTERP_LINEAR:
220 /* fall-through, for now */
221 default:
222 {
223 register vector float dadx = setup.coef[slot].dadx.v;
224 register vector float dady = setup.coef[slot].dady.v;
225 register vector float topLeft
226 = spu_add(setup.coef[slot].a0.v,
227 spu_add(spu_mul(spu_splats(x), dadx),
228 spu_mul(spu_splats(y), dady)));
229
230 result[QUAD_TOP_LEFT].v = topLeft;
231 result[QUAD_TOP_RIGHT].v = spu_add(topLeft, dadx);
232 result[QUAD_BOTTOM_LEFT].v = spu_add(topLeft, dady);
233 result[QUAD_BOTTOM_RIGHT].v = spu_add(spu_add(topLeft, dadx), dady);
234 }
235 }
236 }
237
238
239 static INLINE vector float
240 eval_z(float x, float y)
241 {
242 const uint slot = 0;
243 const float dzdx = setup.coef[slot].dadx.f[2];
244 const float dzdy = setup.coef[slot].dady.f[2];
245 const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
246 const vector float topLeftv = spu_splats(topLeft);
247 const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
248 return spu_add(topLeftv, derivs);
249 }
250
251
252 static INLINE mask_t
253 do_depth_test(int x, int y, mask_t quadmask)
254 {
255 float4 zvals;
256 mask_t mask;
257
258 zvals.v = eval_z((float) x, (float) y);
259
260 if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
261 int ix = (x - setup.cliprect_minx) / 4;
262 int iy = (y - setup.cliprect_miny) / 2;
263 mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
264 }
265 else {
266 int ix = (x - setup.cliprect_minx) / 2;
267 int iy = (y - setup.cliprect_miny) / 2;
268 mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
269 }
270
271 if (spu_extract(spu_orx(mask), 0))
272 spu.cur_ztile_status = TILE_STATUS_DIRTY;
273
274 return mask;
275 }
276
277
278 /**
279 * Emit a quad (pass to next stage). No clipping is done.
280 * Note: about 1/5 to 1/7 of the time, mask is zero and this function
281 * should be skipped. But adding the test for that slows things down
282 * overall.
283 */
284 static INLINE void
285 emit_quad( int x, int y, mask_t mask )
286 {
287 #if 0
288 struct softpipe_context *sp = setup.softpipe;
289 setup.quad.x0 = x;
290 setup.quad.y0 = y;
291 setup.quad.mask = mask;
292 sp->quad.first->run(sp->quad.first, &setup.quad);
293 #else
294
295 if (spu.depth_stencil.depth.enabled) {
296 mask = do_depth_test(x, y, mask);
297 }
298
299 /* If any bits in mask are set... */
300 if (spu_extract(spu_orx(mask), 0)) {
301 const int ix = x - setup.cliprect_minx;
302 const int iy = y - setup.cliprect_miny;
303
304 spu.cur_ctile_status = TILE_STATUS_DIRTY;
305
306 if (spu.texture.start) {
307 /* texture mapping */
308 float4 texcoords[4];
309 eval_coeff(2, (float) x, (float) y, texcoords);
310
311 if (spu_extract(mask, 0))
312 spu.ctile.ui[iy][ix] = sample_texture(texcoords[0]);
313 if (spu_extract(mask, 1))
314 spu.ctile.ui[iy][ix+1] = sample_texture(texcoords[1]);
315 if (spu_extract(mask, 2))
316 spu.ctile.ui[iy+1][ix] = sample_texture(texcoords[2]);
317 if (spu_extract(mask, 3))
318 spu.ctile.ui[iy+1][ix+1] = sample_texture(texcoords[3]);
319 }
320 else {
321 /* simple shading */
322 const vector unsigned char shuffle = spu.color_shuffle;
323 float4 colors[4];
324 eval_coeff(1, (float) x, (float) y, colors);
325
326 if (spu_extract(mask, 0))
327 spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0].v, shuffle);
328 if (spu_extract(mask, 1))
329 spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1].v, shuffle);
330 if (spu_extract(mask, 2))
331 spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2].v, shuffle);
332 if (spu_extract(mask, 3))
333 spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3].v, shuffle);
334 }
335
336 #if 0
337 /* SIMD_Z with swizzled color buffer (someday) */
338 vector unsigned int uicolors = *((vector unsigned int *) &colors);
339 spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask);
340 #endif
341 }
342
343 #endif
344 }
345
346
347 /**
348 * Given an X or Y coordinate, return the block/quad coordinate that it
349 * belongs to.
350 */
351 static INLINE int block( int x )
352 {
353 return x & ~1;
354 }
355
356
357 /**
358 * Compute mask which indicates which pixels in the 2x2 quad are actually inside
359 * the triangle's bounds.
360 * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
361 */
362 static INLINE mask_t calculate_mask( int x )
363 {
364 /* This is a little tricky.
365 * Use & instead of && to avoid branches.
366 * Use negation to convert true/false to ~0/0 values.
367 */
368 mask_t mask;
369 mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
370 mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
371 mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
372 mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
373 return mask;
374 }
375
376
377 /**
378 * Render a horizontal span of quads
379 */
380 static void flush_spans( void )
381 {
382 int minleft, maxright;
383 int x;
384
385 switch (setup.span.y_flags) {
386 case 0x3:
387 /* both odd and even lines written (both quad rows) */
388 minleft = MIN2(setup.span.left[0], setup.span.left[1]);
389 maxright = MAX2(setup.span.right[0], setup.span.right[1]);
390 break;
391
392 case 0x1:
393 /* only even line written (quad top row) */
394 minleft = setup.span.left[0];
395 maxright = setup.span.right[0];
396 break;
397
398 case 0x2:
399 /* only odd line written (quad bottom row) */
400 minleft = setup.span.left[1];
401 maxright = setup.span.right[1];
402 break;
403
404 default:
405 return;
406 }
407
408
409 /* _really_ clear tiles now if needed */
410 if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
411 clear_c_tile(&spu.ctile);
412 spu.cur_ctile_status = TILE_STATUS_DIRTY;
413 }
414 if (spu.depth_stencil.depth.enabled &&
415 spu.cur_ztile_status == TILE_STATUS_CLEAR) {
416 clear_z_tile(&spu.ztile);
417 spu.cur_ztile_status = TILE_STATUS_DIRTY;
418 }
419
420 /* XXX this loop could be moved into the above switch cases and
421 * calculate_mask() could be simplified a bit...
422 */
423 for (x = block(minleft); x <= block(maxright); x += 2) {
424 emit_quad( x, setup.span.y, calculate_mask( x ) );
425 }
426
427 setup.span.y = 0;
428 setup.span.y_flags = 0;
429 setup.span.right[0] = 0;
430 setup.span.right[1] = 0;
431 }
432
433 #if DEBUG_VERTS
434 static void print_vertex(const struct vertex_header *v)
435 {
436 int i;
437 fprintf(stderr, "Vertex: (%p)\n", v);
438 for (i = 0; i < setup.quad.nr_attrs; i++) {
439 fprintf(stderr, " %d: %f %f %f %f\n", i,
440 v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
441 }
442 }
443 #endif
444
445 static boolean setup_sort_vertices(const struct vertex_header *v0,
446 const struct vertex_header *v1,
447 const struct vertex_header *v2)
448 {
449
450 #if DEBUG_VERTS
451 fprintf(stderr, "Triangle:\n");
452 print_vertex(v0);
453 print_vertex(v1);
454 print_vertex(v2);
455 #endif
456
457 setup.vprovoke = v2;
458
459 /* determine bottom to top order of vertices */
460 {
461 float y0 = v0->data[0][1];
462 float y1 = v1->data[0][1];
463 float y2 = v2->data[0][1];
464 if (y0 <= y1) {
465 if (y1 <= y2) {
466 /* y0<=y1<=y2 */
467 setup.vmin = v0;
468 setup.vmid = v1;
469 setup.vmax = v2;
470 }
471 else if (y2 <= y0) {
472 /* y2<=y0<=y1 */
473 setup.vmin = v2;
474 setup.vmid = v0;
475 setup.vmax = v1;
476 }
477 else {
478 /* y0<=y2<=y1 */
479 setup.vmin = v0;
480 setup.vmid = v2;
481 setup.vmax = v1;
482 }
483 }
484 else {
485 if (y0 <= y2) {
486 /* y1<=y0<=y2 */
487 setup.vmin = v1;
488 setup.vmid = v0;
489 setup.vmax = v2;
490 }
491 else if (y2 <= y1) {
492 /* y2<=y1<=y0 */
493 setup.vmin = v2;
494 setup.vmid = v1;
495 setup.vmax = v0;
496 }
497 else {
498 /* y1<=y2<=y0 */
499 setup.vmin = v1;
500 setup.vmid = v2;
501 setup.vmax = v0;
502 }
503 }
504 }
505
506 /* Check if triangle is completely outside the tile bounds */
507 if (setup.vmin->data[0][1] > setup.cliprect_maxy)
508 return FALSE;
509 if (setup.vmax->data[0][1] < setup.cliprect_miny)
510 return FALSE;
511 if (setup.vmin->data[0][0] < setup.cliprect_minx &&
512 setup.vmid->data[0][0] < setup.cliprect_minx &&
513 setup.vmax->data[0][0] < setup.cliprect_minx)
514 return FALSE;
515 if (setup.vmin->data[0][0] > setup.cliprect_maxx &&
516 setup.vmid->data[0][0] > setup.cliprect_maxx &&
517 setup.vmax->data[0][0] > setup.cliprect_maxx)
518 return FALSE;
519
520 setup.ebot.dx = setup.vmid->data[0][0] - setup.vmin->data[0][0];
521 setup.ebot.dy = setup.vmid->data[0][1] - setup.vmin->data[0][1];
522 setup.emaj.dx = setup.vmax->data[0][0] - setup.vmin->data[0][0];
523 setup.emaj.dy = setup.vmax->data[0][1] - setup.vmin->data[0][1];
524 setup.etop.dx = setup.vmax->data[0][0] - setup.vmid->data[0][0];
525 setup.etop.dy = setup.vmax->data[0][1] - setup.vmid->data[0][1];
526
527 /*
528 * Compute triangle's area. Use 1/area to compute partial
529 * derivatives of attributes later.
530 *
531 * The area will be the same as prim->det, but the sign may be
532 * different depending on how the vertices get sorted above.
533 *
534 * To determine whether the primitive is front or back facing we
535 * use the prim->det value because its sign is correct.
536 */
537 {
538 const float area = (setup.emaj.dx * setup.ebot.dy -
539 setup.ebot.dx * setup.emaj.dy);
540
541 setup.oneoverarea = 1.0f / area;
542 /*
543 _mesa_printf("%s one-over-area %f area %f det %f\n",
544 __FUNCTION__, setup.oneoverarea, area, prim->det );
545 */
546 }
547
548 #if 0
549 /* We need to know if this is a front or back-facing triangle for:
550 * - the GLSL gl_FrontFacing fragment attribute (bool)
551 * - two-sided stencil test
552 */
553 setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
554 #endif
555
556 return TRUE;
557 }
558
559
560 /**
561 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
562 * The value value comes from vertex->data[slot].
563 * The result will be put into setup.coef[slot].a0.
564 * \param slot which attribute slot
565 */
566 static INLINE void const_coeff(uint slot)
567 {
568 setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
569 setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
570 setup.coef[slot].a0.f[0] = setup.vprovoke->data[slot][0];
571 setup.coef[slot].a0.f[1] = setup.vprovoke->data[slot][1];
572 setup.coef[slot].a0.f[2] = setup.vprovoke->data[slot][2];
573 setup.coef[slot].a0.f[3] = setup.vprovoke->data[slot][3];
574 }
575
576
577 /**
578 * Compute a0, dadx and dady for a linearly interpolated coefficient,
579 * for a triangle.
580 */
581 static void tri_linear_coeff( uint slot, uint firstComp, uint lastComp )
582 {
583 uint i;
584 for (i = firstComp; i < lastComp; i++) {
585 float botda = setup.vmid->data[slot][i] - setup.vmin->data[slot][i];
586 float majda = setup.vmax->data[slot][i] - setup.vmin->data[slot][i];
587 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
588 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
589
590 ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
591
592 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
593 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
594
595 /* calculate a0 as the value which would be sampled for the
596 * fragment at (0,0), taking into account that we want to sample at
597 * pixel centers, in other words (0.5, 0.5).
598 *
599 * this is neat but unfortunately not a good way to do things for
600 * triangles with very large values of dadx or dady as it will
601 * result in the subtraction and re-addition from a0 of a very
602 * large number, which means we'll end up loosing a lot of the
603 * fractional bits and precision from a0. the way to fix this is
604 * to define a0 as the sample at a pixel center somewhere near vmin
605 * instead - i'll switch to this later.
606 */
607 setup.coef[slot].a0.f[i] = (setup.vmin->data[slot][i] -
608 (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
609 setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
610 }
611
612 /*
613 _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
614 slot, "xyzw"[i],
615 setup.coef[slot].a0[i],
616 setup.coef[slot].dadx.f[i],
617 setup.coef[slot].dady.f[i]);
618 */
619 }
620
621
622 #if 0
623 /**
624 * Compute a0, dadx and dady for a perspective-corrected interpolant,
625 * for a triangle.
626 * We basically multiply the vertex value by 1/w before computing
627 * the plane coefficients (a0, dadx, dady).
628 * Later, when we compute the value at a particular fragment position we'll
629 * divide the interpolated value by the interpolated W at that fragment.
630 */
631 static void tri_persp_coeff( unsigned slot,
632 unsigned i )
633 {
634 /* premultiply by 1/w:
635 */
636 float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
637 float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
638 float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
639
640 float botda = mida - mina;
641 float majda = maxa - mina;
642 float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
643 float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
644
645 /*
646 printf("tri persp %d,%d: %f %f %f\n", slot, i,
647 setup.vmin->data[slot][i],
648 setup.vmid->data[slot][i],
649 setup.vmax->data[slot][i]
650 );
651 */
652
653 assert(slot < PIPE_MAX_SHADER_INPUTS);
654 assert(i <= 3);
655
656 setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
657 setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
658 setup.coef[slot].a0.f[i] = (mina -
659 (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
660 setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
661 }
662 #endif
663
664
665 /**
666 * Compute the setup.coef[] array dadx, dady, a0 values.
667 * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
668 */
669 static void setup_tri_coefficients(void)
670 {
671 #if 1
672 uint i;
673
674 for (i = 0; i < spu.vertex_info.num_attribs; i++) {
675 switch (spu.vertex_info.interp_mode[i]) {
676 case INTERP_NONE:
677 break;
678 case INTERP_POS:
679 tri_linear_coeff(i, 2, 3);
680 /* XXX interp W if PERSPECTIVE... */
681 break;
682 case INTERP_CONSTANT:
683 const_coeff(i);
684 break;
685 case INTERP_LINEAR:
686 tri_linear_coeff(i, 0, 4);
687 break;
688 case INTERP_PERSPECTIVE:
689 tri_linear_coeff(i, 0, 4); /* XXX temporary */
690 break;
691 default:
692 ASSERT(0);
693 }
694 }
695 #else
696 ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
697 ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
698 spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
699 tri_linear_coeff(0, 2, 3); /* slot 0, z */
700 tri_linear_coeff(1, 0, 4); /* slot 1, color */
701 #endif
702 }
703
704
705 static void setup_tri_edges(void)
706 {
707 float vmin_x = setup.vmin->data[0][0] + 0.5f;
708 float vmid_x = setup.vmid->data[0][0] + 0.5f;
709
710 float vmin_y = setup.vmin->data[0][1] - 0.5f;
711 float vmid_y = setup.vmid->data[0][1] - 0.5f;
712 float vmax_y = setup.vmax->data[0][1] - 0.5f;
713
714 setup.emaj.sy = CEILF(vmin_y);
715 setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
716 setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
717 setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
718
719 setup.etop.sy = CEILF(vmid_y);
720 setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
721 setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
722 setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
723
724 setup.ebot.sy = CEILF(vmin_y);
725 setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
726 setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
727 setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
728 }
729
730
731 /**
732 * Render the upper or lower half of a triangle.
733 * Scissoring/cliprect is applied here too.
734 */
735 static void subtriangle( struct edge *eleft,
736 struct edge *eright,
737 unsigned lines )
738 {
739 const int minx = setup.cliprect_minx;
740 const int maxx = setup.cliprect_maxx;
741 const int miny = setup.cliprect_miny;
742 const int maxy = setup.cliprect_maxy;
743 int y, start_y, finish_y;
744 int sy = (int)eleft->sy;
745
746 ASSERT((int)eleft->sy == (int) eright->sy);
747
748 /* clip top/bottom */
749 start_y = sy;
750 finish_y = sy + lines;
751
752 if (start_y < miny)
753 start_y = miny;
754
755 if (finish_y > maxy)
756 finish_y = maxy;
757
758 start_y -= sy;
759 finish_y -= sy;
760
761 /*
762 _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
763 */
764
765 for (y = start_y; y < finish_y; y++) {
766
767 /* avoid accumulating adds as floats don't have the precision to
768 * accurately iterate large triangle edges that way. luckily we
769 * can just multiply these days.
770 *
771 * this is all drowned out by the attribute interpolation anyway.
772 */
773 int left = (int)(eleft->sx + y * eleft->dxdy);
774 int right = (int)(eright->sx + y * eright->dxdy);
775
776 /* clip left/right */
777 if (left < minx)
778 left = minx;
779 if (right > maxx)
780 right = maxx;
781
782 if (left < right) {
783 int _y = sy + y;
784 if (block(_y) != setup.span.y) {
785 flush_spans();
786 setup.span.y = block(_y);
787 }
788
789 setup.span.left[_y&1] = left;
790 setup.span.right[_y&1] = right;
791 setup.span.y_flags |= 1<<(_y&1);
792 }
793 }
794
795
796 /* save the values so that emaj can be restarted:
797 */
798 eleft->sx += lines * eleft->dxdy;
799 eright->sx += lines * eright->dxdy;
800 eleft->sy += lines;
801 eright->sy += lines;
802 }
803
804
805 /**
806 * Draw triangle into tile at (tx, ty) (tile coords)
807 * The tile data should have already been fetched.
808 */
809 boolean
810 tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
811 {
812 setup.tx = tx;
813 setup.ty = ty;
814
815 /* set clipping bounds to tile bounds */
816 setup.cliprect_minx = tx * TILE_SIZE;
817 setup.cliprect_miny = ty * TILE_SIZE;
818 setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
819 setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
820
821 if (!setup_sort_vertices((struct vertex_header *) v0,
822 (struct vertex_header *) v1,
823 (struct vertex_header *) v2)) {
824 return FALSE; /* totally clipped */
825 }
826
827 setup_tri_coefficients();
828 setup_tri_edges();
829
830 setup.span.y = 0;
831 setup.span.y_flags = 0;
832 setup.span.right[0] = 0;
833 setup.span.right[1] = 0;
834 /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
835
836 /* init_constant_attribs( setup ); */
837
838 if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
839 /* wait for mfc_get() to complete */
840 wait_on_mask(1 << TAG_READ_TILE_COLOR);
841 spu.cur_ctile_status = TILE_STATUS_CLEAN;
842 }
843 ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
844
845 if (spu.depth_stencil.depth.enabled) {
846 if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
847 /* wait for mfc_get() to complete */
848 wait_on_mask(1 << TAG_READ_TILE_Z);
849 spu.cur_ztile_status = TILE_STATUS_CLEAN;
850 }
851 ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
852 }
853
854
855 if (setup.oneoverarea < 0.0) {
856 /* emaj on left:
857 */
858 subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
859 subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
860 }
861 else {
862 /* emaj on right:
863 */
864 subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
865 subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
866 }
867
868 flush_spans();
869
870 return TRUE;
871 }