llvmpipe: fail cleanly on malloc failure in lp_setup_alloc_triangle
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_tri.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36 #include "lp_perf.h"
37 #include "lp_setup_context.h"
38 #include "lp_setup_coef.h"
39 #include "lp_rast.h"
40 #include "lp_state_fs.h"
41
42 #define NUM_CHANNELS 4
43
44 #if defined(PIPE_ARCH_SSE)
45 #include <emmintrin.h>
46 #endif
47
48 static INLINE int
49 subpixel_snap(float a)
50 {
51 return util_iround(FIXED_ONE * a);
52 }
53
54 static INLINE float
55 fixed_to_float(int a)
56 {
57 return a * (1.0 / FIXED_ONE);
58 }
59
60
61
62
63
64
65
66 /**
67 * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
68 * immediately after it.
69 * The memory is allocated from the per-scene pool, not per-tile.
70 * \param tri_size returns number of bytes allocated
71 * \param nr_inputs number of fragment shader inputs
72 * \return pointer to triangle space
73 */
74 struct lp_rast_triangle *
75 lp_setup_alloc_triangle(struct lp_scene *scene,
76 unsigned nr_inputs,
77 unsigned nr_planes,
78 unsigned *tri_size)
79 {
80 unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
81 unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
82 struct lp_rast_triangle *tri;
83
84 *tri_size = (sizeof(struct lp_rast_triangle) +
85 3 * input_array_sz +
86 plane_sz);
87
88 tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
89 if (tri == NULL)
90 return NULL;
91
92 tri->inputs.stride = input_array_sz;
93
94 {
95 char *a = (char *)tri;
96 char *b = (char *)&GET_PLANES(tri)[nr_planes];
97 assert(b - a == *tri_size);
98 }
99
100 return tri;
101 }
102
103 void
104 lp_setup_print_vertex(struct lp_setup_context *setup,
105 const char *name,
106 const float (*v)[4])
107 {
108 int i, j;
109
110 debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
111 name,
112 v[0][0], v[0][1], v[0][2], v[0][3]);
113
114 for (i = 0; i < setup->fs.nr_inputs; i++) {
115 const float *in = v[setup->fs.input[i].src_index];
116
117 debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
118 i,
119 name, setup->fs.input[i].src_index,
120 (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ",
121 (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ",
122 (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ",
123 (setup->fs.input[i].usage_mask & 0x8) ? "w" : " ");
124
125 for (j = 0; j < 4; j++)
126 if (setup->fs.input[i].usage_mask & (1<<j))
127 debug_printf("%.5f ", in[j]);
128
129 debug_printf("\n");
130 }
131 }
132
133
134 /**
135 * Print triangle vertex attribs (for debug).
136 */
137 void
138 lp_setup_print_triangle(struct lp_setup_context *setup,
139 const float (*v0)[4],
140 const float (*v1)[4],
141 const float (*v2)[4])
142 {
143 debug_printf("triangle\n");
144
145 {
146 const float ex = v0[0][0] - v2[0][0];
147 const float ey = v0[0][1] - v2[0][1];
148 const float fx = v1[0][0] - v2[0][0];
149 const float fy = v1[0][1] - v2[0][1];
150
151 /* det = cross(e,f).z */
152 const float det = ex * fy - ey * fx;
153 if (det < 0.0f)
154 debug_printf(" - ccw\n");
155 else if (det > 0.0f)
156 debug_printf(" - cw\n");
157 else
158 debug_printf(" - zero area\n");
159 }
160
161 lp_setup_print_vertex(setup, "v0", v0);
162 lp_setup_print_vertex(setup, "v1", v1);
163 lp_setup_print_vertex(setup, "v2", v2);
164 }
165
166
167 #define MAX_PLANES 8
168 static unsigned
169 lp_rast_tri_tab[MAX_PLANES+1] = {
170 0, /* should be impossible */
171 LP_RAST_OP_TRIANGLE_1,
172 LP_RAST_OP_TRIANGLE_2,
173 LP_RAST_OP_TRIANGLE_3,
174 LP_RAST_OP_TRIANGLE_4,
175 LP_RAST_OP_TRIANGLE_5,
176 LP_RAST_OP_TRIANGLE_6,
177 LP_RAST_OP_TRIANGLE_7,
178 LP_RAST_OP_TRIANGLE_8
179 };
180
181
182
183 /**
184 * The primitive covers the whole tile- shade whole tile.
185 *
186 * \param tx, ty the tile position in tiles, not pixels
187 */
188 static boolean
189 lp_setup_whole_tile(struct lp_setup_context *setup,
190 const struct lp_rast_shader_inputs *inputs,
191 int tx, int ty)
192 {
193 struct lp_scene *scene = setup->scene;
194
195 LP_COUNT(nr_fully_covered_64);
196
197 /* if variant is opaque and scissor doesn't effect the tile */
198 if (inputs->opaque) {
199 if (!scene->fb.zsbuf) {
200 /*
201 * All previous rendering will be overwritten so reset the bin.
202 */
203 lp_scene_bin_reset( scene, tx, ty );
204 }
205
206 LP_COUNT(nr_shade_opaque_64);
207 return lp_scene_bin_cmd_with_state( scene, tx, ty,
208 setup->fs.stored,
209 LP_RAST_OP_SHADE_TILE_OPAQUE,
210 lp_rast_arg_inputs(inputs) );
211 } else {
212 LP_COUNT(nr_shade_64);
213 return lp_scene_bin_cmd_with_state( scene, tx, ty,
214 setup->fs.stored,
215 LP_RAST_OP_SHADE_TILE,
216 lp_rast_arg_inputs(inputs) );
217 }
218 }
219
220
221 /**
222 * Do basic setup for triangle rasterization and determine which
223 * framebuffer tiles are touched. Put the triangle in the scene's
224 * bins for the tiles which we overlap.
225 */
226 static boolean
227 do_triangle_ccw(struct lp_setup_context *setup,
228 const float (*v0)[4],
229 const float (*v1)[4],
230 const float (*v2)[4],
231 boolean frontfacing )
232 {
233 struct lp_scene *scene = setup->scene;
234 struct lp_rast_triangle *tri;
235 struct lp_rast_plane *plane;
236 int x[4];
237 int y[4];
238 struct u_rect bbox;
239 unsigned tri_bytes;
240 int nr_planes = 3;
241
242 if (0)
243 lp_setup_print_triangle(setup, v0, v1, v2);
244
245 if (setup->scissor_test) {
246 nr_planes = 7;
247 }
248 else {
249 nr_planes = 3;
250 }
251
252 /* x/y positions in fixed point */
253 x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset);
254 x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset);
255 x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
256 x[3] = 0;
257 y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset);
258 y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset);
259 y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
260 y[3] = 0;
261
262
263 /* Bounding rectangle (in pixels) */
264 {
265 /* Yes this is necessary to accurately calculate bounding boxes
266 * with the two fill-conventions we support. GL (normally) ends
267 * up needing a bottom-left fill convention, which requires
268 * slightly different rounding.
269 */
270 int adj = (setup->pixel_offset != 0) ? 1 : 0;
271
272 bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
273 bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
274 bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
275 bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
276
277 /* Inclusive coordinates:
278 */
279 bbox.x1--;
280 bbox.y1--;
281 }
282
283 if (bbox.x1 < bbox.x0 ||
284 bbox.y1 < bbox.y0) {
285 if (0) debug_printf("empty bounding box\n");
286 LP_COUNT(nr_culled_tris);
287 return TRUE;
288 }
289
290 if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
291 if (0) debug_printf("offscreen\n");
292 LP_COUNT(nr_culled_tris);
293 return TRUE;
294 }
295
296 u_rect_find_intersection(&setup->draw_region, &bbox);
297
298 tri = lp_setup_alloc_triangle(scene,
299 setup->fs.nr_inputs,
300 nr_planes,
301 &tri_bytes);
302 if (!tri)
303 return FALSE;
304
305 #if 0
306 tri->v[0][0] = v0[0][0];
307 tri->v[1][0] = v1[0][0];
308 tri->v[2][0] = v2[0][0];
309 tri->v[0][1] = v0[0][1];
310 tri->v[1][1] = v1[0][1];
311 tri->v[2][1] = v2[0][1];
312 #endif
313
314 LP_COUNT(nr_tris);
315
316 /* Setup parameter interpolants:
317 */
318 lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing );
319
320 tri->inputs.frontfacing = frontfacing;
321 tri->inputs.disable = FALSE;
322 tri->inputs.opaque = setup->fs.current.variant->opaque;
323
324 plane = GET_PLANES(tri);
325
326 #if defined(PIPE_ARCH_SSE)
327 {
328 __m128i vertx, verty;
329 __m128i shufx, shufy;
330 __m128i dcdx, dcdy, c;
331 __m128i unused;
332 __m128i dcdx_neg_mask;
333 __m128i dcdy_neg_mask;
334 __m128i dcdx_zero_mask;
335 __m128i top_left_flag;
336 __m128i c_inc_mask, c_inc;
337 __m128i eo, p0, p1, p2;
338 __m128i zero = _mm_setzero_si128();
339
340 vertx = _mm_loadu_si128((__m128i *)x); /* vertex x coords */
341 verty = _mm_loadu_si128((__m128i *)y); /* vertex y coords */
342
343 shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
344 shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
345
346 dcdx = _mm_sub_epi32(verty, shufy);
347 dcdy = _mm_sub_epi32(vertx, shufx);
348
349 dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
350 dcdx_zero_mask = _mm_cmpeq_epi32(dcdx, zero);
351 dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
352
353 top_left_flag = _mm_set1_epi32((setup->pixel_offset == 0) ? ~0 : 0);
354
355 c_inc_mask = _mm_or_si128(dcdx_neg_mask,
356 _mm_and_si128(dcdx_zero_mask,
357 _mm_xor_si128(dcdy_neg_mask,
358 top_left_flag)));
359
360 c_inc = _mm_srli_epi32(c_inc_mask, 31);
361
362 c = _mm_sub_epi32(mm_mullo_epi32(dcdx, vertx),
363 mm_mullo_epi32(dcdy, verty));
364
365 c = _mm_add_epi32(c, c_inc);
366
367 /* Scale up to match c:
368 */
369 dcdx = _mm_slli_epi32(dcdx, FIXED_ORDER);
370 dcdy = _mm_slli_epi32(dcdy, FIXED_ORDER);
371
372 /* Calculate trivial reject values:
373 */
374 eo = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
375 _mm_and_si128(dcdx_neg_mask, dcdx));
376
377 /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
378
379 /* Pointless transpose which gets undone immediately in
380 * rasterization:
381 */
382 transpose4_epi32(&c, &dcdx, &dcdy, &eo,
383 &p0, &p1, &p2, &unused);
384
385 _mm_store_si128((__m128i *)&plane[0], p0);
386 _mm_store_si128((__m128i *)&plane[1], p1);
387 _mm_store_si128((__m128i *)&plane[2], p2);
388 }
389 #else
390 {
391 int i;
392 plane[0].dcdy = x[0] - x[1];
393 plane[1].dcdy = x[1] - x[2];
394 plane[2].dcdy = x[2] - x[0];
395 plane[0].dcdx = y[0] - y[1];
396 plane[1].dcdx = y[1] - y[2];
397 plane[2].dcdx = y[2] - y[0];
398
399 for (i = 0; i < 3; i++) {
400 /* half-edge constants, will be interated over the whole render
401 * target.
402 */
403 plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
404
405 /* correct for top-left vs. bottom-left fill convention.
406 *
407 * note that we're overloading gl_rasterization_rules to mean
408 * both (0.5,0.5) pixel centers *and* bottom-left filling
409 * convention.
410 *
411 * GL actually has a top-left filling convention, but GL's
412 * notion of "top" differs from gallium's...
413 *
414 * Also, sometimes (in FBO cases) GL will render upside down
415 * to its usual method, in which case it will probably want
416 * to use the opposite, top-left convention.
417 */
418 if (plane[i].dcdx < 0) {
419 /* both fill conventions want this - adjust for left edges */
420 plane[i].c++;
421 }
422 else if (plane[i].dcdx == 0) {
423 if (setup->pixel_offset == 0) {
424 /* correct for top-left fill convention:
425 */
426 if (plane[i].dcdy > 0) plane[i].c++;
427 }
428 else {
429 /* correct for bottom-left fill convention:
430 */
431 if (plane[i].dcdy < 0) plane[i].c++;
432 }
433 }
434
435 plane[i].dcdx *= FIXED_ONE;
436 plane[i].dcdy *= FIXED_ONE;
437
438 /* find trivial reject offsets for each edge for a single-pixel
439 * sized block. These will be scaled up at each recursive level to
440 * match the active blocksize. Scaling in this way works best if
441 * the blocks are square.
442 */
443 plane[i].eo = 0;
444 if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
445 if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
446 }
447 }
448 #endif
449
450 if (0) {
451 debug_printf("p0: %08x/%08x/%08x/%08x\n",
452 plane[0].c,
453 plane[0].dcdx,
454 plane[0].dcdy,
455 plane[0].eo);
456
457 debug_printf("p1: %08x/%08x/%08x/%08x\n",
458 plane[1].c,
459 plane[1].dcdx,
460 plane[1].dcdy,
461 plane[1].eo);
462
463 debug_printf("p0: %08x/%08x/%08x/%08x\n",
464 plane[2].c,
465 plane[2].dcdx,
466 plane[2].dcdy,
467 plane[2].eo);
468 }
469
470
471 /*
472 * When rasterizing scissored tris, use the intersection of the
473 * triangle bounding box and the scissor rect to generate the
474 * scissor planes.
475 *
476 * This permits us to cut off the triangle "tails" that are present
477 * in the intermediate recursive levels caused when two of the
478 * triangles edges don't diverge quickly enough to trivially reject
479 * exterior blocks from the triangle.
480 *
481 * It's not really clear if it's worth worrying about these tails,
482 * but since we generate the planes for each scissored tri, it's
483 * free to trim them in this case.
484 *
485 * Note that otherwise, the scissor planes only vary in 'C' value,
486 * and even then only on state-changes. Could alternatively store
487 * these planes elsewhere.
488 */
489 if (nr_planes == 7) {
490 plane[3].dcdx = -1;
491 plane[3].dcdy = 0;
492 plane[3].c = 1-bbox.x0;
493 plane[3].eo = 1;
494
495 plane[4].dcdx = 1;
496 plane[4].dcdy = 0;
497 plane[4].c = bbox.x1+1;
498 plane[4].eo = 0;
499
500 plane[5].dcdx = 0;
501 plane[5].dcdy = 1;
502 plane[5].c = 1-bbox.y0;
503 plane[5].eo = 1;
504
505 plane[6].dcdx = 0;
506 plane[6].dcdy = -1;
507 plane[6].c = bbox.y1+1;
508 plane[6].eo = 0;
509 }
510
511 return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
512 }
513
514 /*
515 * Round to nearest less or equal power of two of the input.
516 *
517 * Undefined if no bit set exists, so code should check against 0 first.
518 */
519 static INLINE uint32_t
520 floor_pot(uint32_t n)
521 {
522 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
523 if (n == 0)
524 return 0;
525
526 __asm__("bsr %1,%0"
527 : "=r" (n)
528 : "rm" (n));
529 return 1 << n;
530 #else
531 n |= (n >> 1);
532 n |= (n >> 2);
533 n |= (n >> 4);
534 n |= (n >> 8);
535 n |= (n >> 16);
536 return n - (n >> 1);
537 #endif
538 }
539
540
541 boolean
542 lp_setup_bin_triangle( struct lp_setup_context *setup,
543 struct lp_rast_triangle *tri,
544 const struct u_rect *bbox,
545 int nr_planes )
546 {
547 struct lp_scene *scene = setup->scene;
548 int i;
549
550 /* What is the largest power-of-two boundary this triangle crosses:
551 */
552 int dx = floor_pot((bbox->x0 ^ bbox->x1) |
553 (bbox->y0 ^ bbox->y1));
554
555 /* The largest dimension of the rasterized area of the triangle
556 * (aligned to a 4x4 grid), rounded down to the nearest power of two:
557 */
558 int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) |
559 (bbox->y1 - (bbox->y0 & ~3)));
560
561 /* Determine which tile(s) intersect the triangle's bounding box
562 */
563 if (dx < TILE_SIZE)
564 {
565 int ix0 = bbox->x0 / TILE_SIZE;
566 int iy0 = bbox->y0 / TILE_SIZE;
567 int px = bbox->x0 & 63 & ~3;
568 int py = bbox->y0 & 63 & ~3;
569 int mask = px | (py << 8);
570
571 assert(iy0 == bbox->y1 / TILE_SIZE &&
572 ix0 == bbox->x1 / TILE_SIZE);
573
574 if (nr_planes == 3) {
575 if (sz < 4)
576 {
577 /* Triangle is contained in a single 4x4 stamp:
578 */
579 return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
580 setup->fs.stored,
581 LP_RAST_OP_TRIANGLE_3_4,
582 lp_rast_arg_triangle(tri, mask) );
583 }
584
585 if (sz < 16)
586 {
587 /* Triangle is contained in a single 16x16 block:
588 */
589 return lp_scene_bin_cmd_with_state( scene, ix0, iy0,
590 setup->fs.stored,
591 LP_RAST_OP_TRIANGLE_3_16,
592 lp_rast_arg_triangle(tri, mask) );
593 }
594 }
595 else if (nr_planes == 4 && sz < 16)
596 {
597 return lp_scene_bin_cmd_with_state(scene, ix0, iy0,
598 setup->fs.stored,
599 LP_RAST_OP_TRIANGLE_4_16,
600 lp_rast_arg_triangle(tri, mask) );
601 }
602
603
604 /* Triangle is contained in a single tile:
605 */
606 return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored,
607 lp_rast_tri_tab[nr_planes],
608 lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
609 }
610 else
611 {
612 struct lp_rast_plane *plane = GET_PLANES(tri);
613 int c[MAX_PLANES];
614 int ei[MAX_PLANES];
615 int eo[MAX_PLANES];
616 int xstep[MAX_PLANES];
617 int ystep[MAX_PLANES];
618 int x, y;
619
620 int ix0 = bbox->x0 / TILE_SIZE;
621 int iy0 = bbox->y0 / TILE_SIZE;
622 int ix1 = bbox->x1 / TILE_SIZE;
623 int iy1 = bbox->y1 / TILE_SIZE;
624
625 for (i = 0; i < nr_planes; i++) {
626 c[i] = (plane[i].c +
627 plane[i].dcdy * iy0 * TILE_SIZE -
628 plane[i].dcdx * ix0 * TILE_SIZE);
629
630 ei[i] = (plane[i].dcdy -
631 plane[i].dcdx -
632 plane[i].eo) << TILE_ORDER;
633
634 eo[i] = plane[i].eo << TILE_ORDER;
635 xstep[i] = -(plane[i].dcdx << TILE_ORDER);
636 ystep[i] = plane[i].dcdy << TILE_ORDER;
637 }
638
639
640
641 /* Test tile-sized blocks against the triangle.
642 * Discard blocks fully outside the tri. If the block is fully
643 * contained inside the tri, bin an lp_rast_shade_tile command.
644 * Else, bin a lp_rast_triangle command.
645 */
646 for (y = iy0; y <= iy1; y++)
647 {
648 boolean in = FALSE; /* are we inside the triangle? */
649 int cx[MAX_PLANES];
650
651 for (i = 0; i < nr_planes; i++)
652 cx[i] = c[i];
653
654 for (x = ix0; x <= ix1; x++)
655 {
656 int out = 0;
657 int partial = 0;
658
659 for (i = 0; i < nr_planes; i++) {
660 int planeout = cx[i] + eo[i];
661 int planepartial = cx[i] + ei[i] - 1;
662 out |= (planeout >> 31);
663 partial |= (planepartial >> 31) & (1<<i);
664 }
665
666 if (out) {
667 /* do nothing */
668 if (in)
669 break; /* exiting triangle, all done with this row */
670 LP_COUNT(nr_empty_64);
671 }
672 else if (partial) {
673 /* Not trivially accepted by at least one plane -
674 * rasterize/shade partial tile
675 */
676 int count = util_bitcount(partial);
677 in = TRUE;
678
679 if (!lp_scene_bin_cmd_with_state( scene, x, y,
680 setup->fs.stored,
681 lp_rast_tri_tab[count],
682 lp_rast_arg_triangle(tri, partial) ))
683 goto fail;
684
685 LP_COUNT(nr_partially_covered_64);
686 }
687 else {
688 /* triangle covers the whole tile- shade whole tile */
689 LP_COUNT(nr_fully_covered_64);
690 in = TRUE;
691 if (!lp_setup_whole_tile(setup, &tri->inputs, x, y))
692 goto fail;
693 }
694
695 /* Iterate cx values across the region:
696 */
697 for (i = 0; i < nr_planes; i++)
698 cx[i] += xstep[i];
699 }
700
701 /* Iterate c values down the region:
702 */
703 for (i = 0; i < nr_planes; i++)
704 c[i] += ystep[i];
705 }
706 }
707
708 return TRUE;
709
710 fail:
711 /* Need to disable any partially binned triangle. This is easier
712 * than trying to locate all the triangle, shade-tile, etc,
713 * commands which may have been binned.
714 */
715 tri->inputs.disable = TRUE;
716 return FALSE;
717 }
718
719
720 /**
721 * Try to draw the triangle, restart the scene on failure.
722 */
723 static void retry_triangle_ccw( struct lp_setup_context *setup,
724 const float (*v0)[4],
725 const float (*v1)[4],
726 const float (*v2)[4],
727 boolean front)
728 {
729 if (!do_triangle_ccw( setup, v0, v1, v2, front ))
730 {
731 if (!lp_setup_flush_and_restart(setup))
732 return;
733
734 if (!do_triangle_ccw( setup, v0, v1, v2, front ))
735 return;
736 }
737 }
738
739 static INLINE float
740 calc_area(const float (*v0)[4],
741 const float (*v1)[4],
742 const float (*v2)[4])
743 {
744 float dx01 = v0[0][0] - v1[0][0];
745 float dy01 = v0[0][1] - v1[0][1];
746 float dx20 = v2[0][0] - v0[0][0];
747 float dy20 = v2[0][1] - v0[0][1];
748 return dx01 * dy20 - dx20 * dy01;
749 }
750
751
752 /**
753 * Draw triangle if it's CW, cull otherwise.
754 */
755 static void triangle_cw( struct lp_setup_context *setup,
756 const float (*v0)[4],
757 const float (*v1)[4],
758 const float (*v2)[4] )
759 {
760 float area = calc_area(v0, v1, v2);
761
762 if (area < 0.0f)
763 retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface);
764 }
765
766
767 static void triangle_ccw( struct lp_setup_context *setup,
768 const float (*v0)[4],
769 const float (*v1)[4],
770 const float (*v2)[4])
771 {
772 float area = calc_area(v0, v1, v2);
773
774 if (area > 0.0f)
775 retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface);
776 }
777
778 /**
779 * Draw triangle whether it's CW or CCW.
780 */
781 static void triangle_both( struct lp_setup_context *setup,
782 const float (*v0)[4],
783 const float (*v1)[4],
784 const float (*v2)[4] )
785 {
786 float area = calc_area(v0, v1, v2);
787
788 if (area > 0.0f)
789 retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
790 else if (area < 0.0f)
791 retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface );
792 }
793
794
795 static void triangle_nop( struct lp_setup_context *setup,
796 const float (*v0)[4],
797 const float (*v1)[4],
798 const float (*v2)[4] )
799 {
800 }
801
802
803 void
804 lp_setup_choose_triangle( struct lp_setup_context *setup )
805 {
806 switch (setup->cullmode) {
807 case PIPE_FACE_NONE:
808 setup->triangle = triangle_both;
809 break;
810 case PIPE_FACE_BACK:
811 setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
812 break;
813 case PIPE_FACE_FRONT:
814 setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
815 break;
816 default:
817 setup->triangle = triangle_nop;
818 break;
819 }
820 }