1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Binning code for triangles
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "util/u_rect.h"
36 #include "lp_setup_context.h"
38 #include "lp_state_fs.h"
39 #include "lp_state_setup.h"
41 #define NUM_CHANNELS 4
46 subpixel_snap(float a
)
48 return util_iround(FIXED_ONE
* a
);
54 return a
* (1.0 / FIXED_ONE
);
64 * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
65 * immediately after it.
66 * The memory is allocated from the per-scene pool, not per-tile.
67 * \param tri_size returns number of bytes allocated
68 * \param num_inputs number of fragment shader inputs
69 * \return pointer to triangle space
71 struct lp_rast_triangle
*
72 lp_setup_alloc_triangle(struct lp_scene
*scene
,
77 unsigned input_array_sz
= NUM_CHANNELS
* (num_inputs
+ 1) * sizeof(float);
78 struct lp_rast_triangle
*tri
;
79 unsigned tri_bytes
, bytes
;
82 tri_bytes
= align(Offset(struct lp_rast_triangle
, plane
[nr_planes
]), 16);
83 bytes
= tri_bytes
+ (3 * input_array_sz
);
85 tri
= lp_scene_alloc_aligned( scene
, bytes
, 16 );
88 inputs
= ((char *)tri
) + tri_bytes
;
89 tri
->inputs
.a0
= (float (*)[4]) inputs
;
90 tri
->inputs
.dadx
= (float (*)[4]) (inputs
+ input_array_sz
);
91 tri
->inputs
.dady
= (float (*)[4]) (inputs
+ 2 * input_array_sz
);
100 lp_setup_print_vertex(struct lp_setup_context
*setup
,
104 const struct lp_setup_variant_key
*key
= &setup
->setup
.variant
->key
;
107 debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n",
109 v
[0][0], v
[0][1], v
[0][2], v
[0][3]);
111 for (i
= 0; i
< key
->num_inputs
; i
++) {
112 const float *in
= v
[key
->inputs
[i
].src_index
];
114 debug_printf(" in[%d] (%s[%d]) %s%s%s%s ",
116 name
, key
->inputs
[i
].src_index
,
117 (key
->inputs
[i
].usage_mask
& 0x1) ? "x" : " ",
118 (key
->inputs
[i
].usage_mask
& 0x2) ? "y" : " ",
119 (key
->inputs
[i
].usage_mask
& 0x4) ? "z" : " ",
120 (key
->inputs
[i
].usage_mask
& 0x8) ? "w" : " ");
122 for (j
= 0; j
< 4; j
++)
123 if (key
->inputs
[i
].usage_mask
& (1<<j
))
124 debug_printf("%.5f ", in
[j
]);
132 * Print triangle vertex attribs (for debug).
135 lp_setup_print_triangle(struct lp_setup_context
*setup
,
136 const float (*v0
)[4],
137 const float (*v1
)[4],
138 const float (*v2
)[4])
140 debug_printf("triangle\n");
143 const float ex
= v0
[0][0] - v2
[0][0];
144 const float ey
= v0
[0][1] - v2
[0][1];
145 const float fx
= v1
[0][0] - v2
[0][0];
146 const float fy
= v1
[0][1] - v2
[0][1];
148 /* det = cross(e,f).z */
149 const float det
= ex
* fy
- ey
* fx
;
151 debug_printf(" - ccw\n");
153 debug_printf(" - cw\n");
155 debug_printf(" - zero area\n");
158 lp_setup_print_vertex(setup
, "v0", v0
);
159 lp_setup_print_vertex(setup
, "v1", v1
);
160 lp_setup_print_vertex(setup
, "v2", v2
);
165 lp_rast_tri_tab
[9] = {
166 0, /* should be impossible */
167 LP_RAST_OP_TRIANGLE_1
,
168 LP_RAST_OP_TRIANGLE_2
,
169 LP_RAST_OP_TRIANGLE_3
,
170 LP_RAST_OP_TRIANGLE_4
,
171 LP_RAST_OP_TRIANGLE_5
,
172 LP_RAST_OP_TRIANGLE_6
,
173 LP_RAST_OP_TRIANGLE_7
,
174 LP_RAST_OP_TRIANGLE_8
180 * The primitive covers the whole tile- shade whole tile.
182 * \param tx, ty the tile position in tiles, not pixels
185 lp_setup_whole_tile(struct lp_setup_context
*setup
,
186 const struct lp_rast_shader_inputs
*inputs
,
189 struct lp_scene
*scene
= setup
->scene
;
191 LP_COUNT(nr_fully_covered_64
);
193 /* if variant is opaque and scissor doesn't effect the tile */
194 if (inputs
->opaque
) {
195 if (!scene
->fb
.zsbuf
) {
197 * All previous rendering will be overwritten so reset the bin.
199 lp_scene_bin_reset( scene
, tx
, ty
);
202 LP_COUNT(nr_shade_opaque_64
);
203 return lp_scene_bin_command( scene
, tx
, ty
,
204 LP_RAST_OP_SHADE_TILE_OPAQUE
,
205 lp_rast_arg_inputs(inputs
) );
207 LP_COUNT(nr_shade_64
);
208 return lp_scene_bin_command( scene
, tx
, ty
,
209 LP_RAST_OP_SHADE_TILE
,
210 lp_rast_arg_inputs(inputs
) );
216 * Do basic setup for triangle rasterization and determine which
217 * framebuffer tiles are touched. Put the triangle in the scene's
218 * bins for the tiles which we overlap.
221 do_triangle_ccw(struct lp_setup_context
*setup
,
222 const float (*v0
)[4],
223 const float (*v1
)[4],
224 const float (*v2
)[4],
225 boolean frontfacing
)
227 struct lp_scene
*scene
= setup
->scene
;
228 const struct lp_setup_variant_key
*key
= &setup
->setup
.variant
->key
;
229 struct lp_rast_triangle
*tri
;
239 lp_setup_print_triangle(setup
, v0
, v1
, v2
);
241 if (setup
->scissor_test
) {
248 /* x/y positions in fixed point */
249 x
[0] = subpixel_snap(v0
[0][0] - setup
->pixel_offset
);
250 x
[1] = subpixel_snap(v1
[0][0] - setup
->pixel_offset
);
251 x
[2] = subpixel_snap(v2
[0][0] - setup
->pixel_offset
);
252 y
[0] = subpixel_snap(v0
[0][1] - setup
->pixel_offset
);
253 y
[1] = subpixel_snap(v1
[0][1] - setup
->pixel_offset
);
254 y
[2] = subpixel_snap(v2
[0][1] - setup
->pixel_offset
);
257 /* Bounding rectangle (in pixels) */
259 /* Yes this is necessary to accurately calculate bounding boxes
260 * with the two fill-conventions we support. GL (normally) ends
261 * up needing a bottom-left fill convention, which requires
262 * slightly different rounding.
264 int adj
= (setup
->pixel_offset
!= 0) ? 1 : 0;
266 bbox
.x0
= (MIN3(x
[0], x
[1], x
[2]) + (FIXED_ONE
-1)) >> FIXED_ORDER
;
267 bbox
.x1
= (MAX3(x
[0], x
[1], x
[2]) + (FIXED_ONE
-1)) >> FIXED_ORDER
;
268 bbox
.y0
= (MIN3(y
[0], y
[1], y
[2]) + (FIXED_ONE
-1) + adj
) >> FIXED_ORDER
;
269 bbox
.y1
= (MAX3(y
[0], y
[1], y
[2]) + (FIXED_ONE
-1) + adj
) >> FIXED_ORDER
;
271 /* Inclusive coordinates:
277 if (bbox
.x1
< bbox
.x0
||
279 if (0) debug_printf("empty bounding box\n");
280 LP_COUNT(nr_culled_tris
);
284 if (!u_rect_test_intersection(&setup
->draw_region
, &bbox
)) {
285 if (0) debug_printf("offscreen\n");
286 LP_COUNT(nr_culled_tris
);
290 u_rect_find_intersection(&setup
->draw_region
, &bbox
);
292 tri
= lp_setup_alloc_triangle(scene
,
300 tri
->v
[0][0] = v0
[0][0];
301 tri
->v
[1][0] = v1
[0][0];
302 tri
->v
[2][0] = v2
[0][0];
303 tri
->v
[0][1] = v0
[0][1];
304 tri
->v
[1][1] = v1
[0][1];
305 tri
->v
[2][1] = v2
[0][1];
308 tri
->plane
[0].dcdy
= x
[0] - x
[1];
309 tri
->plane
[1].dcdy
= x
[1] - x
[2];
310 tri
->plane
[2].dcdy
= x
[2] - x
[0];
312 tri
->plane
[0].dcdx
= y
[0] - y
[1];
313 tri
->plane
[1].dcdx
= y
[1] - y
[2];
314 tri
->plane
[2].dcdx
= y
[2] - y
[0];
316 area
= (tri
->plane
[0].dcdy
* tri
->plane
[2].dcdx
-
317 tri
->plane
[2].dcdy
* tri
->plane
[0].dcdx
);
321 /* Cull non-ccw and zero-sized triangles.
323 * XXX: subject to overflow??
326 lp_scene_putback_data( scene
, tri_bytes
);
327 LP_COUNT(nr_culled_tris
);
331 /* Setup parameter interpolants:
333 setup
->setup
.variant
->jit_function( v0
,
340 &setup
->setup
.variant
->key
);
342 tri
->inputs
.facing
= frontfacing
? 1.0F
: -1.0F
;
343 tri
->inputs
.disable
= FALSE
;
344 tri
->inputs
.opaque
= setup
->fs
.current
.variant
->opaque
;
345 tri
->inputs
.state
= setup
->fs
.stored
;
348 lp_dump_setup_coef(&setup
->setup
.variant
->key
,
349 (const float (*)[4])tri
->inputs
.a0
,
350 (const float (*)[4])tri
->inputs
.dadx
,
351 (const float (*)[4])tri
->inputs
.dady
);
353 for (i
= 0; i
< 3; i
++) {
354 struct lp_rast_plane
*plane
= &tri
->plane
[i
];
356 /* half-edge constants, will be interated over the whole render
359 plane
->c
= plane
->dcdx
* x
[i
] - plane
->dcdy
* y
[i
];
361 /* correct for top-left vs. bottom-left fill convention.
363 * note that we're overloading gl_rasterization_rules to mean
364 * both (0.5,0.5) pixel centers *and* bottom-left filling
367 * GL actually has a top-left filling convention, but GL's
368 * notion of "top" differs from gallium's...
370 * Also, sometimes (in FBO cases) GL will render upside down
371 * to its usual method, in which case it will probably want
372 * to use the opposite, top-left convention.
374 if (plane
->dcdx
< 0) {
375 /* both fill conventions want this - adjust for left edges */
378 else if (plane
->dcdx
== 0) {
379 if (setup
->pixel_offset
== 0) {
380 /* correct for top-left fill convention:
382 if (plane
->dcdy
> 0) plane
->c
++;
385 /* correct for bottom-left fill convention:
387 if (plane
->dcdy
< 0) plane
->c
++;
391 plane
->dcdx
*= FIXED_ONE
;
392 plane
->dcdy
*= FIXED_ONE
;
394 /* find trivial reject offsets for each edge for a single-pixel
395 * sized block. These will be scaled up at each recursive level to
396 * match the active blocksize. Scaling in this way works best if
397 * the blocks are square.
400 if (plane
->dcdx
< 0) plane
->eo
-= plane
->dcdx
;
401 if (plane
->dcdy
> 0) plane
->eo
+= plane
->dcdy
;
403 /* Calculate trivial accept offsets from the above.
405 plane
->ei
= plane
->dcdy
- plane
->dcdx
- plane
->eo
;
410 * When rasterizing scissored tris, use the intersection of the
411 * triangle bounding box and the scissor rect to generate the
414 * This permits us to cut off the triangle "tails" that are present
415 * in the intermediate recursive levels caused when two of the
416 * triangles edges don't diverge quickly enough to trivially reject
417 * exterior blocks from the triangle.
419 * It's not really clear if it's worth worrying about these tails,
420 * but since we generate the planes for each scissored tri, it's
421 * free to trim them in this case.
423 * Note that otherwise, the scissor planes only vary in 'C' value,
424 * and even then only on state-changes. Could alternatively store
425 * these planes elsewhere.
427 if (nr_planes
== 7) {
428 tri
->plane
[3].dcdx
= -1;
429 tri
->plane
[3].dcdy
= 0;
430 tri
->plane
[3].c
= 1-bbox
.x0
;
431 tri
->plane
[3].ei
= 0;
432 tri
->plane
[3].eo
= 1;
434 tri
->plane
[4].dcdx
= 1;
435 tri
->plane
[4].dcdy
= 0;
436 tri
->plane
[4].c
= bbox
.x1
+1;
437 tri
->plane
[4].ei
= -1;
438 tri
->plane
[4].eo
= 0;
440 tri
->plane
[5].dcdx
= 0;
441 tri
->plane
[5].dcdy
= 1;
442 tri
->plane
[5].c
= 1-bbox
.y0
;
443 tri
->plane
[5].ei
= 0;
444 tri
->plane
[5].eo
= 1;
446 tri
->plane
[6].dcdx
= 0;
447 tri
->plane
[6].dcdy
= -1;
448 tri
->plane
[6].c
= bbox
.y1
+1;
449 tri
->plane
[6].ei
= -1;
450 tri
->plane
[6].eo
= 0;
453 return lp_setup_bin_triangle( setup
, tri
, &bbox
, nr_planes
);
457 * Round to nearest less or equal power of two of the input.
459 * Undefined if no bit set exists, so code should check against 0 first.
461 static INLINE
uint32_t
462 floor_pot(uint32_t n
)
464 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
484 lp_setup_bin_triangle( struct lp_setup_context
*setup
,
485 struct lp_rast_triangle
*tri
,
486 const struct u_rect
*bbox
,
489 struct lp_scene
*scene
= setup
->scene
;
492 /* What is the largest power-of-two boundary this triangle crosses:
494 int dx
= floor_pot((bbox
->x0
^ bbox
->x1
) |
495 (bbox
->y0
^ bbox
->y1
));
497 /* The largest dimension of the rasterized area of the triangle
498 * (aligned to a 4x4 grid), rounded down to the nearest power of two:
500 int sz
= floor_pot((bbox
->x1
- (bbox
->x0
& ~3)) |
501 (bbox
->y1
- (bbox
->y0
& ~3)));
503 if (nr_planes
== 3) {
504 if (sz
< 4 && dx
< 64)
506 /* Triangle is contained in a single 4x4 stamp:
508 int mask
= (bbox
->x0
& 63 & ~3) | ((bbox
->y0
& 63 & ~3) << 8);
510 return lp_scene_bin_command( scene
,
511 bbox
->x0
/64, bbox
->y0
/64,
512 LP_RAST_OP_TRIANGLE_3_4
,
513 lp_rast_arg_triangle(tri
, mask
) );
516 if (sz
< 16 && dx
< 64)
518 int mask
= (bbox
->x0
& 63 & ~3) | ((bbox
->y0
& 63 & ~3) << 8);
520 /* Triangle is contained in a single 16x16 block:
522 return lp_scene_bin_command( scene
,
523 bbox
->x0
/64, bbox
->y0
/64,
524 LP_RAST_OP_TRIANGLE_3_16
,
525 lp_rast_arg_triangle(tri
, mask
) );
530 /* Determine which tile(s) intersect the triangle's bounding box
534 int ix0
= bbox
->x0
/ TILE_SIZE
;
535 int iy0
= bbox
->y0
/ TILE_SIZE
;
537 assert(iy0
== bbox
->y1
/ TILE_SIZE
&&
538 ix0
== bbox
->x1
/ TILE_SIZE
);
540 /* Triangle is contained in a single tile:
542 return lp_scene_bin_command( scene
, ix0
, iy0
,
543 lp_rast_tri_tab
[nr_planes
],
544 lp_rast_arg_triangle(tri
, (1<<nr_planes
)-1) );
555 int ix0
= bbox
->x0
/ TILE_SIZE
;
556 int iy0
= bbox
->y0
/ TILE_SIZE
;
557 int ix1
= bbox
->x1
/ TILE_SIZE
;
558 int iy1
= bbox
->y1
/ TILE_SIZE
;
560 for (i
= 0; i
< nr_planes
; i
++) {
561 c
[i
] = (tri
->plane
[i
].c
+
562 tri
->plane
[i
].dcdy
* iy0
* TILE_SIZE
-
563 tri
->plane
[i
].dcdx
* ix0
* TILE_SIZE
);
565 ei
[i
] = tri
->plane
[i
].ei
<< TILE_ORDER
;
566 eo
[i
] = tri
->plane
[i
].eo
<< TILE_ORDER
;
567 xstep
[i
] = -(tri
->plane
[i
].dcdx
<< TILE_ORDER
);
568 ystep
[i
] = tri
->plane
[i
].dcdy
<< TILE_ORDER
;
573 /* Test tile-sized blocks against the triangle.
574 * Discard blocks fully outside the tri. If the block is fully
575 * contained inside the tri, bin an lp_rast_shade_tile command.
576 * Else, bin a lp_rast_triangle command.
578 for (y
= iy0
; y
<= iy1
; y
++)
580 boolean in
= FALSE
; /* are we inside the triangle? */
583 for (i
= 0; i
< nr_planes
; i
++)
586 for (x
= ix0
; x
<= ix1
; x
++)
591 for (i
= 0; i
< nr_planes
; i
++) {
592 int planeout
= cx
[i
] + eo
[i
];
593 int planepartial
= cx
[i
] + ei
[i
] - 1;
594 out
|= (planeout
>> 31);
595 partial
|= (planepartial
>> 31) & (1<<i
);
601 break; /* exiting triangle, all done with this row */
602 LP_COUNT(nr_empty_64
);
605 /* Not trivially accepted by at least one plane -
606 * rasterize/shade partial tile
608 int count
= util_bitcount(partial
);
610 if (!lp_scene_bin_command( scene
, x
, y
,
611 lp_rast_tri_tab
[count
],
612 lp_rast_arg_triangle(tri
, partial
) ))
615 LP_COUNT(nr_partially_covered_64
);
618 /* triangle covers the whole tile- shade whole tile */
619 LP_COUNT(nr_fully_covered_64
);
621 if (!lp_setup_whole_tile(setup
, &tri
->inputs
, x
, y
))
625 /* Iterate cx values across the region:
627 for (i
= 0; i
< nr_planes
; i
++)
631 /* Iterate c values down the region:
633 for (i
= 0; i
< nr_planes
; i
++)
641 /* Need to disable any partially binned triangle. This is easier
642 * than trying to locate all the triangle, shade-tile, etc,
643 * commands which may have been binned.
645 tri
->inputs
.disable
= TRUE
;
651 * Draw triangle if it's CW, cull otherwise.
653 static void triangle_cw( struct lp_setup_context
*setup
,
654 const float (*v0
)[4],
655 const float (*v1
)[4],
656 const float (*v2
)[4] )
658 if (!do_triangle_ccw( setup
, v1
, v0
, v2
, !setup
->ccw_is_frontface
))
660 lp_setup_flush_and_restart(setup
);
662 if (!do_triangle_ccw( setup
, v1
, v0
, v2
, !setup
->ccw_is_frontface
))
669 * Draw triangle if it's CCW, cull otherwise.
671 static void triangle_ccw( struct lp_setup_context
*setup
,
672 const float (*v0
)[4],
673 const float (*v1
)[4],
674 const float (*v2
)[4] )
676 if (!do_triangle_ccw( setup
, v0
, v1
, v2
, setup
->ccw_is_frontface
))
678 lp_setup_flush_and_restart(setup
);
679 if (!do_triangle_ccw( setup
, v0
, v1
, v2
, setup
->ccw_is_frontface
))
687 * Draw triangle whether it's CW or CCW.
689 static void triangle_both( struct lp_setup_context
*setup
,
690 const float (*v0
)[4],
691 const float (*v1
)[4],
692 const float (*v2
)[4] )
694 /* edge vectors e = v0 - v2, f = v1 - v2 */
695 const float ex
= v0
[0][0] - v2
[0][0];
696 const float ey
= v0
[0][1] - v2
[0][1];
697 const float fx
= v1
[0][0] - v2
[0][0];
698 const float fy
= v1
[0][1] - v2
[0][1];
700 /* det = cross(e,f).z */
701 const float det
= ex
* fy
- ey
* fx
;
703 triangle_ccw( setup
, v0
, v1
, v2
);
705 triangle_cw( setup
, v0
, v1
, v2
);
709 static void triangle_nop( struct lp_setup_context
*setup
,
710 const float (*v0
)[4],
711 const float (*v1
)[4],
712 const float (*v2
)[4] )
718 lp_setup_choose_triangle( struct lp_setup_context
*setup
)
720 switch (setup
->cullmode
) {
722 setup
->triangle
= triangle_both
;
725 setup
->triangle
= setup
->ccw_is_frontface
? triangle_ccw
: triangle_cw
;
727 case PIPE_FACE_FRONT
:
728 setup
->triangle
= setup
->ccw_is_frontface
? triangle_cw
: triangle_ccw
;
731 setup
->triangle
= triangle_nop
;