X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_setup_tri.c;h=bc48eb8d1b54785287ed94a4a7f47f3f9448c2ee;hb=4195febeecd2d2f5571afdb90cbb185a4759f50a;hp=b56006154202b9456b832bd517d712d614ccbd32;hpb=149cb7682e37ce719d693f120e68dde60ba73bdf;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b5600615420..bc48eb8d1b5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -31,227 +31,32 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_rect.h" #include "lp_perf.h" #include "lp_setup_context.h" +#include "lp_setup_coef.h" #include "lp_rast.h" #include "lp_state_fs.h" #define NUM_CHANNELS 4 -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - */ -static void constant_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - unsigned slot, - const float value, - unsigned i ) -{ - tri->inputs.a0[slot][i] = value; - tri->inputs.dadx[slot][i] = 0.0f; - tri->inputs.dady[slot][i] = 0.0f; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void linear_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned vert_attr, - unsigned i) -{ - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void perspective_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - unsigned vert_attr, - unsigned i) -{ - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea; - - tri->inputs.dadx[slot][i] = dadx; - tri->inputs.dady[slot][i] = dady; - tri->inputs.a0[slot][i] = (a1 - - (dadx * (v1[0][0] - setup->pixel_offset) + - dady * (v1[0][1] - setup->pixel_offset))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial - * Z and W are copied from position_coef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ -static void -setup_fragcoord_coef(struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - unsigned slot, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4]) + +static INLINE int +subpixel_snap(float a) { - /*X*/ - tri->inputs.a0[slot][0] = 0.0; - tri->inputs.dadx[slot][0] = 1.0; - tri->inputs.dady[slot][0] = 0.0; - /*Y*/ - tri->inputs.a0[slot][1] = 0.0; - tri->inputs.dadx[slot][1] = 0.0; - tri->inputs.dady[slot][1] = 1.0; - /*Z*/ - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2); - /*W*/ - linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3); + return util_iround(FIXED_ONE * a); } - -/** - * Setup the fragment input attribute with the front-facing value. - * \param frontface is the triangle front facing? - */ -static void setup_facing_coef( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - unsigned slot, - boolean frontface ) +static INLINE float +fixed_to_float(int a) { - /* convert TRUE to 1.0 and FALSE to -1.0 */ - constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 ); - constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */ - constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */ - constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */ + return a * (1.0 / FIXED_ONE); } -/** - * Compute the tri->coef[] array dadx, dady, a0 values. - */ -static void setup_tri_coefficients( struct lp_setup_context *setup, - struct lp_rast_triangle *tri, - float oneoverarea, - const float (*v1)[4], - const float (*v2)[4], - const float (*v3)[4], - boolean frontface) -{ - unsigned slot; - - /* The internal position input is in slot zero: - */ - setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3); - - /* setup interpolation for all the remaining attributes: - */ - for (slot = 0; slot < setup->fs.nr_inputs; slot++) { - unsigned vert_attr = setup->fs.input[slot].src_index; - unsigned i; - - switch (setup->fs.input[slot].interp) { - case LP_INTERP_CONSTANT: - if (setup->flatshade_first) { - for (i = 0; i < NUM_CHANNELS; i++) - if (setup->fs.input[slot].usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v1[vert_attr][i], i); - } - else { - for (i = 0; i < NUM_CHANNELS; i++) - if (setup->fs.input[slot].usage_mask & (1 << i)) - constant_coef(setup, tri, slot+1, v3[vert_attr][i], i); - } - break; - - case LP_INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - if (setup->fs.input[slot].usage_mask & (1 << i)) - linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); - break; - - case LP_INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - if (setup->fs.input[slot].usage_mask & (1 << i)) - perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i); - break; - - case LP_INTERP_POSITION: - /* XXX: fix me - duplicates the values in slot zero. - */ - setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3); - break; - - case LP_INTERP_FACING: - setup_facing_coef(setup, tri, slot+1, frontface); - break; - - default: - assert(0); - } - } -} - -static INLINE int subpixel_snap( float a ) -{ - return util_iround(FIXED_ONE * a - (FIXED_ONE / 2)); -} @@ -263,22 +68,24 @@ static INLINE int subpixel_snap( float a ) * \param nr_inputs number of fragment shader inputs * \return pointer to triangle space */ -static INLINE struct lp_rast_triangle * -alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) +struct lp_rast_triangle * +lp_setup_alloc_triangle(struct lp_scene *scene, + unsigned nr_inputs, + unsigned nr_planes, + unsigned *tri_size) { unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float); struct lp_rast_triangle *tri; - unsigned bytes; + unsigned tri_bytes, bytes; char *inputs; - assert(sizeof(*tri) % 16 == 0); - - bytes = sizeof(*tri) + (3 * input_array_sz); + tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16); + bytes = tri_bytes + (3 * input_array_sz); tri = lp_scene_alloc_aligned( scene, bytes, 16 ); if (tri) { - inputs = (char *) (tri + 1); + inputs = ((char *)tri) + tri_bytes; tri->inputs.a0 = (float (*)[4]) inputs; tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz); tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz); @@ -289,30 +96,120 @@ alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size) return tri; } +void +lp_setup_print_vertex(struct lp_setup_context *setup, + const char *name, + const float (*v)[4]) +{ + int i, j; + + debug_printf(" wpos (%s[0]) xyzw %f %f %f %f\n", + name, + v[0][0], v[0][1], v[0][2], v[0][3]); + + for (i = 0; i < setup->fs.nr_inputs; i++) { + const float *in = v[setup->fs.input[i].src_index]; + + debug_printf(" in[%d] (%s[%d]) %s%s%s%s ", + i, + name, setup->fs.input[i].src_index, + (setup->fs.input[i].usage_mask & 0x1) ? "x" : " ", + (setup->fs.input[i].usage_mask & 0x2) ? "y" : " ", + (setup->fs.input[i].usage_mask & 0x4) ? "z" : " ", + (setup->fs.input[i].usage_mask & 0x8) ? "w" : " "); + + for (j = 0; j < 4; j++) + if (setup->fs.input[i].usage_mask & (1<fs.nr_inputs; i++) { - debug_printf(" v1[%d]: %f %f %f %f\n", i, - v1[i][0], v1[i][1], v1[i][2], v1[i][3]); - } - for (i = 0; i < setup->fs.nr_inputs; i++) { - debug_printf(" v2[%d]: %f %f %f %f\n", i, - v2[i][0], v2[i][1], v2[i][2], v2[i][3]); + { + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + const float det = ex * fy - ey * fx; + if (det < 0.0f) + debug_printf(" - ccw\n"); + else if (det > 0.0f) + debug_printf(" - cw\n"); + else + debug_printf(" - zero area\n"); } - for (i = 0; i < setup->fs.nr_inputs; i++) { - debug_printf(" v3[%d]: %f %f %f %f\n", i, - v3[i][0], v3[i][1], v3[i][2], v3[i][3]); + + lp_setup_print_vertex(setup, "v0", v0); + lp_setup_print_vertex(setup, "v1", v1); + lp_setup_print_vertex(setup, "v2", v2); +} + + +#define MAX_PLANES 8 +static unsigned +lp_rast_tri_tab[MAX_PLANES+1] = { + 0, /* should be impossible */ + LP_RAST_OP_TRIANGLE_1, + LP_RAST_OP_TRIANGLE_2, + LP_RAST_OP_TRIANGLE_3, + LP_RAST_OP_TRIANGLE_4, + LP_RAST_OP_TRIANGLE_5, + LP_RAST_OP_TRIANGLE_6, + LP_RAST_OP_TRIANGLE_7, + LP_RAST_OP_TRIANGLE_8 +}; + + + +/** + * The primitive covers the whole tile- shade whole tile. + * + * \param tx, ty the tile position in tiles, not pixels + */ +static boolean +lp_setup_whole_tile(struct lp_setup_context *setup, + const struct lp_rast_shader_inputs *inputs, + int tx, int ty) +{ + struct lp_scene *scene = setup->scene; + + LP_COUNT(nr_fully_covered_64); + + /* if variant is opaque and scissor doesn't effect the tile */ + if (inputs->opaque) { + if (!scene->fb.zsbuf) { + /* + * All previous rendering will be overwritten so reset the bin. + */ + lp_scene_bin_reset( scene, tx, ty ); + } + + LP_COUNT(nr_shade_opaque_64); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE_OPAQUE, + lp_rast_arg_inputs(inputs) ); + } else { + LP_COUNT(nr_shade_64); + return lp_scene_bin_cmd_with_state( scene, tx, ty, + setup->fs.stored, + LP_RAST_OP_SHADE_TILE, + lp_rast_arg_inputs(inputs) ); } } @@ -322,329 +219,473 @@ print_triangle(struct lp_setup_context *setup, * framebuffer tiles are touched. Put the triangle in the scene's * bins for the tiles which we overlap. */ -static void +static boolean do_triangle_ccw(struct lp_setup_context *setup, + const float (*v0)[4], const float (*v1)[4], const float (*v2)[4], - const float (*v3)[4], boolean frontfacing ) { - /* x/y positions in fixed point */ - const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset); - const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset); - const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset); - const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset); - const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset); - const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset); - - struct lp_scene *scene = lp_setup_get_current_scene(setup); + struct lp_scene *scene = setup->scene; struct lp_rast_triangle *tri; - int area; - float oneoverarea; - int minx, maxx, miny, maxy; + int x[3]; + int y[3]; + struct u_rect bbox; unsigned tri_bytes; + int i; + int nr_planes = 3; if (0) - print_triangle(setup, v1, v2, v3); - - tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes); - if (!tri) - return; + lp_setup_print_triangle(setup, v0, v1, v2); -#ifdef DEBUG - tri->v[0][0] = v1[0][0]; - tri->v[1][0] = v2[0][0]; - tri->v[2][0] = v3[0][0]; - tri->v[0][1] = v1[0][1]; - tri->v[1][1] = v2[0][1]; - tri->v[2][1] = v3[0][1]; -#endif + if (setup->scissor_test) { + nr_planes = 7; + } + else { + nr_planes = 3; + } - tri->dx12 = x1 - x2; - tri->dx23 = x2 - x3; - tri->dx31 = x3 - x1; + /* x/y positions in fixed point */ + x[0] = subpixel_snap(v0[0][0] - setup->pixel_offset); + x[1] = subpixel_snap(v1[0][0] - setup->pixel_offset); + x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset); + y[0] = subpixel_snap(v0[0][1] - setup->pixel_offset); + y[1] = subpixel_snap(v1[0][1] - setup->pixel_offset); + y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset); - tri->dy12 = y1 - y2; - tri->dy23 = y2 - y3; - tri->dy31 = y3 - y1; - area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12); + /* Bounding rectangle (in pixels) */ + { + /* Yes this is necessary to accurately calculate bounding boxes + * with the two fill-conventions we support. GL (normally) ends + * up needing a bottom-left fill convention, which requires + * slightly different rounding. + */ + int adj = (setup->pixel_offset != 0) ? 1 : 0; - LP_COUNT(nr_tris); + bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; + bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - /* Cull non-ccw and zero-sized triangles. - * - * XXX: subject to overflow?? - */ - if (area <= 0) { - lp_scene_putback_data( scene, tri_bytes ); - LP_COUNT(nr_culled_tris); - return; + /* Inclusive coordinates: + */ + bbox.x1--; + bbox.y1--; } - /* Bounding rectangle (in pixels) */ - minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER; - miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER; - - if (setup->scissor_test) { - minx = MAX2(minx, setup->scissor.current.minx); - maxx = MIN2(maxx, setup->scissor.current.maxx); - miny = MAX2(miny, setup->scissor.current.miny); - maxy = MIN2(maxy, setup->scissor.current.maxy); + if (bbox.x1 < bbox.x0 || + bbox.y1 < bbox.y0) { + if (0) debug_printf("empty bounding box\n"); + LP_COUNT(nr_culled_tris); + return TRUE; } - if (miny == maxy || - minx == maxx) { - lp_scene_putback_data( scene, tri_bytes ); + if (!u_rect_test_intersection(&setup->draw_region, &bbox)) { + if (0) debug_printf("offscreen\n"); LP_COUNT(nr_culled_tris); - return; + return TRUE; } - /* - */ - oneoverarea = ((float)FIXED_ONE) / (float)area; + u_rect_find_intersection(&setup->draw_region, &bbox); + + tri = lp_setup_alloc_triangle(scene, + setup->fs.nr_inputs, + nr_planes, + &tri_bytes); + if (!tri) + return FALSE; + +#ifdef DEBUG + tri->v[0][0] = v0[0][0]; + tri->v[1][0] = v1[0][0]; + tri->v[2][0] = v2[0][0]; + tri->v[0][1] = v0[0][1]; + tri->v[1][1] = v1[0][1]; + tri->v[2][1] = v2[0][1]; +#endif + + tri->plane[0].dcdy = x[0] - x[1]; + tri->plane[1].dcdy = x[1] - x[2]; + tri->plane[2].dcdy = x[2] - x[0]; + + tri->plane[0].dcdx = y[0] - y[1]; + tri->plane[1].dcdx = y[1] - y[2]; + tri->plane[2].dcdx = y[2] - y[0]; + + LP_COUNT(nr_tris); /* Setup parameter interpolants: */ - setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing ); + lp_setup_tri_coef( setup, &tri->inputs, v0, v1, v2, frontfacing ); tri->inputs.facing = frontfacing ? 1.0F : -1.0F; + tri->inputs.disable = FALSE; + tri->inputs.opaque = setup->fs.current.variant->opaque; - /* half-edge constants, will be interated over the whole render target. - */ - tri->c1 = tri->dy12 * x1 - tri->dx12 * y1; - tri->c2 = tri->dy23 * x2 - tri->dx23 * y2; - tri->c3 = tri->dy31 * x3 - tri->dx31 * y3; + + for (i = 0; i < 3; i++) { + struct lp_rast_plane *plane = &tri->plane[i]; - /* correct for top-left fill convention: - */ - if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++; - if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++; - if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++; - - tri->dy12 *= FIXED_ONE; - tri->dy23 *= FIXED_ONE; - tri->dy31 *= FIXED_ONE; - - tri->dx12 *= FIXED_ONE; - tri->dx23 *= FIXED_ONE; - tri->dx31 *= FIXED_ONE; - - /* find trivial reject offsets for each edge for a single-pixel - * sized block. These will be scaled up at each recursive level to - * match the active blocksize. Scaling in this way works best if - * the blocks are square. - */ - tri->eo1 = 0; - if (tri->dy12 < 0) tri->eo1 -= tri->dy12; - if (tri->dx12 > 0) tri->eo1 += tri->dx12; + /* half-edge constants, will be interated over the whole render + * target. + */ + plane->c = plane->dcdx * x[i] - plane->dcdy * y[i]; + + /* correct for top-left vs. bottom-left fill convention. + * + * note that we're overloading gl_rasterization_rules to mean + * both (0.5,0.5) pixel centers *and* bottom-left filling + * convention. + * + * GL actually has a top-left filling convention, but GL's + * notion of "top" differs from gallium's... + * + * Also, sometimes (in FBO cases) GL will render upside down + * to its usual method, in which case it will probably want + * to use the opposite, top-left convention. + */ + if (plane->dcdx < 0) { + /* both fill conventions want this - adjust for left edges */ + plane->c++; + } + else if (plane->dcdx == 0) { + if (setup->pixel_offset == 0) { + /* correct for top-left fill convention: + */ + if (plane->dcdy > 0) plane->c++; + } + else { + /* correct for bottom-left fill convention: + */ + if (plane->dcdy < 0) plane->c++; + } + } - tri->eo2 = 0; - if (tri->dy23 < 0) tri->eo2 -= tri->dy23; - if (tri->dx23 > 0) tri->eo2 += tri->dx23; + plane->dcdx *= FIXED_ONE; + plane->dcdy *= FIXED_ONE; - tri->eo3 = 0; - if (tri->dy31 < 0) tri->eo3 -= tri->dy31; - if (tri->dx31 > 0) tri->eo3 += tri->dx31; + /* find trivial reject offsets for each edge for a single-pixel + * sized block. These will be scaled up at each recursive level to + * match the active blocksize. Scaling in this way works best if + * the blocks are square. + */ + plane->eo = 0; + if (plane->dcdx < 0) plane->eo -= plane->dcdx; + if (plane->dcdy > 0) plane->eo += plane->dcdy; + + /* Calculate trivial accept offsets from the above. + */ + plane->ei = plane->dcdy - plane->dcdx - plane->eo; + } - /* Calculate trivial accept offsets from the above. - */ - tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1; - tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2; - tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3; - /* Fill in the inputs.step[][] arrays. - * We've manually unrolled some loops here. + /* + * When rasterizing scissored tris, use the intersection of the + * triangle bounding box and the scissor rect to generate the + * scissor planes. + * + * This permits us to cut off the triangle "tails" that are present + * in the intermediate recursive levels caused when two of the + * triangles edges don't diverge quickly enough to trivially reject + * exterior blocks from the triangle. + * + * It's not really clear if it's worth worrying about these tails, + * but since we generate the planes for each scissored tri, it's + * free to trim them in this case. + * + * Note that otherwise, the scissor planes only vary in 'C' value, + * and even then only on state-changes. Could alternatively store + * these planes elsewhere. */ - { - const int xstep1 = -tri->dy12; - const int xstep2 = -tri->dy23; - const int xstep3 = -tri->dy31; - const int ystep1 = tri->dx12; - const int ystep2 = tri->dx23; - const int ystep3 = tri->dx31; - -#define SETUP_STEP(i, x, y) \ - do { \ - tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \ - tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \ - tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \ - } while (0) - - SETUP_STEP(0, 0, 0); - SETUP_STEP(1, 1, 0); - SETUP_STEP(2, 0, 1); - SETUP_STEP(3, 1, 1); - - SETUP_STEP(4, 2, 0); - SETUP_STEP(5, 3, 0); - SETUP_STEP(6, 2, 1); - SETUP_STEP(7, 3, 1); - - SETUP_STEP(8, 0, 2); - SETUP_STEP(9, 1, 2); - SETUP_STEP(10, 0, 3); - SETUP_STEP(11, 1, 3); - - SETUP_STEP(12, 2, 2); - SETUP_STEP(13, 3, 2); - SETUP_STEP(14, 2, 3); - SETUP_STEP(15, 3, 3); -#undef STEP + if (nr_planes == 7) { + tri->plane[3].dcdx = -1; + tri->plane[3].dcdy = 0; + tri->plane[3].c = 1-bbox.x0; + tri->plane[3].ei = 0; + tri->plane[3].eo = 1; + + tri->plane[4].dcdx = 1; + tri->plane[4].dcdy = 0; + tri->plane[4].c = bbox.x1+1; + tri->plane[4].ei = -1; + tri->plane[4].eo = 0; + + tri->plane[5].dcdx = 0; + tri->plane[5].dcdy = 1; + tri->plane[5].c = 1-bbox.y0; + tri->plane[5].ei = 0; + tri->plane[5].eo = 1; + + tri->plane[6].dcdx = 0; + tri->plane[6].dcdy = -1; + tri->plane[6].c = bbox.y1+1; + tri->plane[6].ei = -1; + tri->plane[6].eo = 0; } - /* - * All fields of 'tri' are now set. The remaining code here is - * concerned with binning. - */ + return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes ); +} + +/* + * Round to nearest less or equal power of two of the input. + * + * Undefined if no bit set exists, so code should check against 0 first. + */ +static INLINE uint32_t +floor_pot(uint32_t n) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + if (n == 0) + return 0; + + __asm__("bsr %1,%0" + : "=r" (n) + : "rm" (n)); + return 1 << n; +#else + n |= (n >> 1); + n |= (n >> 2); + n |= (n >> 4); + n |= (n >> 8); + n |= (n >> 16); + return n - (n >> 1); +#endif +} + - /* Convert to tile coordinates: +boolean +lp_setup_bin_triangle( struct lp_setup_context *setup, + struct lp_rast_triangle *tri, + const struct u_rect *bbox, + int nr_planes ) +{ + struct lp_scene *scene = setup->scene; + int i; + + /* What is the largest power-of-two boundary this triangle crosses: */ - minx = minx / TILE_SIZE; - miny = miny / TILE_SIZE; - maxx = maxx / TILE_SIZE; - maxy = maxy / TILE_SIZE; + int dx = floor_pot((bbox->x0 ^ bbox->x1) | + (bbox->y0 ^ bbox->y1)); - /* - * Clamp to framebuffer size + /* The largest dimension of the rasterized area of the triangle + * (aligned to a 4x4 grid), rounded down to the nearest power of two: */ - minx = MAX2(minx, 0); - miny = MAX2(miny, 0); - maxx = MIN2(maxx, scene->tiles_x - 1); - maxy = MIN2(maxy, scene->tiles_y - 1); + int sz = floor_pot((bbox->x1 - (bbox->x0 & ~3)) | + (bbox->y1 - (bbox->y0 & ~3))); /* Determine which tile(s) intersect the triangle's bounding box */ - if (miny == maxy && minx == maxx) + if (dx < TILE_SIZE) { + int ix0 = bbox->x0 / TILE_SIZE; + int iy0 = bbox->y0 / TILE_SIZE; + int px = bbox->x0 & 63 & ~3; + int py = bbox->y0 & 63 & ~3; + int mask = px | (py << 8); + + assert(iy0 == bbox->y1 / TILE_SIZE && + ix0 == bbox->x1 / TILE_SIZE); + + if (nr_planes == 3) { + if (sz < 4) + { + /* Triangle is contained in a single 4x4 stamp: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_4, + lp_rast_arg_triangle(tri, mask) ); + } + + if (sz < 16) + { + /* Triangle is contained in a single 16x16 block: + */ + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_3_16, + lp_rast_arg_triangle(tri, mask) ); + } + } + else if (nr_planes == 4 && sz < 16) + { + return lp_scene_bin_cmd_with_state(scene, ix0, iy0, + setup->fs.stored, + LP_RAST_OP_TRIANGLE_4_16, + lp_rast_arg_triangle(tri, mask) ); + } + + /* Triangle is contained in a single tile: */ - lp_scene_bin_command( scene, minx, miny, lp_rast_triangle, - lp_rast_arg_triangle(tri) ); + return lp_scene_bin_cmd_with_state( scene, ix0, iy0, setup->fs.stored, + lp_rast_tri_tab[nr_planes], + lp_rast_arg_triangle(tri, (1<c1 + - tri->dx12 * miny * TILE_SIZE - - tri->dy12 * minx * TILE_SIZE); - int c2 = (tri->c2 + - tri->dx23 * miny * TILE_SIZE - - tri->dy23 * minx * TILE_SIZE); - int c3 = (tri->c3 + - tri->dx31 * miny * TILE_SIZE - - tri->dy31 * minx * TILE_SIZE); - - int ei1 = tri->ei1 << TILE_ORDER; - int ei2 = tri->ei2 << TILE_ORDER; - int ei3 = tri->ei3 << TILE_ORDER; - - int eo1 = tri->eo1 << TILE_ORDER; - int eo2 = tri->eo2 << TILE_ORDER; - int eo3 = tri->eo3 << TILE_ORDER; - - int xstep1 = -(tri->dy12 << TILE_ORDER); - int xstep2 = -(tri->dy23 << TILE_ORDER); - int xstep3 = -(tri->dy31 << TILE_ORDER); - - int ystep1 = tri->dx12 << TILE_ORDER; - int ystep2 = tri->dx23 << TILE_ORDER; - int ystep3 = tri->dx31 << TILE_ORDER; + int c[MAX_PLANES]; + int ei[MAX_PLANES]; + int eo[MAX_PLANES]; + int xstep[MAX_PLANES]; + int ystep[MAX_PLANES]; int x, y; + int ix0 = bbox->x0 / TILE_SIZE; + int iy0 = bbox->y0 / TILE_SIZE; + int ix1 = bbox->x1 / TILE_SIZE; + int iy1 = bbox->y1 / TILE_SIZE; + + for (i = 0; i < nr_planes; i++) { + c[i] = (tri->plane[i].c + + tri->plane[i].dcdy * iy0 * TILE_SIZE - + tri->plane[i].dcdx * ix0 * TILE_SIZE); + + ei[i] = tri->plane[i].ei << TILE_ORDER; + eo[i] = tri->plane[i].eo << TILE_ORDER; + xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER); + ystep[i] = tri->plane[i].dcdy << TILE_ORDER; + } + + /* Test tile-sized blocks against the triangle. * Discard blocks fully outside the tri. If the block is fully * contained inside the tri, bin an lp_rast_shade_tile command. * Else, bin a lp_rast_triangle command. */ - for (y = miny; y <= maxy; y++) + for (y = iy0; y <= iy1; y++) { - int cx1 = c1; - int cx2 = c2; - int cx3 = c3; boolean in = FALSE; /* are we inside the triangle? */ + int cx[MAX_PLANES]; - for (x = minx; x <= maxx; x++) + for (i = 0; i < nr_planes; i++) + cx[i] = c[i]; + + for (x = ix0; x <= ix1; x++) { - if (cx1 + eo1 < 0 || - cx2 + eo2 < 0 || - cx3 + eo3 < 0) - { - /* do nothing */ + int out = 0; + int partial = 0; + + for (i = 0; i < nr_planes; i++) { + int planeout = cx[i] + eo[i]; + int planepartial = cx[i] + ei[i] - 1; + out |= (planeout >> 31); + partial |= (planepartial >> 31) & (1< 0 && - cx2 + ei2 > 0 && - cx3 + ei3 > 0) - { + } + else if (partial) { + /* Not trivially accepted by at least one plane - + * rasterize/shade partial tile + */ + int count = util_bitcount(partial); + in = TRUE; + + if (!lp_scene_bin_cmd_with_state( scene, x, y, + setup->fs.stored, + lp_rast_tri_tab[count], + lp_rast_arg_triangle(tri, partial) )) + goto fail; + + LP_COUNT(nr_partially_covered_64); + } + else { /* triangle covers the whole tile- shade whole tile */ LP_COUNT(nr_fully_covered_64); - in = TRUE; - if (setup->fs.current.variant->opaque) { - lp_scene_bin_reset( scene, x, y ); - lp_scene_bin_command( scene, x, y, - lp_rast_set_state, - lp_rast_arg_state(setup->fs.stored) ); - } - lp_scene_bin_command( scene, x, y, - lp_rast_shade_tile, - lp_rast_arg_inputs(&tri->inputs) ); - } - else - { - /* rasterizer/shade partial tile */ - LP_COUNT(nr_partially_covered_64); - in = TRUE; - lp_scene_bin_command( scene, x, y, - lp_rast_triangle, - lp_rast_arg_triangle(tri) ); - } + in = TRUE; + if (!lp_setup_whole_tile(setup, &tri->inputs, x, y)) + goto fail; + } /* Iterate cx values across the region: */ - cx1 += xstep1; - cx2 += xstep2; - cx3 += xstep3; + for (i = 0; i < nr_planes; i++) + cx[i] += xstep[i]; } /* Iterate c values down the region: */ - c1 += ystep1; - c2 += ystep2; - c3 += ystep3; + for (i = 0; i < nr_planes; i++) + c[i] += ystep[i]; } } + + return TRUE; + +fail: + /* Need to disable any partially binned triangle. This is easier + * than trying to locate all the triangle, shade-tile, etc, + * commands which may have been binned. + */ + tri->inputs.disable = TRUE; + return FALSE; } /** - * Draw triangle if it's CW, cull otherwise. + * Try to draw the triangle, restart the scene on failure. */ -static void triangle_cw( struct lp_setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4] ) +static void retry_triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4], + boolean front) { - do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface ); + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + { + if (!lp_setup_flush_and_restart(setup)) + return; + + if (!do_triangle_ccw( setup, v0, v1, v2, front )) + return; + } +} + +static INLINE float +calc_area(const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float dx01 = v0[0][0] - v1[0][0]; + float dy01 = v0[0][1] - v1[0][1]; + float dx20 = v2[0][0] - v0[0][0]; + float dy20 = v2[0][1] - v0[0][1]; + return dx01 * dy20 - dx20 * dy01; } /** - * Draw triangle if it's CCW, cull otherwise. + * Draw triangle if it's CW, cull otherwise. */ -static void triangle_ccw( struct lp_setup_context *setup, +static void triangle_cw( struct lp_setup_context *setup, const float (*v0)[4], const float (*v1)[4], const float (*v2)[4] ) { - do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + float area = calc_area(v0, v1, v2); + + if (area < 0.0f) + retry_triangle_ccw(setup, v0, v2, v1, !setup->ccw_is_frontface); } +static void triangle_ccw( struct lp_setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) +{ + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw(setup, v0, v1, v2, setup->ccw_is_frontface); +} /** * Draw triangle whether it's CW or CCW. @@ -654,17 +695,12 @@ static void triangle_both( struct lp_setup_context *setup, const float (*v1)[4], const float (*v2)[4] ) { - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0][0] - v2[0][0]; - const float ey = v0[0][1] - v2[0][1]; - const float fx = v1[0][0] - v2[0][0]; - const float fy = v1[0][1] - v2[0][1]; - - /* det = cross(e,f).z */ - if (ex * fy - ey * fx < 0.0f) - triangle_ccw( setup, v0, v1, v2 ); - else - triangle_cw( setup, v0, v1, v2 ); + float area = calc_area(v0, v1, v2); + + if (area > 0.0f) + retry_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface ); + else if (area < 0.0f) + retry_triangle_ccw( setup, v0, v2, v1, !setup->ccw_is_frontface ); }