* Binning code for triangles
*/
-#include "lp_setup_context.h"
-#include "lp_rast.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "lp_perf.h"
+#include "lp_setup_context.h"
+#include "lp_rast.h"
#define NUM_CHANNELS 4
+
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
-static void constant_coef( struct lp_rast_triangle *tri,
+static void constant_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
unsigned slot,
const float value,
unsigned i )
{
tri->inputs.a0[slot][i] = value;
- tri->inputs.dadx[slot][i] = 0;
- tri->inputs.dady[slot][i] = 0;
+ tri->inputs.dadx[slot][i] = 0.0f;
+ tri->inputs.dady[slot][i] = 0.0f;
}
+
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
-static void linear_coef( struct lp_rast_triangle *tri,
+static void linear_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
float da12 = a1 - a2;
float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- tri->inputs.a0[slot][i] = (v1[vert_attr][i] -
- (dadx * (v1[0][0] - 0.5f) +
- dady * (v1[0][1] - 0.5f)));
+ tri->inputs.a0[slot][i] = (a1 -
+ (dadx * (v1[0][0] - setup->pixel_offset) +
+ dady * (v1[0][1] - setup->pixel_offset)));
}
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void perspective_coef( struct lp_rast_triangle *tri,
+static void perspective_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
float a3 = v3[vert_attr][i] * v3[0][3];
float da12 = a1 - a2;
float da31 = a3 - a1;
- float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea;
- float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea;
+ float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
+ float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
tri->inputs.a0[slot][i] = (a1 -
- (dadx * (v1[0][0] - 0.5f) +
- dady * (v1[0][1] - 0.5f)));
+ (dadx * (v1[0][0] - setup->pixel_offset) +
+ dady * (v1[0][1] - setup->pixel_offset)));
}
/**
* Special coefficient setup for gl_FragCoord.
- * X and Y are trivial, though Y has to be inverted for OpenGL.
+ * X and Y are trivial
* Z and W are copied from position_coef which should have already been computed.
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
-setup_fragcoord_coef(struct lp_rast_triangle *tri,
+setup_fragcoord_coef(struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
+ float oneoverarea,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
tri->inputs.dadx[slot][1] = 0.0;
tri->inputs.dady[slot][1] = 1.0;
/*Z*/
- linear_coef(tri, slot, v1, v2, v3, 0, 2);
+ linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
/*W*/
- linear_coef(tri, slot, v1, v2, v3, 0, 3);
+ linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
}
-static void setup_facing_coef( struct lp_rast_triangle *tri,
+/**
+ * Setup the fragment input attribute with the front-facing value.
+ * \param frontface is the triangle front facing?
+ */
+static void setup_facing_coef( struct lp_setup_context *setup,
+ struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface )
{
- constant_coef( tri, slot, 1.0f - frontface, 0 );
- constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
- constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
- constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
+ /* convert TRUE to 1.0 and FALSE to -1.0 */
+ constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
+ constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
+ constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
+ constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
}
/**
* Compute the tri->coef[] array dadx, dady, a0 values.
*/
-static void setup_tri_coefficients( struct setup_context *setup,
+static void setup_tri_coefficients( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
+ float oneoverarea,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
- boolean frontface )
+ boolean frontface)
{
unsigned slot;
- /* Allocate space for the a0, dadx and dady arrays
- */
- {
- unsigned bytes;
- bytes = (setup->fs.nr_inputs + 1) * 4 * sizeof(float);
- tri->inputs.a0 = get_data_aligned( &setup->data, bytes, 16 );
- tri->inputs.dadx = get_data_aligned( &setup->data, bytes, 16 );
- tri->inputs.dady = get_data_aligned( &setup->data, bytes, 16 );
- }
-
/* The internal position input is in slot zero:
*/
- setup_fragcoord_coef(tri, 0, v1, v2, v3);
+ setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3);
/* setup interpolation for all the remaining attributes:
*/
switch (setup->fs.input[slot].interp) {
case LP_INTERP_CONSTANT:
- for (i = 0; i < NUM_CHANNELS; i++)
- constant_coef(tri, slot+1, v3[vert_attr][i], i);
+ if (setup->flatshade_first) {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+ }
+ else {
+ for (i = 0; i < NUM_CHANNELS; i++)
+ constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
+ }
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
- linear_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
+ linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
- perspective_coef(tri, slot+1, v1, v2, v3, vert_attr, i);
+ perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
break;
case LP_INTERP_POSITION:
/* XXX: fix me - duplicates the values in slot zero.
*/
- setup_fragcoord_coef(tri, slot+1, v1, v2, v3);
+ setup_fragcoord_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3);
break;
case LP_INTERP_FACING:
- setup_facing_coef(tri, slot+1, frontface);
+ setup_facing_coef(setup, tri, slot+1, frontface);
break;
default:
-static inline int subpixel_snap( float a )
+static INLINE int subpixel_snap( float a )
{
- return util_iround(FIXED_ONE * a);
+ return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
}
-#define MIN3(a,b,c) MIN2(MIN2(a,b),c)
-#define MAX3(a,b,c) MAX2(MAX2(a,b),c)
+
+/**
+ * Alloc space for a new triangle plus the input.a0/dadx/dady arrays
+ * immediately after it.
+ * The memory is allocated from the per-scene pool, not per-tile.
+ * \param tri_size returns number of bytes allocated
+ * \param nr_inputs number of fragment shader inputs
+ * \return pointer to triangle space
+ */
+static INLINE struct lp_rast_triangle *
+alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
+{
+ unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+ struct lp_rast_triangle *tri;
+ unsigned bytes;
+ char *inputs;
+
+ assert(sizeof(*tri) % 16 == 0);
+
+ bytes = sizeof(*tri) + (3 * input_array_sz);
+
+ tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+
+ inputs = (char *) (tri + 1);
+ tri->inputs.a0 = (float (*)[4]) inputs;
+ tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
+ tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+
+ *tri_size = bytes;
+
+ return tri;
+}
+
+
+/**
+ * Print triangle vertex attribs (for debug).
+ */
+static void
+print_triangle(struct lp_setup_context *setup,
+ const float (*v1)[4],
+ const float (*v2)[4],
+ const float (*v3)[4])
+{
+ uint i;
+
+ debug_printf("llvmpipe triangle\n");
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v1[%d]: %f %f %f %f\n", i,
+ v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+ }
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v2[%d]: %f %f %f %f\n", i,
+ v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+ }
+ for (i = 0; i < setup->fs.nr_inputs; i++) {
+ debug_printf(" v3[%d]: %f %f %f %f\n", i,
+ v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
+ }
+}
+
/**
* Do basic setup for triangle rasterization and determine which
- * framebuffer tiles are touched. Put the triangle in the bins for the
- * tiles which we overlap.
+ * framebuffer tiles are touched. Put the triangle in the scene's
+ * bins for the tiles which we overlap.
*/
static void
-do_triangle_ccw(struct setup_context *setup,
+do_triangle_ccw(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
/* x/y positions in fixed point */
- const int x1 = subpixel_snap(v1[0][0]);
- const int x2 = subpixel_snap(v2[0][0]);
- const int x3 = subpixel_snap(v3[0][0]);
- const int y1 = subpixel_snap(v1[0][1]);
- const int y2 = subpixel_snap(v2[0][1]);
- const int y3 = subpixel_snap(v3[0][1]);
-
- struct lp_rast_triangle *tri = get_data( &setup->data, sizeof *tri );
- float area;
+ const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
+ const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
+ const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
+ const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
+ const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
+ const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
+
+ struct lp_scene *scene = lp_setup_get_current_scene(setup);
+ struct lp_rast_triangle *tri;
+ int area;
+ float oneoverarea;
int minx, maxx, miny, maxy;
+ unsigned tri_bytes;
+
+ if (0)
+ print_triangle(setup, v1, v2, v3);
+
+ tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
+
+#ifdef DEBUG
+ tri->v[0][0] = v1[0][0];
+ tri->v[1][0] = v2[0][0];
+ tri->v[2][0] = v3[0][0];
+ tri->v[0][1] = v1[0][1];
+ tri->v[1][1] = v2[0][1];
+ tri->v[2][1] = v3[0][1];
+#endif
tri->dx12 = x1 - x2;
tri->dx23 = x2 - x3;
tri->dy23 = y2 - y3;
tri->dy31 = y3 - y1;
- area = (tri->dx12 * tri->dy31 -
- tri->dx31 * tri->dy12);
+ area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
+
+ LP_COUNT(nr_tris);
/* Cull non-ccw and zero-sized triangles.
*
* XXX: subject to overflow??
*/
if (area <= 0) {
- putback_data( &setup->data, sizeof *tri );
+ lp_scene_putback_data( scene, tri_bytes );
+ LP_COUNT(nr_culled_tris);
return;
}
/* Bounding rectangle (in pixels) */
- tri->minx = (MIN3(x1, x2, x3) + 0xf) >> FIXED_ORDER;
- tri->maxx = (MAX3(x1, x2, x3) + 0xf) >> FIXED_ORDER;
- tri->miny = (MIN3(y1, y2, y3) + 0xf) >> FIXED_ORDER;
- tri->maxy = (MAX3(y1, y2, y3) + 0xf) >> FIXED_ORDER;
+ minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
+ maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
- if (tri->miny == tri->maxy ||
- tri->minx == tri->maxx) {
- putback_data( &setup->data, sizeof *tri );
+ if (setup->scissor_test) {
+ minx = MAX2(minx, setup->scissor.current.minx);
+ maxx = MIN2(maxx, setup->scissor.current.maxx);
+ miny = MAX2(miny, setup->scissor.current.miny);
+ maxy = MIN2(maxy, setup->scissor.current.maxy);
+ }
+
+ if (miny == maxy ||
+ minx == maxx) {
+ lp_scene_putback_data( scene, tri_bytes );
+ LP_COUNT(nr_culled_tris);
return;
}
/*
*/
- tri->oneoverarea = ((float)FIXED_ONE) / (float)area;
+ oneoverarea = ((float)FIXED_ONE) / (float)area;
/* Setup parameter interpolants:
*/
- setup_tri_coefficients( setup, tri, v1, v2, v3, frontfacing );
+ setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
+
+ tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
- /* half-edge constants, will be interated over the whole
- * rendertarget.
+ /* half-edge constants, will be interated over the whole render target.
*/
tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
+ /* Fill in the inputs.step[][] arrays.
+ * We've manually unrolled some loops here.
+ */
{
- int xstep1 = -tri->dy12;
- int xstep2 = -tri->dy23;
- int xstep3 = -tri->dy31;
-
- int ystep1 = tri->dx12;
- int ystep2 = tri->dx23;
- int ystep3 = tri->dx31;
-
- int ix, iy;
- int i = 0;
-
- int c1 = 0;
- int c2 = 0;
- int c3 = 0;
-
- for (iy = 0; iy < 4; iy++) {
- int cx1 = c1;
- int cx2 = c2;
- int cx3 = c3;
-
- for (ix = 0; ix < 4; ix++, i++) {
- tri->step[0][i] = cx1;
- tri->step[1][i] = cx2;
- tri->step[2][i] = cx3;
- cx1 += xstep1;
- cx2 += xstep2;
- cx3 += xstep3;
- }
-
- c1 += ystep1;
- c2 += ystep2;
- c3 += ystep3;
- }
+ const int xstep1 = -tri->dy12;
+ const int xstep2 = -tri->dy23;
+ const int xstep3 = -tri->dy31;
+ const int ystep1 = tri->dx12;
+ const int ystep2 = tri->dx23;
+ const int ystep3 = tri->dx31;
+
+#define SETUP_STEP(i, x, y) \
+ do { \
+ tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
+ tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
+ tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
+ } while (0)
+
+ SETUP_STEP(0, 0, 0);
+ SETUP_STEP(1, 1, 0);
+ SETUP_STEP(2, 0, 1);
+ SETUP_STEP(3, 1, 1);
+
+ SETUP_STEP(4, 2, 0);
+ SETUP_STEP(5, 3, 0);
+ SETUP_STEP(6, 2, 1);
+ SETUP_STEP(7, 3, 1);
+
+ SETUP_STEP(8, 0, 2);
+ SETUP_STEP(9, 1, 2);
+ SETUP_STEP(10, 0, 3);
+ SETUP_STEP(11, 1, 3);
+
+ SETUP_STEP(12, 2, 2);
+ SETUP_STEP(13, 3, 2);
+ SETUP_STEP(14, 2, 3);
+ SETUP_STEP(15, 3, 3);
+#undef STEP
}
/*
/* Convert to tile coordinates:
*/
- minx = tri->minx / TILE_SIZE;
- miny = tri->miny / TILE_SIZE;
- maxx = tri->maxx / TILE_SIZE;
- maxy = tri->maxy / TILE_SIZE;
+ minx = minx / TILE_SIZE;
+ miny = miny / TILE_SIZE;
+ maxx = maxx / TILE_SIZE;
+ maxy = maxy / TILE_SIZE;
+
+ /*
+ * Clamp to framebuffer size
+ */
+ minx = MAX2(minx, 0);
+ miny = MAX2(miny, 0);
+ maxx = MIN2(maxx, scene->tiles_x - 1);
+ maxy = MIN2(maxy, scene->tiles_y - 1);
/* Determine which tile(s) intersect the triangle's bounding box
*/
{
/* Triangle is contained in a single tile:
*/
- bin_command( &setup->tile[minx][miny], lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
+ lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
+ lp_rast_arg_triangle(tri) );
}
else
{
int x, y;
- /* Trivially accept or reject blocks, else jump to per-pixel
- * examination above.
+ /* Test tile-sized blocks against the triangle.
+ * Discard blocks fully outside the tri. If the block is fully
+ * contained inside the tri, bin an lp_rast_shade_tile command.
+ * Else, bin a lp_rast_triangle command.
*/
for (y = miny; y <= maxy; y++)
{
int cx1 = c1;
int cx2 = c2;
int cx3 = c3;
- int in = 0;
+ boolean in = FALSE; /* are we inside the triangle? */
for (x = minx; x <= maxx; x++)
{
cx3 + eo3 < 0)
{
/* do nothing */
+ LP_COUNT(nr_empty_64);
if (in)
- break;
+ break; /* exiting triangle, all done with this row */
}
else if (cx1 + ei1 > 0 &&
cx2 + ei2 > 0 &&
cx3 + ei3 > 0)
{
- in = 1;
/* triangle covers the whole tile- shade whole tile */
- bin_command( &setup->tile[x][y],
- lp_rast_shade_tile,
- lp_rast_arg_inputs(&tri->inputs) );
+ LP_COUNT(nr_fully_covered_64);
+ in = TRUE;
+ if(setup->fs.current.opaque) {
+ lp_scene_bin_reset( scene, x, y );
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_set_state,
+ lp_rast_arg_state(setup->fs.stored) );
+ }
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_shade_tile,
+ lp_rast_arg_inputs(&tri->inputs) );
}
else
{
- in = 1;
- /* shade partial tile */
- bin_command( &setup->tile[x][y],
- lp_rast_triangle,
- lp_rast_arg_triangle(tri) );
+ /* rasterizer/shade partial tile */
+ LP_COUNT(nr_partially_covered_64);
+ in = TRUE;
+ lp_scene_bin_command( scene, x, y,
+ lp_rast_triangle,
+ lp_rast_arg_triangle(tri) );
}
/* Iterate cx values across the region:
}
}
-static void triangle_cw( struct setup_context *setup,
+
+/**
+ * Draw triangle if it's CW, cull otherwise.
+ */
+static void triangle_cw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
do_triangle_ccw( setup, v1, v0, v2, !setup->ccw_is_frontface );
}
-static void triangle_ccw( struct setup_context *setup,
+
+/**
+ * Draw triangle if it's CCW, cull otherwise.
+ */
+static void triangle_ccw( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
do_triangle_ccw( setup, v0, v1, v2, setup->ccw_is_frontface );
}
-static void triangle_both( struct setup_context *setup,
+
+
+/**
+ * Draw triangle whether it's CW or CCW.
+ */
+static void triangle_both( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
const float fy = v1[0][1] - v2[0][1];
/* det = cross(e,f).z */
- if (ex * fy - ey * fx < 0)
+ if (ex * fy - ey * fx < 0.0f)
triangle_ccw( setup, v0, v1, v2 );
else
triangle_cw( setup, v0, v1, v2 );
}
-static void triangle_nop( struct setup_context *setup,
+
+static void triangle_nop( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
void
-lp_setup_choose_triangle( struct setup_context *setup )
+lp_setup_choose_triangle( struct lp_setup_context *setup )
{
switch (setup->cullmode) {
- case PIPE_WINDING_NONE:
+ case PIPE_FACE_NONE:
setup->triangle = triangle_both;
break;
- case PIPE_WINDING_CCW:
- setup->triangle = triangle_cw;
+ case PIPE_FACE_BACK:
+ setup->triangle = setup->ccw_is_frontface ? triangle_ccw : triangle_cw;
break;
- case PIPE_WINDING_CW:
- setup->triangle = triangle_ccw;
+ case PIPE_FACE_FRONT:
+ setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
break;
default:
setup->triangle = triangle_nop;
break;
}
}
-
-