llvmpipe: slightly shrink the size of a binned triangle
authorKeith Whitwell <keithw@vmware.com>
Fri, 15 Oct 2010 11:23:22 +0000 (12:23 +0100)
committerKeith Whitwell <keithw@vmware.com>
Fri, 15 Oct 2010 12:27:47 +0000 (13:27 +0100)
12 files changed:
src/gallium/drivers/llvmpipe/lp_rast.c
src/gallium/drivers/llvmpipe/lp_rast.h
src/gallium/drivers/llvmpipe/lp_rast_debug.c
src/gallium/drivers/llvmpipe/lp_rast_priv.h
src/gallium/drivers/llvmpipe/lp_rast_tri.c
src/gallium/drivers/llvmpipe/lp_rast_tri_tmp.h
src/gallium/drivers/llvmpipe/lp_setup_coef.c
src/gallium/drivers/llvmpipe/lp_setup_coef.h
src/gallium/drivers/llvmpipe/lp_setup_coef_intrin.c
src/gallium/drivers/llvmpipe/lp_setup_line.c
src/gallium/drivers/llvmpipe/lp_setup_point.c
src/gallium/drivers/llvmpipe/lp_setup_tri.c

index 8e9be755e074d37db30f9b41ade9781e563681e2..d358a983943080170e3d34bbe76b142375aa4cdd 100644 (file)
@@ -366,9 +366,9 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
          variant->jit_function[RAST_WHOLE]( &state->jit_context,
                                             tile_x + x, tile_y + y,
                                             inputs->frontfacing,
-                                            inputs->a0,
-                                            inputs->dadx,
-                                            inputs->dady,
+                                            GET_A0(inputs),
+                                            GET_DADX(inputs),
+                                            GET_DADY(inputs),
                                             color,
                                             depth,
                                             0xffff,
@@ -447,9 +447,9 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
                                          x, y,
                                          inputs->frontfacing,
-                                         inputs->a0,
-                                         inputs->dadx,
-                                         inputs->dady,
+                                         GET_A0(inputs),
+                                         GET_DADX(inputs),
+                                         GET_DADY(inputs),
                                          color,
                                          depth,
                                          mask,
index c5fb15484c59308a27a016ee9f3ebc6808dcdb70..8d8b6210ec787fa524d247532116ebbd2d9c39fd 100644 (file)
@@ -78,13 +78,14 @@ struct lp_rast_state {
  * These pointers point into the bin data buffer.
  */
 struct lp_rast_shader_inputs {
-   unsigned frontfacing;     /** One for front-facing */
-   unsigned disable:1;  /** Partially binned, disable this command */
-   unsigned opaque:1;   /** Is opaque */
-
-   float (*a0)[4];
-   float (*dadx)[4];
-   float (*dady)[4];
+   unsigned frontfacing:1;      /** True for front-facing */
+   unsigned disable:1;          /** Partially binned, disable this command */
+   unsigned opaque:1;           /** Is opaque */
+   unsigned pad0:29;            /* wasted space */
+   unsigned stride;             /* how much to advance data between a0, dadx, dady */
+   unsigned pad2;               /* wasted space */
+   unsigned pad3;               /* wasted space */
+   /* followed by a0, dadx, dady and planes[] */
 };
 
 /* Note: the order of these values is important as they are loaded by
@@ -111,17 +112,24 @@ struct lp_rast_plane {
  * Objects of this type are put into the lp_setup_context::data buffer.
  */
 struct lp_rast_triangle {
-   /* inputs for the shader */
-   struct lp_rast_shader_inputs inputs;
-
 #ifdef DEBUG
    float v[3][2];
+   float pad0;
+   float pad1;
 #endif
 
-   struct lp_rast_plane plane[8]; /* NOTE: may allocate fewer planes */
+   /* inputs for the shader */
+   struct lp_rast_shader_inputs inputs;
+   /* planes are also allocated here */
 };
 
 
+#define GET_A0(inputs) ((float (*)[4])((inputs)+1))
+#define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
+#define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
+#define GET_PLANES(tri) ((struct lp_rast_plane *)((char *)(&(tri)->inputs + 1) + 3 * (tri)->inputs.stride))
+
+
 
 struct lp_rasterizer *
 lp_rast_create( unsigned num_threads );
index 3113e196c408ff89431f117b596b80032ba4ec61..e2783aa56833910ddf313b5ee5fe8d20f5bf73ab 100644 (file)
@@ -178,6 +178,7 @@ debug_triangle(int tilex, int tiley,
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
    unsigned plane_mask = arg.triangle.plane_mask;
+   const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
    struct lp_rast_plane plane[8];
    int x, y;
    int count = 0;
@@ -190,7 +191,7 @@ debug_triangle(int tilex, int tiley,
    }
 
    while (plane_mask) {
-      plane[nr_planes] = tri->plane[u_bit_scan(&plane_mask)];
+      plane[nr_planes] = tri_plane[u_bit_scan(&plane_mask)];
       plane[nr_planes].c = (plane[nr_planes].c +
                             plane[nr_planes].dcdy * tiley -
                             plane[nr_planes].dcdx * tilex);
index e5d04c65b00128b806ac91e4a573ee6808e2de42..b30408f097bf95e5f323ad74f1b0db07d27a84e3 100644 (file)
@@ -262,9 +262,9 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    variant->jit_function[RAST_WHOLE]( &state->jit_context,
                                       x, y,
                                       inputs->frontfacing,
-                                      inputs->a0,
-                                      inputs->dadx,
-                                      inputs->dady,
+                                      GET_A0(inputs),
+                                      GET_DADX(inputs),
+                                      GET_DADY(inputs),
                                       color,
                                       depth,
                                       0xffff,
index bae772b9c507fb7d95f4dc681fd3967c3a4209d3..5bdf19712f453cca0936dcf384a57013fcb2bfb7 100644 (file)
@@ -311,7 +311,7 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task,
                       const union lp_rast_cmd_arg arg)
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
+   const struct lp_rast_plane *plane = GET_PLANES(tri);
    int x = (arg.triangle.plane_mask & 0xff) + task->x;
    int y = (arg.triangle.plane_mask >> 8) + task->y;
    unsigned i, j;
@@ -421,7 +421,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task,
                      const union lp_rast_cmd_arg arg)
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
+   const struct lp_rast_plane *plane = GET_PLANES(tri);
    int x = (arg.triangle.plane_mask & 0xff) + task->x;
    int y = (arg.triangle.plane_mask >> 8) + task->y;
 
index 2f032295126dce8bd524ec77a5daaa7f6dc5091a..9976996719aebb5cb941411d367e9122fe8d746b 100644 (file)
@@ -156,6 +156,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
    unsigned plane_mask = arg.triangle.plane_mask;
+   const struct lp_rast_plane *tri_plane = GET_PLANES(tri);
    const int x = task->x, y = task->y;
    struct lp_rast_plane plane[NR_PLANES];
    int c[NR_PLANES];
@@ -172,7 +173,7 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
 
    while (plane_mask) {
       int i = ffs(plane_mask) - 1;
-      plane[j] = tri->plane[i];
+      plane[j] = tri_plane[i];
       plane_mask &= ~(1 << i);
       c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
 
@@ -255,7 +256,7 @@ TRI_16(struct lp_rasterizer_task *task,
        const union lp_rast_cmd_arg arg)
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
+   const struct lp_rast_plane *plane = GET_PLANES(tri);
    unsigned mask = arg.triangle.plane_mask;
    unsigned outmask, partial_mask;
    unsigned j;
@@ -328,7 +329,7 @@ TRI_4(struct lp_rasterizer_task *task,
       const union lp_rast_cmd_arg arg)
 {
    const struct lp_rast_triangle *tri = arg.triangle.tri;
-   const struct lp_rast_plane *plane = tri->plane;
+   const struct lp_rast_plane *plane = GET_PLANES(tri);
    unsigned mask = arg.triangle.plane_mask;
    const int x = task->x + (mask & 0xff);
    const int y = task->y + (mask >> 8);
index 8dc2688ddb604f56f8110983b526ca9a483525ac..a835df6af24b04f9e23fe9553c8945f4a5c7f32f 100644 (file)
 /**
  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  */
-static void constant_coef( struct lp_rast_shader_inputs *inputs,
+static void constant_coef( struct lp_tri_info *info,
                            unsigned slot,
                           const float value,
                            unsigned i )
 {
-   inputs->a0[slot][i] = value;
-   inputs->dadx[slot][i] = 0.0f;
-   inputs->dady[slot][i] = 0.0f;
+   info->a0[slot][i] = value;
+   info->dadx[slot][i] = 0.0f;
+   info->dady[slot][i] = 0.0f;
 }
 
 
 
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
-                         const struct lp_tri_info *info,
+static void linear_coef( struct lp_tri_info *info,
                          unsigned slot,
                          unsigned vert_attr,
                          unsigned i)
@@ -69,8 +68,8 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs,
    float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
    float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
 
-   inputs->dadx[slot][i] = dadx;
-   inputs->dady[slot][i] = dady;
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;
 
    /* calculate a0 as the value which would be sampled for the
     * fragment at (0,0), taking into account that we want to sample at
@@ -84,7 +83,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs,
     * to define a0 as the sample at a pixel center somewhere near vmin
     * instead - i'll switch to this later.
     */
-   inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+   info->a0[slot][i] = a0 - (dadx * info->x0_center +
                                   dady * info->y0_center);
 }
 
@@ -97,8 +96,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs,
  * Later, when we compute the value at a particular fragment position we'll
  * divide the interpolated value by the interpolated W at that fragment.
  */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
-                              const struct lp_tri_info *info,
+static void perspective_coef( struct lp_tri_info *info,
                               unsigned slot,
                              unsigned vert_attr,
                               unsigned i)
@@ -113,9 +111,9 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs,
    float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
    float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
 
-   inputs->dadx[slot][i] = dadx;
-   inputs->dady[slot][i] = dady;
-   inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;
+   info->a0[slot][i] = a0 - (dadx * info->x0_center +
                                   dady * info->y0_center);
 }
 
@@ -127,23 +125,22 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs,
  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
  */
 static void
-setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
-                     const struct lp_tri_info *info,
+setup_fragcoord_coef(struct lp_tri_info *info,
                      unsigned slot,
                      unsigned usage_mask)
 {
    /*X*/
    if (usage_mask & TGSI_WRITEMASK_X) {
-      inputs->a0[slot][0] = 0.0;
-      inputs->dadx[slot][0] = 1.0;
-      inputs->dady[slot][0] = 0.0;
+      info->a0[slot][0] = 0.0;
+      info->dadx[slot][0] = 1.0;
+      info->dady[slot][0] = 0.0;
    }
 
    /*Y*/
    if (usage_mask & TGSI_WRITEMASK_Y) {
-      inputs->a0[slot][1] = 0.0;
-      inputs->dadx[slot][1] = 0.0;
-      inputs->dady[slot][1] = 1.0;
+      info->a0[slot][1] = 0.0;
+      info->dadx[slot][1] = 0.0;
+      info->dady[slot][1] = 1.0;
    }
 
    /*Z*/
@@ -162,23 +159,23 @@ setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
  * Setup the fragment input attribute with the front-facing value.
  * \param frontface  is the triangle front facing?
  */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
+static void setup_facing_coef( struct lp_tri_info *info,
                                unsigned slot,
                                boolean frontface,
                                unsigned usage_mask)
 {
    /* convert TRUE to 1.0 and FALSE to -1.0 */
    if (usage_mask & TGSI_WRITEMASK_X)
-      constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
+      constant_coef( info, slot, 2.0f * frontface - 1.0f, 0 );
 
    if (usage_mask & TGSI_WRITEMASK_Y)
-      constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
+      constant_coef( info, slot, 0.0f, 1 ); /* wasted */
 
    if (usage_mask & TGSI_WRITEMASK_Z)
-      constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
+      constant_coef( info, slot, 0.0f, 2 ); /* wasted */
 
    if (usage_mask & TGSI_WRITEMASK_W)
-      constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
+      constant_coef( info, slot, 0.0f, 3 ); /* wasted */
 }
 
 
@@ -212,6 +209,10 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
    info.dx20_ooa  = dx20 * oneoverarea;
    info.dy01_ooa  = dy01 * oneoverarea;
    info.dy20_ooa  = dy20 * oneoverarea;
+   info.a0 = GET_A0(inputs);
+   info.dadx = GET_DADX(inputs);
+   info.dady = GET_DADY(inputs);
+      
 
 
    /* setup interpolation for all the remaining attributes:
@@ -225,25 +226,25 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
          if (setup->flatshade_first) {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
+                  constant_coef(&info, slot+1, info.v0[vert_attr][i], i);
          }
          else {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
+                  constant_coef(&info, slot+1, info.v2[vert_attr][i], i);
          }
          break;
 
       case LP_INTERP_LINEAR:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               linear_coef(inputs, &info, slot+1, vert_attr, i);
+               linear_coef(&info, slot+1, vert_attr, i);
          break;
 
       case LP_INTERP_PERSPECTIVE:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               perspective_coef(inputs, &info, slot+1, vert_attr, i);
+               perspective_coef(&info, slot+1, vert_attr, i);
          fragcoord_usage_mask |= TGSI_WRITEMASK_W;
          break;
 
@@ -257,7 +258,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
          break;
 
       case LP_INTERP_FACING:
-         setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
+         setup_facing_coef(&info, slot+1, info.frontfacing, usage_mask);
          break;
 
       default:
@@ -267,7 +268,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
 
    /* The internal position input is in slot zero:
     */
-   setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
+   setup_fragcoord_coef(&info, 0, fragcoord_usage_mask);
 }
 
 #else
index 87a3255ccc698fd80638a983ab61d34a911856dd..7b5b78edd53904bb2da4e3ae284923c785d62c47 100644 (file)
@@ -52,6 +52,10 @@ struct lp_tri_info {
    const float (*v2)[4];
 
    boolean frontfacing;                /* remove eventually */
+
+   float (*a0)[4];
+   float (*dadx)[4];
+   float (*dady)[4];
 };
 
 void lp_setup_tri_coef( struct lp_setup_context *setup,
index 3742fd672b2d74c3e41568a074afce2e807e2c8d..29714e27687a08b17287788401477b0737a121e9 100644 (file)
 #include <emmintrin.h>
 
 
-static void constant_coef4( struct lp_rast_shader_inputs *inputs,
-                           const struct lp_tri_info *info,
+static void constant_coef4( struct lp_tri_info *info,
                            unsigned slot,
                            const float *attr)
 {
-   *(__m128 *)inputs->a0[slot]   = *(__m128 *)attr;
-   *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
-   *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+   *(__m128 *)info->a0[slot]   = *(__m128 *)attr;
+   *(__m128 *)info->dadx[slot] = _mm_set1_ps(0.0);
+   *(__m128 *)info->dady[slot] = _mm_set1_ps(0.0);
 }
 
 
@@ -56,8 +55,7 @@ static void constant_coef4( struct lp_rast_shader_inputs *inputs,
  * Setup the fragment input attribute with the front-facing value.
  * \param frontface  is the triangle front facing?
  */
-static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
-                              const struct lp_tri_info *info,
+static void setup_facing_coef( struct lp_tri_info *info,
                               unsigned slot )
 {
    /* XXX: just pass frontface directly to the shader, don't bother
@@ -66,15 +64,14 @@ static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
    __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
                           0, 0, 0);
 
-   *(__m128 *)inputs->a0[slot]   = a0;
-   *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
-   *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
+   *(__m128 *)info->a0[slot]   = a0;
+   *(__m128 *)info->dadx[slot] = _mm_set1_ps(0.0);
+   *(__m128 *)info->dady[slot] = _mm_set1_ps(0.0);
 }
 
 
 
-static void calc_coef4( struct lp_rast_shader_inputs *inputs,
-                       const struct lp_tri_info *info,
+static void calc_coef4(        const struct lp_tri_info *info,
                        unsigned slot,
                        __m128 a0,
                        __m128 a1,
@@ -96,14 +93,13 @@ static void calc_coef4( struct lp_rast_shader_inputs *inputs,
    __m128 attr_v0       = _mm_add_ps(dadx_x0, dady_y0);
    __m128 attr_0        = _mm_sub_ps(a0, attr_v0);
 
-   *(__m128 *)inputs->a0[slot]   = attr_0;
-   *(__m128 *)inputs->dadx[slot] = dadx;
-   *(__m128 *)inputs->dady[slot] = dady;
+   *(__m128 *)info->a0[slot]   = attr_0;
+   *(__m128 *)info->dadx[slot] = dadx;
+   *(__m128 *)info->dady[slot] = dady;
 }
 
 
-static void linear_coef( struct lp_rast_shader_inputs *inputs,
-                         const struct lp_tri_info *info,
+static void linear_coef( struct lp_tri_info *info,
                          unsigned slot,
                          unsigned vert_attr)
 {
@@ -111,7 +107,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs,
    __m128 a1 = *(const __m128 *)info->v1[vert_attr];
    __m128 a2 = *(const __m128 *)info->v2[vert_attr];
 
-   calc_coef4(inputs, info, slot, a0, a1, a2);
+   calc_coef4(info, slot, a0, a1, a2);
 }
 
 
@@ -124,8 +120,7 @@ static void linear_coef( struct lp_rast_shader_inputs *inputs,
  * Later, when we compute the value at a particular fragment position we'll
  * divide the interpolated value by the interpolated W at that fragment.
  */
-static void perspective_coef( struct lp_rast_shader_inputs *inputs,
-                              const struct lp_tri_info *info,
+static void perspective_coef( const struct lp_tri_info *info,
                               unsigned slot,
                              unsigned vert_attr)
 {
@@ -139,7 +134,7 @@ static void perspective_coef( struct lp_rast_shader_inputs *inputs,
    __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
    __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
 
-   calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
+   calc_coef4(info, slot, a0_oow, a1_oow, a2_oow);
 }
 
 
@@ -174,11 +169,14 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
    info.dx20_ooa  = dx20 * oneoverarea;
    info.dy01_ooa  = dy01 * oneoverarea;
    info.dy20_ooa  = dy20 * oneoverarea;
+   info.a0 = GET_A0(inputs);
+   info.dadx = GET_DADX(inputs);
+   info.dady = GET_DADY(inputs);
 
 
    /* The internal position input is in slot zero:
     */
-   linear_coef(inputs, &info, 0, 0);
+   linear_coef(&info, 0, 0);
 
    /* setup interpolation for all the remaining attributes:
     */
@@ -188,19 +186,19 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
       switch (setup->fs.input[slot].interp) {
       case LP_INTERP_CONSTANT:
          if (setup->flatshade_first) {
-           constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]);
+           constant_coef4(&info, slot+1, info.v0[vert_attr]);
          }
          else {
-           constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]);
+           constant_coef4(&info, slot+1, info.v2[vert_attr]);
          }
          break;
 
       case LP_INTERP_LINEAR:
-        linear_coef(inputs, &info, slot+1, vert_attr);
+        linear_coef(&info, slot+1, vert_attr);
          break;
 
       case LP_INTERP_PERSPECTIVE:
-        perspective_coef(inputs, &info, slot+1, vert_attr);
+        perspective_coef(&info, slot+1, vert_attr);
          break;
 
       case LP_INTERP_POSITION:
@@ -211,7 +209,7 @@ void lp_setup_tri_coef( struct lp_setup_context *setup,
          break;
 
       case LP_INTERP_FACING:
-         setup_facing_coef(inputs, &info, slot+1);
+         setup_facing_coef(&info, slot+1);
          break;
 
       default:
index efc48eecfee679f9decde371f41349c73330fb8a..2fd9f2e2f2a4a2954b5ee87e20ed9d8ff5d226ab 100644 (file)
@@ -46,6 +46,10 @@ struct lp_line_info {
 
    const float (*v1)[4];
    const float (*v2)[4];
+
+   float (*a0)[4];
+   float (*dadx)[4];
+   float (*dady)[4];
 };
 
 
@@ -53,14 +57,14 @@ struct lp_line_info {
  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  */
 static void constant_coef( struct lp_setup_context *setup,
-                           struct lp_rast_triangle *tri,
+                           struct lp_line_info *info,
                            unsigned slot,
                            const float value,
                            unsigned i )
 {
-   tri->inputs.a0[slot][i] = value;
-   tri->inputs.dadx[slot][i] = 0.0f;
-   tri->inputs.dady[slot][i] = 0.0f;
+   info->a0[slot][i] = value;
+   info->dadx[slot][i] = 0.0f;
+   info->dady[slot][i] = 0.0f;
 }
 
 
@@ -69,7 +73,6 @@ static void constant_coef( struct lp_setup_context *setup,
  * for a triangle.
  */
 static void linear_coef( struct lp_setup_context *setup,
-                         struct lp_rast_triangle *tri,
                          struct lp_line_info *info,
                          unsigned slot,
                          unsigned vert_attr,
@@ -82,10 +85,10 @@ static void linear_coef( struct lp_setup_context *setup,
    float dadx = da21 * info->dx * info->oneoverarea;
    float dady = da21 * info->dy * info->oneoverarea;
 
-   tri->inputs.dadx[slot][i] = dadx;
-   tri->inputs.dady[slot][i] = dady;  
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;  
    
-   tri->inputs.a0[slot][i] = (a1 -
+   info->a0[slot][i] = (a1 -
                               (dadx * (info->v1[0][0] - setup->pixel_offset) +
                                dady * (info->v1[0][1] - setup->pixel_offset)));
 }
@@ -100,7 +103,6 @@ static void linear_coef( struct lp_setup_context *setup,
  * divide the interpolated value by the interpolated W at that fragment.
  */
 static void perspective_coef( struct lp_setup_context *setup,
-                              struct lp_rast_triangle *tri,
                               struct lp_line_info *info,
                               unsigned slot,
                               unsigned vert_attr,
@@ -115,43 +117,42 @@ static void perspective_coef( struct lp_setup_context *setup,
    float dadx = da21 * info->dx * info->oneoverarea;
    float dady = da21 * info->dy * info->oneoverarea;
 
-   tri->inputs.dadx[slot][i] = dadx;
-   tri->inputs.dady[slot][i] = dady;
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;
    
-   tri->inputs.a0[slot][i] = (a1 -
-                              (dadx * (info->v1[0][0] - setup->pixel_offset) +
-                               dady * (info->v1[0][1] - setup->pixel_offset)));
+   info->a0[slot][i] = (a1 -
+                        (dadx * (info->v1[0][0] - setup->pixel_offset) +
+                         dady * (info->v1[0][1] - setup->pixel_offset)));
 }
 
 static void
 setup_fragcoord_coef( struct lp_setup_context *setup,
-                      struct lp_rast_triangle *tri,
                       struct lp_line_info *info,
                       unsigned slot,
                       unsigned usage_mask)
 {
    /*X*/
    if (usage_mask & TGSI_WRITEMASK_X) {
-      tri->inputs.a0[slot][0] = 0.0;
-      tri->inputs.dadx[slot][0] = 1.0;
-      tri->inputs.dady[slot][0] = 0.0;
+      info->a0[slot][0] = 0.0;
+      info->dadx[slot][0] = 1.0;
+      info->dady[slot][0] = 0.0;
    }
 
    /*Y*/
    if (usage_mask & TGSI_WRITEMASK_Y) {
-      tri->inputs.a0[slot][1] = 0.0;
-      tri->inputs.dadx[slot][1] = 0.0;
-      tri->inputs.dady[slot][1] = 1.0;
+      info->a0[slot][1] = 0.0;
+      info->dadx[slot][1] = 0.0;
+      info->dady[slot][1] = 1.0;
    }
 
    /*Z*/
    if (usage_mask & TGSI_WRITEMASK_Z) {
-      linear_coef(setup, tri, info, slot, 0, 2);
+      linear_coef(setup, info, slot, 0, 2);
    }
 
    /*W*/
    if (usage_mask & TGSI_WRITEMASK_W) {
-      linear_coef(setup, tri, info, slot, 0, 3);
+      linear_coef(setup, info, slot, 0, 3);
    }
 }
 
@@ -159,7 +160,6 @@ setup_fragcoord_coef( struct lp_setup_context *setup,
  * Compute the tri->coef[] array dadx, dady, a0 values.
  */
 static void setup_line_coefficients( struct lp_setup_context *setup,
-                                     struct lp_rast_triangle *tri,
                                      struct lp_line_info *info)
 {
    unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
@@ -177,25 +177,25 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
          if (setup->flatshade_first) {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+                  constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i);
          }
          else {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+                  constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i);
          }
          break;
 
       case LP_INTERP_LINEAR:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               linear_coef(setup, tri, info, slot+1, vert_attr, i);
+               linear_coef(setup, info, slot+1, vert_attr, i);
          break;
 
       case LP_INTERP_PERSPECTIVE:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+               perspective_coef(setup, info, slot+1, vert_attr, i);
          fragcoord_usage_mask |= TGSI_WRITEMASK_W;
          break;
 
@@ -211,7 +211,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
       case LP_INTERP_FACING:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               constant_coef(setup, tri, slot+1, 1.0, i);
+               constant_coef(setup, info, slot+1, 1.0, i);
          break;
 
       default:
@@ -221,7 +221,7 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
 
    /* The internal position input is in slot zero:
     */
-   setup_fragcoord_coef(setup, tri, info, 0,
+   setup_fragcoord_coef(setup, info, 0,
                         fragcoord_usage_mask);
 }
 
@@ -276,6 +276,7 @@ try_setup_line( struct lp_setup_context *setup,
 {
    struct lp_scene *scene = setup->scene;
    struct lp_rast_triangle *line;
+   struct lp_rast_plane *plane;
    struct lp_line_info info;
    float width = MAX2(1.0, setup->line_width);
    struct u_rect bbox;
@@ -581,32 +582,35 @@ try_setup_line( struct lp_setup_context *setup,
 #endif
 
    /* calculate the deltas */
-   line->plane[0].dcdy = x[0] - x[1];
-   line->plane[1].dcdy = x[1] - x[2];
-   line->plane[2].dcdy = x[2] - x[3];
-   line->plane[3].dcdy = x[3] - x[0];
+   plane = GET_PLANES(line);
+   plane[0].dcdy = x[0] - x[1];
+   plane[1].dcdy = x[1] - x[2];
+   plane[2].dcdy = x[2] - x[3];
+   plane[3].dcdy = x[3] - x[0];
 
-   line->plane[0].dcdx = y[0] - y[1];
-   line->plane[1].dcdx = y[1] - y[2];
-   line->plane[2].dcdx = y[2] - y[3];
-   line->plane[3].dcdx = y[3] - y[0];
+   plane[0].dcdx = y[0] - y[1];
+   plane[1].dcdx = y[1] - y[2];
+   plane[2].dcdx = y[2] - y[3];
+   plane[3].dcdx = y[3] - y[0];
 
 
    /* Setup parameter interpolants:
     */
-   setup_line_coefficients( setup, line, &info); 
+   info.a0 = GET_A0(&line->inputs);
+   info.dadx = GET_DADX(&line->inputs);
+   info.dady = GET_DADY(&line->inputs);
+   setup_line_coefficients(setup, &info); 
 
    line->inputs.frontfacing = TRUE;
    line->inputs.disable = FALSE;
    line->inputs.opaque = FALSE;
 
    for (i = 0; i < 4; i++) {
-      struct lp_rast_plane *plane = &line->plane[i];
 
       /* half-edge constants, will be interated over the whole render
        * target.
        */
-      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+      plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
 
       
       /* correct for top-left vs. bottom-left fill convention.  
@@ -622,38 +626,38 @@ try_setup_line( struct lp_setup_context *setup,
        * to its usual method, in which case it will probably want
        * to use the opposite, top-left convention.
        */         
-      if (plane->dcdx < 0) {
+      if (plane[i].dcdx < 0) {
          /* both fill conventions want this - adjust for left edges */
-         plane->c++;            
+         plane[i].c++;            
       }
-      else if (plane->dcdx == 0) {
+      else if (plane[i].dcdx == 0) {
          if (setup->pixel_offset == 0) {
             /* correct for top-left fill convention:
              */
-            if (plane->dcdy > 0) plane->c++;
+            if (plane[i].dcdy > 0) plane[i].c++;
          }
          else {
             /* correct for bottom-left fill convention:
              */
-            if (plane->dcdy < 0) plane->c++;
+            if (plane[i].dcdy < 0) plane[i].c++;
          }
       }
 
-      plane->dcdx *= FIXED_ONE;
-      plane->dcdy *= FIXED_ONE;
+      plane[i].dcdx *= FIXED_ONE;
+      plane[i].dcdy *= FIXED_ONE;
 
       /* find trivial reject offsets for each edge for a single-pixel
        * sized block.  These will be scaled up at each recursive level to
        * match the active blocksize.  Scaling in this way works best if
        * the blocks are square.
        */
-      plane->eo = 0;
-      if (plane->dcdx < 0) plane->eo -= plane->dcdx;
-      if (plane->dcdy > 0) plane->eo += plane->dcdy;
+      plane[i].eo = 0;
+      if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+      if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
 
       /* Calculate trivial accept offsets from the above.
        */
-      plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+      plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo;
    }
 
 
@@ -676,29 +680,29 @@ try_setup_line( struct lp_setup_context *setup,
     * these planes elsewhere.
     */
    if (nr_planes == 8) {
-      line->plane[4].dcdx = -1;
-      line->plane[4].dcdy = 0;
-      line->plane[4].c = 1-bbox.x0;
-      line->plane[4].ei = 0;
-      line->plane[4].eo = 1;
-
-      line->plane[5].dcdx = 1;
-      line->plane[5].dcdy = 0;
-      line->plane[5].c = bbox.x1+1;
-      line->plane[5].ei = -1;
-      line->plane[5].eo = 0;
-
-      line->plane[6].dcdx = 0;
-      line->plane[6].dcdy = 1;
-      line->plane[6].c = 1-bbox.y0;
-      line->plane[6].ei = 0;
-      line->plane[6].eo = 1;
-
-      line->plane[7].dcdx = 0;
-      line->plane[7].dcdy = -1;
-      line->plane[7].c = bbox.y1+1;
-      line->plane[7].ei = -1;
-      line->plane[7].eo = 0;
+      plane[4].dcdx = -1;
+      plane[4].dcdy = 0;
+      plane[4].c = 1-bbox.x0;
+      plane[4].ei = 0;
+      plane[4].eo = 1;
+
+      plane[5].dcdx = 1;
+      plane[5].dcdy = 0;
+      plane[5].c = bbox.x1+1;
+      plane[5].ei = -1;
+      plane[5].eo = 0;
+
+      plane[6].dcdx = 0;
+      plane[6].dcdy = 1;
+      plane[6].c = 1-bbox.y0;
+      plane[6].ei = 0;
+      plane[6].eo = 1;
+
+      plane[7].dcdx = 0;
+      plane[7].dcdy = -1;
+      plane[7].c = bbox.y1+1;
+      plane[7].ei = -1;
+      plane[7].eo = 0;
    }
 
    return lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
index 108c831e66e5af3e4e19a13cacdf077e1230807e..e30e70e16d2eb1b4c1658a9cc033530295a99c81 100644 (file)
@@ -45,6 +45,10 @@ struct point_info {
    int dx01, dx12;
 
    const float (*v0)[4];
+
+   float (*a0)[4];
+   float (*dadx)[4];
+   float (*dady)[4];
 };   
 
 
@@ -53,20 +57,19 @@ struct point_info {
  */
 static void
 constant_coef(struct lp_setup_context *setup,
-              struct lp_rast_triangle *point,
+              struct point_info *info,
               unsigned slot,
               const float value,
               unsigned i)
 {
-   point->inputs.a0[slot][i] = value;
-   point->inputs.dadx[slot][i] = 0.0f;
-   point->inputs.dady[slot][i] = 0.0f;
+   info->a0[slot][i] = value;
+   info->dadx[slot][i] = 0.0f;
+   info->dady[slot][i] = 0.0f;
 }
 
 
 static void
 point_persp_coeff(struct lp_setup_context *setup,
-                  struct lp_rast_triangle *point,
                   const struct point_info *info,
                   unsigned slot,
                   unsigned i)
@@ -82,9 +85,9 @@ point_persp_coeff(struct lp_setup_context *setup,
 
    assert(i < 4);
 
-   point->inputs.a0[slot][i] = info->v0[slot][i]*w0;
-   point->inputs.dadx[slot][i] = 0.0f;
-   point->inputs.dady[slot][i] = 0.0f;
+   info->a0[slot][i] = info->v0[slot][i]*w0;
+   info->dadx[slot][i] = 0.0f;
+   info->dady[slot][i] = 0.0f;
 }
 
 
@@ -98,7 +101,6 @@ point_persp_coeff(struct lp_setup_context *setup,
  */
 static void
 texcoord_coef(struct lp_setup_context *setup,
-              struct lp_rast_triangle *point,
               const struct point_info *info,
               unsigned slot,
               unsigned i,
@@ -115,14 +117,14 @@ texcoord_coef(struct lp_setup_context *setup,
       float x0 = info->v0[0][0] - setup->pixel_offset;
       float y0 = info->v0[0][1] - setup->pixel_offset;
 
-      point->inputs.dadx[slot][0] = dadx;
-      point->inputs.dady[slot][0] = dady;
-      point->inputs.a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
+      info->dadx[slot][0] = dadx;
+      info->dady[slot][0] = dady;
+      info->a0[slot][0] = 0.5 - (dadx * x0 + dady * y0);
 
       if (perspective) {
-         point->inputs.dadx[slot][0] *= w0;
-         point->inputs.dady[slot][0] *= w0;
-         point->inputs.a0[slot][0] *= w0;
+         info->dadx[slot][0] *= w0;
+         info->dady[slot][0] *= w0;
+         info->a0[slot][0] *= w0;
       }
    }
    else if (i == 1) {
@@ -135,25 +137,25 @@ texcoord_coef(struct lp_setup_context *setup,
          dady = -dady;
       }
 
-      point->inputs.dadx[slot][1] = dadx;
-      point->inputs.dady[slot][1] = dady;
-      point->inputs.a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
+      info->dadx[slot][1] = dadx;
+      info->dady[slot][1] = dady;
+      info->a0[slot][1] = 0.5 - (dadx * x0 + dady * y0);
 
       if (perspective) {
-         point->inputs.dadx[slot][1] *= w0;
-         point->inputs.dady[slot][1] *= w0;
-         point->inputs.a0[slot][1] *= w0;
+         info->dadx[slot][1] *= w0;
+         info->dady[slot][1] *= w0;
+         info->a0[slot][1] *= w0;
       }
    }
    else if (i == 2) {
-      point->inputs.a0[slot][2] = 0.0f;
-      point->inputs.dadx[slot][2] = 0.0f;
-      point->inputs.dady[slot][2] = 0.0f;
+      info->a0[slot][2] = 0.0f;
+      info->dadx[slot][2] = 0.0f;
+      info->dady[slot][2] = 0.0f;
    }
    else {
-      point->inputs.a0[slot][3] = perspective ? w0 : 1.0f;
-      point->inputs.dadx[slot][3] = 0.0f;
-      point->inputs.dady[slot][3] = 0.0f;
+      info->a0[slot][3] = perspective ? w0 : 1.0f;
+      info->dadx[slot][3] = 0.0f;
+      info->dady[slot][3] = 0.0f;
    }
 }
 
@@ -166,33 +168,32 @@ texcoord_coef(struct lp_setup_context *setup,
  */
 static void
 setup_point_fragcoord_coef(struct lp_setup_context *setup,
-                           struct lp_rast_triangle *point,
-                           const struct point_info *info,
+                           struct point_info *info,
                            unsigned slot,
                            unsigned usage_mask)
 {
    /*X*/
    if (usage_mask & TGSI_WRITEMASK_X) {
-      point->inputs.a0[slot][0] = 0.0;
-      point->inputs.dadx[slot][0] = 1.0;
-      point->inputs.dady[slot][0] = 0.0;
+      info->a0[slot][0] = 0.0;
+      info->dadx[slot][0] = 1.0;
+      info->dady[slot][0] = 0.0;
    }
 
    /*Y*/
    if (usage_mask & TGSI_WRITEMASK_Y) {
-      point->inputs.a0[slot][1] = 0.0;
-      point->inputs.dadx[slot][1] = 0.0;
-      point->inputs.dady[slot][1] = 1.0;
+      info->a0[slot][1] = 0.0;
+      info->dadx[slot][1] = 0.0;
+      info->dady[slot][1] = 1.0;
    }
 
    /*Z*/
    if (usage_mask & TGSI_WRITEMASK_Z) {
-      constant_coef(setup, point, slot, info->v0[0][2], 2);
+      constant_coef(setup, info, slot, info->v0[0][2], 2);
    }
 
    /*W*/
    if (usage_mask & TGSI_WRITEMASK_W) {
-      constant_coef(setup, point, slot, info->v0[0][3], 3);
+      constant_coef(setup, info, slot, info->v0[0][3], 3);
    }
 }
 
@@ -202,8 +203,7 @@ setup_point_fragcoord_coef(struct lp_setup_context *setup,
  */
 static void   
 setup_point_coefficients( struct lp_setup_context *setup,
-                          struct lp_rast_triangle *point,
-                          const struct point_info *info)
+                          struct point_info *info)
 {
    const struct lp_fragment_shader *shader = setup->fs.current.variant->shader;
    unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
@@ -248,7 +248,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
                 (setup->sprite_coord_enable & (1 << semantic_index))) {
                for (i = 0; i < NUM_CHANNELS; i++) {
                   if (usage_mask & (1 << i)) {
-                     texcoord_coef(setup, point, info, slot + 1, i,
+                     texcoord_coef(setup, info, slot + 1, i,
                                    setup->sprite_coord_origin,
                                    perspective);
                   }
@@ -261,10 +261,10 @@ setup_point_coefficients( struct lp_setup_context *setup,
          for (i = 0; i < NUM_CHANNELS; i++) {
             if (usage_mask & (1 << i)) {
                if (perspective) {
-                  point_persp_coeff(setup, point, info, slot+1, i);
+                  point_persp_coeff(setup, info, slot+1, i);
                }
                else {
-                  constant_coef(setup, point, slot+1, info->v0[vert_attr][i], i);
+                  constant_coef(setup, info, slot+1, info->v0[vert_attr][i], i);
                }
             }
          }
@@ -273,7 +273,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
       case LP_INTERP_FACING:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               constant_coef(setup, point, slot+1, 1.0, i);
+               constant_coef(setup, info, slot+1, 1.0, i);
          break;
 
       default:
@@ -284,7 +284,7 @@ setup_point_coefficients( struct lp_setup_context *setup,
 
    /* The internal position input is in slot zero:
     */
-   setup_point_fragcoord_coef(setup, point, info, 0,
+   setup_point_fragcoord_coef(setup, info, 0,
                               fragcoord_usage_mask);
 }
 
@@ -368,39 +368,44 @@ try_setup_point( struct lp_setup_context *setup,
    info.dx12 = fixed_width;
    info.dy01 = fixed_width;
    info.dy12 = 0;
+   info.a0 = GET_A0(&point->inputs);
+   info.dadx = GET_DADX(&point->inputs);
+   info.dady = GET_DADY(&point->inputs);
    
    /* Setup parameter interpolants:
     */
-   setup_point_coefficients(setup, point, &info);
+   setup_point_coefficients(setup, &info);
 
    point->inputs.frontfacing = TRUE;
    point->inputs.disable = FALSE;
    point->inputs.opaque = FALSE;
 
    {
-      point->plane[0].dcdx = -1;
-      point->plane[0].dcdy = 0;
-      point->plane[0].c = 1-bbox.x0;
-      point->plane[0].ei = 0;
-      point->plane[0].eo = 1;
-
-      point->plane[1].dcdx = 1;
-      point->plane[1].dcdy = 0;
-      point->plane[1].c = bbox.x1+1;
-      point->plane[1].ei = -1;
-      point->plane[1].eo = 0;
-
-      point->plane[2].dcdx = 0;
-      point->plane[2].dcdy = 1;
-      point->plane[2].c = 1-bbox.y0;
-      point->plane[2].ei = 0;
-      point->plane[2].eo = 1;
-
-      point->plane[3].dcdx = 0;
-      point->plane[3].dcdy = -1;
-      point->plane[3].c = bbox.y1+1;
-      point->plane[3].ei = -1;
-      point->plane[3].eo = 0;
+      struct lp_rast_plane *plane = GET_PLANES(point);
+
+      plane[0].dcdx = -1;
+      plane[0].dcdy = 0;
+      plane[0].c = 1-bbox.x0;
+      plane[0].ei = 0;
+      plane[0].eo = 1;
+
+      plane[1].dcdx = 1;
+      plane[1].dcdy = 0;
+      plane[1].c = bbox.x1+1;
+      plane[1].ei = -1;
+      plane[1].eo = 0;
+
+      plane[2].dcdx = 0;
+      plane[2].dcdy = 1;
+      plane[2].c = 1-bbox.y0;
+      plane[2].ei = 0;
+      plane[2].eo = 1;
+
+      plane[3].dcdx = 0;
+      plane[3].dcdy = -1;
+      plane[3].c = bbox.y1+1;
+      plane[3].ei = -1;
+      plane[3].eo = 0;
    }
 
    return lp_setup_bin_triangle(setup, point, &bbox, nr_planes);
index 3bf0b2d25224f79de0b748ec86a3c82dbbf412c0..937821b4c3a5ea9ab878784d3e0f6e7879e06974 100644 (file)
@@ -75,24 +75,25 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
                         unsigned *tri_size)
 {
    unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
+   unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
    struct lp_rast_triangle *tri;
-   unsigned tri_bytes, bytes;
-   char *inputs;
 
-   tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
-   bytes = tri_bytes + (3 * input_array_sz);
-
-   tri = lp_scene_alloc_aligned( scene, bytes, 16 );
+   *tri_size = (sizeof(struct lp_rast_triangle) +
+                3 * input_array_sz +
+                plane_sz);
 
+   tri = lp_scene_alloc_aligned( scene, *tri_size, 16 );
    if (tri) {
-      inputs = ((char *)tri) + tri_bytes;
-      tri->inputs.a0   = (float (*)[4]) inputs;
-      tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
-      tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
+      tri->inputs.stride = input_array_sz;
+   }
 
-      *tri_size = bytes;
+   {
+      char *a = (char *)tri;
+      char *b = (char *)&GET_PLANES(tri)[nr_planes];
+      assert(b - a == *tri_size);
    }
 
+
    return tri;
 }
 
@@ -228,6 +229,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
 {
    struct lp_scene *scene = setup->scene;
    struct lp_rast_triangle *tri;
+   struct lp_rast_plane *plane;
    int x[3];
    int y[3];
    struct u_rect bbox;
@@ -296,7 +298,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
    if (!tri)
       return FALSE;
 
-#ifdef DEBUG
+#if 0
    tri->v[0][0] = v0[0][0];
    tri->v[1][0] = v1[0][0];
    tri->v[2][0] = v2[0][0];
@@ -305,13 +307,14 @@ do_triangle_ccw(struct lp_setup_context *setup,
    tri->v[2][1] = v2[0][1];
 #endif
 
-   tri->plane[0].dcdy = x[0] - x[1];
-   tri->plane[1].dcdy = x[1] - x[2];
-   tri->plane[2].dcdy = x[2] - x[0];
+   plane = GET_PLANES(tri);
+   plane[0].dcdy = x[0] - x[1];
+   plane[1].dcdy = x[1] - x[2];
+   plane[2].dcdy = x[2] - x[0];
 
-   tri->plane[0].dcdx = y[0] - y[1];
-   tri->plane[1].dcdx = y[1] - y[2];
-   tri->plane[2].dcdx = y[2] - y[0];
+   plane[0].dcdx = y[0] - y[1];
+   plane[1].dcdx = y[1] - y[2];
+   plane[2].dcdx = y[2] - y[0];
 
    LP_COUNT(nr_tris);
 
@@ -325,12 +328,10 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
   
    for (i = 0; i < 3; i++) {
-      struct lp_rast_plane *plane = &tri->plane[i];
-
       /* half-edge constants, will be interated over the whole render
        * target.
        */
-      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+      plane[i].c = plane[i].dcdx * x[i] - plane[i].dcdy * y[i];
 
       /* correct for top-left vs. bottom-left fill convention.  
        *
@@ -345,38 +346,38 @@ do_triangle_ccw(struct lp_setup_context *setup,
        * to its usual method, in which case it will probably want
        * to use the opposite, top-left convention.
        */         
-      if (plane->dcdx < 0) {
+      if (plane[i].dcdx < 0) {
          /* both fill conventions want this - adjust for left edges */
-         plane->c++;            
+         plane[i].c++;            
       }
-      else if (plane->dcdx == 0) {
+      else if (plane[i].dcdx == 0) {
          if (setup->pixel_offset == 0) {
             /* correct for top-left fill convention:
              */
-            if (plane->dcdy > 0) plane->c++;
+            if (plane[i].dcdy > 0) plane[i].c++;
          }
          else {
             /* correct for bottom-left fill convention:
              */
-            if (plane->dcdy < 0) plane->c++;
+            if (plane[i].dcdy < 0) plane[i].c++;
          }
       }
 
-      plane->dcdx *= FIXED_ONE;
-      plane->dcdy *= FIXED_ONE;
+      plane[i].dcdx *= FIXED_ONE;
+      plane[i].dcdy *= FIXED_ONE;
 
       /* find trivial reject offsets for each edge for a single-pixel
        * sized block.  These will be scaled up at each recursive level to
        * match the active blocksize.  Scaling in this way works best if
        * the blocks are square.
        */
-      plane->eo = 0;
-      if (plane->dcdx < 0) plane->eo -= plane->dcdx;
-      if (plane->dcdy > 0) plane->eo += plane->dcdy;
+      plane[i].eo = 0;
+      if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+      if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
 
       /* Calculate trivial accept offsets from the above.
        */
-      plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+      plane[i].ei = plane[i].dcdy - plane[i].dcdx - plane[i].eo;
    }
 
 
@@ -399,29 +400,29 @@ do_triangle_ccw(struct lp_setup_context *setup,
     * these planes elsewhere.
     */
    if (nr_planes == 7) {
-      tri->plane[3].dcdx = -1;
-      tri->plane[3].dcdy = 0;
-      tri->plane[3].c = 1-bbox.x0;
-      tri->plane[3].ei = 0;
-      tri->plane[3].eo = 1;
-
-      tri->plane[4].dcdx = 1;
-      tri->plane[4].dcdy = 0;
-      tri->plane[4].c = bbox.x1+1;
-      tri->plane[4].ei = -1;
-      tri->plane[4].eo = 0;
-
-      tri->plane[5].dcdx = 0;
-      tri->plane[5].dcdy = 1;
-      tri->plane[5].c = 1-bbox.y0;
-      tri->plane[5].ei = 0;
-      tri->plane[5].eo = 1;
-
-      tri->plane[6].dcdx = 0;
-      tri->plane[6].dcdy = -1;
-      tri->plane[6].c = bbox.y1+1;
-      tri->plane[6].ei = -1;
-      tri->plane[6].eo = 0;
+      plane[3].dcdx = -1;
+      plane[3].dcdy = 0;
+      plane[3].c = 1-bbox.x0;
+      plane[3].ei = 0;
+      plane[3].eo = 1;
+
+      plane[4].dcdx = 1;
+      plane[4].dcdy = 0;
+      plane[4].c = bbox.x1+1;
+      plane[4].ei = -1;
+      plane[4].eo = 0;
+
+      plane[5].dcdx = 0;
+      plane[5].dcdy = 1;
+      plane[5].c = 1-bbox.y0;
+      plane[5].ei = 0;
+      plane[5].eo = 1;
+
+      plane[6].dcdx = 0;
+      plane[6].dcdy = -1;
+      plane[6].c = bbox.y1+1;
+      plane[6].ei = -1;
+      plane[6].eo = 0;
    }
 
    return lp_setup_bin_triangle( setup, tri, &bbox, nr_planes );
@@ -525,6 +526,7 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
    }
    else
    {
+      struct lp_rast_plane *plane = GET_PLANES(tri);
       int c[MAX_PLANES];
       int ei[MAX_PLANES];
       int eo[MAX_PLANES];
@@ -538,14 +540,14 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
       int iy1 = bbox->y1 / TILE_SIZE;
       
       for (i = 0; i < nr_planes; i++) {
-         c[i] = (tri->plane[i].c + 
-                 tri->plane[i].dcdy * iy0 * TILE_SIZE - 
-                 tri->plane[i].dcdx * ix0 * TILE_SIZE);
-
-         ei[i] = tri->plane[i].ei << TILE_ORDER;
-         eo[i] = tri->plane[i].eo << TILE_ORDER;
-         xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
-         ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
+         c[i] = (plane[i].c + 
+                 plane[i].dcdy * iy0 * TILE_SIZE - 
+                 plane[i].dcdx * ix0 * TILE_SIZE);
+
+         ei[i] = plane[i].ei << TILE_ORDER;
+         eo[i] = plane[i].eo << TILE_ORDER;
+         xstep[i] = -(plane[i].dcdx << TILE_ORDER);
+         ystep[i] = plane[i].dcdy << TILE_ORDER;
       }