radeonsi: force NaNs to 0
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_line.c
index 930207ae33a4bf13bc5c4dd677aee1be8f980499..6c05b90e64a062a5c2875dbedc584c3d076e89ec 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "lp_setup_context.h"
 #include "lp_rast.h"
 #include "lp_state_fs.h"
+#include "lp_state_setup.h"
+#include "lp_context.h"
+#include "draw/draw_context.h"
 
 #define NUM_CHANNELS 4
 
+struct lp_line_info {
 
-static const int step_scissor_minx[16] = {
-   0, 1, 0, 1,
-   2, 3, 2, 3,
-   0, 1, 0, 1,
-   2, 3, 2, 3
-};
-
-static const int step_scissor_maxx[16] = {
-    0, -1,  0, -1,
-   -2, -3, -2, -3,
-    0, -1,  0, -1,
-   -2, -3, -2, -3
-};
+   float dx;
+   float dy;
+   float oneoverarea;
+   boolean frontfacing;
 
-static const int step_scissor_miny[16] = {
-   0, 0, 1, 1,
-   0, 0, 1, 1,
-   2, 2, 3, 3,
-   2, 2, 3, 3
-};
+   const float (*v1)[4];
+   const float (*v2)[4];
 
-static const int step_scissor_maxy[16] = {
-    0,  0, -1, -1,
-    0,  0, -1, -1,
-   -2, -2, -3, -3,
-   -2, -2, -3, -3
+   float (*a0)[4];
+   float (*dadx)[4];
+   float (*dady)[4];
 };
 
 
-
 /**
  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  */
 static void constant_coef( struct lp_setup_context *setup,
-                           struct lp_rast_triangle *tri,
+                           struct lp_line_info *info,
                            unsigned slot,
                            const float value,
                            unsigned i )
 {
-   tri->inputs.a0[slot][i] = value;
-   tri->inputs.dadx[slot][i] = 0.0f;
-   tri->inputs.dady[slot][i] = 0.0f;
+   info->a0[slot][i] = value;
+   info->dadx[slot][i] = 0.0f;
+   info->dady[slot][i] = 0.0f;
 }
 
 
@@ -89,27 +77,24 @@ static void constant_coef( struct lp_setup_context *setup,
  * for a triangle.
  */
 static void linear_coef( struct lp_setup_context *setup,
-                         struct lp_rast_triangle *tri,
-                         float oneoverarea,
+                         struct lp_line_info *info,
                          unsigned slot,
-                         const float (*v1)[4],
-                         const float (*v2)[4],
                          unsigned vert_attr,
                          unsigned i)
 {
-   float a1 = v1[vert_attr][i]; 
-   float a2 = v2[vert_attr][i];
+   float a1 = info->v1[vert_attr][i]; 
+   float a2 = info->v2[vert_attr][i];
       
    float da21 = a1 - a2;   
-   float dadx = da21 * tri->dx * oneoverarea;
-   float dady = da21 * tri->dy * oneoverarea;
+   float dadx = da21 * info->dx * info->oneoverarea;
+   float dady = da21 * info->dy * info->oneoverarea;
 
-   tri->inputs.dadx[slot][i] = dadx;
-   tri->inputs.dady[slot][i] = dady;  
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;  
    
-   tri->inputs.a0[slot][i] = (a1 -
-                              (dadx * (v1[0][0] - setup->pixel_offset) +
-                               dady * (v1[0][1] - setup->pixel_offset)));
+   info->a0[slot][i] = (a1 -
+                              (dadx * (info->v1[0][0] - setup->pixel_offset) +
+                               dady * (info->v1[0][1] - setup->pixel_offset)));
 }
 
 
@@ -122,74 +107,100 @@ static void linear_coef( struct lp_setup_context *setup,
  * divide the interpolated value by the interpolated W at that fragment.
  */
 static void perspective_coef( struct lp_setup_context *setup,
-                              struct lp_rast_triangle *tri,
-                              float oneoverarea,
+                              struct lp_line_info *info,
                               unsigned slot,
-                              const float (*v1)[4],
-                              const float (*v2)[4],
                               unsigned vert_attr,
                               unsigned i)
 {
    /* premultiply by 1/w  (v[0][3] is always 1/w):
     */
-   float a1 = v1[vert_attr][i] * v1[0][3];
-   float a2 = v2[vert_attr][i] * v2[0][3];
+   float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+   float a2 = info->v2[vert_attr][i] * info->v2[0][3];
 
    float da21 = a1 - a2;   
-   float dadx = da21 * tri->dx * oneoverarea;
-   float dady = da21 * tri->dy * oneoverarea;
+   float dadx = da21 * info->dx * info->oneoverarea;
+   float dady = da21 * info->dy * info->oneoverarea;
 
-   tri->inputs.dadx[slot][i] = dadx;
-   tri->inputs.dady[slot][i] = dady;
+   info->dadx[slot][i] = dadx;
+   info->dady[slot][i] = dady;
    
-   tri->inputs.a0[slot][i] = (a1 -
-                              (dadx * (v1[0][0] - setup->pixel_offset) +
-                               dady * (v1[0][1] - setup->pixel_offset)));
+   info->a0[slot][i] = (a1 -
+                        (dadx * (info->v1[0][0] - setup->pixel_offset) +
+                         dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+static void
+setup_fragcoord_coef( struct lp_setup_context *setup,
+                      struct lp_line_info *info,
+                      unsigned slot,
+                      unsigned usage_mask)
+{
+   /*X*/
+   if (usage_mask & TGSI_WRITEMASK_X) {
+      info->a0[slot][0] = 0.0;
+      info->dadx[slot][0] = 1.0;
+      info->dady[slot][0] = 0.0;
+   }
+
+   /*Y*/
+   if (usage_mask & TGSI_WRITEMASK_Y) {
+      info->a0[slot][1] = 0.0;
+      info->dadx[slot][1] = 0.0;
+      info->dady[slot][1] = 1.0;
+   }
+
+   /*Z*/
+   if (usage_mask & TGSI_WRITEMASK_Z) {
+      linear_coef(setup, info, slot, 0, 2);
+   }
+
+   /*W*/
+   if (usage_mask & TGSI_WRITEMASK_W) {
+      linear_coef(setup, info, slot, 0, 3);
+   }
 }
 
 /**
  * Compute the tri->coef[] array dadx, dady, a0 values.
  */
 static void setup_line_coefficients( struct lp_setup_context *setup,
-                                     struct lp_rast_triangle *tri,
-                                     float oneoverarea,
-                                     const float (*v1)[4],
-                                     const float (*v2)[4])
+                                     struct lp_line_info *info)
 {
+   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
    unsigned slot;
 
    /* setup interpolation for all the remaining attributes:
     */
-   for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
-      unsigned vert_attr = setup->fs.input[slot].src_index;
-      unsigned usage_mask = setup->fs.input[slot].usage_mask;
+   for (slot = 0; slot < key->num_inputs; slot++) {
+      unsigned vert_attr = key->inputs[slot].src_index;
+      unsigned usage_mask = key->inputs[slot].usage_mask;
       unsigned i;
            
-      switch (setup->fs.input[slot].interp) {
+      switch (key->inputs[slot].interp) {
       case LP_INTERP_CONSTANT:
-         if (setup->flatshade_first) {
+         if (key->flatshade_first) {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
+                  constant_coef(setup, info, slot+1, info->v1[vert_attr][i], i);
          }
          else {
             for (i = 0; i < NUM_CHANNELS; i++)
                if (usage_mask & (1 << i))
-                  constant_coef(setup, tri, slot+1, v2[vert_attr][i], i);
+                  constant_coef(setup, info, slot+1, info->v2[vert_attr][i], i);
          }
          break;
 
       case LP_INTERP_LINEAR:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
+               linear_coef(setup, info, slot+1, vert_attr, i);
          break;
 
       case LP_INTERP_PERSPECTIVE:
          for (i = 0; i < NUM_CHANNELS; i++)
             if (usage_mask & (1 << i))
-               perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, vert_attr, i);
+               perspective_coef(setup, info, slot+1, vert_attr, i);
          fragcoord_usage_mask |= TGSI_WRITEMASK_W;
          break;
 
@@ -202,6 +213,13 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
          fragcoord_usage_mask |= usage_mask;
          break;
 
+      case LP_INTERP_FACING:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            if (usage_mask & (1 << i))
+               constant_coef(setup, info, slot+1,
+                             info->frontfacing ? 1.0f : -1.0f, i);
+         break;
+
       default:
          assert(0);
       }
@@ -209,8 +227,8 @@ static void setup_line_coefficients( struct lp_setup_context *setup,
 
    /* The internal position input is in slot zero:
     */
-   lp_setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v2,
-                            fragcoord_usage_mask);
+   setup_fragcoord_coef(setup, info, 0,
+                        fragcoord_usage_mask);
 }
 
 
@@ -229,127 +247,305 @@ print_line(struct lp_setup_context *setup,
            const float (*v1)[4],
            const float (*v2)[4])
 {
+   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    uint i;
 
    debug_printf("llvmpipe line\n");
-   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+   for (i = 0; i < 1 + key->num_inputs; i++) {
       debug_printf("  v1[%d]:  %f %f %f %f\n", i,
                    v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
    }
-   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+   for (i = 0; i < 1 + key->num_inputs; i++) {
       debug_printf("  v2[%d]:  %f %f %f %f\n", i,
                    v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
    }
 }
 
 
-static void
-lp_setup_line( struct lp_setup_context *setup,
+static INLINE boolean sign(float x){
+   return x >= 0;  
+}  
+
+
+/* Used on positive floats only:
+ */
+static INLINE float fracf(float f)
+{
+   return f - floorf(f);
+}
+
+
+
+static boolean
+try_setup_line( struct lp_setup_context *setup,
                const float (*v1)[4],
                const float (*v2)[4])
 {
-   struct lp_scene *scene = lp_setup_get_current_scene(setup);
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+   struct lp_scene *scene = setup->scene;
+   const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    struct lp_rast_triangle *line;
-   float oneoverarea;
-   float half_width = setup->line_width / 2;
-   int minx, maxx, miny, maxy;
-   int ix0, ix1, iy0, iy1;
+   struct lp_rast_plane *plane;
+   struct lp_line_info info;
+   float width = MAX2(1.0, setup->line_width);
+   struct u_rect bbox;
    unsigned tri_bytes;
    int x[4]; 
    int y[4];
    int i;
    int nr_planes = 4;
-   boolean opaque;
-         
+   unsigned viewport_index = 0;
+   unsigned layer = 0;
+   
+   /* linewidth should be interpreted as integer */
+   int fixed_width = util_iround(width) * FIXED_ONE;
+
+   float x_offset=0;
+   float y_offset=0;
+   float x_offset_end=0;
+   float y_offset_end=0;
+      
+   float x1diff;
+   float y1diff;
+   float x2diff;
+   float y2diff;
+   float dx, dy;
+   float area;
+
+   boolean draw_start;
+   boolean draw_end;
+   boolean will_draw_start;
+   boolean will_draw_end;
+
    if (0)
       print_line(setup, v1, v2);
 
    if (setup->scissor_test) {
       nr_planes = 8;
+      if (setup->viewport_index_slot > 0) {
+         unsigned *udata = (unsigned*)v1[setup->viewport_index_slot];
+         viewport_index = lp_clamp_viewport_idx(*udata);
+      }
    }
    else {
       nr_planes = 4;
    }
 
-   line = lp_setup_alloc_triangle(scene,
-                                  setup->fs.nr_inputs,
-                                  nr_planes,
-                                  &tri_bytes);
-   if (!line)
-      return;
+   if (setup->layer_slot > 0) {
+      layer = *(unsigned*)v1[setup->layer_slot];
+      layer = MIN2(layer, scene->fb_max_layer);
+   }
 
-#ifndef DEBUG
-   line->v[0][0] = v1[0][0];
-   line->v[1][0] = v2[0][0];   
-   line->v[0][1] = v1[0][1];
-   line->v[1][1] = v2[0][1];
-#endif
+   dx = v1[0][0] - v2[0][0];
+   dy = v1[0][1] - v2[0][1];
+   area = (dx * dx  + dy * dy);
+   if (area == 0) {
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
+   }
 
-   /* pre-calculation(based on given vertices) to determine if line is
-    * more horizontal or more vertical
-    */
-   line->dx = v1[0][0] - v2[0][0];
-   line->dy = v1[0][1] - v2[0][1];
-   
-   /* x-major line */
-   if (fabsf(line->dx) >= fabsf(line->dy)) {
-      if (line->dx < 0) {
+   info.oneoverarea = 1.0f / area;
+   info.dx = dx;
+   info.dy = dy;
+   info.v1 = v1;
+   info.v2 = v2;
+
+  
+   /* X-MAJOR LINE */
+   if (fabsf(dx) >= fabsf(dy)) {
+      float dydx = dy / dx;
+
+      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+      if (y2diff==-0.5 && dy<0){
+         y2diff = 0.5;
+      }
+      
+      /* 
+       * Diamond exit rule test for starting point 
+       */    
+      if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+         draw_start = TRUE;
+      }
+      else if (sign(x1diff) == sign(-dx)) {
+         draw_start = FALSE;
+      }
+      else if (sign(-y1diff) != sign(dy)) {
+         draw_start = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float yintersect = fracf(v1[0][1]) + x1diff * dydx;
+         draw_start = (yintersect < 1.0 && yintersect > 0.0);
+      }
+
+
+      /* 
+       * Diamond exit rule test for ending point 
+       */    
+      if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+         draw_end = FALSE;
+      }
+      else if (sign(x2diff) != sign(-dx)) {
+         draw_end = FALSE;
+      }
+      else if (sign(-y2diff) == sign(dy)) {
+         draw_end = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float yintersect = fracf(v2[0][1]) + x2diff * dydx;
+         draw_end = (yintersect < 1.0 && yintersect > 0.0);
+      }
+
+      /* Are we already drawing start/end?
+       */
+      will_draw_start = sign(-x1diff) != sign(dx);
+      will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+
+      if (dx < 0) {
          /* if v2 is to the right of v1, swap pointers */
          const float (*temp)[4] = v1;
          v1 = v2;
          v2 = temp;
-         line->dx = -line->dx;
-         line->dy = -line->dy;
+         dx = -dx;
+         dy = -dy;
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            x_offset_end = - x1diff - 0.5;
+            y_offset_end = x_offset_end * dydx;
+
+         }
+         if (will_draw_end != draw_end) {
+            x_offset = - x2diff - 0.5;
+            y_offset = x_offset * dydx;
+         }
+
       }
-      
+      else{
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            x_offset = - x1diff + 0.5;
+            y_offset = x_offset * dydx;
+         }
+         if (will_draw_end != draw_end) {
+            x_offset_end = - x2diff + 0.5;
+            y_offset_end = x_offset_end * dydx;
+         }
+      }
+  
       /* x/y positions in fixed point */
-      x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
-      x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
-      x[2] = subpixel_snap(v2[0][0] - setup->pixel_offset);
-      x[3] = subpixel_snap(v1[0][0] - setup->pixel_offset);
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
+      
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) - fixed_width/2;
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) + fixed_width/2;
       
-      y[0] = subpixel_snap(v1[0][1] - half_width - setup->pixel_offset);
-      y[1] = subpixel_snap(v2[0][1] - half_width - setup->pixel_offset);
-      y[2] = subpixel_snap(v2[0][1] + half_width - setup->pixel_offset);
-      y[3] = subpixel_snap(v1[0][1] + half_width - setup->pixel_offset);
    }
-   else{
-      /* y-major line */
-      if (line->dy > 0) {
+   else {
+      const float dxdy = dx / dy;
+
+      /* Y-MAJOR LINE */      
+      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+      if (x2diff==-0.5 && dx<0) {
+         x2diff = 0.5;
+      }
+
+      /* 
+       * Diamond exit rule test for starting point 
+       */    
+      if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+         draw_start = TRUE;
+      }
+      else if (sign(-y1diff) == sign(dy)) {
+         draw_start = FALSE;
+      }
+      else if (sign(x1diff) != sign(-dx)) {
+         draw_start = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float xintersect = fracf(v1[0][0]) + y1diff * dxdy;
+         draw_start = (xintersect < 1.0 && xintersect > 0.0);
+      }
+
+      /* 
+       * Diamond exit rule test for ending point 
+       */    
+      if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+         draw_end = FALSE;
+      }
+      else if (sign(-y2diff) != sign(dy) ) {
+         draw_end = FALSE;
+      }
+      else if (sign(x2diff) == sign(-dx) ) {
+         draw_end = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
+         draw_end = (xintersect < 1.0 && xintersect >= 0.0);
+      }
+
+      /* Are we already drawing start/end?
+       */
+      will_draw_start = sign(y1diff) == sign(dy);
+      will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+
+      if (dy > 0) {
          /* if v2 is on top of v1, swap pointers */
          const float (*temp)[4] = v1;
          v1 = v2;
          v2 = temp; 
-         line->dx = -line->dx;
-         line->dy = -line->dy;
+         dx = -dx;
+         dy = -dy;
+
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            y_offset_end = - y1diff + 0.5;
+            x_offset_end = y_offset_end * dxdy;
+         }
+         if (will_draw_end != draw_end) {
+            y_offset = - y2diff + 0.5;
+            x_offset = y_offset * dxdy;
+         }
       }
-      x[0] = subpixel_snap(v1[0][0] - half_width - setup->pixel_offset);
-      x[1] = subpixel_snap(v2[0][0] - half_width - setup->pixel_offset);
-      x[2] = subpixel_snap(v2[0][0] + half_width - setup->pixel_offset);
-      x[3] = subpixel_snap(v1[0][0] + half_width - setup->pixel_offset);
+      else {
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            y_offset = - y1diff - 0.5;
+            x_offset = y_offset * dxdy;
+                     
+         }
+         if (will_draw_end != draw_end) {
+            y_offset_end = - y2diff - 0.5;
+            x_offset_end = y_offset_end * dxdy;
+         }
+      }
+
+      /* x/y positions in fixed point */
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) - fixed_width/2;
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) + fixed_width/2;
      
-      y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
-      y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-      y[2] = subpixel_snap(v2[0][1] - setup->pixel_offset);
-      y[3] = subpixel_snap(v1[0][1] - setup->pixel_offset);
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset); 
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset);
    }
 
-   /* calculate the deltas */
-   line->plane[0].dcdy = x[0] - x[1];
-   line->plane[1].dcdy = x[1] - x[2];
-   line->plane[2].dcdy = x[2] - x[3];
-   line->plane[3].dcdy = x[3] - x[0];
-
-   line->plane[0].dcdx = y[0] - y[1];
-   line->plane[1].dcdx = y[1] - y[2];
-   line->plane[2].dcdx = y[2] - y[3];
-   line->plane[3].dcdx = y[3] - y[0];
-
-
-   LP_COUNT(nr_tris);
-
    /* Bounding rectangle (in pixels) */
    {
       /* Yes this is necessary to accurately calculate bounding boxes
@@ -357,122 +553,128 @@ lp_setup_line( struct lp_setup_context *setup,
        * up needing a bottom-left fill convention, which requires
        * slightly different rounding.
        */
-      int adj = (setup->pixel_offset != 0) ? 1 : 0;
+      int adj = (setup->bottom_edge_rule != 0) ? 1 : 0;
+
+      bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+      bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+      bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+      bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
 
-      minx = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
-      maxx = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
-      miny = (MIN4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
-      maxy = (MAX4(y[0], y[1], y[3], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+      /* Inclusive coordinates:
+       */
+      bbox.x1--;
+      bbox.y1--;
    }
 
-   if (setup->scissor_test) {
-      minx = MAX2(minx, setup->scissor.current.minx);
-      maxx = MIN2(maxx, setup->scissor.current.maxx);
-      miny = MAX2(miny, setup->scissor.current.miny);
-      maxy = MIN2(maxy, setup->scissor.current.maxy);
+   if (bbox.x1 < bbox.x0 ||
+       bbox.y1 < bbox.y0) {
+      if (0) debug_printf("empty bounding box\n");
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
    }
-   else {
-      minx = MAX2(minx, 0);
-      miny = MAX2(miny, 0);
-      maxx = MIN2(maxx, scene->fb.width);
-      maxy = MIN2(maxy, scene->fb.height);
+
+   if (!u_rect_test_intersection(&setup->draw_regions[viewport_index], &bbox)) {
+      if (0) debug_printf("offscreen\n");
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
    }
 
+   /* Can safely discard negative regions:
+    */
+   bbox.x0 = MAX2(bbox.x0, 0);
+   bbox.y0 = MAX2(bbox.y0, 0);
+
+   line = lp_setup_alloc_triangle(scene,
+                                  key->num_inputs,
+                                  nr_planes,
+                                  &tri_bytes);
+   if (!line)
+      return FALSE;
+
+#ifdef DEBUG
+   line->v[0][0] = v1[0][0];
+   line->v[1][0] = v2[0][0];   
+   line->v[0][1] = v1[0][1];
+   line->v[1][1] = v2[0][1];
+#endif
+
+   LP_COUNT(nr_tris);
 
-   if (miny >= maxy || minx >= maxx) {
-      lp_scene_putback_data( scene, tri_bytes );
-      return;
+   if (lp_context->active_statistics_queries &&
+       !llvmpipe_rasterization_disabled(lp_context)) {
+      lp_context->pipeline_statistics.c_primitives++;
    }
 
-   oneoverarea = 1.0f / (line->dx * line->dx  + line->dy * line->dy);    
+   /* calculate the deltas */
+   plane = GET_PLANES(line);
+   plane[0].dcdy = x[0] - x[1];
+   plane[1].dcdy = x[1] - x[2];
+   plane[2].dcdy = x[2] - x[3];
+   plane[3].dcdy = x[3] - x[0];
+
+   plane[0].dcdx = y[0] - y[1];
+   plane[1].dcdx = y[1] - y[2];
+   plane[2].dcdx = y[2] - y[3];
+   plane[3].dcdx = y[3] - y[0];
+
+   if (draw_will_inject_frontface(lp_context->draw) &&
+       setup->face_slot > 0) {
+      line->inputs.frontfacing = v1[setup->face_slot][0];
+   } else {
+      line->inputs.frontfacing = TRUE;
+   }
 
    /* Setup parameter interpolants:
     */
-   setup_line_coefficients( setup, line, oneoverarea, v1, v2); 
+   info.a0 = GET_A0(&line->inputs);
+   info.dadx = GET_DADX(&line->inputs);
+   info.dady = GET_DADY(&line->inputs);
+   info.frontfacing = line->inputs.frontfacing;
+   setup_line_coefficients(setup, &info); 
+
+   line->inputs.disable = FALSE;
+   line->inputs.opaque = FALSE;
+   line->inputs.layer = layer;
+   line->inputs.viewport_index = viewport_index;
 
    for (i = 0; i < 4; i++) {
-      struct lp_rast_plane *plane = &line->plane[i];
 
       /* half-edge constants, will be interated over the whole render
        * target.
        */
-      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+      plane[i].c = IMUL64(plane[i].dcdx, x[i]) - IMUL64(plane[i].dcdy, y[i]);
 
       
       /* correct for top-left vs. bottom-left fill convention.  
-       *
-       * note that we're overloading gl_rasterization_rules to mean
-       * both (0.5,0.5) pixel centers *and* bottom-left filling
-       * convention.
-       *
-       * GL actually has a top-left filling convention, but GL's
-       * notion of "top" differs from gallium's...
-       *
-       * Also, sometimes (in FBO cases) GL will render upside down
-       * to its usual method, in which case it will probably want
-       * to use the opposite, top-left convention.
        */         
-      if (plane->dcdx < 0) {
+      if (plane[i].dcdx < 0) {
          /* both fill conventions want this - adjust for left edges */
-         plane->c++;            
+         plane[i].c++;            
       }
-      else if (plane->dcdx == 0) {
+      else if (plane[i].dcdx == 0) {
          if (setup->pixel_offset == 0) {
             /* correct for top-left fill convention:
              */
-            if (plane->dcdy > 0) plane->c++;
+            if (plane[i].dcdy > 0) plane[i].c++;
          }
          else {
             /* correct for bottom-left fill convention:
              */
-            if (plane->dcdy < 0) plane->c++;
+            if (plane[i].dcdy < 0) plane[i].c++;
          }
       }
 
-      plane->dcdx *= FIXED_ONE;
-      plane->dcdy *= FIXED_ONE;
+      plane[i].dcdx *= FIXED_ONE;
+      plane[i].dcdy *= FIXED_ONE;
 
       /* find trivial reject offsets for each edge for a single-pixel
        * sized block.  These will be scaled up at each recursive level to
        * match the active blocksize.  Scaling in this way works best if
        * the blocks are square.
        */
-      plane->eo = 0;
-      if (plane->dcdx < 0) plane->eo -= plane->dcdx;
-      if (plane->dcdy > 0) plane->eo += plane->dcdy;
-
-      /* Calculate trivial accept offsets from the above.
-       */
-      plane->ei = plane->dcdy - plane->dcdx - plane->eo;
-
-      plane->step = line->step[i];
-
-      /* Fill in the inputs.step[][] arrays.
-       * We've manually unrolled some loops here.
-       */
-#define SETUP_STEP(j, x, y) \
-      line->step[i][j] = y * plane->dcdy - x * plane->dcdx                                     
-      
-      SETUP_STEP(0, 0, 0);
-      SETUP_STEP(1, 1, 0);
-      SETUP_STEP(2, 0, 1);
-      SETUP_STEP(3, 1, 1);
-
-      SETUP_STEP(4, 2, 0);
-      SETUP_STEP(5, 3, 0);
-      SETUP_STEP(6, 2, 1);
-      SETUP_STEP(7, 3, 1);
-
-      SETUP_STEP(8, 0, 2);
-      SETUP_STEP(9, 1, 2);
-      SETUP_STEP(10, 0, 3);
-      SETUP_STEP(11, 1, 3);
-
-      SETUP_STEP(12, 2, 2);
-      SETUP_STEP(13, 3, 2);
-      SETUP_STEP(14, 2, 3);
-      SETUP_STEP(15, 3, 3);
-#undef STEP
+      plane[i].eo = 0;
+      if (plane[i].dcdx < 0) plane[i].eo -= plane[i].dcdx;
+      if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
    }
 
 
@@ -495,151 +697,45 @@ lp_setup_line( struct lp_setup_context *setup,
     * these planes elsewhere.
     */
    if (nr_planes == 8) {
-      line->plane[4].step = step_scissor_maxx;
-      line->plane[4].dcdx = 1;
-      line->plane[4].dcdy = 0;
-      line->plane[4].c = maxx;
-      line->plane[4].ei = -1;
-      line->plane[4].eo = 0;
-
-      line->plane[5].step = step_scissor_miny;
-      line->plane[5].dcdx = 0;
-      line->plane[5].dcdy = 1;
-      line->plane[5].c = 1-miny;
-      line->plane[5].ei = 0;
-      line->plane[5].eo = 1;
-
-      line->plane[6].step = step_scissor_maxy;
-      line->plane[6].dcdx = 0;
-      line->plane[6].dcdy = -1;
-      line->plane[6].c = maxy;
-      line->plane[6].ei = -1;
-      line->plane[6].eo = 0;
-
-      line->plane[7].step = step_scissor_minx;
-      line->plane[7].dcdx = -1;
-      line->plane[7].dcdy = 0;
-      line->plane[7].c = 1-minx;
-      line->plane[7].ei = 0;
-      line->plane[7].eo = 1;
+      const struct u_rect *scissor =
+         &setup->scissors[viewport_index];
+
+      plane[4].dcdx = -1;
+      plane[4].dcdy = 0;
+      plane[4].c = 1-scissor->x0;
+      plane[4].eo = 1;
+
+      plane[5].dcdx = 1;
+      plane[5].dcdy = 0;
+      plane[5].c = scissor->x1+1;
+      plane[5].eo = 0;
+
+      plane[6].dcdx = 0;
+      plane[6].dcdy = 1;
+      plane[6].c = 1-scissor->y0;
+      plane[6].eo = 1;
+
+      plane[7].dcdx = 0;
+      plane[7].dcdy = -1;
+      plane[7].c = scissor->y1+1;
+      plane[7].eo = 0;
    }
 
+   return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
+}
 
-   /*
-    * All fields of 'tri' are now set.  The remaining code here is
-    * concerned with binning.
-    */
-
-   /* Convert to tile coordinates, and inclusive ranges:
-    */
-   ix0 = minx / TILE_SIZE;
-   iy0 = miny / TILE_SIZE;
-   ix1 = (maxx-1) / TILE_SIZE;
-   iy1 = (maxy-1) / TILE_SIZE;
 
-   /*
-    * Clamp to framebuffer size
-    */
-   assert(ix0 == MAX2(ix0, 0));
-   assert(iy0 == MAX2(iy0, 0));
-   assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
-   assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
-
-   /* Determine which tile(s) intersect the triangle's bounding box
-    */
-   if (iy0 == iy1 && ix0 == ix1)
-   {
-      /* Triangle is contained in a single tile:
-       */
-      lp_scene_bin_command( scene, ix0, iy0,
-                            lp_rast_tri_tab[nr_planes], 
-                            lp_rast_arg_triangle(line, (1<<nr_planes)-1) );
-   }
-   else
+static void lp_setup_line( struct lp_setup_context *setup,
+                           const float (*v0)[4],
+                           const float (*v1)[4] )
+{
+   if (!try_setup_line( setup, v0, v1 ))
    {
-      int c[8];
-      int ei[8];
-      int eo[8];
-      int xstep[8];
-      int ystep[8];
-      int x, y;
-      int is_blit = -1; /* undetermined */
-      
-      for (i = 0; i < nr_planes; i++) {
-         c[i] = (line->plane[i].c + 
-                 line->plane[i].dcdy * iy0 * TILE_SIZE - 
-                 line->plane[i].dcdx * ix0 * TILE_SIZE);
-
-         ei[i] = line->plane[i].ei << TILE_ORDER;
-         eo[i] = line->plane[i].eo << TILE_ORDER;
-         xstep[i] = -(line->plane[i].dcdx << TILE_ORDER);
-         ystep[i] = line->plane[i].dcdy << TILE_ORDER;
-      }
-
-
+      if (!lp_setup_flush_and_restart(setup))
+         return;
 
-      /* Test tile-sized blocks against the triangle.
-       * Discard blocks fully outside the tri.  If the block is fully
-       * contained inside the tri, bin an lp_rast_shade_tile command.
-       * Else, bin a lp_rast_triangle command.
-       */
-      for (y = iy0; y <= iy1; y++)
-      {
-         boolean in = FALSE;  /* are we inside the triangle? */
-         int cx[8];
-
-         for (i = 0; i < nr_planes; i++)
-            cx[i] = c[i];
-
-         for (x = ix0; x <= ix1; x++)
-         {
-            int out = 0;
-            int partial = 0;
-
-            for (i = 0; i < nr_planes; i++) {
-               int planeout = cx[i] + eo[i];
-               int planepartial = cx[i] + ei[i] - 1;
-               out |= (planeout >> 31);
-               partial |= (planepartial >> 31) & (1<<i);      
-            }
-            if (out) {
-               /* do nothing */
-               if (in)
-                  break;  /* exiting triangle, all done with this row */
-               LP_COUNT(nr_empty_64);
-            }
-            else if (partial) {
-               /* Not trivially accepted by at least one plane - 
-                * rasterize/shade partial tile
-                */
-               int count = util_bitcount(partial);
-               in = TRUE;
-               lp_scene_bin_command( scene, x, y,
-                                     lp_rast_tri_tab[count], 
-                                     lp_rast_arg_triangle(line, partial) );
-
-               LP_COUNT(nr_partially_covered_64);
-            }
-            else {
-               /* triangle covers the whole tile- shade whole tile */
-               LP_COUNT(nr_fully_covered_64);
-               in = TRUE;
-               /* leverages on existing code in lp_setup_tri.c */ 
-               do_triangle_ccw_whole_tile(setup, scene, line, x, y,
-                                          opaque, &is_blit);
-            }
-
-            /* Iterate cx values across the region:
-             */
-            for (i = 0; i < nr_planes; i++)
-               cx[i] += xstep[i];
-         }
-      
-         /* Iterate c values down the region:
-          */
-         for (i = 0; i < nr_planes; i++)
-            c[i] += ystep[i];
-      }
+      if (!try_setup_line( setup, v0, v1 ))
+         return;
    }
 }