llvmpipe: reintroduce SET_STATE binner command
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_line.c
index be41c44e6f5d51444aa7a286199137fb4b1e8732..e4cff9aa42ce287b458ac8cee4b4270269a4d010 100644 (file)
  * Binning code for lines
  */
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "lp_perf.h"
 #include "lp_setup_context.h"
+#include "lp_rast.h"
+#include "lp_state_fs.h"
 
-static void line_nop( struct lp_setup_context *setup,
-                      const float (*v0)[4],
-                      const float (*v1)[4] )
+#define NUM_CHANNELS 4
+
+struct lp_line_info {
+
+   float dx;
+   float dy;
+   float oneoverarea;
+
+   const float (*v1)[4];
+   const float (*v2)[4];
+};
+
+
+/**
+ * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ */
+static void constant_coef( struct lp_setup_context *setup,
+                           struct lp_rast_triangle *tri,
+                           unsigned slot,
+                           const float value,
+                           unsigned i )
+{
+   tri->inputs.a0[slot][i] = value;
+   tri->inputs.dadx[slot][i] = 0.0f;
+   tri->inputs.dady[slot][i] = 0.0f;
+}
+
+
+/**
+ * Compute a0, dadx and dady for a linearly interpolated coefficient,
+ * for a triangle.
+ */
+static void linear_coef( struct lp_setup_context *setup,
+                         struct lp_rast_triangle *tri,
+                         struct lp_line_info *info,
+                         unsigned slot,
+                         unsigned vert_attr,
+                         unsigned i)
+{
+   float a1 = info->v1[vert_attr][i]; 
+   float a2 = info->v2[vert_attr][i];
+      
+   float da21 = a1 - a2;   
+   float dadx = da21 * info->dx * info->oneoverarea;
+   float dady = da21 * info->dy * info->oneoverarea;
+
+   tri->inputs.dadx[slot][i] = dadx;
+   tri->inputs.dady[slot][i] = dady;  
+   
+   tri->inputs.a0[slot][i] = (a1 -
+                              (dadx * (info->v1[0][0] - setup->pixel_offset) +
+                               dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+
+/**
+ * Compute a0, dadx and dady for a perspective-corrected interpolant,
+ * for a triangle.
+ * We basically multiply the vertex value by 1/w before computing
+ * the plane coefficients (a0, dadx, dady).
+ * Later, when we compute the value at a particular fragment position we'll
+ * divide the interpolated value by the interpolated W at that fragment.
+ */
+static void perspective_coef( struct lp_setup_context *setup,
+                              struct lp_rast_triangle *tri,
+                              struct lp_line_info *info,
+                              unsigned slot,
+                              unsigned vert_attr,
+                              unsigned i)
+{
+   /* premultiply by 1/w  (v[0][3] is always 1/w):
+    */
+   float a1 = info->v1[vert_attr][i] * info->v1[0][3];
+   float a2 = info->v2[vert_attr][i] * info->v2[0][3];
+
+   float da21 = a1 - a2;   
+   float dadx = da21 * info->dx * info->oneoverarea;
+   float dady = da21 * info->dy * info->oneoverarea;
+
+   tri->inputs.dadx[slot][i] = dadx;
+   tri->inputs.dady[slot][i] = dady;
+   
+   tri->inputs.a0[slot][i] = (a1 -
+                              (dadx * (info->v1[0][0] - setup->pixel_offset) +
+                               dady * (info->v1[0][1] - setup->pixel_offset)));
+}
+
+static void
+setup_fragcoord_coef( struct lp_setup_context *setup,
+                      struct lp_rast_triangle *tri,
+                      struct lp_line_info *info,
+                      unsigned slot,
+                      unsigned usage_mask)
+{
+   /*X*/
+   if (usage_mask & TGSI_WRITEMASK_X) {
+      tri->inputs.a0[slot][0] = 0.0;
+      tri->inputs.dadx[slot][0] = 1.0;
+      tri->inputs.dady[slot][0] = 0.0;
+   }
+
+   /*Y*/
+   if (usage_mask & TGSI_WRITEMASK_Y) {
+      tri->inputs.a0[slot][1] = 0.0;
+      tri->inputs.dadx[slot][1] = 0.0;
+      tri->inputs.dady[slot][1] = 1.0;
+   }
+
+   /*Z*/
+   if (usage_mask & TGSI_WRITEMASK_Z) {
+      linear_coef(setup, tri, info, slot, 0, 2);
+   }
+
+   /*W*/
+   if (usage_mask & TGSI_WRITEMASK_W) {
+      linear_coef(setup, tri, info, slot, 0, 3);
+   }
+}
+
+/**
+ * Compute the tri->coef[] array dadx, dady, a0 values.
+ */
+static void setup_line_coefficients( struct lp_setup_context *setup,
+                                     struct lp_rast_triangle *tri,
+                                     struct lp_line_info *info)
 {
+   unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
+   unsigned slot;
+
+   /* setup interpolation for all the remaining attributes:
+    */
+   for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
+      unsigned vert_attr = setup->fs.input[slot].src_index;
+      unsigned usage_mask = setup->fs.input[slot].usage_mask;
+      unsigned i;
+           
+      switch (setup->fs.input[slot].interp) {
+      case LP_INTERP_CONSTANT:
+         if (setup->flatshade_first) {
+            for (i = 0; i < NUM_CHANNELS; i++)
+               if (usage_mask & (1 << i))
+                  constant_coef(setup, tri, slot+1, info->v1[vert_attr][i], i);
+         }
+         else {
+            for (i = 0; i < NUM_CHANNELS; i++)
+               if (usage_mask & (1 << i))
+                  constant_coef(setup, tri, slot+1, info->v2[vert_attr][i], i);
+         }
+         break;
+
+      case LP_INTERP_LINEAR:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            if (usage_mask & (1 << i))
+               linear_coef(setup, tri, info, slot+1, vert_attr, i);
+         break;
+
+      case LP_INTERP_PERSPECTIVE:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            if (usage_mask & (1 << i))
+               perspective_coef(setup, tri, info, slot+1, vert_attr, i);
+         fragcoord_usage_mask |= TGSI_WRITEMASK_W;
+         break;
+
+      case LP_INTERP_POSITION:
+         /*
+          * The generated pixel interpolators will pick up the coeffs from
+          * slot 0, so all need to ensure that the usage mask is covers all
+          * usages.
+          */
+         fragcoord_usage_mask |= usage_mask;
+         break;
+
+      case LP_INTERP_FACING:
+         for (i = 0; i < NUM_CHANNELS; i++)
+            if (usage_mask & (1 << i))
+               constant_coef(setup, tri, slot+1, 1.0, i);
+         break;
+
+      default:
+         assert(0);
+      }
+   }
+
+   /* The internal position input is in slot zero:
+    */
+   setup_fragcoord_coef(setup, tri, info, 0,
+                        fragcoord_usage_mask);
 }
 
 
-void 
-lp_setup_choose_line( struct lp_setup_context *setup )
+
+static INLINE int subpixel_snap( float a )
 {
-   setup->line = line_nop;
+   return util_iround(FIXED_ONE * a);
+}
+
+
+/**
+ * Print line vertex attribs (for debug).
+ */
+static void
+print_line(struct lp_setup_context *setup,
+           const float (*v1)[4],
+           const float (*v2)[4])
+{
+   uint i;
+
+   debug_printf("llvmpipe line\n");
+   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+      debug_printf("  v1[%d]:  %f %f %f %f\n", i,
+                   v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
+   }
+   for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
+      debug_printf("  v2[%d]:  %f %f %f %f\n", i,
+                   v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
+   }
+}
+
+
+static INLINE boolean sign(float x){
+   return x >= 0;  
+}  
+
+
+/* Used on positive floats only:
+ */
+static INLINE float fracf(float f)
+{
+   return f - floorf(f);
+}
+
+
+
+static boolean
+try_setup_line( struct lp_setup_context *setup,
+               const float (*v1)[4],
+               const float (*v2)[4])
+{
+   struct lp_scene *scene = setup->scene;
+   struct lp_rast_triangle *line;
+   struct lp_line_info info;
+   float width = MAX2(1.0, setup->line_width);
+   struct u_rect bbox;
+   unsigned tri_bytes;
+   int x[4]; 
+   int y[4];
+   int i;
+   int nr_planes = 4;
+   
+   /* linewidth should be interpreted as integer */
+   int fixed_width = util_iround(width) * FIXED_ONE;
+
+   float x_offset=0;
+   float y_offset=0;
+   float x_offset_end=0;
+   float y_offset_end=0;
+      
+   float x1diff;
+   float y1diff;
+   float x2diff;
+   float y2diff;
+   float dx, dy;
+   float area;
+
+   boolean draw_start;
+   boolean draw_end;
+   boolean will_draw_start;
+   boolean will_draw_end;
+
+   if (0)
+      print_line(setup, v1, v2);
+
+   if (setup->scissor_test) {
+      nr_planes = 8;
+   }
+   else {
+      nr_planes = 4;
+   }
+
+
+   dx = v1[0][0] - v2[0][0];
+   dy = v1[0][1] - v2[0][1];
+   area = (dx * dx  + dy * dy);
+   if (area == 0) {
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
+   }
+
+   info.oneoverarea = 1.0f / area;
+   info.dx = dx;
+   info.dy = dy;
+   info.v1 = v1;
+   info.v2 = v2;
+
+  
+   /* X-MAJOR LINE */
+   if (fabsf(dx) >= fabsf(dy)) {
+      float dydx = dy / dx;
+
+      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+      if (y2diff==-0.5 && dy<0){
+         y2diff = 0.5;
+      }
+      
+      /* 
+       * Diamond exit rule test for starting point 
+       */    
+      if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+         draw_start = TRUE;
+      }
+      else if (sign(x1diff) == sign(-dx)) {
+         draw_start = FALSE;
+      }
+      else if (sign(-y1diff) != sign(dy)) {
+         draw_start = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float yintersect = fracf(v1[0][1]) + x1diff * dydx;
+         draw_start = (yintersect < 1.0 && yintersect > 0.0);
+      }
+
+
+      /* 
+       * Diamond exit rule test for ending point 
+       */    
+      if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+         draw_end = FALSE;
+      }
+      else if (sign(x2diff) != sign(-dx)) {
+         draw_end = FALSE;
+      }
+      else if (sign(-y2diff) == sign(dy)) {
+         draw_end = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float yintersect = fracf(v2[0][1]) + x2diff * dydx;
+         draw_end = (yintersect < 1.0 && yintersect > 0.0);
+      }
+
+      /* Are we already drawing start/end?
+       */
+      will_draw_start = sign(-x1diff) != sign(dx);
+      will_draw_end = (sign(x2diff) == sign(-dx)) || x2diff==0;
+
+      if (dx < 0) {
+         /* if v2 is to the right of v1, swap pointers */
+         const float (*temp)[4] = v1;
+         v1 = v2;
+         v2 = temp;
+         dx = -dx;
+         dy = -dy;
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            x_offset_end = - x1diff - 0.5;
+            y_offset_end = x_offset_end * dydx;
+
+         }
+         if (will_draw_end != draw_end) {
+            x_offset = - x2diff - 0.5;
+            y_offset = x_offset * dydx;
+         }
+
+      }
+      else{
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            x_offset = - x1diff + 0.5;
+            y_offset = x_offset * dydx;
+         }
+         if (will_draw_end != draw_end) {
+            x_offset_end = - x2diff + 0.5;
+            y_offset_end = x_offset_end * dydx;
+         }
+      }
+  
+      /* x/y positions in fixed point */
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset);
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset);
+      
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) - fixed_width/2;
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) - fixed_width/2;
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset) + fixed_width/2;
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset) + fixed_width/2;
+      
+   }
+   else {
+      const float dxdy = dx / dy;
+
+      /* Y-MAJOR LINE */      
+      x1diff = v1[0][0] - (float) floor(v1[0][0]) - 0.5;
+      y1diff = v1[0][1] - (float) floor(v1[0][1]) - 0.5;
+      x2diff = v2[0][0] - (float) floor(v2[0][0]) - 0.5;
+      y2diff = v2[0][1] - (float) floor(v2[0][1]) - 0.5;
+
+      if (x2diff==-0.5 && dx<0) {
+         x2diff = 0.5;
+      }
+
+      /* 
+       * Diamond exit rule test for starting point 
+       */    
+      if (fabsf(x1diff) + fabsf(y1diff) < 0.5) {
+         draw_start = TRUE;
+      }
+      else if (sign(-y1diff) == sign(dy)) {
+         draw_start = FALSE;
+      }
+      else if (sign(x1diff) != sign(-dx)) {
+         draw_start = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float xintersect = fracf(v1[0][0]) + y1diff * dxdy;
+         draw_start = (xintersect < 1.0 && xintersect > 0.0);
+      }
+
+      /* 
+       * Diamond exit rule test for ending point 
+       */    
+      if (fabsf(x2diff) + fabsf(y2diff) < 0.5) {
+         draw_end = FALSE;
+      }
+      else if (sign(-y2diff) != sign(dy) ) {
+         draw_end = FALSE;
+      }
+      else if (sign(x2diff) == sign(-dx) ) {
+         draw_end = TRUE;
+      }
+      else {
+         /* do intersection test */
+         float xintersect = fracf(v2[0][0]) + y2diff * dxdy;
+         draw_end = (xintersect < 1.0 && xintersect >= 0.0);
+      }
+
+      /* Are we already drawing start/end?
+       */
+      will_draw_start = sign(y1diff) == sign(dy);
+      will_draw_end = (sign(-y2diff) == sign(dy)) || y2diff==0;
+
+      if (dy > 0) {
+         /* if v2 is on top of v1, swap pointers */
+         const float (*temp)[4] = v1;
+         v1 = v2;
+         v2 = temp; 
+         dx = -dx;
+         dy = -dy;
+
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            y_offset_end = - y1diff + 0.5;
+            x_offset_end = y_offset_end * dxdy;
+         }
+         if (will_draw_end != draw_end) {
+            y_offset = - y2diff + 0.5;
+            x_offset = y_offset * dxdy;
+         }
+      }
+      else {
+         /* Otherwise shift planes appropriately */
+         if (will_draw_start != draw_start) {
+            y_offset = - y1diff - 0.5;
+            x_offset = y_offset * dxdy;
+                     
+         }
+         if (will_draw_end != draw_end) {
+            y_offset_end = - y2diff - 0.5;
+            x_offset_end = y_offset_end * dxdy;
+         }
+      }
+
+      /* x/y positions in fixed point */
+      x[0] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) - fixed_width/2;
+      x[1] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) - fixed_width/2;
+      x[2] = subpixel_snap(v2[0][0] + x_offset_end - setup->pixel_offset) + fixed_width/2;
+      x[3] = subpixel_snap(v1[0][0] + x_offset     - setup->pixel_offset) + fixed_width/2;
+     
+      y[0] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset); 
+      y[1] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+      y[2] = subpixel_snap(v2[0][1] + y_offset_end - setup->pixel_offset);
+      y[3] = subpixel_snap(v1[0][1] + y_offset     - setup->pixel_offset);
+   }
+
+
+
+   LP_COUNT(nr_tris);
+
+   /* Bounding rectangle (in pixels) */
+   {
+      /* Yes this is necessary to accurately calculate bounding boxes
+       * with the two fill-conventions we support.  GL (normally) ends
+       * up needing a bottom-left fill convention, which requires
+       * slightly different rounding.
+       */
+      int adj = (setup->pixel_offset != 0) ? 1 : 0;
+
+      bbox.x0 = (MIN4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+      bbox.x1 = (MAX4(x[0], x[1], x[2], x[3]) + (FIXED_ONE-1)) >> FIXED_ORDER;
+      bbox.y0 = (MIN4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+      bbox.y1 = (MAX4(y[0], y[1], y[2], y[3]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
+
+      /* Inclusive coordinates:
+       */
+      bbox.x1--;
+      bbox.y1--;
+   }
+
+   if (bbox.x1 < bbox.x0 ||
+       bbox.y1 < bbox.y0) {
+      if (0) debug_printf("empty bounding box\n");
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
+   }
+
+   if (!u_rect_test_intersection(&setup->draw_region, &bbox)) {
+      if (0) debug_printf("offscreen\n");
+      LP_COUNT(nr_culled_tris);
+      return TRUE;
+   }
+
+   u_rect_find_intersection(&setup->draw_region, &bbox);
+
+   line = lp_setup_alloc_triangle(scene,
+                                  setup->fs.nr_inputs,
+                                  nr_planes,
+                                  &tri_bytes);
+   if (!line)
+      return FALSE;
+
+#ifdef DEBUG
+   line->v[0][0] = v1[0][0];
+   line->v[1][0] = v2[0][0];   
+   line->v[0][1] = v1[0][1];
+   line->v[1][1] = v2[0][1];
+#endif
+
+   /* calculate the deltas */
+   line->plane[0].dcdy = x[0] - x[1];
+   line->plane[1].dcdy = x[1] - x[2];
+   line->plane[2].dcdy = x[2] - x[3];
+   line->plane[3].dcdy = x[3] - x[0];
+
+   line->plane[0].dcdx = y[0] - y[1];
+   line->plane[1].dcdx = y[1] - y[2];
+   line->plane[2].dcdx = y[2] - y[3];
+   line->plane[3].dcdx = y[3] - y[0];
+
+
+   /* Setup parameter interpolants:
+    */
+   setup_line_coefficients( setup, line, &info); 
+
+   line->inputs.facing = 1.0F;
+   line->inputs.disable = FALSE;
+   line->inputs.opaque = FALSE;
+
+   for (i = 0; i < 4; i++) {
+      struct lp_rast_plane *plane = &line->plane[i];
+
+      /* half-edge constants, will be interated over the whole render
+       * target.
+       */
+      plane->c = plane->dcdx * x[i] - plane->dcdy * y[i];
+
+      
+      /* correct for top-left vs. bottom-left fill convention.  
+       *
+       * note that we're overloading gl_rasterization_rules to mean
+       * both (0.5,0.5) pixel centers *and* bottom-left filling
+       * convention.
+       *
+       * GL actually has a top-left filling convention, but GL's
+       * notion of "top" differs from gallium's...
+       *
+       * Also, sometimes (in FBO cases) GL will render upside down
+       * to its usual method, in which case it will probably want
+       * to use the opposite, top-left convention.
+       */         
+      if (plane->dcdx < 0) {
+         /* both fill conventions want this - adjust for left edges */
+         plane->c++;            
+      }
+      else if (plane->dcdx == 0) {
+         if (setup->pixel_offset == 0) {
+            /* correct for top-left fill convention:
+             */
+            if (plane->dcdy > 0) plane->c++;
+         }
+         else {
+            /* correct for bottom-left fill convention:
+             */
+            if (plane->dcdy < 0) plane->c++;
+         }
+      }
+
+      plane->dcdx *= FIXED_ONE;
+      plane->dcdy *= FIXED_ONE;
+
+      /* find trivial reject offsets for each edge for a single-pixel
+       * sized block.  These will be scaled up at each recursive level to
+       * match the active blocksize.  Scaling in this way works best if
+       * the blocks are square.
+       */
+      plane->eo = 0;
+      if (plane->dcdx < 0) plane->eo -= plane->dcdx;
+      if (plane->dcdy > 0) plane->eo += plane->dcdy;
+
+      /* Calculate trivial accept offsets from the above.
+       */
+      plane->ei = plane->dcdy - plane->dcdx - plane->eo;
+   }
+
+
+   /* 
+    * When rasterizing scissored tris, use the intersection of the
+    * triangle bounding box and the scissor rect to generate the
+    * scissor planes.
+    *
+    * This permits us to cut off the triangle "tails" that are present
+    * in the intermediate recursive levels caused when two of the
+    * triangles edges don't diverge quickly enough to trivially reject
+    * exterior blocks from the triangle.
+    *
+    * It's not really clear if it's worth worrying about these tails,
+    * but since we generate the planes for each scissored tri, it's
+    * free to trim them in this case.
+    * 
+    * Note that otherwise, the scissor planes only vary in 'C' value,
+    * and even then only on state-changes.  Could alternatively store
+    * these planes elsewhere.
+    */
+   if (nr_planes == 8) {
+      line->plane[4].dcdx = -1;
+      line->plane[4].dcdy = 0;
+      line->plane[4].c = 1-bbox.x0;
+      line->plane[4].ei = 0;
+      line->plane[4].eo = 1;
+
+      line->plane[5].dcdx = 1;
+      line->plane[5].dcdy = 0;
+      line->plane[5].c = bbox.x1+1;
+      line->plane[5].ei = -1;
+      line->plane[5].eo = 0;
+
+      line->plane[6].dcdx = 0;
+      line->plane[6].dcdy = 1;
+      line->plane[6].c = 1-bbox.y0;
+      line->plane[6].ei = 0;
+      line->plane[6].eo = 1;
+
+      line->plane[7].dcdx = 0;
+      line->plane[7].dcdy = -1;
+      line->plane[7].c = bbox.y1+1;
+      line->plane[7].ei = -1;
+      line->plane[7].eo = 0;
+   }
+
+   return lp_setup_bin_triangle(setup, line, &bbox, nr_planes);
+}
+
+
+static void lp_setup_line( struct lp_setup_context *setup,
+                           const float (*v0)[4],
+                           const float (*v1)[4] )
+{
+   if (!try_setup_line( setup, v0, v1 ))
+   {
+      if (!lp_setup_flush_and_restart(setup))
+         return;
+
+      if (!try_setup_line( setup, v0, v1 ))
+         return;
+   }
+}
+
+
+void lp_setup_choose_line( struct lp_setup_context *setup ) 
+{ 
+   setup->line = lp_setup_line;
 }