include surface.offset in address calculations
[mesa.git] / src / mesa / pipe / softpipe / sp_prim_setup.c
index 29bb9365401704dbf94820ab1f1dac7bce86b224..7a3d011b7bd09772d81c9746509b03b43164e540 100644 (file)
  * 
  **************************************************************************/
 
-/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
+/**
+ * \brief  Primitive rasterization/rendering (points, lines, triangles)
+ *
+ * \author  Keith Whitwell <keith@tungstengraphics.com>
+ * \author  Brian Paul
  */
 
+
 #include "imports.h"
 #include "macros.h"
 
 #include "sp_context.h"
-#include "sp_prim.h"
-#include "sp_tile.h"
-
-
-
-/**
- * Emit/render a quad.
- * This passes the quad to the first stage of per-fragment operations.
- */
-static INLINE void
-quad_emit(struct softpipe_context *sp, struct quad_header *quad)
-{
-   sp->quad.first->run(sp->quad.first, quad);
-}
+#include "sp_headers.h"
+#include "pipe/draw/draw_private.h"
+#include "sp_quad.h"
+#include "sp_prim_setup.h"
 
 
 /**
  * Triangle edge info
  */
 struct edge {
-   GLfloat dx;                 /* X(v1) - X(v0), used only during setup */
-   GLfloat dy;                 /* Y(v1) - Y(v0), used only during setup */
-   GLfloat dxdy;               /* dx/dy */
-   GLfloat sx;                 /* first sample point x coord */
-   GLfloat sy;
-   GLint lines;                        /* number of lines  on this edge */
+   GLfloat dx;                 /**< X(v1) - X(v0), used only during setup */
+   GLfloat dy;                 /**< Y(v1) - Y(v0), used only during setup */
+   GLfloat dxdy;               /**< dx/dy */
+   GLfloat sx, sy;             /**< first sample point coord */
+   GLint lines;                        /**< number of lines on this edge */
 };
 
 
 /**
- * Triangle setup info (derived from prim_stage).
+ * Triangle setup info (derived from draw_stage).
  * Also used for line drawing (taking some liberties).
  */
 struct setup_stage {
-   struct prim_stage stage; /**< This must be first */
+   struct draw_stage stage; /**< This must be first (base class) */
+
+   struct softpipe_context *softpipe;
 
    /* Vertices are just an array of floats making up each attribute in
     * turn.  Currently fixed at 4 floats, but should change in time.
@@ -100,40 +96,73 @@ struct setup_stage {
 /**
  * Basically a cast wrapper.
  */
-static inline struct setup_stage *setup_stage( struct prim_stage *stage )
+static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
 {
    return (struct setup_stage *)stage;
 }
 
 
 /**
- * Given an X or Y coordinate, return the block/quad coordinate that it
- * belongs to.
+ * Clip setup->quad against the scissor/surface bounds.
  */
-static inline GLint block( GLint x )
+static INLINE void
+quad_clip(struct setup_stage *setup)
 {
-   return x & ~1;
+   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+   if (setup->quad.x0 >= cliprect->maxx ||
+       setup->quad.y0 >= cliprect->maxy ||
+       setup->quad.x0 + 1 < cliprect->minx ||
+       setup->quad.y0 + 1 < cliprect->miny) {
+      /* totally clipped */
+      setup->quad.mask = 0x0;
+      return;
+   }
+   if (setup->quad.x0 < cliprect->minx)
+      setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+   if (setup->quad.y0 < cliprect->miny)
+      setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+   if (setup->quad.x0 == cliprect->maxx - 1)
+      setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+   if (setup->quad.y0 == cliprect->maxy - 1)
+      setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
 }
 
 
-
-static void setup_begin( struct prim_stage *stage )
+/**
+ * Emit a quad (pass to next stage) with clipping.
+ */
+static INLINE void
+clip_emit_quad(struct setup_stage *setup)
 {
-   setup_stage(stage)->quad.nr_attrs = stage->softpipe->nr_frag_attrs;
+   quad_clip(setup);
+   if (setup->quad.mask) {
+      struct softpipe_context *sp = setup->softpipe;
+      sp->quad.first->run(sp->quad.first, &setup->quad);
+   }
 }
 
 
 /**
- * Run shader on a quad/block.
+ * Emit a quad (pass to next stage).  No clipping is done.
  */
-static void run_shader_block( struct setup_stage *setup, 
-                             GLint x, GLint y, GLuint mask )
+static INLINE void
+emit_quad( struct setup_stage *setup, GLint x, GLint y, GLuint mask )
 {
+   struct softpipe_context *sp = setup->softpipe;
    setup->quad.x0 = x;
    setup->quad.y0 = y;
    setup->quad.mask = mask;
+   sp->quad.first->run(sp->quad.first, &setup->quad);
+}
 
-   quad_emit(setup->stage.softpipe, &setup->quad);
+
+/**
+ * Given an X or Y coordinate, return the block/quad coordinate that it
+ * belongs to.
+ */
+static INLINE GLint block( GLint x )
+{
+   return x & ~1;
 }
 
 
@@ -196,9 +225,8 @@ static void flush_spans( struct setup_stage *setup )
 
    for (x = block(minleft); x <= block(maxright); )
    {
-      run_shader_block( setup, x,
-                       setup->span.y, 
-                       calculate_mask( setup, x ) );
+      emit_quad( setup, x, setup->span.y, 
+                 calculate_mask( setup, x ) );
       x += 2;
    }
 
@@ -208,6 +236,18 @@ static void flush_spans( struct setup_stage *setup )
    setup->span.right[1] = 0;
 }
 
+#if 0
+static void print_vertex(const struct setup_stage *setup,
+                         const struct vertex_header *v)
+{
+   int i;
+   printf("Vertex:\n");
+   for (i = 0; i < setup->softpipe->nr_attrs; i++) {
+      printf("  %d: %f %f %f\n",  i, 
+          v->data[i][0], v->data[i][1], v->data[i][2]);
+   }
+}
+#endif
 
 static GLboolean setup_sort_vertices( struct setup_stage *setup,
                                      const struct prim_header *prim )
@@ -216,6 +256,13 @@ static GLboolean setup_sort_vertices( struct setup_stage *setup,
    const struct vertex_header *v1 = prim->v[1];
    const struct vertex_header *v2 = prim->v[2];
 
+#if 0
+   printf("Triangle:\n");
+   print_vertex(setup, v0);
+   print_vertex(setup, v1);
+   print_vertex(setup, v2);
+#endif
+
    setup->vprovoke = v2;
 
    /* determine bottom to top order of vertices */
@@ -272,26 +319,32 @@ static GLboolean setup_sort_vertices( struct setup_stage *setup,
    setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0];
    setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1];
 
-   /* xxx: may need to adjust this sign according to the if-tree
-    * above:
+   /*
+    * Compute triangle's area.  Use 1/area to compute partial
+    * derivatives of attributes later.
+    *
+    * The area will be the same as prim->det, but the sign may be
+    * different depending on how the vertices get sorted above.
     *
-    * XXX: this is like 'det', but calculated from screen coords??
+    * To determine whether the primitive is front or back facing we
+    * use the prim->det value because its sign is correct.
     */
    {
       const GLfloat area = (setup->emaj.dx * setup->ebot.dy - 
                            setup->ebot.dx * setup->emaj.dy);
 
       setup->oneoverarea = 1.0 / area;
+      /*
+      _mesa_printf("%s one-over-area %f  area %f  det %f\n",
+                   __FUNCTION__, setup->oneoverarea, area, prim->det );
+      */
    }
 
-   /* XXX need to know if this is a front or back-facing triangle:
+   /* We need to know if this is a front or back-facing triangle for:
     *  - the GLSL gl_FrontFacing fragment attribute (bool)
     *  - two-sided stencil test
     */
-   setup->quad.facing = 0;
-
-   _mesa_printf("%s one-over-area %f\n", __FUNCTION__, setup->oneoverarea );
-
+   setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->setup.front_winding == PIPE_WINDING_CW);
 
    return GL_TRUE;
 }
@@ -299,11 +352,18 @@ static GLboolean setup_sort_vertices( struct setup_stage *setup,
 
 /**
  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
+ * The value value comes from vertex->data[slot][i].
+ * The result will be put into setup->coef[slot].a0[i].
+ * \param slot  which attribute slot 
+ * \param i  which component of the slot (0..3)
  */
 static void const_coeff( struct setup_stage *setup,
                         GLuint slot,
                         GLuint i )
 {
+   assert(slot < FRAG_ATTRIB_MAX);
+   assert(i <= 3);
+
    setup->coef[slot].dadx[i] = 0;
    setup->coef[slot].dady[i] = 0;
 
@@ -326,6 +386,9 @@ static void tri_linear_coeff( struct setup_stage *setup,
    GLfloat a = setup->ebot.dy * majda - botda * setup->emaj.dy;
    GLfloat b = setup->emaj.dx * botda - majda * setup->ebot.dx;
    
+   assert(slot < FRAG_ATTRIB_MAX);
+   assert(i <= 3);
+
    setup->coef[slot].dadx[i] = a * setup->oneoverarea;
    setup->coef[slot].dady[i] = b * setup->oneoverarea;
 
@@ -345,11 +408,13 @@ static void tri_linear_coeff( struct setup_stage *setup,
                            (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5) + 
                             setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5)));
 
+   /*
    _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
                slot, "xyzw"[i], 
                setup->coef[slot].a0[i],
                setup->coef[slot].dadx[i],
                setup->coef[slot].dady[i]);
+   */
 }
 
 
@@ -372,6 +437,9 @@ static void tri_persp_coeff( struct setup_stage *setup,
    GLfloat a = setup->ebot.dy * majda - botda * setup->emaj.dy;
    GLfloat b = setup->emaj.dx * botda - majda * setup->ebot.dx;
       
+   assert(slot < FRAG_ATTRIB_MAX);
+   assert(i <= 3);
+
    setup->coef[slot].dadx[i] = a * setup->oneoverarea;
    setup->coef[slot].dady[i] = b * setup->oneoverarea;
    setup->coef[slot].a0[i] = (mina - 
@@ -380,14 +448,13 @@ static void tri_persp_coeff( struct setup_stage *setup,
 }
 
 
-
 /**
  * Compute the setup->coef[] array dadx, dady, a0 values.
  * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
  */
 static void setup_tri_coefficients( struct setup_stage *setup )
 {
-   const enum interp_mode *interp = setup->stage.softpipe->interp;
+   const enum interp_mode *interp = setup->softpipe->interp;
    GLuint slot, j;
 
    /* z and w are done by linear interpolation:
@@ -447,40 +514,35 @@ static void setup_tri_edges( struct setup_stage *setup )
 
 /**
  * Render the upper or lower half of a triangle.
- * Scissoring is applied here too.
+ * Scissoring/cliprect is applied here too.
  */
 static void subtriangle( struct setup_stage *setup,
                         struct edge *eleft,
                         struct edge *eright,
                         GLuint lines )
 {
+   const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
    GLint y, start_y, finish_y;
    GLint sy = (GLint)eleft->sy;
 
    assert((GLint)eleft->sy == (GLint) eright->sy);
-   assert((GLint)eleft->sy >= 0);      /* catch bug in x64? */
 
-   /* scissor y:
-    */
-   if (setup->stage.softpipe->setup.scissor) {
-      start_y = sy;
-      finish_y = start_y + lines;
+   /* clip top/bottom */
+   start_y = sy;
+   finish_y = sy + lines;
 
-      if (start_y < setup->stage.softpipe->scissor.miny) 
-        start_y = setup->stage.softpipe->scissor.miny;
+   if (start_y < cliprect->miny)
+      start_y = cliprect->miny;
 
-      if (finish_y > setup->stage.softpipe->scissor.maxy) 
-        finish_y = setup->stage.softpipe->scissor.maxy;
+   if (finish_y > cliprect->maxy)
+      finish_y = cliprect->maxy;
 
-      start_y -= sy;
-      finish_y -= sy;
-   }
-   else {
-      start_y = 0;
-      finish_y = lines;
-   }
+   start_y -= sy;
+   finish_y -= sy;
 
+   /*
    _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);  
+   */
 
    for (y = start_y; y < finish_y; y++) {
 
@@ -493,15 +555,11 @@ static void subtriangle( struct setup_stage *setup,
       GLint left = (GLint)(eleft->sx + y * eleft->dxdy);
       GLint right = (GLint)(eright->sx + y * eright->dxdy);
 
-      /* scissor x: 
-       */
-      if (setup->stage.softpipe->setup.scissor) {
-        if (left  < setup->stage.softpipe->scissor.minx) 
-           left  = setup->stage.softpipe->scissor.minx;
-
-        if (right > setup->stage.softpipe->scissor.maxx) 
-           right = setup->stage.softpipe->scissor.maxx;
-      }
+      /* clip left/right */
+      if (left < cliprect->minx)
+         left = cliprect->minx;
+      if (right > cliprect->maxx)
+         right = cliprect->maxx;
 
       if (left < right) {
         GLint _y = sy+y;
@@ -529,24 +587,28 @@ static void subtriangle( struct setup_stage *setup,
 /**
  * Do setup for triangle rasterization, then render the triangle.
  */
-static void setup_tri( struct prim_stage *stage,
+static void setup_tri( struct draw_stage *stage,
                       struct prim_header *prim )
 {
    struct setup_stage *setup = setup_stage( stage );
 
+   /*
    _mesa_printf("%s\n", __FUNCTION__ );
+   */
 
    setup_sort_vertices( setup, prim );
    setup_tri_coefficients( setup );
    setup_tri_edges( setup );
 
+   setup->quad.prim = PRIM_TRI;
+
    setup->span.y = 0;
    setup->span.y_flags = 0;
    setup->span.right[0] = 0;
    setup->span.right[1] = 0;
-//   setup->span.z_mode = tri_z_mode( setup->ctx );
+   /*   setup->span.z_mode = tri_z_mode( setup->ctx ); */
 
-//   init_constant_attribs( setup );
+   /*   init_constant_attribs( setup ); */
       
    if (setup->oneoverarea < 0.0) {
       /* emaj on left:
@@ -604,7 +666,7 @@ line_persp_coeff(struct setup_stage *setup, GLuint slot, GLuint i)
 static INLINE void
 setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim)
 {
-   const enum interp_mode *interp = setup->stage.softpipe->interp;
+   const enum interp_mode *interp = setup->softpipe->interp;
    GLuint slot, j;
 
    /* use setup->vmin, vmax to point to vertices */
@@ -663,8 +725,8 @@ plot(struct setup_stage *setup, GLint x, GLint y)
    {
       /* flush prev quad, start new quad */
 
-      if (setup->quad.x0 != -1) 
-        quad_emit(setup->stage.softpipe, &setup->quad);
+      if (setup->quad.x0 != -1)
+         clip_emit_quad(setup);
 
       setup->quad.x0 = quadX;
       setup->quad.y0 = quadY;
@@ -675,18 +737,29 @@ plot(struct setup_stage *setup, GLint x, GLint y)
 }
 
 
+/**
+ * Determine whether or not to emit a line fragment by checking
+ * line stipple pattern.
+ */
+static INLINE GLuint
+stipple_test(GLint counter, GLushort pattern, GLint factor)
+{
+   GLint b = (counter / factor) & 0xf;
+   return (1 << b) & pattern;
+}
+
 
 /**
  * Do setup for line rasterization, then render the line.
  * XXX single-pixel width, no stipple, etc
- * XXX no scissoring yet.
  */
 static void
-setup_line(struct prim_stage *stage, struct prim_header *prim)
+setup_line(struct draw_stage *stage, struct prim_header *prim)
 {
    const struct vertex_header *v0 = prim->v[0];
    const struct vertex_header *v1 = prim->v[1];
    struct setup_stage *setup = setup_stage( stage );
+   struct softpipe_context *sp = setup->softpipe;
 
    GLint x0 = (GLint) v0->data[0][0];
    GLint x1 = (GLint) v1->data[0][0];
@@ -722,6 +795,14 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
 
    setup->quad.x0 = setup->quad.y0 = -1;
    setup->quad.mask = 0x0;
+   setup->quad.prim = PRIM_LINE;
+   /* XXX temporary: set coverage to 1.0 so the line appears
+    * if AA mode happens to be enabled.
+    */
+   setup->quad.coverage[0] =
+   setup->quad.coverage[1] =
+   setup->quad.coverage[2] =
+   setup->quad.coverage[3] = 1.0;
 
    if (dx > dy) {
       /*** X-major line ***/
@@ -731,7 +812,12 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
       const GLint errorDec = error - dx;
 
       for (i = 0; i < dx; i++) {
-         plot(setup, x0, y0);
+         if (!sp->setup.line_stipple_enable ||
+             stipple_test(sp->line_stipple_counter,
+                          sp->setup.line_stipple_pattern,
+                          sp->setup.line_stipple_factor + 1)) {
+             plot(setup, x0, y0);
+         }
 
          x0 += xstep;
          if (error < 0) {
@@ -741,6 +827,8 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
             error += errorDec;
             y0 += ystep;
          }
+
+         sp->line_stipple_counter++;
       }
    }
    else {
@@ -751,7 +839,12 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
       const GLint errorDec = error - dy;
 
       for (i = 0; i < dy; i++) {
-         plot(setup, x0, y0);
+         if (!sp->setup.line_stipple_enable ||
+             stipple_test(sp->line_stipple_counter,
+                          sp->setup.line_stipple_pattern,
+                          sp->setup.line_stipple_factor + 1)) {
+            plot(setup, x0, y0);
+         }
 
          y0 += ystep;
 
@@ -762,12 +855,14 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
             error += errorDec;
             x0 += xstep;
          }
+
+         sp->line_stipple_counter++;
       }
    }
 
    /* draw final quad */
    if (setup->quad.mask) {
-      quad_emit(setup->stage.softpipe, &setup->quad);
+      clip_emit_quad(setup);
    }
 }
 
@@ -778,12 +873,12 @@ setup_line(struct prim_stage *stage, struct prim_header *prim)
  * XXX could optimize a lot for 1-pixel points.
  */
 static void
-setup_point(struct prim_stage *stage, struct prim_header *prim)
+setup_point(struct draw_stage *stage, struct prim_header *prim)
 {
    struct setup_stage *setup = setup_stage( stage );
    /*XXX this should be a vertex attrib! */
-   GLfloat halfSize = 0.5 * setup->stage.softpipe->point.size;
-   GLboolean round = setup->stage.softpipe->point.smooth;
+   const GLfloat halfSize = 0.5 * setup->softpipe->setup.point_size;
+   const GLboolean round = setup->softpipe->setup.point_smooth;
    const struct vertex_header *v0 = prim->v[0];
    const GLfloat x = v0->data[FRAG_ATTRIB_WPOS][0];
    const GLfloat y = v0->data[FRAG_ATTRIB_WPOS][1];
@@ -813,7 +908,7 @@ setup_point(struct prim_stage *stage, struct prim_header *prim)
          const_coeff(setup, slot, j);
    }
 
-   /* XXX need to clip against scissor bounds too */
+   setup->quad.prim = PRIM_POINT;
 
    if (halfSize <= 0.5 && !round) {
       /* special case for 1-pixel points */
@@ -822,67 +917,104 @@ setup_point(struct prim_stage *stage, struct prim_header *prim)
       setup->quad.x0 = x - ix;
       setup->quad.y0 = y - iy;
       setup->quad.mask = (1 << ix) << (2 * iy);
-      quad_emit(setup->stage.softpipe, &setup->quad);
+      clip_emit_quad(setup);
    }
    else {
       const GLint ixmin = block((GLint) (x - halfSize));
       const GLint ixmax = block((GLint) (x + halfSize));
       const GLint iymin = block((GLint) (y - halfSize));
       const GLint iymax = block((GLint) (y + halfSize));
-      GLfloat halfSizeSquared = halfSize * halfSize;
       GLint ix, iy;
 
-      for (iy = iymin; iy <= iymax; iy += 2) {
-         for (ix = ixmin; ix <= ixmax; ix += 2) {
+      if (round) {
+         /* rounded points */
+         const GLfloat rmin = halfSize - 0.7071F;  /* 0.7071 = sqrt(2)/2 */
+         const GLfloat rmax = halfSize + 0.7071F;
+         const GLfloat rmin2 = MAX2(0.0F, rmin * rmin);
+         const GLfloat rmax2 = rmax * rmax;
+         const GLfloat cscale = 1.0F / (rmax2 - rmin2);
 
-            if (round) {
-               /* rounded points */
-               /* XXX for GL_SMOOTH, need to compute per-fragment coverage too */
-               GLfloat dx, dy;
+         for (iy = iymin; iy <= iymax; iy += 2) {
+            for (ix = ixmin; ix <= ixmax; ix += 2) {
+               GLfloat dx, dy, dist2, cover;
 
                setup->quad.mask = 0x0;
 
                dx = (ix + 0.5) - x;
                dy = (iy + 0.5) - y;
-               if (dx * dx + dy * dy <= halfSizeSquared)
+               dist2 = dx * dx + dy * dy;
+               if (dist2 <= rmax2) {
+                  cover = 1.0F - (dist2 - rmin2) * cscale;
+                  setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0);
                   setup->quad.mask |= MASK_BOTTOM_LEFT;
+               }
 
                dx = (ix + 1.5) - x;
                dy = (iy + 0.5) - y;
-               if (dx * dx + dy * dy <= halfSizeSquared)
+               dist2 = dx * dx + dy * dy;
+               if (dist2 <= rmax2) {
+                  cover = 1.0F - (dist2 - rmin2) * cscale;
+                  setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0);
                   setup->quad.mask |= MASK_BOTTOM_RIGHT;
+               }
 
                dx = (ix + 0.5) - x;
                dy = (iy + 1.5) - y;
-               if (dx * dx + dy * dy <= halfSizeSquared)
+               dist2 = dx * dx + dy * dy;
+               if (dist2 <= rmax2) {
+                  cover = 1.0F - (dist2 - rmin2) * cscale;
+                  setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0);
                   setup->quad.mask |= MASK_TOP_LEFT;
+               }
 
                dx = (ix + 1.5) - x;
                dy = (iy + 1.5) - y;
-               if (dx * dx + dy * dy <= halfSizeSquared)
+               dist2 = dx * dx + dy * dy;
+               if (dist2 <= rmax2) {
+                  cover = 1.0F - (dist2 - rmin2) * cscale;
+                  setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0);
                   setup->quad.mask |= MASK_TOP_RIGHT;
-            }
-            else {
-               /* square points */
-               setup->quad.mask = 0xf;
-
-               if (ix + 0.5 < x - halfSize)
-                  setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+               }
 
-               if (ix + 1.5 > x + halfSize)
-                  setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
-
-               if (iy + 0.5 < y - halfSize)
-                  setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
-
-               if (iy + 1.5 > y + halfSize)
-                  setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+               if (setup->quad.mask) {
+                  setup->quad.x0 = ix;
+                  setup->quad.y0 = iy;
+                  clip_emit_quad(setup);
+               }
             }
+         }
+      }
+      else {
+         /* square points */
+         for (iy = iymin; iy <= iymax; iy += 2) {
+            for (ix = ixmin; ix <= ixmax; ix += 2) {
+               setup->quad.mask = 0xf;
 
-            if (setup->quad.mask) {
-               setup->quad.x0 = ix;
-               setup->quad.y0 = iy;
-               quad_emit( setup->stage.softpipe, &setup->quad );
+               if (ix + 0.5 < x - halfSize) {
+                  /* fragment is past left edge of point, turn off left bits */
+                  setup->quad.mask &= ~(MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+               }
+
+               if (ix + 1.5 > x + halfSize) {
+                  /* past the right edge */
+                  setup->quad.mask &= ~(MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+               }
+
+               if (iy + 0.5 < y - halfSize) {
+                  /* below the bottom edge */
+                  setup->quad.mask &= ~(MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+               }
+
+               if (iy + 1.5 > y + halfSize) {
+                  /* above the top edge */
+                  setup->quad.mask &= ~(MASK_TOP_LEFT | MASK_TOP_RIGHT);
+               }
+
+               if (setup->quad.mask) {
+                  setup->quad.x0 = ix;
+                  setup->quad.y0 = iy;
+                  clip_emit_quad(setup);
+               }
             }
          }
       }
@@ -891,21 +1023,44 @@ setup_point(struct prim_stage *stage, struct prim_header *prim)
 
 
 
-static void setup_end( struct prim_stage *stage )
+static void setup_begin( struct draw_stage *stage )
 {
+   struct setup_stage *setup = setup_stage(stage);
+   struct softpipe_context *sp = setup->softpipe;
+
+   setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs;
+
+   sp->quad.first->begin(sp->quad.first);
 }
 
 
-struct prim_stage *prim_setup( struct softpipe_context *softpipe )
+static void setup_end( struct draw_stage *stage )
+{
+}
+
+
+static void reset_stipple_counter( struct draw_stage *stage )
+{
+   struct setup_stage *setup = setup_stage(stage);
+   setup->softpipe->line_stipple_counter = 0;
+}
+
+
+/**
+ * Create a new primitive setup/render stage.
+ */
+struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe )
 {
    struct setup_stage *setup = CALLOC_STRUCT(setup_stage);
 
-   setup->stage.softpipe = softpipe;
+   setup->softpipe = softpipe;
+   setup->stage.draw = softpipe->draw;
    setup->stage.begin = setup_begin;
    setup->stage.point = setup_point;
    setup->stage.line = setup_line;
    setup->stage.tri = setup_tri;
    setup->stage.end = setup_end;
+   setup->stage.reset_stipple_counter = reset_stipple_counter;
 
    setup->quad.coef = setup->coef;