llvmpipe: enable texcoord semantics
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_tri.c
index 907129dbd1be59e1e7e961f489fa8015196c2834..d24a4b4afe74a88a26930357cd9e499224db8e6a 100644 (file)
 
 #if defined(PIPE_ARCH_SSE)
 #include <emmintrin.h>
-#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
 #include <altivec.h>
 #include "util/u_pwr8.h"
 #endif
 
+#if !defined(PIPE_ARCH_SSE)
+
 static inline int
 subpixel_snap(float a)
 {
    return util_iround(FIXED_ONE * a);
 }
 
-static inline float
-fixed_to_float(int a)
-{
-   return a * (1.0f / FIXED_ONE);
-}
-
+#endif
 
 /* Position and area in fixed point coordinates */
 struct fixed_position {
@@ -94,6 +91,8 @@ lp_setup_alloc_triangle(struct lp_scene *scene,
    unsigned plane_sz = nr_planes * sizeof(struct lp_rast_plane);
    struct lp_rast_triangle *tri;
 
+   STATIC_ASSERT(sizeof(struct lp_rast_plane) % 8 == 0);
+
    *tri_size = (sizeof(struct lp_rast_triangle) +
                 3 * input_array_sz +
                 plane_sz);
@@ -274,7 +273,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
    const struct lp_setup_variant_key *key = &setup->setup.variant->key;
    struct lp_rast_triangle *tri;
    struct lp_rast_plane *plane;
-   struct u_rect bbox;
+   const struct u_rect *scissor;
+   struct u_rect bbox, bboxpos;
+   boolean s_planes[4];
    unsigned tri_bytes;
    int nr_planes = 3;
    unsigned viewport_index = 0;
@@ -302,13 +303,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
       layer = MIN2(layer, scene->fb_max_layer);
    }
 
-   if (setup->scissor_test) {
-      nr_planes = 7;
-   }
-   else {
-      nr_planes = 3;
-   }
-
    /* Bounding rectangle (in pixels) */
    {
       /* Yes this is necessary to accurately calculate bounding boxes
@@ -340,12 +334,26 @@ do_triangle_ccw(struct lp_setup_context *setup,
       return TRUE;
    }
 
+   bboxpos = bbox;
+
    /* Can safely discard negative regions, but need to keep hold of
     * information about when the triangle extends past screen
     * boundaries.  See trimmed_box in lp_setup_bin_triangle().
     */
-   bbox.x0 = MAX2(bbox.x0, 0);
-   bbox.y0 = MAX2(bbox.y0, 0);
+   bboxpos.x0 = MAX2(bboxpos.x0, 0);
+   bboxpos.y0 = MAX2(bboxpos.y0, 0);
+
+   nr_planes = 3;
+   /*
+    * Determine how many scissor planes we need, that is drop scissor
+    * edges if the bounding box of the tri is fully inside that edge.
+    */
+   if (setup->scissor_test) {
+      /* why not just use draw_regions */
+      scissor = &setup->scissors[viewport_index];
+      scissor_planes_needed(s_planes, &bboxpos, scissor);
+      nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+   }
 
    tri = lp_setup_alloc_triangle(scene,
                                  key->num_inputs,
@@ -354,7 +362,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
    if (!tri)
       return FALSE;
 
-#if 0
+#ifdef DEBUG
    tri->v[0][0] = v0[0][0];
    tri->v[1][0] = v1[0][0];
    tri->v[2][0] = v2[0][0];
@@ -367,13 +375,11 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    /* Setup parameter interpolants:
     */
-   setup->setup.variant->jit_function( v0,
-                                      v1,
-                                      v2,
-                                      frontfacing,
-                                      GET_A0(&tri->inputs),
-                                      GET_DADX(&tri->inputs),
-                                      GET_DADY(&tri->inputs) );
+   setup->setup.variant->jit_function(v0, v1, v2,
+                                      frontfacing,
+                                      GET_A0(&tri->inputs),
+                                      GET_DADX(&tri->inputs),
+                                      GET_DADY(&tri->inputs));
 
    tri->inputs.frontfacing = frontfacing;
    tri->inputs.disable = FALSE;
@@ -383,9 +389,9 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    if (0)
       lp_dump_setup_coef(&setup->setup.variant->key,
-                        (const float (*)[4])GET_A0(&tri->inputs),
-                        (const float (*)[4])GET_DADX(&tri->inputs),
-                        (const float (*)[4])GET_DADY(&tri->inputs));
+                         (const float (*)[4])GET_A0(&tri->inputs),
+                         (const float (*)[4])GET_DADX(&tri->inputs),
+                         (const float (*)[4])GET_DADY(&tri->inputs));
 
    plane = GET_PLANES(tri);
 
@@ -483,7 +489,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
       eo = _mm_shuffle_epi32(eo, _MM_SHUFFLE(0,0,0,2));
       plane[2].eo = (uint32_t)_mm_cvtsi128_si32(eo);
    } else
-#elif defined(_ARCH_PWR8) && defined(PIPE_ARCH_LITTLE_ENDIAN)
+#elif defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
    /*
     * XXX this code is effectively disabled for all practical purposes,
     * as the allowed fb size is tiny if FIXED_ORDER is 8.
@@ -507,7 +513,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
       __m128i zero = vec_splats((unsigned char) 0);
       PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
 
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+#if UTIL_ARCH_LITTLE_ENDIAN
       vshuf_mask.i[0] = 0x07060504;
       vshuf_mask.i[1] = 0x0B0A0908;
       vshuf_mask.i[2] = 0x03020100;
@@ -672,32 +678,46 @@ do_triangle_ccw(struct lp_setup_context *setup,
     * Note that otherwise, the scissor planes only vary in 'C' value,
     * and even then only on state-changes.  Could alternatively store
     * these planes elsewhere.
+    * (Or only store the c value together with a bit indicating which
+    * scissor edge this is, so rasterization would treat them differently
+    * (easier to evaluate) to ordinary planes.)
     */
-   if (nr_planes == 7) {
-      const struct u_rect *scissor = &setup->scissors[viewport_index];
-
-      plane[3].dcdx = -1 << 8;
-      plane[3].dcdy = 0;
-      plane[3].c = (1-scissor->x0) << 8;
-      plane[3].eo = 1 << 8;
-
-      plane[4].dcdx = 1 << 8;
-      plane[4].dcdy = 0;
-      plane[4].c = (scissor->x1+1) << 8;
-      plane[4].eo = 0;
-
-      plane[5].dcdx = 0;
-      plane[5].dcdy = 1 << 8;
-      plane[5].c = (1-scissor->y0) << 8;
-      plane[5].eo = 1 << 8;
-
-      plane[6].dcdx = 0;
-      plane[6].dcdy = -1 << 8;
-      plane[6].c = (scissor->y1+1) << 8;
-      plane[6].eo = 0;
+   if (nr_planes > 3) {
+      /* why not just use draw_regions */
+      struct lp_rast_plane *plane_s = &plane[3];
+
+      if (s_planes[0]) {
+         plane_s->dcdx = ~0U << 8;
+         plane_s->dcdy = 0;
+         plane_s->c = (1-scissor->x0) << 8;
+         plane_s->eo = 1 << 8;
+         plane_s++;
+      }
+      if (s_planes[1]) {
+         plane_s->dcdx = 1 << 8;
+         plane_s->dcdy = 0;
+         plane_s->c = (scissor->x1+1) << 8;
+         plane_s->eo = 0 << 8;
+         plane_s++;
+      }
+      if (s_planes[2]) {
+         plane_s->dcdx = 0;
+         plane_s->dcdy = 1 << 8;
+         plane_s->c = (1-scissor->y0) << 8;
+         plane_s->eo = 1 << 8;
+         plane_s++;
+      }
+      if (s_planes[3]) {
+         plane_s->dcdx = 0;
+         plane_s->dcdy = ~0U << 8;
+         plane_s->c = (scissor->y1+1) << 8;
+         plane_s->eo = 0;
+         plane_s++;
+      }
+      assert(plane_s == &plane[nr_planes]);
    }
 
-   return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
+   return lp_setup_bin_triangle(setup, tri, &bbox, &bboxpos, nr_planes, viewport_index);
 }
 
 /*
@@ -714,7 +734,8 @@ floor_pot(uint32_t n)
 
    __asm__("bsr %1,%0"
           : "=r" (n)
-          : "rm" (n));
+          : "rm" (n)
+          : "cc");
    return 1 << n;
 #else
    n |= (n >>  1);
@@ -728,11 +749,12 @@ floor_pot(uint32_t n)
 
 
 boolean
-lp_setup_bin_triangle( struct lp_setup_context *setup,
-                       struct lp_rast_triangle *tri,
-                       const struct u_rect *bbox,
-                       int nr_planes,
-                       unsigned viewport_index )
+lp_setup_bin_triangle(struct lp_setup_context *setup,
+                      struct lp_rast_triangle *tri,
+                      const struct u_rect *bboxorig,
+                      const struct u_rect *bbox,
+                      int nr_planes,
+                      unsigned viewport_index)
 {
    struct lp_scene *scene = setup->scene;
    struct u_rect trimmed_box = *bbox;   
@@ -748,7 +770,16 @@ lp_setup_bin_triangle( struct lp_setup_context *setup,
    int max_sz = ((bbox->x1 - (bbox->x0 & ~3)) |
                  (bbox->y1 - (bbox->y0 & ~3)));
    int sz = floor_pot(max_sz);
-   boolean use_32bits = max_sz <= MAX_FIXED_LENGTH32;
+
+   /*
+    * NOTE: It is important to use the original bounding box
+    * which might contain negative values here, because if the
+    * plane math may overflow or not with the 32bit rasterization
+    * functions depends on the original extent of the triangle.
+    */
+   int max_szorig = ((bboxorig->x1 - (bboxorig->x0 & ~3)) |
+                     (bboxorig->y1 - (bboxorig->y0 & ~3)));
+   boolean use_32bits = max_szorig <= MAX_FIXED_LENGTH32;
 
    /* Now apply scissor, etc to the bounding box.  Could do this
     * earlier, but it confuses the logic for tri-16 and would force
@@ -984,17 +1015,16 @@ calc_fixed_position(struct lp_setup_context *setup,
     * Both should be acceptable, I think.
     */
 #if defined(PIPE_ARCH_SSE)
-   __m128d v0r, v1r, v2r;
+   __m128 v0r, v1r;
    __m128 vxy0xy2, vxy1xy0;
    __m128i vxy0xy2i, vxy1xy0i;
    __m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
    __m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
    __m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
-   v0r = _mm_load_sd((const double *)v0[0]);
-   v1r = _mm_load_sd((const double *)v1[0]);
-   v2r = _mm_load_sd((const double *)v2[0]);
-   vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
-   vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
+   v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+   vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+   v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+   vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
    vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
    vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
    vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
@@ -1098,6 +1128,11 @@ static void triangle_cw(struct lp_setup_context *setup,
                         const float (*v2)[4])
 {
    PIPE_ALIGN_VAR(16) struct fixed_position position;
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -1119,6 +1154,11 @@ static void triangle_ccw(struct lp_setup_context *setup,
                          const float (*v2)[4])
 {
    PIPE_ALIGN_VAR(16) struct fixed_position position;
+   struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
+
+   if (lp_context->active_statistics_queries) {
+      lp_context->pipeline_statistics.c_primitives++;
+   }
 
    calc_fixed_position(setup, &position, v0, v1, v2);
 
@@ -1137,8 +1177,7 @@ static void triangle_both(struct lp_setup_context *setup,
    PIPE_ALIGN_VAR(16) struct fixed_position position;
    struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
 
-   if (lp_context->active_statistics_queries &&
-       !llvmpipe_rasterization_disabled(lp_context)) {
+   if (lp_context->active_statistics_queries) {
       lp_context->pipeline_statistics.c_primitives++;
    }
 
@@ -1167,17 +1206,21 @@ static void triangle_both(struct lp_setup_context *setup,
 }
 
 
-static void triangle_nop( struct lp_setup_context *setup,
-                         const float (*v0)[4],
-                         const float (*v1)[4],
-                         const float (*v2)[4] )
+static void triangle_noop(struct lp_setup_context *setup,
+                          const float (*v0)[4],
+                          const float (*v1)[4],
+                          const float (*v2)[4])
 {
 }
 
 
 void 
-lp_setup_choose_triangle( struct lp_setup_context *setup )
+lp_setup_choose_triangle(struct lp_setup_context *setup)
 {
+   if (setup->rasterizer_discard) {
+      setup->triangle = triangle_noop;
+      return;
+   }
    switch (setup->cullmode) {
    case PIPE_FACE_NONE:
       setup->triangle = triangle_both;
@@ -1189,7 +1232,7 @@ lp_setup_choose_triangle( struct lp_setup_context *setup )
       setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
       break;
    default:
-      setup->triangle = triangle_nop;
+      setup->triangle = triangle_noop;
       break;
    }
 }