freedreno/ir3: fix infinite recursion in sched

[mesa.git] / src / gallium / drivers / softpipe / sp_tex_sample.c
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c

index 2c7f17f20bee79e1a5bf3d081a7f948e4538cf62..68dcf57240d32c5b42504e962068091d4628bf28 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1,6 +1,6 @@
  /**************************************************************************
   * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
   * All Rights Reserved.
   * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   *
@@ -19,7 +19,7 @@
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -608,6 +608,48 @@ get_texel_2d(const struct sp_sampler_view *sp_sview,
     }
  }
  
+
+/*
+ * Here's the complete logic (HOLY CRAP) for finding next face and doing the
+ * corresponding coord wrapping, implemented by get_next_face,
+ * get_next_xcoord, get_next_ycoord.
+ * Read like that (first line):
+ * If face is +x and s coord is below zero, then
+ * new face is +z, new s is max , new t is old t
+ * (max is always cube size - 1).
+ *
+ * +x s- -> +z: s = max,   t = t
+ * +x s+ -> -z: s = 0,     t = t
+ * +x t- -> +y: s = max,   t = max-s
+ * +x t+ -> -y: s = max,   t = s
+ *
+ * -x s- -> -z: s = max,   t = t
+ * -x s+ -> +z: s = 0,     t = t
+ * -x t- -> +y: s = 0,     t = s
+ * -x t+ -> -y: s = 0,     t = max-s
+ *
+ * +y s- -> -x: s = t,     t = 0
+ * +y s+ -> +x: s = max-t, t = 0
+ * +y t- -> -z: s = max-s, t = 0
+ * +y t+ -> +z: s = s,     t = 0
+ *
+ * -y s- -> -x: s = max-t, t = max
+ * -y s+ -> +x: s = t,     t = max
+ * -y t- -> +z: s = s,     t = max
+ * -y t+ -> -z: s = max-s, t = max
+
+ * +z s- -> -x: s = max,   t = t
+ * +z s+ -> +x: s = 0,     t = t
+ * +z t- -> +y: s = s,     t = max
+ * +z t+ -> -y: s = s,     t = 0
+
+ * -z s- -> +x: s = max,   t = t
+ * -z s+ -> -x: s = 0,     t = t
+ * -z t- -> +y: s = max-s, t = 0
+ * -z t+ -> -y: s = max-s, t = max
+ */
+
+
  /*
   * seamless cubemap neighbour array.
   * this array is used to find the adjacent face in each of 4 directions,
@@ -617,49 +659,104 @@ static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
     /* pos X first then neg X is Z different, Y the same */
     /* PIPE_TEX_FACE_POS_X,*/
     { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
     /* PIPE_TEX_FACE_NEG_X */
     { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
  
     /* pos Y first then neg Y is X different, X the same */
     /* PIPE_TEX_FACE_POS_Y */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
+     PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
  
     /* PIPE_TEX_FACE_NEG_Y */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
+     PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
  
     /* pos Z first then neg Y is X different, X the same */
     /* PIPE_TEX_FACE_POS_Z */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
  
     /* PIPE_TEX_FACE_NEG_Z */
     { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y }
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
  };
  
  static INLINE unsigned
-get_next_face(unsigned face, int x, int y)
+get_next_face(unsigned face, int idx)
  {
-   int idx = 0;
+   return face_array[face][idx];
+}
  
-   if (x == 0 && y == 0)
-      return face;
-   if (x == -1)
-      idx = 0;
-   else if (x == 1)
-      idx = 1;
-   else if (y == -1)
-      idx = 2;
-   else if (y == 1)
-      idx = 3;
+/*
+ * return a new xcoord based on old face, old coords, cube size
+ * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
+ */
+static INLINE int
+get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
+{
+   if ((face == 0 && fall_off_index != 1) ||
+       (face == 1 && fall_off_index == 0) ||
+       (face == 4 && fall_off_index == 0) ||
+       (face == 5 && fall_off_index == 0)) {
+      return max;
+   }
+   if ((face == 1 && fall_off_index != 0) ||
+       (face == 0 && fall_off_index == 1) ||
+       (face == 4 && fall_off_index == 1) ||
+       (face == 5 && fall_off_index == 1)) {
+      return 0;
+   }
+   if ((face == 4 && fall_off_index >= 2) ||
+       (face == 2 && fall_off_index == 3) ||
+       (face == 3 && fall_off_index == 2)) {
+      return xc;
+   }
+   if ((face == 5 && fall_off_index >= 2) ||
+       (face == 2 && fall_off_index == 2) ||
+       (face == 3 && fall_off_index == 3)) {
+      return max - xc;
+   }
+   if ((face == 2 && fall_off_index == 0) ||
+       (face == 3 && fall_off_index == 1)) {
+      return yc;
+   }
+   /* (face == 2 && fall_off_index == 1) ||
+      (face == 3 && fall_off_index == 0)) */
+   return max - yc;
+}
  
-   return face_array[face][idx];
+/*
+ * return a new ycoord based on old face, old coords, cube size
+ * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
+ */
+static INLINE int
+get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
+{
+   if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
+      return yc;
+   }
+   if (face == 2 ||
+       (face == 4 && fall_off_index == 3) ||
+       (face == 5 && fall_off_index == 2)) {
+      return 0;
+   }
+   if (face == 3 ||
+       (face == 4 && fall_off_index == 2) ||
+       (face == 5 && fall_off_index == 3)) {
+      return max;
+   }
+   if ((face == 0 && fall_off_index == 3) ||
+       (face == 1 && fall_off_index == 2)) {
+      return xc;
+   }
+   /* (face == 0 && fall_off_index == 2) ||
+      (face == 1 && fall_off_index == 3) */
+   return max - xc;
  }
  
+
  static INLINE const float *
  get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
                          union tex_tile_address addr, int x, int y,
@@ -668,44 +765,47 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
     const struct pipe_resource *texture = sp_sview->base.texture;
     unsigned level = addr.bits.level;
     unsigned face = addr.bits.face;
-   int new_x, new_y;
-   int max_x, max_y;
-   int c;
+   int new_x, new_y, max_x;
  
     max_x = (int) u_minify(texture->width0, level);
-   max_y = (int) u_minify(texture->height0, level);
+
+   assert(texture->width0 == texture->height0);
     new_x = x;
     new_y = y;
  
-   /* the corner case */
-   if ((x < 0 || x >= max_x) &&
-       (y < 0 || y >= max_y)) {
-      const float *c1, *c2, *c3;
-      int fx = x < 0 ? 0 : max_x - 1;
-      int fy = y < 0 ? 0 : max_y - 1;
-      c1 = get_texel_2d_no_border( sp_sview, addr, fx, fy);
-      addr.bits.face = get_next_face(face, (x < 0) ? -1 : 1, 0);
-      c2 = get_texel_2d_no_border( sp_sview, addr, (x < 0) ? max_x - 1 : 0, fy);
-      addr.bits.face = get_next_face(face, 0, (y < 0) ? -1 : 1);
-      c3 = get_texel_2d_no_border( sp_sview, addr, fx, (y < 0) ?  max_y - 1 : 0);
-      for (c = 0; c < TGSI_QUAD_SIZE; c++)
-         corner[c] = CLAMP((c1[c] + c2[c] + c3[c]), 0.0F, 1.0F) / 3;
-
-      return corner;
-   }
     /* change the face */
     if (x < 0) {
-      new_x = max_x - 1;
-      face = get_next_face(face, -1, 0);
+      /*
+       * Cheat with corners. They are difficult and I believe because we don't get
+       * per-pixel faces we can actually have multiple corner texels per pixel,
+       * which screws things up majorly in any case (as the per spec behavior is
+       * to average the 3 remaining texels, which we might not have).
+       * Hence just make sure that the 2nd coord is clamped, will simply pick the
+       * sample which would have fallen off the x coord, but not y coord.
+       * So the filter weight of the samples will be wrong, but at least this
+       * ensures that only valid texels near the corner are used.
+       */
+      if (y < 0 || y >= max_x) {
+         y = CLAMP(y, 0, max_x - 1);
+      }
+      new_x = get_next_xcoord(face, 0, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 0, max_x -1, x, y);
+      face = get_next_face(face, 0);
     } else if (x >= max_x) {
-      new_x = 0;
-      face = get_next_face(face, 1, 0);
+      if (y < 0 || y >= max_x) {
+         y = CLAMP(y, 0, max_x - 1);
+      }
+      new_x = get_next_xcoord(face, 1, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 1, max_x -1, x, y);
+      face = get_next_face(face, 1);
     } else if (y < 0) {
-      new_y = max_y - 1;
-      face = get_next_face(face, 0, -1);
-   } else if (y >= max_y) {
-      new_y = 0;
-      face = get_next_face(face, 0, 1);
+      new_x = get_next_xcoord(face, 2, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 2, max_x -1, x, y);
+      face = get_next_face(face, 2);
+   } else if (y >= max_x) {
+      new_x = get_next_xcoord(face, 3, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 3, max_x -1, x, y);
+      face = get_next_face(face, 3);
     }
  
     addr.bits.face = face;
@@ -917,8 +1017,8 @@ img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
  {
     unsigned xpot = pot_level_size(sp_sview->xpot, level);
     unsigned ypot = pot_level_size(sp_sview->ypot, level);
-   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
+   int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
+   int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
     union tex_tile_address addr;
     int c;
  
@@ -1028,13 +1128,13 @@ img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
     x0 = util_ifloor(u);
     if (x0 < 0) 
        x0 = 0;
-   else if (x0 > xpot - 1)
+   else if (x0 > (int) xpot - 1)
        x0 = xpot - 1;
  
     y0 = util_ifloor(v);
     if (y0 < 0) 
        y0 = 0;
-   else if (y0 > ypot - 1)
+   else if (y0 > (int) ypot - 1)
        y0 = ypot - 1;
     
     out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
@@ -1241,6 +1341,7 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
        wrap_nearest_clamp_to_edge(s, width, &x);
        wrap_nearest_clamp_to_edge(t, height, &y);
     } else {
+      /* Would probably make sense to ignore mode and just do edge clamp */
        sp_samp->nearest_texcoord_s(s, width, &x);
        sp_samp->nearest_texcoord_t(t, height, &y);
     }
@@ -1525,9 +1626,11 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
      * always apply wrap mode CLAMP_TO_BORDER.
      */
     if (sp_samp->base.seamless_cube_map) {
+      /* Note this is a bit overkill, actual clamping is not required */
        wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
        wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
     } else {
+      /* Would probably make sense to ignore mode and just do edge clamp */
        sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
        sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
     }
@@ -1757,22 +1860,22 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
                    enum tgsi_sampler_control control,
                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   const struct pipe_resource *texture = sp_sview->base.texture;
+   const struct pipe_sampler_view *psview = &sp_sview->base;
     int j;
     float lod[TGSI_QUAD_SIZE];
  
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
  
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = sp_sview->base.u.tex.first_level + (int)lod[j];
+      int level0 = psview->u.tex.first_level + (int)lod[j];
  
        if (lod[j] < 0.0)
           mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    sp_sview->base.u.tex.first_level,
+                    psview->u.tex.first_level,
                      sp_sview->faces[j], &rgba[0][j]);
  
-      else if (level0 >= texture->last_level)
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], texture->last_level,
+      else if (level0 >= (int) psview->u.tex.last_level)
+         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
                      sp_sview->faces[j], &rgba[0][j]);
  
        else {
@@ -1815,7 +1918,7 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
                     enum tgsi_sampler_control control,
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   const struct pipe_resource *texture = sp_sview->base.texture;
+   const struct pipe_sampler_view *psview = &sp_sview->base;
     float lod[TGSI_QUAD_SIZE];
     int j;
  
@@ -1824,11 +1927,11 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
        if (lod[j] < 0.0)
           mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
-                    sp_sview->base.u.tex.first_level,
+                    psview->u.tex.first_level,
                      sp_sview->faces[j], &rgba[0][j]);
        else {
-         float level = sp_sview->base.u.tex.first_level + (int)(lod[j] + 0.5F) ;
-         level = MIN2(level, (int)texture->last_level);
+         int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
+         level = MIN2(level, (int)psview->u.tex.last_level);
           min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
                      level, sp_sview->faces[j], &rgba[0][j]);
        }
@@ -1962,12 +2065,12 @@ img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
     float F = A*C-B*B/4.0f;
  
     /* check if it is an ellipse */
-   /* ASSERT(F > 0.0); */
+   /* assert(F > 0.0); */
  
     /* Compute the ellipse's (u,v) bounding box in texture space */
     float d = -B*B+4.0f*C*A;
-   float box_u = 2.0f / d * sqrt(d*C*F); /* box_u -> half of bbox with   */
-   float box_v = 2.0f / d * sqrt(A*d*F); /* box_v -> half of bbox height */
+   float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
+   float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
  
     float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
     float s_buffer[TGSI_QUAD_SIZE];
@@ -2127,12 +2230,13 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
                          float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
     const struct pipe_resource *texture = sp_sview->base.texture;
+   const struct pipe_sampler_view *psview = &sp_sview->base;
     int level0;
     float lambda;
     float lod[TGSI_QUAD_SIZE];
  
-   float s_to_u = u_minify(texture->width0, sp_sview->base.u.tex.first_level);
-   float t_to_v = u_minify(texture->height0, sp_sview->base.u.tex.first_level);
+   float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
+   float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
     float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
     float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
     float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
@@ -2189,15 +2293,15 @@ mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
     /* XXX: Take into account all lod values.
      */
     lambda = lod[0];
-   level0 = sp_sview->base.u.tex.first_level + (int)lambda;
+   level0 = psview->u.tex.first_level + (int)lambda;
  
     /* If the ellipse covers the whole image, we can
      * simply return the average of the whole image.
      */
-   if (level0 >= (int) texture->last_level) {
+   if (level0 >= (int) psview->u.tex.last_level) {
        int j;
        for (j = 0; j < TGSI_QUAD_SIZE; j++)
-         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], texture->last_level,
+         min_filter(sp_sview, sp_samp, s[j], t[j], p[j], psview->u.tex.last_level,
                      sp_sview->faces[j], &rgba[0][j]);
     }
     else {
@@ -2233,25 +2337,25 @@ mip_filter_linear_2d_linear_repeat_POT(
     enum tgsi_sampler_control control,
     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
-   const struct pipe_resource *texture = sp_sview->base.texture;
+   const struct pipe_sampler_view *psview = &sp_sview->base;
     int j;
     float lod[TGSI_QUAD_SIZE];
  
     compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
  
     for (j = 0; j < TGSI_QUAD_SIZE; j++) {
-      int level0 = sp_sview->base.u.tex.first_level + (int)lod[j];
+      int level0 = psview->u.tex.first_level + (int)lod[j];
  
        /* Catches both negative and large values of level0:
         */
-      if ((unsigned)level0 >= texture->last_level) { 
+      if ((unsigned)level0 >= psview->u.tex.last_level) {
           if (level0 < 0)
              img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
-                                            sp_sview->base.u.tex.first_level,
+                                            psview->u.tex.first_level,
                                              sp_sview->faces[j], &rgba[0][j]);
           else
              img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
-                                            sp_sview->base.texture->last_level,
+                                            psview->u.tex.last_level,
                                              sp_sview->faces[j], &rgba[0][j]);
  
        }
@@ -2291,9 +2395,11 @@ sample_compare(struct sp_sampler_view *sp_sview,
                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
     const struct pipe_sampler_state *sampler = &sp_samp->base;
-   int j, k0, k1, k2, k3;
-   float val;
-   float pc0, pc1, pc2, pc3;
+   int j;
+   int k[4];
+   float pc[4];
+   const struct util_format_description *format_desc;
+   unsigned chan_type;
  
     /**
      * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -2304,93 +2410,94 @@ sample_compare(struct sp_sampler_view *sp_sview,
  
     if (sp_sview->base.texture->target == PIPE_TEXTURE_2D_ARRAY ||
         sp_sview->base.texture->target == PIPE_TEXTURE_CUBE) {
-      pc0 = CLAMP(c0[0], 0.0F, 1.0F);
-      pc1 = CLAMP(c0[1], 0.0F, 1.0F);
-      pc2 = CLAMP(c0[2], 0.0F, 1.0F);
-      pc3 = CLAMP(c0[3], 0.0F, 1.0F);
+      pc[0] = c0[0];
+      pc[1] = c0[1];
+      pc[2] = c0[2];
+      pc[3] = c0[3];
     } else if (sp_sview->base.texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
-      pc0 = CLAMP(c1[0], 0.0F, 1.0F);
-      pc1 = CLAMP(c1[1], 0.0F, 1.0F);
-      pc2 = CLAMP(c1[2], 0.0F, 1.0F);
-      pc3 = CLAMP(c1[3], 0.0F, 1.0F);
+      pc[0] = c1[0];
+      pc[1] = c1[1];
+      pc[2] = c1[2];
+      pc[3] = c1[3];
     } else {
-      pc0 = CLAMP(p[0], 0.0F, 1.0F);
-      pc1 = CLAMP(p[1], 0.0F, 1.0F);
-      pc2 = CLAMP(p[2], 0.0F, 1.0F);
-      pc3 = CLAMP(p[3], 0.0F, 1.0F);
+      pc[0] = p[0];
+      pc[1] = p[1];
+      pc[2] = p[2];
+      pc[3] = p[3];
+   }
+
+   format_desc = util_format_description(sp_sview->base.format);
+   /* not entirely sure we couldn't end up with non-valid swizzle here */
+   chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
+                  format_desc->channel[format_desc->swizzle[0]].type :
+                  UTIL_FORMAT_TYPE_FLOAT;
+   if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
+      /*
+       * clamping is a result of conversion to texture format, hence
+       * doesn't happen with floats. Technically also should do comparison
+       * in texture format (quantization!).
+       */
+      pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
+      pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
+      pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
+      pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
     }
+
     /* compare four texcoords vs. four texture samples */
     switch (sampler->compare_func) {
     case PIPE_FUNC_LESS:
-      k0 = pc0 < rgba[0][0];
-      k1 = pc1 < rgba[0][1];
-      k2 = pc2 < rgba[0][2];
-      k3 = pc3 < rgba[0][3];
+      k[0] = pc[0] < rgba[0][0];
+      k[1] = pc[1] < rgba[0][1];
+      k[2] = pc[2] < rgba[0][2];
+      k[3] = pc[3] < rgba[0][3];
        break;
     case PIPE_FUNC_LEQUAL:
-      k0 = pc0 <= rgba[0][0];
-      k1 = pc1 <= rgba[0][1];
-      k2 = pc2 <= rgba[0][2];
-      k3 = pc3 <= rgba[0][3];
+      k[0] = pc[0] <= rgba[0][0];
+      k[1] = pc[1] <= rgba[0][1];
+      k[2] = pc[2] <= rgba[0][2];
+      k[3] = pc[3] <= rgba[0][3];
        break;
     case PIPE_FUNC_GREATER:
-      k0 = pc0 > rgba[0][0];
-      k1 = pc1 > rgba[0][1];
-      k2 = pc2 > rgba[0][2];
-      k3 = pc3 > rgba[0][3];
+      k[0] = pc[0] > rgba[0][0];
+      k[1] = pc[1] > rgba[0][1];
+      k[2] = pc[2] > rgba[0][2];
+      k[3] = pc[3] > rgba[0][3];
        break;
     case PIPE_FUNC_GEQUAL:
-      k0 = pc0 >= rgba[0][0];
-      k1 = pc1 >= rgba[0][1];
-      k2 = pc2 >= rgba[0][2];
-      k3 = pc3 >= rgba[0][3];
+      k[0] = pc[0] >= rgba[0][0];
+      k[1] = pc[1] >= rgba[0][1];
+      k[2] = pc[2] >= rgba[0][2];
+      k[3] = pc[3] >= rgba[0][3];
        break;
     case PIPE_FUNC_EQUAL:
-      k0 = pc0 == rgba[0][0];
-      k1 = pc1 == rgba[0][1];
-      k2 = pc2 == rgba[0][2];
-      k3 = pc3 == rgba[0][3];
+      k[0] = pc[0] == rgba[0][0];
+      k[1] = pc[1] == rgba[0][1];
+      k[2] = pc[2] == rgba[0][2];
+      k[3] = pc[3] == rgba[0][3];
        break;
     case PIPE_FUNC_NOTEQUAL:
-      k0 = pc0 != rgba[0][0];
-      k1 = pc1 != rgba[0][1];
-      k2 = pc2 != rgba[0][2];
-      k3 = pc3 != rgba[0][3];
+      k[0] = pc[0] != rgba[0][0];
+      k[1] = pc[1] != rgba[0][1];
+      k[2] = pc[2] != rgba[0][2];
+      k[3] = pc[3] != rgba[0][3];
        break;
     case PIPE_FUNC_ALWAYS:
-      k0 = k1 = k2 = k3 = 1;
+      k[0] = k[1] = k[2] = k[3] = 1;
        break;
     case PIPE_FUNC_NEVER:
-      k0 = k1 = k2 = k3 = 0;
+      k[0] = k[1] = k[2] = k[3] = 0;
        break;
     default:
-      k0 = k1 = k2 = k3 = 0;
+      k[0] = k[1] = k[2] = k[3] = 0;
        assert(0);
        break;
     }
  
-   if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
-      /* convert four pass/fail values to an intensity in [0,1] */
-      /*
-       * XXX this doesn't actually make much sense.
-       * We just average the result of four _pixels_ and output the same
-       * value for all of the four pixels of the quad.
-       * This really needs to work on the _samples_ i.e. inside the img filter.
-       */
-      val = 0.25F * (k0 + k1 + k2 + k3);
-
-      /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-      for (j = 0; j < 4; j++) {
-         rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
-         rgba[3][j] = 1.0F;
-      }
-   } else {
-      for (j = 0; j < 4; j++) {
-         rgba[0][j] = k0;
-         rgba[1][j] = k1;
-         rgba[2][j] = k2;
-         rgba[3][j] = 1.0F;
-      }
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+      rgba[0][j] = k[j];
+      rgba[1][j] = k[j];
+      rgba[2][j] = k[j];
+      rgba[3][j] = 1.0F;
     }
  }
  
@@ -2479,7 +2586,7 @@ get_nearest_unorm_wrap(unsigned mode)
     case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
        return wrap_nearest_unorm_clamp_to_border;
     default:
-      assert(0);
+      debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
        return wrap_nearest_unorm_clamp;
     }
  }
@@ -2523,7 +2630,7 @@ get_linear_unorm_wrap(unsigned mode)
     case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
        return wrap_linear_unorm_clamp_to_border;
     default:
-      assert(0);
+      debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
        return wrap_linear_unorm_clamp;
     }
  }
@@ -2800,11 +2907,21 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
     const struct pipe_sampler_view *view = &sp_sview->base;
     const struct pipe_resource *texture = view->texture;
  
+   if (texture->target == PIPE_BUFFER) {
+      dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
+      /* the other values are undefined, but let's avoid potential valgrind
+       * warnings.
+       */
+      dims[1] = dims[2] = dims[3] = 0;
+      return;
+   }
+
     /* undefined according to EXT_gpu_program */
     level += view->u.tex.first_level;
     if (level > view->u.tex.last_level)
        return;
  
+   dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
     dims[0] = u_minify(texture->width0, level);
  
     switch(texture->target) {
@@ -2829,9 +2946,6 @@ sp_get_dims(struct sp_sampler_view *sp_sview, int level,
        dims[1] = u_minify(texture->height0, level);
        dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
        break;
-   case PIPE_BUFFER:
-      dims[0] /= util_format_get_blocksize(view->format);
-      return;
     default:
        assert(!"unexpected texture target in sp_get_dims()");
        return;
@@ -3092,7 +3206,11 @@ sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
     struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-   /* TODO should have defined behavior if no texture is bound. */
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      dims[0] = dims[1] = dims[2] = dims[3] = 0;
+      return;
+   }
     sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
  }
  
@@ -3116,8 +3234,16 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
     assert(sampler_index < PIPE_MAX_SAMPLERS);
     assert(sp_samp->sp_sampler[sampler_index]);
-   /* FIXME should have defined behavior if no texture is bound. */
-   assert(sp_samp->sp_sview[sview_index].get_samples);
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      int i, j;
+      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
+         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+            rgba[j][i] = 0.0f;
+         }
+      }
+      return;
+   }
     sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
                                                sp_samp->sp_sampler[sampler_index],
                                                s, t, p, c0, lod, control, rgba);
@@ -3135,8 +3261,16 @@ sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
     struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-   /* FIXME should have defined behavior if no texture is bound. */
-   assert(sp_samp->sp_sview[sview_index].base.texture);
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      int i, j;
+      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
+         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+            rgba[j][i] = 0.0f;
+         }
+      }
+      return;
+   }
     sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
  }