softpipe: fix shadow sampling

[mesa.git] / src / gallium / drivers / softpipe / sp_tex_sample.c
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c

index 4121857b869ab7f51e84e40face5566560f2c3e7..274e56b40c71b8ad80de14368ebbe6321d208df2 100644 (file)
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1,6 +1,6 @@
  /**************************************************************************
   * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
   * All Rights Reserved.
   * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   *
@@ -19,7 +19,7 @@
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -608,6 +608,48 @@ get_texel_2d(const struct sp_sampler_view *sp_sview,
     }
  }
  
+
+/*
+ * Here's the complete logic (HOLY CRAP) for finding next face and doing the
+ * corresponding coord wrapping, implemented by get_next_face,
+ * get_next_xcoord, get_next_ycoord.
+ * Read like that (first line):
+ * If face is +x and s coord is below zero, then
+ * new face is +z, new s is max , new t is old t
+ * (max is always cube size - 1).
+ *
+ * +x s- -> +z: s = max,   t = t
+ * +x s+ -> -z: s = 0,     t = t
+ * +x t- -> +y: s = max,   t = max-s
+ * +x t+ -> -y: s = max,   t = s
+ *
+ * -x s- -> -z: s = max,   t = t
+ * -x s+ -> +z: s = 0,     t = t
+ * -x t- -> +y: s = 0,     t = s
+ * -x t+ -> -y: s = 0,     t = max-s
+ *
+ * +y s- -> -x: s = t,     t = 0
+ * +y s+ -> +x: s = max-t, t = 0
+ * +y t- -> -z: s = max-s, t = 0
+ * +y t+ -> +z: s = s,     t = 0
+ *
+ * -y s- -> -x: s = max-t, t = max
+ * -y s+ -> +x: s = t,     t = max
+ * -y t- -> +z: s = s,     t = max
+ * -y t+ -> -z: s = max-s, t = max
+
+ * +z s- -> -x: s = max,   t = t
+ * +z s+ -> +x: s = 0,     t = t
+ * +z t- -> +y: s = s,     t = max
+ * +z t+ -> -y: s = s,     t = 0
+
+ * -z s- -> +x: s = max,   t = t
+ * -z s+ -> -x: s = 0,     t = t
+ * -z t- -> +y: s = max-s, t = 0
+ * -z t+ -> -y: s = max-s, t = max
+ */
+
+
  /*
   * seamless cubemap neighbour array.
   * this array is used to find the adjacent face in each of 4 directions,
@@ -617,49 +659,104 @@ static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
     /* pos X first then neg X is Z different, Y the same */
     /* PIPE_TEX_FACE_POS_X,*/
     { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
     /* PIPE_TEX_FACE_NEG_X */
     { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
  
     /* pos Y first then neg Y is X different, X the same */
     /* PIPE_TEX_FACE_POS_Y */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
+     PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
  
     /* PIPE_TEX_FACE_NEG_Y */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
+     PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
  
     /* pos Z first then neg Y is X different, X the same */
     /* PIPE_TEX_FACE_POS_Z */
     { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
  
     /* PIPE_TEX_FACE_NEG_Z */
     { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
-     PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y }
+     PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
  };
  
  static INLINE unsigned
-get_next_face(unsigned face, int x, int y)
+get_next_face(unsigned face, int idx)
  {
-   int idx = 0;
+   return face_array[face][idx];
+}
  
-   if (x == 0 && y == 0)
-      return face;
-   if (x == -1)
-      idx = 0;
-   else if (x == 1)
-      idx = 1;
-   else if (y == -1)
-      idx = 2;
-   else if (y == 1)
-      idx = 3;
+/*
+ * return a new xcoord based on old face, old coords, cube size
+ * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
+ */
+static INLINE int
+get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
+{
+   if ((face == 0 && fall_off_index != 1) ||
+       (face == 1 && fall_off_index == 0) ||
+       (face == 4 && fall_off_index == 0) ||
+       (face == 5 && fall_off_index == 0)) {
+      return max;
+   }
+   if ((face == 1 && fall_off_index != 0) ||
+       (face == 0 && fall_off_index == 1) ||
+       (face == 4 && fall_off_index == 1) ||
+       (face == 5 && fall_off_index == 1)) {
+      return 0;
+   }
+   if ((face == 4 && fall_off_index >= 2) ||
+       (face == 2 && fall_off_index == 3) ||
+       (face == 3 && fall_off_index == 2)) {
+      return xc;
+   }
+   if ((face == 5 && fall_off_index >= 2) ||
+       (face == 2 && fall_off_index == 2) ||
+       (face == 3 && fall_off_index == 3)) {
+      return max - xc;
+   }
+   if ((face == 2 && fall_off_index == 0) ||
+       (face == 3 && fall_off_index == 1)) {
+      return yc;
+   }
+   /* (face == 2 && fall_off_index == 1) ||
+      (face == 3 && fall_off_index == 0)) */
+   return max - yc;
+}
  
-   return face_array[face][idx];
+/*
+ * return a new ycoord based on old face, old coords, cube size
+ * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
+ */
+static INLINE int
+get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
+{
+   if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
+      return yc;
+   }
+   if (face == 2 ||
+       (face == 4 && fall_off_index == 3) ||
+       (face == 5 && fall_off_index == 2)) {
+      return 0;
+   }
+   if (face == 3 ||
+       (face == 4 && fall_off_index == 2) ||
+       (face == 5 && fall_off_index == 3)) {
+      return max;
+   }
+   if ((face == 0 && fall_off_index == 3) ||
+       (face == 1 && fall_off_index == 2)) {
+      return xc;
+   }
+   /* (face == 0 && fall_off_index == 2) ||
+      (face == 1 && fall_off_index == 3) */
+   return max - xc;
  }
  
+
  static INLINE const float *
  get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
                          union tex_tile_address addr, int x, int y,
@@ -668,44 +765,47 @@ get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
     const struct pipe_resource *texture = sp_sview->base.texture;
     unsigned level = addr.bits.level;
     unsigned face = addr.bits.face;
-   int new_x, new_y;
-   int max_x, max_y;
-   int c;
+   int new_x, new_y, max_x;
  
     max_x = (int) u_minify(texture->width0, level);
-   max_y = (int) u_minify(texture->height0, level);
+
+   assert(texture->width0 == texture->height0);
     new_x = x;
     new_y = y;
  
-   /* the corner case */
-   if ((x < 0 || x >= max_x) &&
-       (y < 0 || y >= max_y)) {
-      const float *c1, *c2, *c3;
-      int fx = x < 0 ? 0 : max_x - 1;
-      int fy = y < 0 ? 0 : max_y - 1;
-      c1 = get_texel_2d_no_border( sp_sview, addr, fx, fy);
-      addr.bits.face = get_next_face(face, (x < 0) ? -1 : 1, 0);
-      c2 = get_texel_2d_no_border( sp_sview, addr, (x < 0) ? max_x - 1 : 0, fy);
-      addr.bits.face = get_next_face(face, 0, (y < 0) ? -1 : 1);
-      c3 = get_texel_2d_no_border( sp_sview, addr, fx, (y < 0) ?  max_y - 1 : 0);
-      for (c = 0; c < TGSI_QUAD_SIZE; c++)
-         corner[c] = CLAMP((c1[c] + c2[c] + c3[c]), 0.0F, 1.0F) / 3;
-
-      return corner;
-   }
     /* change the face */
     if (x < 0) {
-      new_x = max_x - 1;
-      face = get_next_face(face, -1, 0);
+      /*
+       * Cheat with corners. They are difficult and I believe because we don't get
+       * per-pixel faces we can actually have multiple corner texels per pixel,
+       * which screws things up majorly in any case (as the per spec behavior is
+       * to average the 3 remaining texels, which we might not have).
+       * Hence just make sure that the 2nd coord is clamped, will simply pick the
+       * sample which would have fallen off the x coord, but not y coord.
+       * So the filter weight of the samples will be wrong, but at least this
+       * ensures that only valid texels near the corner are used.
+       */
+      if (y < 0 || y >= max_x) {
+         y = CLAMP(y, 0, max_x - 1);
+      }
+      new_x = get_next_xcoord(face, 0, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 0, max_x -1, x, y);
+      face = get_next_face(face, 0);
     } else if (x >= max_x) {
-      new_x = 0;
-      face = get_next_face(face, 1, 0);
+      if (y < 0 || y >= max_x) {
+         y = CLAMP(y, 0, max_x - 1);
+      }
+      new_x = get_next_xcoord(face, 1, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 1, max_x -1, x, y);
+      face = get_next_face(face, 1);
     } else if (y < 0) {
-      new_y = max_y - 1;
-      face = get_next_face(face, 0, -1);
-   } else if (y >= max_y) {
-      new_y = 0;
-      face = get_next_face(face, 0, 1);
+      new_x = get_next_xcoord(face, 2, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 2, max_x -1, x, y);
+      face = get_next_face(face, 2);
+   } else if (y >= max_x) {
+      new_x = get_next_xcoord(face, 3, max_x -1, x, y);
+      new_y = get_next_ycoord(face, 3, max_x -1, x, y);
+      face = get_next_face(face, 3);
     }
  
     addr.bits.face = face;
@@ -1241,6 +1341,7 @@ img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
        wrap_nearest_clamp_to_edge(s, width, &x);
        wrap_nearest_clamp_to_edge(t, height, &y);
     } else {
+      /* Would probably make sense to ignore mode and just do edge clamp */
        sp_samp->nearest_texcoord_s(s, width, &x);
        sp_samp->nearest_texcoord_t(t, height, &y);
     }
@@ -1525,9 +1626,11 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
      * always apply wrap mode CLAMP_TO_BORDER.
      */
     if (sp_samp->base.seamless_cube_map) {
+      /* Note this is a bit overkill, actual clamping is not required */
        wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
        wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
     } else {
+      /* Would probably make sense to ignore mode and just do edge clamp */
        sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
        sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
     }
@@ -2291,9 +2394,9 @@ sample_compare(struct sp_sampler_view *sp_sview,
                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
  {
     const struct pipe_sampler_state *sampler = &sp_samp->base;
-   int j, k0, k1, k2, k3;
-   float val;
-   float pc0, pc1, pc2, pc3;
+   int j;
+   int k[4];
+   float pc[4];
     const struct util_format_description *format_desc;
     unsigned chan_type;
  
@@ -2306,20 +2409,20 @@ sample_compare(struct sp_sampler_view *sp_sview,
  
     if (sp_sview->base.texture->target == PIPE_TEXTURE_2D_ARRAY ||
         sp_sview->base.texture->target == PIPE_TEXTURE_CUBE) {
-      pc0 = c0[0];
-      pc1 = c0[1];
-      pc2 = c0[2];
-      pc3 = c0[3];
+      pc[0] = c0[0];
+      pc[1] = c0[1];
+      pc[2] = c0[2];
+      pc[3] = c0[3];
     } else if (sp_sview->base.texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
-      pc0 = c1[0];
-      pc1 = c1[1];
-      pc2 = c1[2];
-      pc3 = c1[3];
+      pc[0] = c1[0];
+      pc[1] = c1[1];
+      pc[2] = c1[2];
+      pc[3] = c1[3];
     } else {
-      pc0 = p[0];
-      pc1 = p[1];
-      pc2 = p[2];
-      pc3 = p[3];
+      pc[0] = p[0];
+      pc[1] = p[1];
+      pc[2] = p[2];
+      pc[3] = p[3];
     }
  
     format_desc = util_format_description(sp_sview->base.format);
@@ -2333,84 +2436,67 @@ sample_compare(struct sp_sampler_view *sp_sview,
         * doesn't happen with floats. Technically also should do comparison
         * in texture format (quantization!).
         */
-      pc0 = CLAMP(pc0, 0.0F, 1.0F);
-      pc1 = CLAMP(pc1, 0.0F, 1.0F);
-      pc2 = CLAMP(pc2, 0.0F, 1.0F);
-      pc3 = CLAMP(pc3, 0.0F, 1.0F);
+      pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
+      pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
+      pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
+      pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
     }
  
     /* compare four texcoords vs. four texture samples */
     switch (sampler->compare_func) {
     case PIPE_FUNC_LESS:
-      k0 = pc0 < rgba[0][0];
-      k1 = pc1 < rgba[0][1];
-      k2 = pc2 < rgba[0][2];
-      k3 = pc3 < rgba[0][3];
+      k[0] = pc[0] < rgba[0][0];
+      k[1] = pc[1] < rgba[0][1];
+      k[2] = pc[2] < rgba[0][2];
+      k[3] = pc[3] < rgba[0][3];
        break;
     case PIPE_FUNC_LEQUAL:
-      k0 = pc0 <= rgba[0][0];
-      k1 = pc1 <= rgba[0][1];
-      k2 = pc2 <= rgba[0][2];
-      k3 = pc3 <= rgba[0][3];
+      k[0] = pc[0] <= rgba[0][0];
+      k[1] = pc[1] <= rgba[0][1];
+      k[2] = pc[2] <= rgba[0][2];
+      k[3] = pc[3] <= rgba[0][3];
        break;
     case PIPE_FUNC_GREATER:
-      k0 = pc0 > rgba[0][0];
-      k1 = pc1 > rgba[0][1];
-      k2 = pc2 > rgba[0][2];
-      k3 = pc3 > rgba[0][3];
+      k[0] = pc[0] > rgba[0][0];
+      k[1] = pc[1] > rgba[0][1];
+      k[2] = pc[2] > rgba[0][2];
+      k[3] = pc[3] > rgba[0][3];
        break;
     case PIPE_FUNC_GEQUAL:
-      k0 = pc0 >= rgba[0][0];
-      k1 = pc1 >= rgba[0][1];
-      k2 = pc2 >= rgba[0][2];
-      k3 = pc3 >= rgba[0][3];
+      k[0] = pc[0] >= rgba[0][0];
+      k[1] = pc[1] >= rgba[0][1];
+      k[2] = pc[2] >= rgba[0][2];
+      k[3] = pc[3] >= rgba[0][3];
        break;
     case PIPE_FUNC_EQUAL:
-      k0 = pc0 == rgba[0][0];
-      k1 = pc1 == rgba[0][1];
-      k2 = pc2 == rgba[0][2];
-      k3 = pc3 == rgba[0][3];
+      k[0] = pc[0] == rgba[0][0];
+      k[1] = pc[1] == rgba[0][1];
+      k[2] = pc[2] == rgba[0][2];
+      k[3] = pc[3] == rgba[0][3];
        break;
     case PIPE_FUNC_NOTEQUAL:
-      k0 = pc0 != rgba[0][0];
-      k1 = pc1 != rgba[0][1];
-      k2 = pc2 != rgba[0][2];
-      k3 = pc3 != rgba[0][3];
+      k[0] = pc[0] != rgba[0][0];
+      k[1] = pc[1] != rgba[0][1];
+      k[2] = pc[2] != rgba[0][2];
+      k[3] = pc[3] != rgba[0][3];
        break;
     case PIPE_FUNC_ALWAYS:
-      k0 = k1 = k2 = k3 = 1;
+      k[0] = k[1] = k[2] = k[3] = 1;
        break;
     case PIPE_FUNC_NEVER:
-      k0 = k1 = k2 = k3 = 0;
+      k[0] = k[1] = k[2] = k[3] = 0;
        break;
     default:
-      k0 = k1 = k2 = k3 = 0;
+      k[0] = k[1] = k[2] = k[3] = 0;
        assert(0);
        break;
     }
  
-   if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
-      /* convert four pass/fail values to an intensity in [0,1] */
-      /*
-       * XXX this doesn't actually make much sense.
-       * We just average the result of four _pixels_ and output the same
-       * value for all of the four pixels of the quad.
-       * This really needs to work on the _samples_ i.e. inside the img filter.
-       */
-      val = 0.25F * (k0 + k1 + k2 + k3);
-
-      /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-      for (j = 0; j < 4; j++) {
-         rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
-         rgba[3][j] = 1.0F;
-      }
-   } else {
-      for (j = 0; j < 4; j++) {
-         rgba[0][j] = k0;
-         rgba[1][j] = k1;
-         rgba[2][j] = k2;
-         rgba[3][j] = 1.0F;
-      }
+   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
+      rgba[0][j] = k[j];
+      rgba[1][j] = k[j];
+      rgba[2][j] = k[j];
+      rgba[3][j] = 1.0F;
     }
  }
  
@@ -3112,7 +3198,11 @@ sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
     struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-   /* TODO should have defined behavior if no texture is bound. */
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      dims[0] = dims[1] = dims[2] = dims[3] = 0;
+      return;
+   }
     sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
  }
  
@@ -3136,8 +3226,16 @@ sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
     assert(sampler_index < PIPE_MAX_SAMPLERS);
     assert(sp_samp->sp_sampler[sampler_index]);
-   /* FIXME should have defined behavior if no texture is bound. */
-   assert(sp_samp->sp_sview[sview_index].get_samples);
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      int i, j;
+      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
+         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+            rgba[j][i] = 0.0f;
+         }
+      }
+      return;
+   }
     sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
                                                sp_samp->sp_sampler[sampler_index],
                                                s, t, p, c0, lod, control, rgba);
@@ -3155,8 +3253,16 @@ sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
     struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
  
     assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
-   /* FIXME should have defined behavior if no texture is bound. */
-   assert(sp_samp->sp_sview[sview_index].base.texture);
+   /* always have a view here but texture is NULL if no sampler view was set. */
+   if (!sp_samp->sp_sview[sview_index].base.texture) {
+      int i, j;
+      for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
+         for (i = 0; i < TGSI_QUAD_SIZE; i++) {
+            rgba[j][i] = 0.0f;
+         }
+      }
+      return;
+   }
     sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
  }