Merge commit 'origin/7.8'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
index 29845c65d033a15218114bc8496681aa40290908..395eaaba2692b585cff2036ff2d8109dae041a28 100644 (file)
@@ -211,7 +211,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    const int dims = texture_dims(bld->static_state->target);
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
-   LLVMValueRef packed;
+   LLVMValueRef i, j;
    LLVMValueRef use_border = NULL;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
@@ -248,6 +248,43 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
+   /*
+    * Describe the coordinates in terms of pixel blocks.
+    *
+    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+    * bit arithmetic. Verify this.
+    */
+
+   if (bld->format_desc->block.width == 1) {
+      i = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
+      i = LLVMBuildURem(bld->builder, x, block_width, "");
+      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+   }
+
+   if (bld->format_desc->block.height == 1) {
+      j = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
+      j = LLVMBuildURem(bld->builder, y, block_height, "");
+      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   }
+
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
+                                   bld->format_desc,
+                                   x, y, z, y_stride, z_stride);
+
+   lp_build_fetch_rgba_soa(bld->builder,
+                           bld->format_desc,
+                           bld->texel_type,
+                           data_ptr, offset,
+                           i, j,
+                           texel);
+
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -263,36 +300,12 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     * the texel color results with the border color.
     */
 
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
-   /* gather the texels from the texture */
-   packed = lp_build_gather(bld->builder,
-                            bld->texel_type.length,
-                            bld->format_desc->block.bits,
-                            bld->texel_type.width,
-                            data_ptr, offset);
-
-   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
-
-   /* convert texels to float rgba */
-   lp_build_unpack_rgba_soa(bld->builder,
-                            bld->format_desc,
-                            bld->texel_type,
-                            packed, texel);
-
    if (use_border) {
       /* select texel color or border color depending on use_border */
       int chan;
       for (chan = 0; chan < 4; chan++) {
          LLVMValueRef border_chan =
-            lp_build_const_scalar(bld->texel_type,
+            lp_build_const_vec(bld->texel_type,
                                   bld->static_state->border_color[chan]);
          texel[chan] = lp_build_select(&bld->texel_bld, use_border,
                                        border_chan, texel[chan]);
@@ -457,8 +470,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
-   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
+   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
@@ -512,7 +525,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       else {
          LLVMValueRef min, max;
          /* clamp to [0.5, length - 0.5] */
-         min = lp_build_const_scalar(coord_bld->type, 0.5F);
+         min = lp_build_const_vec(coord_bld->type, 0.5F);
          max = lp_build_sub(coord_bld, length_f, min);
          coord = lp_build_clamp(coord_bld, coord, min, max);
       }
@@ -533,7 +546,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          if (bld->static_state->normalized_coords) {
             /* min = -1.0 / (2 * length) = -0.5 / length */
             min = lp_build_mul(coord_bld,
-                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_const_vec(coord_bld->type, -0.5F),
                                lp_build_rcp(coord_bld, length_f));
             /* max = 1.0 - min */
             max = lp_build_sub(coord_bld, coord_bld->one, min);
@@ -545,7 +558,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            min = lp_build_const_vec(coord_bld->type, -0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
             coord = lp_build_clamp(coord_bld, coord, min, max);
             coord = lp_build_sub(coord_bld, coord, half);
@@ -620,7 +633,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
          LLVMValueRef min, max;
          /* min = -1.0 / (2 * length) = -0.5 / length */
          min = lp_build_mul(coord_bld,
-                            lp_build_const_scalar(coord_bld->type, -0.5F),
+                            lp_build_const_vec(coord_bld->type, -0.5F),
                             lp_build_rcp(coord_bld, length_f));
          /* max = 1.0 - min */
          max = lp_build_sub(coord_bld, coord_bld->one, min);
@@ -665,7 +678,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
+   LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
    LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
    LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
    LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
@@ -708,7 +721,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [0.5, length - 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, 0.5F);
+            min = lp_build_const_vec(coord_bld->type, 0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
          }
          /* coord = clamp(coord, min, max) */
@@ -724,7 +737,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          if (bld->static_state->normalized_coords) {
             /* min = -1.0 / (2 * length) = -0.5 / length */
             min = lp_build_mul(coord_bld,
-                               lp_build_const_scalar(coord_bld->type, -0.5F),
+                               lp_build_const_vec(coord_bld->type, -0.5F),
                                lp_build_rcp(coord_bld, length_f));
             /* max = length - min */
             max = lp_build_sub(coord_bld, length_f, min);
@@ -733,7 +746,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          }
          else {
             /* clamp to [-0.5, length + 0.5] */
-            min = lp_build_const_scalar(coord_bld->type, -0.5F);
+            min = lp_build_const_vec(coord_bld->type, -0.5F);
             max = lp_build_sub(coord_bld, length_f, min);
          }
          /* coord = clamp(coord, min, max) */
@@ -843,87 +856,98 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
                       LLVMValueRef depth)
 
 {
-   const int dims = texture_dims(bld->static_state->target);
-   struct lp_build_context *float_bld = &bld->float_bld;
-   LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(), bld->static_state->lod_bias);
-   LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
-   LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(), bld->static_state->max_lod);
-
-   LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
-   LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
-   LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
-
-   LLVMValueRef s0, s1, s2;
-   LLVMValueRef t0, t1, t2;
-   LLVMValueRef r0, r1, r2;
-   LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
-   LLVMValueRef rho, lod;
-
-   /*
-    * dsdx = abs(s[1] - s[0]);
-    * dsdy = abs(s[2] - s[0]);
-    * dtdx = abs(t[1] - t[0]);
-    * dtdy = abs(t[2] - t[0]);
-    * drdx = abs(r[1] - r[0]);
-    * drdy = abs(r[2] - r[0]);
-    * XXX we're assuming a four-element quad in 2x2 layout here.
-    */
-   s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
-   s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
-   s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
-   dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
-   dsdx = lp_build_abs(float_bld, dsdx);
-   dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
-   dsdy = lp_build_abs(float_bld, dsdy);
-   if (dims > 1) {
-      t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
-      t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
-      t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
-      dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
-      dtdx = lp_build_abs(float_bld, dtdx);
-      dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
-      dtdy = lp_build_abs(float_bld, dtdy);
-      if (dims > 2) {
-         r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
-         r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
-         r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
-         drdx = LLVMBuildSub(bld->builder, r1, r0, "");
-         drdx = lp_build_abs(float_bld, drdx);
-         drdy = LLVMBuildSub(bld->builder, r2, r0, "");
-         drdy = lp_build_abs(float_bld, drdy);
-      }
+   if (bld->static_state->min_lod == bld->static_state->max_lod) {
+      /* User is forcing sampling from a particular mipmap level.
+       * This is hit during mipmap generation.
+       */
+      return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
    }
+   else {
+      const int dims = texture_dims(bld->static_state->target);
+      struct lp_build_context *float_bld = &bld->float_bld;
+      LLVMValueRef lod_bias = LLVMConstReal(LLVMFloatType(),
+                                            bld->static_state->lod_bias);
+      LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
+                                           bld->static_state->min_lod);
+      LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
+                                           bld->static_state->max_lod);
+
+      LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+      LLVMValueRef index1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
+      LLVMValueRef index2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
+
+      LLVMValueRef s0, s1, s2;
+      LLVMValueRef t0, t1, t2;
+      LLVMValueRef r0, r1, r2;
+      LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+      LLVMValueRef rho, lod;
+
+      /*
+       * dsdx = abs(s[1] - s[0]);
+       * dsdy = abs(s[2] - s[0]);
+       * dtdx = abs(t[1] - t[0]);
+       * dtdy = abs(t[2] - t[0]);
+       * drdx = abs(r[1] - r[0]);
+       * drdy = abs(r[2] - r[0]);
+       * XXX we're assuming a four-element quad in 2x2 layout here.
+       */
+      s0 = LLVMBuildExtractElement(bld->builder, s, index0, "s0");
+      s1 = LLVMBuildExtractElement(bld->builder, s, index1, "s1");
+      s2 = LLVMBuildExtractElement(bld->builder, s, index2, "s2");
+      dsdx = LLVMBuildSub(bld->builder, s1, s0, "");
+      dsdx = lp_build_abs(float_bld, dsdx);
+      dsdy = LLVMBuildSub(bld->builder, s2, s0, "");
+      dsdy = lp_build_abs(float_bld, dsdy);
+      if (dims > 1) {
+         t0 = LLVMBuildExtractElement(bld->builder, t, index0, "t0");
+         t1 = LLVMBuildExtractElement(bld->builder, t, index1, "t1");
+         t2 = LLVMBuildExtractElement(bld->builder, t, index2, "t2");
+         dtdx = LLVMBuildSub(bld->builder, t1, t0, "");
+         dtdx = lp_build_abs(float_bld, dtdx);
+         dtdy = LLVMBuildSub(bld->builder, t2, t0, "");
+         dtdy = lp_build_abs(float_bld, dtdy);
+         if (dims > 2) {
+            r0 = LLVMBuildExtractElement(bld->builder, r, index0, "r0");
+            r1 = LLVMBuildExtractElement(bld->builder, r, index1, "r1");
+            r2 = LLVMBuildExtractElement(bld->builder, r, index2, "r2");
+            drdx = LLVMBuildSub(bld->builder, r1, r0, "");
+            drdx = lp_build_abs(float_bld, drdx);
+            drdy = LLVMBuildSub(bld->builder, r2, r0, "");
+            drdy = lp_build_abs(float_bld, drdy);
+         }
+      }
 
-   /* Compute rho = max of all partial derivatives scaled by texture size.
-    * XXX this could be vectorized somewhat
-    */
-   rho = LLVMBuildMul(bld->builder,
-                      lp_build_max(float_bld, dsdx, dsdy),
-                      lp_build_int_to_float(float_bld, width), "");
-   if (dims > 1) {
-      LLVMValueRef max;
-      max = LLVMBuildMul(bld->builder,
-                         lp_build_max(float_bld, dtdx, dtdy),
-                         lp_build_int_to_float(float_bld, height), "");
-      rho = lp_build_max(float_bld, rho, max);
-      if (dims > 2) {
+      /* Compute rho = max of all partial derivatives scaled by texture size.
+       * XXX this could be vectorized somewhat
+       */
+      rho = LLVMBuildMul(bld->builder,
+                         lp_build_max(float_bld, dsdx, dsdy),
+                         lp_build_int_to_float(float_bld, width), "");
+      if (dims > 1) {
+         LLVMValueRef max;
          max = LLVMBuildMul(bld->builder,
-                            lp_build_max(float_bld, drdx, drdy),
-                            lp_build_int_to_float(float_bld, depth), "");
+                            lp_build_max(float_bld, dtdx, dtdy),
+                            lp_build_int_to_float(float_bld, height), "");
          rho = lp_build_max(float_bld, rho, max);
+         if (dims > 2) {
+            max = LLVMBuildMul(bld->builder,
+                               lp_build_max(float_bld, drdx, drdy),
+                               lp_build_int_to_float(float_bld, depth), "");
+            rho = lp_build_max(float_bld, rho, max);
+         }
       }
-   }
 
-   /* compute lod = log2(rho) */
-   lod = lp_build_log2(float_bld, rho);
+      /* compute lod = log2(rho) */
+      lod = lp_build_log2(float_bld, rho);
 
-   /* add lod bias */
-   lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
+      /* add lod bias */
+      lod = LLVMBuildAdd(bld->builder, lod, lod_bias, "LOD bias");
 
-   /* clamp lod */
-   lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
+      /* clamp lod */
+      lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
 
-   return lod;
+      return lod;
+   }
 }
 
 
@@ -1215,7 +1239,7 @@ static LLVMValueRef
 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
 {
    /* ima = -0.5 / abs(coord); */
-   LLVMValueRef negHalf = lp_build_const_scalar(coord_bld->type, -0.5);
+   LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
    LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
    LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
                                    lp_build_rcp(coord_bld, absCoord));
@@ -1235,7 +1259,7 @@ lp_build_cube_coord(struct lp_build_context *coord_bld,
                     LLVMValueRef coord, LLVMValueRef ima)
 {
    /* return negate(coord) * ima * sign + 0.5; */
-   LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
+   LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
    LLVMValueRef res;
 
    assert(negate_coord == +1 || negate_coord == -1);
@@ -1697,7 +1721,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
                           LLVMValueRef packed,
                           LLVMValueRef *rgba)
 {
-   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+   LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
    unsigned chan;
 
    /* Decode the input vector components */
@@ -1709,7 +1733,7 @@ lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
       input = packed;
 
       if(start)
-         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+         input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
 
       if(stop < 32)
          input = LLVMBuildAnd(builder, input, mask, "");
@@ -1771,17 +1795,17 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
 
    /* subtract 0.5 (add -128) */
-   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+   i32_c128 = lp_build_const_int_vec(i32.type, -128);
    s = LLVMBuildAdd(builder, s, i32_c128, "");
    t = LLVMBuildAdd(builder, t, i32_c128, "");
 
    /* compute floor (shift right 8) */
-   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+   i32_c8 = lp_build_const_int_vec(i32.type, 8);
    s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
    t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
 
    /* compute fractional part (AND with 0xff) */
-   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+   i32_c255 = lp_build_const_int_vec(i32.type, 255);
    s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
    t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
 
@@ -1948,7 +1972,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
    }
 
    assert(res);
-   res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
+   res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
 
    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
    for(chan = 0; chan < 3; ++chan)
@@ -2026,7 +2050,7 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
    depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
 
-   if (lp_format_is_rgba8(bld.format_desc) &&
+   if (util_format_is_rgba8_variant(bld.format_desc) &&
        static_state->target == PIPE_TEXTURE_2D &&
        static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
        static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&