gallivm: Eliminate unsigned integer arithmetic from texture coordinates.
authorJosé Fonseca <jfonseca@vmware.com>
Sun, 10 Oct 2010 22:55:24 +0000 (23:55 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Mon, 11 Oct 2010 07:14:09 +0000 (08:14 +0100)
SSE support for 32bit and 16bit unsigned arithmetic is not complete, and
can easily result in inefficient code.

In most cases signed/unsigned doesn't make a difference, such as for
integer texture coordinates.

So remove uint_coord_type and uint_coord_bld to avoid inefficient
operations to sneak in the future.

src/gallium/auxiliary/gallivm/lp_bld_sample.h
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index ce2285446acfc359d8c53b5a6306524b8ac9b88b..ffed27cee8302ee0b4902c434709fe102320e8b8 100644 (file)
@@ -197,10 +197,6 @@ struct lp_build_sample_context
    struct lp_type coord_type;
    struct lp_build_context coord_bld;
 
-   /** Unsigned integer coordinates */
-   struct lp_type uint_coord_type;
-   struct lp_build_context uint_coord_bld;
-
    /** Signed integer coordinates */
    struct lp_type int_coord_type;
    struct lp_build_context int_coord_bld;
index be5d9a261acf5a5cc812cdb2ce4b6aea184e646a..641d24b5b6db6c8b278bcdf53ac1a6c715b77464 100644 (file)
@@ -81,11 +81,10 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
                                  LLVMValueRef *out_offset,
                                  LLVMValueRef *out_i)
 {
-   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef length_minus_one;
 
-   length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+   length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 
    switch(wrap_mode) {
    case PIPE_TEX_WRAP_REPEAT:
@@ -93,7 +92,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
          coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
       else {
          /* Add a bias to the texcoord to handle negative coords */
-         LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
          coord = LLVMBuildAdd(bld->builder, coord, bias, "");
          coord = LLVMBuildURem(bld->builder, coord, length, "");
       }
@@ -114,7 +113,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
       assert(0);
    }
 
-   lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+   lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
                                   out_offset, out_i);
 }
 
@@ -147,7 +146,6 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
                                 LLVMValueRef *i0,
                                 LLVMValueRef *i1)
 {
-   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef length_minus_one;
    LLVMValueRef lmask, umask, mask;
@@ -189,8 +187,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
     * multiplication.
     */
 
-   *i0 = uint_coord_bld->zero;
-   *i1 = uint_coord_bld->zero;
+   *i0 = int_coord_bld->zero;
+   *i1 = int_coord_bld->zero;
 
    length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
 
@@ -201,7 +199,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
       }
       else {
          /* Add a bias to the texcoord to handle negative coords */
-         LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
          coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
       }
@@ -209,9 +207,9 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
       mask = lp_build_compare(bld->builder, int_coord_bld->type,
                               PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
 
-      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
       *offset1 = LLVMBuildAnd(bld->builder,
-                              lp_build_add(uint_coord_bld, *offset0, stride),
+                              lp_build_add(int_coord_bld, *offset0, stride),
                               mask, "");
       break;
 
@@ -226,8 +224,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
 
       mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
 
-      *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
-      *offset1 = lp_build_add(uint_coord_bld,
+      *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
+      *offset1 = lp_build_add(int_coord_bld,
                               *offset0,
                               LLVMBuildAnd(bld->builder, stride, mask, ""));
       break;
@@ -240,8 +238,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
    default:
       assert(0);
-      *offset0 = uint_coord_bld->zero;
-      *offset1 = uint_coord_bld->zero;
+      *offset0 = int_coord_bld->zero;
+      *offset1 = int_coord_bld->zero;
       break;
    }
 }
@@ -327,7 +325,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
       r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
 
    /* get pixel, row, image strides */
-   x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+   x_stride = lp_build_const_vec(bld->int_coord_bld.type,
                                  bld->format_desc->block.bits/8);
 
    /* Do texcoord wrapping, compute texel offset */
@@ -346,7 +344,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                        bld->static_state->pot_height,
                                        bld->static_state->wrap_t,
                                        &y_offset, &y_subcoord);
-      offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
+      offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
       if (dims >= 3) {
          LLVMValueRef z_offset;
          lp_build_sample_wrap_nearest_int(bld,
@@ -355,13 +353,13 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
                                           bld->static_state->pot_height,
                                           bld->static_state->wrap_r,
                                           &z_offset, &z_subcoord);
-         offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
       }
       else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
          LLVMValueRef z_offset;
          /* The r coord is the cube face in [0,5] */
-         z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
-         offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
+         z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
+         offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
       }
    }
 
@@ -522,7 +520,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
       r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
 
    /* get pixel, row and image strides */
-   x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+   x_stride = lp_build_const_vec(bld->int_coord_bld.type,
                                  bld->format_desc->block.bits/8);
    y_stride = row_stride_vec;
    z_stride = img_stride_vec;
@@ -553,9 +551,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
 
       for (z = 0; z < 2; z++) {
          for (x = 0; x < 2; x++) {
-            offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
+            offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
                                            offset[z][0][x], y_offset0);
-            offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
+            offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
                                            offset[z][1][x], y_offset1);
          }
       }
@@ -571,20 +569,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld,
                                       &z_subcoord[0], &z_subcoord[1]);
       for (y = 0; y < 2; y++) {
          for (x = 0; x < 2; x++) {
-            offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
                                            offset[0][y][x], z_offset0);
-            offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
+            offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
                                            offset[1][y][x], z_offset1);
          }
       }
    }
    else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
       LLVMValueRef z_offset;
-      z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
+      z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
       for (y = 0; y < 2; y++) {
          for (x = 0; x < 2; x++) {
             /* The r coord is the cube face in [0,5] */
-            offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
+            offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
                                            offset[0][y][x], z_offset);
          }
       }
index 75130e1c54fc6cb736e95fce559abcba75727a3a..af3f4688ede9a62d120fa3aa34bf165f9a7104b8 100644 (file)
@@ -131,7 +131,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    }
 
    /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   lp_build_sample_offset(&bld->uint_coord_bld,
+   lp_build_sample_offset(&bld->int_coord_bld,
                           bld->format_desc,
                           x, y, z, y_stride, z_stride,
                           &offset, &i, &j);
@@ -145,7 +145,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
        * coords which are out of bounds to become zero.  Zero's guaranteed
        * to be inside the texture image.
        */
-      offset = lp_build_andnot(&bld->uint_coord_bld, offset, use_border);
+      offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
    }
 
    lp_build_fetch_rgba_soa(bld->builder,
@@ -239,9 +239,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
 {
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
    LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
-   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    LLVMValueRef coord0, coord1, weight;
 
    switch(wrap_mode) {
@@ -253,20 +252,20 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
       lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
       /* repeat wrap */
       if (is_pot) {
-         coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
+         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
          coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
          coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
       }
       else {
          /* Add a bias to the texcoord to handle negative coords */
-         LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
          LLVMValueRef mask;
          coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
          coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
          mask = lp_build_compare(bld->builder, int_coord_bld->type,
                                  PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
          coord1 = LLVMBuildAnd(bld->builder,
-                              lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one),
+                              lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
                               mask, "");
       }
       break;
@@ -448,8 +447,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
 {
    struct lp_build_context *coord_bld = &bld->coord_bld;
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
-   LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
+   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
    LLVMValueRef icoord;
    
    switch(wrap_mode) {
@@ -460,7 +458,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
          icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
       else {
          /* Add a bias to the texcoord to handle negative coords */
-         LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
+         LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
          icoord = LLVMBuildAdd(bld->builder, icoord, bias, "");
          icoord = LLVMBuildURem(bld->builder, icoord, length, "");
       }
@@ -1199,7 +1197,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    bld.float_type = lp_type_float(32);
    bld.int_type = lp_type_int(32);
    bld.coord_type = type;
-   bld.uint_coord_type = lp_uint_type(type);
    bld.int_coord_type = lp_int_type(type);
    bld.float_size_type = lp_type_float(32);
    bld.float_size_type.length = dims > 1 ? 4 : 1;
@@ -1212,7 +1209,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
    lp_build_context_init(&bld.float_vec_bld, builder, float_vec_type);
    lp_build_context_init(&bld.int_bld, builder, bld.int_type);
    lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
-   lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
    lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
    lp_build_context_init(&bld.int_size_bld, builder, bld.int_size_type);
    lp_build_context_init(&bld.float_size_bld, builder, bld.float_size_type);