gallivm: (trivial) handle non-aligned fetch for lp_build_fetch_rgba_soa
authorRoland Scheidegger <sroland@vmware.com>
Sun, 11 Dec 2016 22:37:30 +0000 (23:37 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Wed, 21 Dec 2016 03:48:24 +0000 (04:48 +0100)
soa fetch so far always assumed that data was aligned. However, we want to
use this for vertex fetch, and data might not be aligned there, so handle
it in this path too (basically just pass through alignment through to other
functions). (It looks like it wouldn't work for for cached s3tc but this is
no different than with AoS fetch.)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_format.h
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 5c866f420bd886f29cdcaff3209156a94205fcd9..6540caaa293ee70f62c16701f1838362e187321f 100644 (file)
@@ -143,6 +143,7 @@ void
 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
+                        boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offsets,
                         LLVMValueRef i,
index 7444c518e424c1bcf4946892bcc45c6ccf0e9b70..389bfa05d4b5506d3c46e7a9845dc447fa55a5e5 100644 (file)
@@ -349,6 +349,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  *
  * \param type  the desired return type for 'rgba'.  The vector length
  *              is the number of texels to fetch
+ * \param aligned if the offset is guaranteed to be aligned to element width
  *
  * \param base_ptr  points to the base of the texture mip tree.
  * \param offset    offset to start of the texture image block.  For non-
@@ -365,6 +366,7 @@ void
 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
+                        boolean aligned,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
@@ -402,7 +404,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                                type.length,
                                format_desc->block.bits,
                                type.width,
-                               TRUE,
+                               aligned,
                                base_ptr, offset, FALSE);
 
       /*
@@ -428,7 +430,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 
       packed = lp_build_gather(gallivm, type.length,
                                format_desc->block.bits,
-                               type.width, TRUE,
+                               type.width, aligned,
                                base_ptr, offset, FALSE);
       if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
          lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
@@ -456,14 +458,14 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
          LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
          offset = LLVMBuildAdd(builder, offset, s_offset, "");
          packed = lp_build_gather(gallivm, type.length, 32, type.width,
-                                  TRUE, base_ptr, offset, FALSE);
+                                  aligned, base_ptr, offset, FALSE);
          packed = LLVMBuildAnd(builder, packed,
                                lp_build_const_int_vec(gallivm, type, mask), "");
       }
       else {
          assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
          packed = lp_build_gather(gallivm, type.length, 32, type.width,
-                                  TRUE, base_ptr, offset, TRUE);
+                                  aligned, base_ptr, offset, TRUE);
          packed = LLVMBuildBitCast(builder, packed,
                                    lp_build_vec_type(gallivm, type), "");
       }
@@ -489,7 +491,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       tmp_type.norm = TRUE;
 
       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    TRUE, base_ptr, offset, i, j, cache);
+                                    aligned, base_ptr, offset, i, j, cache);
 
       lp_build_rgba8_to_fi32_soa(gallivm,
                                 type,
@@ -509,6 +511,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       const struct util_format_description *flinear_desc;
       LLVMValueRef packed;
       flinear_desc = util_format_description(util_format_linear(format_desc->format));
+      /* This probably only works with aligned data */
       packed = lp_build_fetch_cached_texels(gallivm,
                                             flinear_desc,
                                             type.length,
@@ -572,7 +575,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
 
          /* Get a single float[4]={R,G,B,A} pixel */
          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                       TRUE, base_ptr, offset_elem,
+                                       aligned, base_ptr, offset_elem,
                                        i_elem, j_elem, cache);
 
          /*
index 939cd43ac48730a5843dc3a2fb61e7f51d74c2a4..161a03f35242ad2570aff11ce10c1fe919db86bf 100644 (file)
@@ -159,7 +159,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
 
    lp_build_fetch_rgba_soa(bld->gallivm,
                            bld->format_desc,
-                           bld->texel_type,
+                           bld->texel_type, TRUE,
                            data_ptr, offset,
                            i, j,
                            bld->cache,
@@ -2406,7 +2406,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
 
    lp_build_fetch_rgba_soa(bld->gallivm,
                            bld->format_desc,
-                           bld->texel_type,
+                           bld->texel_type, TRUE,
                            bld->base_ptr, offset,
                            i, j,
                            bld->cache,