gallivm: fix alignment issue for vertex data fetch

author Roland Scheidegger <sroland@vmware.com>

Tue, 18 Nov 2014 14:22:29 +0000 (15:22 +0100)

committer Roland Scheidegger <sroland@vmware.com>

Tue, 18 Nov 2014 14:26:59 +0000 (15:26 +0100)
author Roland Scheidegger <sroland@vmware.com>
Tue, 18 Nov 2014 14:22:29 +0000 (15:22 +0100)
committer Roland Scheidegger <sroland@vmware.com>
Tue, 18 Nov 2014 14:26:59 +0000 (15:26 +0100)
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c

index 3a1b05796483d680bfc83bd793fe723c30b4f80c..1c2656036630aa557846d603e9d27c5408c4633d 100644 (file)
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -742,6 +742,7 @@ generate_fetch(struct gallivm_state *gallivm,
        val = lp_build_fetch_rgba_aos(gallivm,
                                      format_desc,
                                      lp_float32_vec4_type(),
+                                    FALSE,
                                      map_ptr,
                                      zero, zero, zero);
        LLVMBuildStore(builder, val, temp_ptr);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h

index 1177fb224ddf7a661618aec2434676b81a4afbe8..969f1f6cc94bc936e65fc61e974cb1d251028f6a 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -62,6 +62,7 @@ LLVMValueRef
  lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                          const struct util_format_description *format_desc,
                          struct lp_type type,
+                        boolean aligned,
                          LLVMValueRef base_ptr,
                          LLVMValueRef offset,
                          LLVMValueRef i,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c

index af755d460c129c8149b4885742a8dc87c43a9252..3c25c329edde4dcb5bcc4dbf3448442c5f314ff5 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -356,6 +356,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
   * Fetch a pixel into a 4 float AoS.
   *
   * \param format_desc  describes format of the image we're fetching from
+ * \param aligned  whether the data is guaranteed to be aligned
   * \param ptr  address of the pixel block (or the texel if uncompressed)
   * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
   *              these will always be (0, 0).
@@ -365,6 +366,7 @@ LLVMValueRef
  lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                          const struct util_format_description *format_desc,
                          struct lp_type type,
+                        boolean aligned,
                          LLVMValueRef base_ptr,
                          LLVMValueRef offset,
                          LLVMValueRef i,
@@ -400,7 +402,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
  
        packed = lp_build_gather(gallivm, type.length/4,
                                 format_desc->block.bits, type.width*4,
-                               base_ptr, offset, TRUE);
+                               aligned, base_ptr, offset, TRUE);
  
        assert(format_desc->block.bits <= vec_len);
  
@@ -437,7 +439,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
           LLVMValueRef packed;
  
           packed = lp_build_gather_elem(gallivm, num_pixels,
-                                       format_desc->block.bits, 32,
+                                       format_desc->block.bits, 32, aligned,
                                         base_ptr, offset, k, FALSE);
  
           tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

index ff2887ee8fa6099602033593369354e8c51a25d4..afaabc0879090d47736b2e1e966f886a85e3bc75 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -386,6 +386,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                                 type.length,
                                 format_desc->block.bits,
                                 type.width,
+                               TRUE,
                                 base_ptr, offset, FALSE);
  
        /*
@@ -411,8 +412,8 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
  
        packed = lp_build_gather(gallivm, type.length,
                                 format_desc->block.bits,
-                               type.width, base_ptr, offset,
-                               FALSE);
+                               type.width, TRUE,
+                               base_ptr, offset, FALSE);
        if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
           lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
        }
@@ -438,15 +439,15 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
           unsigned mask = (1 << 8) - 1;
           LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
           offset = LLVMBuildAdd(builder, offset, s_offset, "");
-         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, FALSE);
+         packed = lp_build_gather(gallivm, type.length, 32, type.width,
+                                  TRUE, base_ptr, offset, FALSE);
           packed = LLVMBuildAnd(builder, packed,
                                 lp_build_const_int_vec(gallivm, type, mask), "");
        }
        else {
           assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
-         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, TRUE);
+         packed = lp_build_gather(gallivm, type.length, 32, type.width,
+                                  TRUE, base_ptr, offset, TRUE);
           packed = LLVMBuildBitCast(builder, packed,
                                     lp_build_vec_type(gallivm, type), "");
        }
@@ -472,7 +473,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
        tmp_type.norm = TRUE;
  
        tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    base_ptr, offset, i, j);
+                                    TRUE, base_ptr, offset, i, j);
  
        lp_build_rgba8_to_fi32_soa(gallivm,
                                  type,
@@ -522,7 +523,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
  
           /* Get a single float[4]={R,G,B,A} pixel */
           tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                       base_ptr, offset_elem,
+                                       TRUE, base_ptr, offset_elem,
                                         i_elem, j_elem);
  
           /*
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c

index 873f354c040ee62843808bd0f1d5715df016c264..4f5a45c6a3d06073c2d2d9696101fb362dc303f1 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -497,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
     assert(format_desc->block.width == 2);
     assert(format_desc->block.height == 1);
  
-   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
+   packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
  
     (void)j;
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c

index 9155d811c06c41bda8c51b17f0ff35face501a5c..d02602041ce13c9722e9be2b7721d879ffe0fc57 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -76,6 +76,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                       unsigned length,
                       unsigned src_width,
                       unsigned dst_width,
+                     boolean aligned,
                       LLVMValueRef base_ptr,
                       LLVMValueRef offsets,
                       unsigned i,
@@ -93,6 +94,27 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
     ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
     res = LLVMBuildLoad(gallivm->builder, ptr, "");
  
+   /* XXX
+    * On some archs we probably really want to avoid having to deal
+    * with alignments lower than 4 bytes (if fetch size is a power of
+    * two >= 32). On x86 it doesn't matter, however.
+    * We should be able to guarantee full alignment for any kind of texture
+    * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
+    * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
+    * but I don't think that's quite what we wanted).
+    * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
+    * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
+    * enforcing what we want (which is what d3d10 does, the offset needs to
+    * be aligned to element size, but GL has bytes regardless of element
+    * size which would only leave us with minimum alignment restriction of 16
+    * which doesn't make much sense if the type isn't 4x32bit). Due to
+    * translation of offsets to first_elem in sampler_views it actually seems
+    * gallium could not do anything else except 16 no matter what...
+    */
+  if (!aligned) {
+      lp_set_load_alignment(res, 1);
+   }
+
     assert(src_width <= dst_width);
     if (src_width > dst_width) {
        res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
@@ -126,6 +148,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
   * @param length length of the offsets
   * @param src_width src element width in bits
   * @param dst_width result element width in bits (src will be expanded to fit)
+ * @param aligned whether the data is guaranteed to be aligned (to src_width)
   * @param base_ptr base pointer, should be a i8 pointer type.
   * @param offsets vector with offsets
   * @param vector_justify select vector rather than integer justification
@@ -135,6 +158,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                  unsigned length,
                  unsigned src_width,
                  unsigned dst_width,
+                boolean aligned,
                  LLVMValueRef base_ptr,
                  LLVMValueRef offsets,
                  boolean vector_justify)
@@ -144,7 +168,7 @@ lp_build_gather(struct gallivm_state *gallivm,
     if (length == 1) {
        /* Scalar */
        return lp_build_gather_elem(gallivm, length,
-                                  src_width, dst_width,
+                                  src_width, dst_width, aligned,
                                    base_ptr, offsets, 0, vector_justify);
     } else {
        /* Vector */
@@ -158,7 +182,7 @@ lp_build_gather(struct gallivm_state *gallivm,
           LLVMValueRef index = lp_build_const_int32(gallivm, i);
           LLVMValueRef elem;
           elem = lp_build_gather_elem(gallivm, length,
-                                     src_width, dst_width,
+                                     src_width, dst_width, aligned,
                                       base_ptr, offsets, i, vector_justify);
           res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
        }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.h b/src/gallium/auxiliary/gallivm/lp_bld_gather.h

index ee694732d367a3f44be221a05cadad421c71f507..3ede4763a702393160e99ea889ee8bfde40a80b6 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -45,6 +45,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                       unsigned length,
                       unsigned src_width,
                       unsigned dst_width,
+                     boolean aligned,
                       LLVMValueRef base_ptr,
                       LLVMValueRef offsets,
                       unsigned i,
@@ -55,6 +56,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                  unsigned length,
                  unsigned src_width,
                  unsigned dst_width,
+                boolean aligned,
                  LLVMValueRef base_ptr,
                  LLVMValueRef offsets,
                  boolean vector_justify);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c

index 394521d382a8acfc9cb6f5494948c5781eea4477..d7fde810a762426f3ceaeb8b145a7721d7af4993 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -581,6 +581,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                                bld->texel_type.length,
                                bld->format_desc->block.bits,
                                bld->texel_type.width,
+                              TRUE,
                                data_ptr, offset, TRUE);
  
        rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
@@ -589,6 +590,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
        rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
                                        bld->format_desc,
                                        u8n.type,
+                                      TRUE,
                                        data_ptr, offset,
                                        x_subcoord,
                                        y_subcoord);
@@ -919,6 +921,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                         bld->texel_type.length,
                                         bld->format_desc->block.bits,
                                         bld->texel_type.width,
+                                       TRUE,
                                         data_ptr, offset[k][j][i], TRUE);
  
                 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
@@ -927,6 +930,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
                                                 bld->format_desc,
                                                 u8n.type,
+                                               TRUE,
                                                 data_ptr, offset[k][j][i],
                                                 x_subcoord[i],
                                                 y_subcoord[j]);
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c

index 48bf06e3c451fe2519e0a3296018b896eb8b50d4..d9abd1ae37c52c910ec7f34729884ac90d90ff6d 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -112,7 +112,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
     block = LLVMAppendBasicBlockInContext(context, func, "entry");
     LLVMPositionBuilderAtEnd(builder, block);
  
-   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type,
+   rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
                                    packed_ptr, offset, i, j);
  
     LLVMBuildStore(builder, rgba, rgba_ptr);
@@ -252,6 +252,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
           }
  
           /* To ensure it's 16-byte aligned */
+         /* Could skip this and use unaligned lp_build_fetch_rgba_aos */
           memcpy(packed, test->packed, sizeof packed);
  
           for (i = 0; i < desc->block.height; ++i) {
author	Roland Scheidegger <sroland@vmware.com>
	Tue, 18 Nov 2014 14:22:29 +0000 (15:22 +0100)
committer	Roland Scheidegger <sroland@vmware.com>
	Tue, 18 Nov 2014 14:26:59 +0000 (15:26 +0100)
src/gallium/auxiliary/draw/draw_llvm.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_format.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_format_aos.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_gather.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_gather.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_test_format.c		patch \| blob \| history