gallivm: Allow to conversions to/from registers of different sizes.

author José Fonseca <jfonseca@vmware.com>

Thu, 1 Jul 2010 11:33:34 +0000 (12:33 +0100)

committer José Fonseca <jfonseca@vmware.com>

Thu, 1 Jul 2010 14:02:15 +0000 (15:02 +0100)
author José Fonseca <jfonseca@vmware.com>
Thu, 1 Jul 2010 11:33:34 +0000 (12:33 +0100)
committer José Fonseca <jfonseca@vmware.com>
Thu, 1 Jul 2010 14:02:15 +0000 (15:02 +0100)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c

index 3f7f2ebde9ce700b99e88e50ae0b8249b43ebb97..5e7260dc21407c868e6c815720b967f65086c38b 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -83,6 +83,9 @@
   *
   * Although the result values can be scaled to an arbitrary bit width specified
   * by dst_width, the actual result type will have the same width.
+ *
+ * Ex: src = { float, float, float, float }
+ * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
   */
  LLVMValueRef
  lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
  
  /**
   * Inverse of lp_build_clamped_float_to_unsigned_norm above.
+ * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
+ * return {float, float, float, float} with values in range [0, 1].
   */
  LLVMValueRef
  lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder,
     unsigned num_tmps;
     unsigned i;
  
-   /* Register width must remain constant */
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
     /* We must not loose or gain channels. Only precision */
     assert(src_type.length * num_srcs == dst_type.length * num_dsts);
  
     assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
     assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
  
     tmp_type = src_type;
     for(i = 0; i < num_srcs; ++i)
@@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder,
  
     assert(!tmp_type.floating || tmp_type.width == dst_type.width);
  
-   if(tmp_type.width > dst_type.width) {
-      assert(num_dsts == 1);
-      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
-      tmp_type.width = dst_type.width;
-      tmp_type.length = dst_type.length;
-      num_tmps = 1;
-   }
-
-   if(tmp_type.width < dst_type.width) {
-      assert(num_tmps == 1);
-      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
-      tmp_type.width = dst_type.width;
-      tmp_type.length = dst_type.length;
-      num_tmps = num_dsts;
-   }
+   lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts);
  
-   assert(tmp_type.width == dst_type.width);
-   assert(tmp_type.length == dst_type.length);
-   assert(num_tmps == num_dsts);
+   tmp_type.width  = dst_type.width;
+   tmp_type.length = dst_type.length;
+   num_tmps        = num_dsts;
  
     /*
      * Scale to the widest range
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c

index 186f8849b8dacb3a437764196349e58a0fca2ef7..dfe83b36c4212275ef416287f1eb97ca4022cd6f 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -427,3 +427,109 @@ lp_build_pack(LLVMBuilderRef builder,
  
     return tmp[0];
  }
+
+
+/**
+ * Truncate or expand the bitwidth
+ */
+void
+lp_build_resize(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                const LLVMValueRef *src, unsigned num_srcs,
+                LLVMValueRef *dst, unsigned num_dsts)
+{
+   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(!src_type.floating || src_type.width == dst_type.width);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+   /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
+   assert(num_srcs == 1 || num_dsts == 1);
+
+   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+   if (src_type.width > dst_type.width) {
+      /*
+       * Truncate bit width.
+       */
+
+      assert(num_dsts == 1);
+
+      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+        /*
+         * Register width remains constant -- use vector packing intrinsics
+         */
+
+         tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
+      }
+      else {
+         /*
+          * Do it element-wise.
+          */
+
+         assert(src_type.length == dst_type.length);
+         tmp[0] = lp_build_undef(dst_type);
+         for (i = 0; i < dst_type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+            val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
+            tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+         }
+      }
+   }
+   else if (src_type.width < dst_type.width) {
+      /*
+       * Expand bit width.
+       */
+
+      assert(num_srcs == 1);
+
+      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+         /*
+          * Register width remains constant -- use vector unpack intrinsics
+          */
+         lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
+      }
+      else {
+         /*
+          * Do it element-wise.
+          */
+
+         assert(src_type.length == dst_type.length);
+         tmp[0] = lp_build_undef(dst_type);
+         for (i = 0; i < dst_type.length; ++i) {
+            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
+
+            if (src_type.sign && dst_type.sign) {
+               val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
+            } else {
+               val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
+            }
+            tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
+         }
+      }
+   }
+   else {
+      /*
+       * No-op
+       */
+
+      assert(num_srcs == 1);
+      assert(num_dsts == 1);
+
+      tmp[0] = src[0];
+   }
+
+   for(i = 0; i < num_dsts; ++i)
+      dst[i] = tmp[i];
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h

index 41adeed220c7d8fd6cbb226ea963542dedc43857..e470082b97747ce9916af3c1b21a7d527d62e2a5 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
                const LLVMValueRef *src, unsigned num_srcs);
  
  
+void
+lp_build_resize(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                const LLVMValueRef *src, unsigned num_srcs,
+                LLVMValueRef *dst, unsigned num_dsts);
+
+
  #endif /* !LP_BLD_PACK_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c

index 9b02f436c5b2e2946d9c87d293662b22cf346086..081f2d324b27150b1d8eb7402a64c26ab082f13f 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -167,19 +167,26 @@ test_one(unsigned verbose,
     unsigned i, j;
     void *code;
  
+   if (src_type.width * src_type.length != dst_type.width * dst_type.length ||
+       src_type.length != dst_type.length) {
+      return TRUE;
+   }
+
     if(verbose >= 1)
        dump_conv_types(stdout, src_type, dst_type);
  
-   if(src_type.length > dst_type.length) {
+   if (src_type.length > dst_type.length) {
        num_srcs = 1;
        num_dsts = src_type.length/dst_type.length;
     }
-   else  {
+   else if (src_type.length < dst_type.length) {
        num_dsts = 1;
        num_srcs = dst_type.length/src_type.length;
     }
-
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+   else  {
+      num_dsts = 1;
+      num_srcs = 1;
+   }
  
     /* We must not loose or gain channels. Only precision */
     assert(src_type.length * num_srcs == dst_type.length * num_dsts);
@@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
     {  FALSE, FALSE,  TRUE, FALSE,     8,  16 },
     {  FALSE, FALSE, FALSE,  TRUE,     8,  16 },
     {  FALSE, FALSE, FALSE, FALSE,     8,  16 },
+
+   {  FALSE, FALSE,  TRUE,  TRUE,     8,   4 },
+   {  FALSE, FALSE,  TRUE, FALSE,     8,   4 },
+   {  FALSE, FALSE, FALSE,  TRUE,     8,   4 },
+   {  FALSE, FALSE, FALSE, FALSE,     8,   4 },
  };
author	José Fonseca <jfonseca@vmware.com>
	Thu, 1 Jul 2010 11:33:34 +0000 (12:33 +0100)
committer	José Fonseca <jfonseca@vmware.com>
	Thu, 1 Jul 2010 14:02:15 +0000 (15:02 +0100)
src/gallium/auxiliary/gallivm/lp_bld_conv.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_pack.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_pack.h		patch \| blob \| history
src/gallium/drivers/llvmpipe/lp_test_conv.c		patch \| blob \| history