gallivm: fix float->SNORM conversion
authorRoland Scheidegger <sroland@vmware.com>
Sat, 27 Jul 2013 01:53:00 +0000 (03:53 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Sat, 27 Jul 2013 14:41:29 +0000 (16:41 +0200)
Just like the UNORM case we need to use round to nearest, not trunc.
(There's also another problem, we're using the formula for SNORM->float
which will produce a value below -1.0 for the most negative value which
according to both OpenGL and d3d10 would need clamping. However, no actual
failures have been observed due to that hence keep cheating on that.)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_conv.c
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

index cbea966882aa85e45b4aae6b47b5081b60da1429..56c15818e7cc90cdd7faf2a7a50cd5f3b0544948 100644 (file)
@@ -257,6 +257,7 @@ lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
       bias = (double)(1ULL << (mantissa - dst_width));
 
       res = LLVMBuildFMul(builder, src, lp_build_const_vec(gallivm, src_type, scale), "");
+      /* instead of fadd/and could (with sse2) just use lp_build_iround */
       res = LLVMBuildFAdd(builder, res, lp_build_const_vec(gallivm, src_type, bias), "");
       res = LLVMBuildBitCast(builder, res, int_vec_type, "");
       res = LLVMBuildAnd(builder, res,
@@ -742,7 +743,6 @@ lp_build_conv(struct gallivm_state *gallivm,
       }
       else {
          double dst_scale = lp_const_scale(dst_type);
-         LLVMTypeRef tmp_vec_type;
 
          if (dst_scale != 1.0) {
             LLVMValueRef scale = lp_build_const_vec(gallivm, tmp_type, dst_scale);
@@ -750,19 +750,37 @@ lp_build_conv(struct gallivm_state *gallivm,
                tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
          }
 
-         /* Use an equally sized integer for intermediate computations */
-         tmp_type.floating = FALSE;
-         tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
-         for(i = 0; i < num_tmps; ++i) {
+         /*
+          * these functions will use fptosi in some form which won't work
+          * with 32bit uint dst.
+          */
+         assert(dst_type.sign || dst_type.width < 32);
+
+         if (dst_type.sign && dst_type.norm && !dst_type.fixed) {
+            struct lp_build_context bld;
+
+            lp_build_context_init(&bld, gallivm, tmp_type);
+            for(i = 0; i < num_tmps; ++i) {
+               tmp[i] = lp_build_iround(&bld, tmp[i]);
+            }
+            tmp_type.floating = FALSE;
+         }
+         else {
+            LLVMTypeRef tmp_vec_type;
+
+            tmp_type.floating = FALSE;
+            tmp_vec_type = lp_build_vec_type(gallivm, tmp_type);
+            for(i = 0; i < num_tmps; ++i) {
 #if 0
-            if(dst_type.sign)
-               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
-            else
-               tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
+               if(dst_type.sign)
+                  tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
+               else
+                  tmp[i] = LLVMBuildFPToUI(builder, tmp[i], tmp_vec_type, "");
 #else
-           /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
-            tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
+              /* FIXME: there is no SSE counterpart for LLVMBuildFPToUI */
+               tmp[i] = LLVMBuildFPToSI(builder, tmp[i], tmp_vec_type, "");
 #endif
+            }
          }
       }
    }
@@ -860,6 +878,18 @@ lp_build_conv(struct gallivm_state *gallivm,
              for(i = 0; i < num_tmps; ++i)
                 tmp[i] = LLVMBuildFMul(builder, tmp[i], scale, "");
           }
+
+          /* the formula above will produce value below -1.0 for most negative
+           * value but everything seems happy with that hence disable for now */
+          if (0 && !src_type.fixed && src_type.norm && src_type.sign) {
+             struct lp_build_context bld;
+
+             lp_build_context_init(&bld, gallivm, dst_type);
+             for(i = 0; i < num_tmps; ++i) {
+                tmp[i] = lp_build_max(&bld, tmp[i],
+                                      lp_build_const_vec(gallivm, dst_type, -1.0f));
+             }
+          }
       }
     }
     else {
index 114ce03bbdc28b2a1dbf35ce04e5e2f53612c5b4..81cd2b0f7c68e0a3449c493abbc76e91f0d35db7 100644 (file)
@@ -39,6 +39,7 @@
 #include "lp_bld_gather.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_format.h"
+#include "lp_bld_arit.h"
 
 
 void
@@ -221,6 +222,11 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
                LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
                input = LLVMBuildFMul(builder, input, scale_val, "");
+               /* the formula above will produce value below -1.0 for most negative
+                * value but everything seems happy with that hence disable for now */
+               if (0)
+                  input = lp_build_max(&bld, input,
+                                       lp_build_const_vec(gallivm, type, -1.0f));
             }
          }
          else if (format_desc->channel[chan].pure_integer) {