freedreno/ir3: fix register usage calculations

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_conv.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c

index ba51ff794f657da5b15d568b2f385120238f197a..14244470c9050a2e3f3ef2d95af697ca19acc279 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -101,7 +101,7 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
     LLVMTypeRef int_vec_type = lp_build_vec_type(gallivm, i32_type);
     LLVMValueRef h;
  
-   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+   if (util_cpu_caps.has_f16c &&
         (src_length == 4 || src_length == 8)) {
        const char *intrinsic = NULL;
        if (src_length == 4) {
@@ -143,7 +143,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
     struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
     LLVMValueRef result;
  
-   if (util_cpu_caps.has_f16c && HAVE_LLVM >= 0x0301 &&
+   if (util_cpu_caps.has_f16c &&
         (length == 4 || length == 8)) {
        struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
        unsigned mode = 3; /* same as LP_BUILD_ROUND_TRUNCATE */
@@ -266,17 +266,19 @@ lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
     else if (dst_width == (mantissa + 1)) {
        /*
         * The destination width matches exactly what can be represented in
-       * floating point (i.e., mantissa + 1 bits). So do a straight
-       * multiplication followed by casting. No further rounding is necessary.
+       * floating point (i.e., mantissa + 1 bits). Even so correct rounding
+       * still needs to be applied (only for numbers in [0.5-1.0] would
+       * conversion using truncation after scaling be sufficient).
         */
-
        double scale;
+      struct lp_build_context uf32_bld;
  
+      lp_build_context_init(&uf32_bld, gallivm, src_type);
        scale = (double)((1ULL << dst_width) - 1);
  
        res = LLVMBuildFMul(builder, src,
                            lp_build_const_vec(gallivm, src_type, scale), "");
-      res = LLVMBuildFPToSI(builder, res, int_vec_type, "");
+      res = lp_build_iround(&uf32_bld, res);
     }
     else {
        /*
@@ -790,29 +792,23 @@ lp_build_conv(struct gallivm_state *gallivm,
        unsigned dst_shift = lp_const_shift(dst_type);
        unsigned src_offset = lp_const_offset(src_type);
        unsigned dst_offset = lp_const_offset(dst_type);
+      struct lp_build_context bld;
+      lp_build_context_init(&bld, gallivm, tmp_type);
  
        /* Compensate for different offsets */
-      if (dst_offset > src_offset && src_type.width > dst_type.width) {
+      /* sscaled -> unorm and similar would cause negative shift count, skip */
+      if (dst_offset > src_offset && src_type.width > dst_type.width && src_shift > 0) {
           for (i = 0; i < num_tmps; ++i) {
              LLVMValueRef shifted;
-            LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, src_shift - 1);
-            if(src_type.sign)
-               shifted = LLVMBuildAShr(builder, tmp[i], shift, "");
-            else
-               shifted = LLVMBuildLShr(builder, tmp[i], shift, "");
  
+            shifted = lp_build_shr_imm(&bld, tmp[i], src_shift - 1);
              tmp[i] = LLVMBuildSub(builder, tmp[i], shifted, "");
           }
        }
  
        if(src_shift > dst_shift) {
-         LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type,
-                                                     src_shift - dst_shift);
           for(i = 0; i < num_tmps; ++i)
-            if(src_type.sign)
-               tmp[i] = LLVMBuildAShr(builder, tmp[i], shift, "");
-            else
-               tmp[i] = LLVMBuildLShr(builder, tmp[i], shift, "");
+            tmp[i] = lp_build_shr_imm(&bld, tmp[i], src_shift - dst_shift);
        }
     }
  
@@ -898,14 +894,27 @@ lp_build_conv(struct gallivm_state *gallivm,
         unsigned dst_shift = lp_const_shift(dst_type);
         unsigned src_offset = lp_const_offset(src_type);
         unsigned dst_offset = lp_const_offset(dst_type);
+       struct lp_build_context bld;
+       lp_build_context_init(&bld, gallivm, tmp_type);
  
         if (src_shift < dst_shift) {
            LLVMValueRef pre_shift[LP_MAX_VECTOR_LENGTH];
-          LLVMValueRef shift = lp_build_const_int_vec(gallivm, tmp_type, dst_shift - src_shift);
  
-          for (i = 0; i < num_tmps; ++i) {
-             pre_shift[i] = tmp[i];
-             tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
+          if (dst_shift - src_shift < dst_type.width) {
+             for (i = 0; i < num_tmps; ++i) {
+                pre_shift[i] = tmp[i];
+                tmp[i] = lp_build_shl_imm(&bld, tmp[i], dst_shift - src_shift);
+             }
+          }
+          else {
+             /*
+              * This happens for things like sscaled -> unorm conversions. Shift
+              * counts equal to bit width cause undefined results, so hack around it.
+              */
+             for (i = 0; i < num_tmps; ++i) {
+                pre_shift[i] = tmp[i];
+                tmp[i] = lp_build_zero(gallivm, dst_type);
+             }
            }
  
            /* Compensate for different offsets */