gallivm: fix [IU]MUL_HI regression

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Tue, 8 Nov 2016 09:14:00 +0000 (10:14 +0100)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Tue, 8 Nov 2016 15:25:54 +0000 (16:25 +0100)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 8 Nov 2016 09:14:00 +0000 (10:14 +0100)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 8 Nov 2016 15:25:54 +0000 (16:25 +0100)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 3de46287f705691580c9637a149bb3d5ec1b4a72..43ad238393ce662e755bbbaa8195026c367ada53 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1094,12 +1094,14 @@ lp_build_mul(struct lp_build_context *bld,
  /*
   * Widening mul, valid for 32x32 bit -> 64bit only.
   * Result is low 32bits, high bits returned in res_hi.
+ *
+ * Emits code that is meant to be compiled for the host CPU.
   */
  LLVMValueRef
-lp_build_mul_32_lohi(struct lp_build_context *bld,
-                     LLVMValueRef a,
-                     LLVMValueRef b,
-                     LLVMValueRef *res_hi)
+lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
+                         LLVMValueRef a,
+                         LLVMValueRef b,
+                         LLVMValueRef *res_hi)
  {
     struct gallivm_state *gallivm = bld->gallivm;
     LLVMBuilderRef builder = gallivm->builder;
@@ -1216,29 +1218,47 @@ lp_build_mul_32_lohi(struct lp_build_context *bld,
        return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
     }
     else {
-      LLVMValueRef tmp;
-      struct lp_type type_tmp;
-      LLVMTypeRef wide_type, cast_type;
-
-      type_tmp = bld->type;
-      type_tmp.width *= 2;
-      wide_type = lp_build_vec_type(gallivm, type_tmp);
-      type_tmp = bld->type;
-      type_tmp.length *= 2;
-      cast_type = lp_build_vec_type(gallivm, type_tmp);
-
-      if (bld->type.sign) {
-         a = LLVMBuildSExt(builder, a, wide_type, "");
-         b = LLVMBuildSExt(builder, b, wide_type, "");
-      } else {
-         a = LLVMBuildZExt(builder, a, wide_type, "");
-         b = LLVMBuildZExt(builder, b, wide_type, "");
-      }
-      tmp = LLVMBuildMul(builder, a, b, "");
-      tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
-      *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
-      return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
+      return lp_build_mul_32_lohi(bld, a, b, res_hi);
+   }
+}
+
+
+/*
+ * Widening mul, valid for 32x32 bit -> 64bit only.
+ * Result is low 32bits, high bits returned in res_hi.
+ *
+ * Emits generic code.
+ */
+LLVMValueRef
+lp_build_mul_32_lohi(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     LLVMValueRef b,
+                     LLVMValueRef *res_hi)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef tmp;
+   struct lp_type type_tmp;
+   LLVMTypeRef wide_type, cast_type;
+
+   type_tmp = bld->type;
+   type_tmp.width *= 2;
+   wide_type = lp_build_vec_type(gallivm, type_tmp);
+   type_tmp = bld->type;
+   type_tmp.length *= 2;
+   cast_type = lp_build_vec_type(gallivm, type_tmp);
+
+   if (bld->type.sign) {
+      a = LLVMBuildSExt(builder, a, wide_type, "");
+      b = LLVMBuildSExt(builder, b, wide_type, "");
+   } else {
+      a = LLVMBuildZExt(builder, a, wide_type, "");
+      b = LLVMBuildZExt(builder, b, wide_type, "");
     }
+   tmp = LLVMBuildMul(builder, a, b, "");
+   tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
+   *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
+   return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
  }
  
  
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h

index 5d48b1c837dd25729297ced9e5b80287bfdc6b75..2a4137a6780b226c6250fa17c1830f45b8cc400f 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -76,6 +76,12 @@ lp_build_mul(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b);
  
+LLVMValueRef
+lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
+                         LLVMValueRef a,
+                         LLVMValueRef b,
+                         LLVMValueRef *res_hi);
+
  LLVMValueRef
  lp_build_mul_32_lohi(struct lp_build_context *bld,
                       LLVMValueRef a,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index 72d4579a042a1498d72863154053087419f3f316..9c6fc4b4ae61021bfac8ba4ed275421629a07d34 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -849,7 +849,24 @@ imul_hi_emit(
  
     /* low result bits are tossed away */
     lp_build_mul_32_lohi(int_bld, emit_data->args[0],
-                        emit_data->args[0], &hi_bits);
+                        emit_data->args[1], &hi_bits);
+   emit_data->output[emit_data->chan] = hi_bits;
+}
+
+static void
+imul_hi_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *int_bld = &bld_base->int_bld;
+   LLVMValueRef hi_bits;
+
+   assert(int_bld->type.width == 32);
+
+   /* low result bits are tossed away */
+   lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
+                            emit_data->args[1], &hi_bits);
     emit_data->output[emit_data->chan] = hi_bits;
  }
  
@@ -867,7 +884,24 @@ umul_hi_emit(
  
     /* low result bits are tossed away */
     lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
-                        emit_data->args[0], &hi_bits);
+                        emit_data->args[1], &hi_bits);
+   emit_data->output[emit_data->chan] = hi_bits;
+}
+
+static void
+umul_hi_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   LLVMValueRef hi_bits;
+
+   assert(uint_bld->type.width == 32);
+
+   /* low result bits are tossed away */
+   lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
+                            emit_data->args[1], &hi_bits);
     emit_data->output[emit_data->chan] = hi_bits;
  }
  
@@ -2581,6 +2615,8 @@ lp_set_default_actions_cpu(
     bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
  
     bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
     bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Tue, 8 Nov 2016 09:14:00 +0000 (10:14 +0100)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Tue, 8 Nov 2016 15:25:54 +0000 (16:25 +0100)
src/gallium/auxiliary/gallivm/lp_bld_arit.c		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_arit.h		patch \| blob \| history
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c		patch \| blob \| history