gallivm: fix [IU]MUL_HI regression
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 8 Nov 2016 09:14:00 +0000 (10:14 +0100)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Tue, 8 Nov 2016 15:25:54 +0000 (16:25 +0100)
This patch does two things:

1. It separates the host-CPU code generation from the generic code
   generation. This guards against accidently breaking things for
   radeonsi in the future.

2. It makes sure we actually use both arguments and don't just compute
   a square :-p

Fixes a regression introduced by commit 29279f44b3172ef3b84d470e70fc7684695ced4b

Cc: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c
src/gallium/auxiliary/gallivm/lp_bld_arit.h
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index 3de46287f705691580c9637a149bb3d5ec1b4a72..43ad238393ce662e755bbbaa8195026c367ada53 100644 (file)
@@ -1094,12 +1094,14 @@ lp_build_mul(struct lp_build_context *bld,
 /*
  * Widening mul, valid for 32x32 bit -> 64bit only.
  * Result is low 32bits, high bits returned in res_hi.
+ *
+ * Emits code that is meant to be compiled for the host CPU.
  */
 LLVMValueRef
-lp_build_mul_32_lohi(struct lp_build_context *bld,
-                     LLVMValueRef a,
-                     LLVMValueRef b,
-                     LLVMValueRef *res_hi)
+lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
+                         LLVMValueRef a,
+                         LLVMValueRef b,
+                         LLVMValueRef *res_hi)
 {
    struct gallivm_state *gallivm = bld->gallivm;
    LLVMBuilderRef builder = gallivm->builder;
@@ -1216,29 +1218,47 @@ lp_build_mul_32_lohi(struct lp_build_context *bld,
       return LLVMBuildShuffleVector(builder, muleven, mulodd, shuf_vec, "");
    }
    else {
-      LLVMValueRef tmp;
-      struct lp_type type_tmp;
-      LLVMTypeRef wide_type, cast_type;
-
-      type_tmp = bld->type;
-      type_tmp.width *= 2;
-      wide_type = lp_build_vec_type(gallivm, type_tmp);
-      type_tmp = bld->type;
-      type_tmp.length *= 2;
-      cast_type = lp_build_vec_type(gallivm, type_tmp);
-
-      if (bld->type.sign) {
-         a = LLVMBuildSExt(builder, a, wide_type, "");
-         b = LLVMBuildSExt(builder, b, wide_type, "");
-      } else {
-         a = LLVMBuildZExt(builder, a, wide_type, "");
-         b = LLVMBuildZExt(builder, b, wide_type, "");
-      }
-      tmp = LLVMBuildMul(builder, a, b, "");
-      tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
-      *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
-      return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
+      return lp_build_mul_32_lohi(bld, a, b, res_hi);
+   }
+}
+
+
+/*
+ * Widening mul, valid for 32x32 bit -> 64bit only.
+ * Result is low 32bits, high bits returned in res_hi.
+ *
+ * Emits generic code.
+ */
+LLVMValueRef
+lp_build_mul_32_lohi(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     LLVMValueRef b,
+                     LLVMValueRef *res_hi)
+{
+   struct gallivm_state *gallivm = bld->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef tmp;
+   struct lp_type type_tmp;
+   LLVMTypeRef wide_type, cast_type;
+
+   type_tmp = bld->type;
+   type_tmp.width *= 2;
+   wide_type = lp_build_vec_type(gallivm, type_tmp);
+   type_tmp = bld->type;
+   type_tmp.length *= 2;
+   cast_type = lp_build_vec_type(gallivm, type_tmp);
+
+   if (bld->type.sign) {
+      a = LLVMBuildSExt(builder, a, wide_type, "");
+      b = LLVMBuildSExt(builder, b, wide_type, "");
+   } else {
+      a = LLVMBuildZExt(builder, a, wide_type, "");
+      b = LLVMBuildZExt(builder, b, wide_type, "");
    }
+   tmp = LLVMBuildMul(builder, a, b, "");
+   tmp = LLVMBuildBitCast(builder, tmp, cast_type, "");
+   *res_hi = lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 1);
+   return lp_build_uninterleave1(gallivm, bld->type.length * 2, tmp, 0);
 }
 
 
index 5d48b1c837dd25729297ced9e5b80287bfdc6b75..2a4137a6780b226c6250fa17c1830f45b8cc400f 100644 (file)
@@ -76,6 +76,12 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
+                         LLVMValueRef a,
+                         LLVMValueRef b,
+                         LLVMValueRef *res_hi);
+
 LLVMValueRef
 lp_build_mul_32_lohi(struct lp_build_context *bld,
                      LLVMValueRef a,
index 72d4579a042a1498d72863154053087419f3f316..9c6fc4b4ae61021bfac8ba4ed275421629a07d34 100644 (file)
@@ -849,7 +849,24 @@ imul_hi_emit(
 
    /* low result bits are tossed away */
    lp_build_mul_32_lohi(int_bld, emit_data->args[0],
-                        emit_data->args[0], &hi_bits);
+                        emit_data->args[1], &hi_bits);
+   emit_data->output[emit_data->chan] = hi_bits;
+}
+
+static void
+imul_hi_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *int_bld = &bld_base->int_bld;
+   LLVMValueRef hi_bits;
+
+   assert(int_bld->type.width == 32);
+
+   /* low result bits are tossed away */
+   lp_build_mul_32_lohi_cpu(int_bld, emit_data->args[0],
+                            emit_data->args[1], &hi_bits);
    emit_data->output[emit_data->chan] = hi_bits;
 }
 
@@ -867,7 +884,24 @@ umul_hi_emit(
 
    /* low result bits are tossed away */
    lp_build_mul_32_lohi(uint_bld, emit_data->args[0],
-                        emit_data->args[0], &hi_bits);
+                        emit_data->args[1], &hi_bits);
+   emit_data->output[emit_data->chan] = hi_bits;
+}
+
+static void
+umul_hi_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   struct lp_build_context *uint_bld = &bld_base->uint_bld;
+   LLVMValueRef hi_bits;
+
+   assert(uint_bld->type.width == 32);
+
+   /* low result bits are tossed away */
+   lp_build_mul_32_lohi_cpu(uint_bld, emit_data->args[0],
+                            emit_data->args[1], &hi_bits);
    emit_data->output[emit_data->chan] = hi_bits;
 }
 
@@ -2581,6 +2615,8 @@ lp_set_default_actions_cpu(
    bld_base->op_actions[TGSI_OPCODE_ISHR].emit = ishr_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_ISLT].emit = islt_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_ISSG].emit = issg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_IMUL_HI].emit = imul_hi_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_UMUL_HI].emit = umul_hi_emit_cpu;
 
    bld_base->op_actions[TGSI_OPCODE_LG2].emit = lg2_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_LOG].emit = log_emit_cpu;