gallivm: fix F2U opcode
authorRoland Scheidegger <sroland@vmware.com>
Tue, 4 Feb 2014 18:53:53 +0000 (19:53 +0100)
committerRoland Scheidegger <sroland@vmware.com>
Wed, 5 Feb 2014 16:45:31 +0000 (17:45 +0100)
Previously, we were really doing F2I. And also move it to generic section.
(Note that for llvmpipe the code generated is definitely bad, due to lack
of unsigned conversions with sse. I think though what llvm does (using scalar
conversions to 64bit signed either with x87 fpu (32bit) or sse (64bit)
including lots of domain changes is quite suboptimal, could do something like
is_large = arg >= 2^31
half_arg = 0.5 * arg
small_c = fptoint(arg)
large_c = fptoint(half_arg) << 1
res = select(is_large, large_c, small_c)
which should be much less instructions but that's something llvm should do
itself.)

This fixes piglit fs/vs-float-uint-conversion.shader_test (maybe more, needs
GL 3.0 version override to run.)

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Reviewed-by: Zack Rusin <zackr@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c

index caaeb01561fff5b78c7b81d5735b16da0cc5959c..b9546dbc661b28bca259882e3ef5c91f03d16cd3 100644 (file)
@@ -720,10 +720,23 @@ sub_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-       emit_data->output[emit_data->chan] = LLVMBuildFSub(
-                               bld_base->base.gallivm->builder,
-                               emit_data->args[0],
-                               emit_data->args[1], "");
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFSub(bld_base->base.gallivm->builder,
+                    emit_data->args[0],
+                    emit_data->args[1], "");
+}
+
+/* TGSI_OPCODE_F2U */
+static void
+f2u_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
 }
 
 /* TGSI_OPCODE_U2F */
@@ -733,9 +746,10 @@ u2f_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-   emit_data->output[emit_data->chan] = LLVMBuildUIToFP(bld_base->base.gallivm->builder,
-                                                       emit_data->args[0],
-                                                       bld_base->base.vec_type, "");
+   emit_data->output[emit_data->chan] =
+      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.vec_type, "");
 }
 
 static void
@@ -949,6 +963,7 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
    bld_base->op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
 
    bld_base->op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
+   bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit;
    bld_base->op_actions[TGSI_OPCODE_U2F].emit = u2f_emit;
    bld_base->op_actions[TGSI_OPCODE_UMAD].emit = umad_emit;
    bld_base->op_actions[TGSI_OPCODE_UMUL].emit = umul_emit;
@@ -1128,18 +1143,6 @@ f2i_emit_cpu(
                                                         emit_data->args[0]);
 }
 
-/* TGSI_OPCODE_F2U (CPU Only) */
-static void
-f2u_emit_cpu(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   /* FIXME: implement and use lp_build_utrunc() */
-   emit_data->output[emit_data->chan] = lp_build_itrunc(&bld_base->base,
-                                                        emit_data->args[0]);
-}
-
 /* TGSI_OPCODE_FSET Helper (CPU Only) */
 static void
 fset_emit_cpu(
@@ -1832,7 +1835,6 @@ lp_set_default_actions_cpu(
    bld_base->op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_EX2].emit = ex2_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_F2I].emit = f2i_emit_cpu;
-   bld_base->op_actions[TGSI_OPCODE_F2U].emit = f2u_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FLR].emit = flr_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = fseq_emit_cpu;
    bld_base->op_actions[TGSI_OPCODE_FSGE].emit = fsge_emit_cpu;