gallivm: Use a more compact approach for lp_build_broadcast_scalar().
authorJosé Fonseca <jfonseca@vmware.com>
Sun, 9 May 2010 21:31:18 +0000 (22:31 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Wed, 12 May 2010 19:40:31 +0000 (20:40 +0100)
It produces exactly the same machine code, but it cuts 5% of the
number of instructions generated for a typical shader.

Also, preserve the scalar when length is 1.

src/gallium/auxiliary/gallivm/lp_bld_swizzle.c

index 6a3c8f3f3a4fd8048a059075d314fabce70464ff..f095a39cf522746cf469cad8a4a7c4f541443c45 100644 (file)
@@ -68,18 +68,20 @@ lp_build_broadcast_scalar(struct lp_build_context *bld,
                           LLVMValueRef scalar)
 {
    const struct lp_type type = bld->type;
-   LLVMValueRef res;
-   unsigned i;
 
    assert(lp_check_elem_type(type, LLVMTypeOf(scalar)));
 
-   res = bld->undef;
-   for(i = 0; i < type.length; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
+   if (type.length == 1) {
+      return scalar;
+   }
+   else {
+      LLVMValueRef res;
+      res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar,
+                                   LLVMConstInt(LLVMInt32Type(), 0, 0), "");
+      res = LLVMBuildShuffleVector(bld->builder, res, bld->undef,
+                                   lp_build_const_int_vec(type, 0), "");
+      return res;
    }
-
-   return res;
 }