clover: Align kernel argument sizes to nearest power of 2
authorJan Vesely <jan.vesely@rutgers.edu>
Fri, 25 Apr 2014 18:24:55 +0000 (14:24 -0400)
committerFrancisco Jerez <currojerez@riseup.net>
Tue, 29 Apr 2014 11:09:21 +0000 (13:09 +0200)
v2: use a new variable for aligned size
    add comment
    make both vars const
    only use the aligned value in argument constructors
    fix comment typo

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
src/gallium/state_trackers/clover/llvm/invocation.cpp

index a81bdf8aa73a858235ee8c6432b24c02f71b9ce9..82e262f2640f04da32dec0cd809ab2c0b1e5d961 100644 (file)
@@ -64,6 +64,7 @@
 
 #include "pipe/p_state.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 
 #include <iostream>
 #include <iomanip>
@@ -308,11 +309,19 @@ namespace {
 #endif
 
             llvm::Type *arg_type = arg.getType();
-            unsigned arg_size = TD.getTypeStoreSize(arg_type);
+            const unsigned arg_store_size = TD.getTypeStoreSize(arg_type);
+
+            // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
+            // type that is not a power of two bytes in size must be
+            // aligned to the next larger power of two".  We need this
+            // alignment for three element vectors, which have
+            // non-power-of-2 store size.
+            const unsigned arg_api_size =
+               util_next_power_of_two(arg_store_size);
 
             llvm::Type *target_type = arg_type->isIntegerTy() ?
-               TD.getSmallestLegalIntType(mod->getContext(), arg_size * 8) :
-               arg_type;
+               TD.getSmallestLegalIntType(mod->getContext(), arg_store_size * 8)
+               arg_type;
             unsigned target_size = TD.getTypeStoreSize(target_type);
             unsigned target_align = TD.getABITypeAlignment(target_type);
 
@@ -326,19 +335,19 @@ namespace {
                if (address_space == address_spaces[clang::LangAS::opencl_local
                                                      - clang::LangAS::Offset]) {
                   args.push_back(module::argument(module::argument::local,
-                                                  arg_size, target_size,
+                                                  arg_api_size, target_size,
                                                   target_align,
                                                   module::argument::zero_ext));
                } else {
                   // XXX: Correctly handle constant address space.  There is no
                   // way for r600g to pass a handle for constant buffers back
                   // to clover like it can for global buffers, so
-                  // creating constant arguements will break r600g.  For now,
+                  // creating constant arguments will break r600g.  For now,
                   // continue treating constant buffers as global buffers
                   // until we can come up with a way to create handles for
                   // constant buffers.
                   args.push_back(module::argument(module::argument::global,
-                                                  arg_size, target_size,
+                                                  arg_api_size, target_size,
                                                   target_align,
                                                   module::argument::zero_ext));
               }
@@ -352,7 +361,7 @@ namespace {
                    module::argument::zero_ext);
 
                args.push_back(
-                  module::argument(module::argument::scalar, arg_size,
+                  module::argument(module::argument::scalar, arg_api_size,
                                    target_size, target_align, ext_type));
             }
          }