#include <pipe/p_compiler.h>
+/**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
/**
* Several functions can only cope with vectors of length up to this value.
* You may need to increase that value if you want to represent bigger vectors.
*/
#define LP_MAX_VECTOR_LENGTH 16
-#define LP_MAX_TYPE_WIDTH 64
-
/**
* The LLVM type system can't conveniently express all the things we care about
};
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.floating = TRUE;
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.norm = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.sign = TRUE;
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+ struct lp_type res_type;
+
+ memset(&res_type, 0, sizeof res_type);
+ res_type.fixed = TRUE;
+ res_type.width = width;
+ res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+ return res_type;
+}
+
+
LLVMTypeRef
lp_build_elem_type(struct lp_type type);
success = TRUE;
for(i = 0; i < n && success; ++i) {
if(mode == AoS) {
- uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
if(mode == SoA) {
const unsigned stride = type.length*type.width/8;
- uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
- uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+ uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+ uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
int64_t start_counter = 0;
int64_t end_counter = 0;
boolean mismatch;