unsigned lo_hi)
{
LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
- unsigned i, j;
+ unsigned i;
assert(type.length <= LP_MAX_VECTOR_LENGTH);
assert(lo_hi < 2);
if (type.length * type.width == 256) {
- assert(type.length >= 4);
- for (i = 0, j = 0; i < type.length; ++i) {
- if (i == type.length / 4) {
- j = type.length;
- } else if (i == type.length / 2) {
- j = type.length / 2;
- } else if (i == 3 * type.length / 4) {
- j = 3 * type.length / 4;
- } else {
- j += 2;
- }
- elems[i] = lp_build_const_int32(gallivm, j + lo_hi);
+ assert(type.length == 8);
+ assert(type.width == 32);
+ static const unsigned shufvals[8] = {0, 2, 8, 10, 4, 6, 12, 14};
+ for (i = 0; i < type.length; ++i) {
+ elems[i] = lp_build_const_int32(gallivm, shufvals[i] + lo_hi);
}
} else {
for (i = 0; i < type.length; ++i) {
}
else {
LLVMValueRef tmp[4], cc01, cc23;
- struct lp_type lp_type32, lp_type64, lp_type32dxt;
+ struct lp_type lp_type32, lp_type64;
memset(&lp_type32, 0, sizeof lp_type32);
lp_type32.width = 32;
lp_type32.length = length;
lp_build_const_extend_shuffle(gallivm, 2, 4), "");
}
if (length == 8) {
+ struct lp_type lp_type32_4 = {0};
+ lp_type32_4.width = 32;
+ lp_type32_4.length = 4;
for (i = 0; i < 4; ++i) {
tmp[0] = elems[i];
tmp[1] = elems[i+4];
- elems[i] = lp_build_concat(gallivm, tmp, lp_type32, 2);
+ elems[i] = lp_build_concat(gallivm, tmp, lp_type32_4, 2);
}
}
cc01 = lp_build_interleave2_half(gallivm, lp_type32, elems[0], elems[1], 0);
tmp = lp_build_select(&bld, sel_mask, alpha_low, alpha_hi);
bit_pos = LLVMBuildAnd(builder, bit_pos,
lp_build_const_int_vec(gallivm, type, 0xffffffdf), "");
- /* Warning: slow shift with per element count */
+ /* Warning: slow shift with per element count (without avx2) */
/*
* Could do pshufb here as well - just use appropriate 2 bits in bit_pos
* to select the right byte with pshufb. Then for the remaining one bit
lp_build_const_int_vec(gallivm, type16, 8), "");
alpha = LLVMBuildBitCast(builder, alpha, i64t, "");
shuffle1 = lp_build_const_shuffle1(gallivm, 0, 8);
- /* XXX this shuffle broken with LLVM 2.8 */
alpha0 = LLVMBuildShuffleVector(builder, alpha0, alpha0, shuffle1, "");
alpha1 = LLVMBuildShuffleVector(builder, alpha1, alpha1, shuffle1, "");
return rgba;
}
+ /*
+ * Could use n > 8 here with avx2, but doesn't seem faster.
+ */
if (n > 4) {
unsigned count;
LLVMTypeRef i8_vectype = LLVMVectorType(i8t, 4 * n);