+
+
+/**
+ * Truncate or expand the bitwidth.
+ *
+ * NOTE: Getting the right sign flags is crucial here, as we employ some
+ * intrinsics that do saturation.
+ */
+void
+lp_build_resize(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef *src, unsigned num_srcs,
+ LLVMValueRef *dst, unsigned num_dsts)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+
+ /*
+ * We don't support float <-> int conversion here. That must be done
+ * before/after calling this function.
+ */
+ assert(src_type.floating == dst_type.floating);
+
+ /*
+ * We don't support double <-> float conversion yet, although it could be
+ * added with little effort.
+ */
+ assert((!src_type.floating && !dst_type.floating) ||
+ src_type.width == dst_type.width);
+
+ /* We must not loose or gain channels. Only precision */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
+ assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
+ assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
+
+ if (src_type.width > dst_type.width) {
+ /*
+ * Truncate bit width.
+ */
+
+ /* Conversion must be M:1 */
+ assert(num_dsts == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector packing intrinsics
+ */
+ tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, src, num_srcs);
+ }
+ else {
+ if (src_type.width / dst_type.width > num_srcs) {
+ /*
+ * First change src vectors size (with shuffle) so they have the
+ * same size as the destination vector, then pack normally.
+ * Note: cannot use cast/extract because llvm generates atrocious code.
+ */
+ unsigned size_ratio = (src_type.width * src_type.length) /
+ (dst_type.length * dst_type.width);
+ unsigned new_length = src_type.length / size_ratio;
+
+ for (i = 0; i < size_ratio * num_srcs; i++) {
+ unsigned start_index = (i % size_ratio) * new_length;
+ tmp[i] = lp_build_extract_range(gallivm, src[i / size_ratio],
+ start_index, new_length);
+ }
+ num_srcs *= size_ratio;
+ src_type.length = new_length;
+ tmp[0] = lp_build_pack(gallivm, src_type, dst_type, TRUE, tmp, num_srcs);
+ }
+ else {
+ /*
+ * Truncate bit width but expand vector size - first pack
+ * then expand simply because this should be more AVX-friendly
+ * for the cases we probably hit.
+ */
+ unsigned size_ratio = (dst_type.width * dst_type.length) /
+ (src_type.length * src_type.width);
+ unsigned num_pack_srcs = num_srcs / size_ratio;
+ dst_type.length = dst_type.length / size_ratio;
+
+ for (i = 0; i < size_ratio; i++) {
+ tmp[i] = lp_build_pack(gallivm, src_type, dst_type, TRUE,
+ &src[i*num_pack_srcs], num_pack_srcs);
+ }
+ tmp[0] = lp_build_concat(gallivm, tmp, dst_type, size_ratio);
+ }
+ }
+ }
+ else if (src_type.width < dst_type.width) {
+ /*
+ * Expand bit width.
+ */
+
+ /* Conversion must be 1:N */
+ assert(num_srcs == 1);
+
+ if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
+ /*
+ * Register width remains constant -- use vector unpack intrinsics
+ */
+ lp_build_unpack(gallivm, src_type, dst_type, src[0], tmp, num_dsts);
+ }
+ else {
+ /*
+ * Do it element-wise.
+ */
+ assert(src_type.length * num_srcs == dst_type.length * num_dsts);
+
+ for (i = 0; i < num_dsts; i++) {
+ tmp[i] = lp_build_undef(gallivm, dst_type);
+ }
+
+ for (i = 0; i < src_type.length; ++i) {
+ unsigned j = i / dst_type.length;
+ LLVMValueRef srcindex = lp_build_const_int32(gallivm, i);
+ LLVMValueRef dstindex = lp_build_const_int32(gallivm, i % dst_type.length);
+ LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], srcindex, "");
+
+ if (src_type.sign && dst_type.sign) {
+ val = LLVMBuildSExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
+ } else {
+ val = LLVMBuildZExt(builder, val, lp_build_elem_type(gallivm, dst_type), "");
+ }
+ tmp[j] = LLVMBuildInsertElement(builder, tmp[j], val, dstindex, "");
+ }
+ }
+ }
+ else {
+ /*
+ * No-op
+ */
+
+ /* "Conversion" must be N:N */
+ assert(num_srcs == num_dsts);
+
+ for(i = 0; i < num_dsts; ++i)
+ tmp[i] = src[i];
+ }
+
+ for(i = 0; i < num_dsts; ++i)
+ dst[i] = tmp[i];
+}
+
+
+/**
+ * Expands src vector from src.length to dst_length
+ */
+LLVMValueRef
+lp_build_pad_vector(struct gallivm_state *gallivm,
+ LLVMValueRef src,
+ unsigned dst_length)
+{
+ LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef undef;
+ LLVMTypeRef type;
+ unsigned i, src_length;
+
+ type = LLVMTypeOf(src);
+
+ if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
+ /* Can't use ShuffleVector on non-vector type */
+ undef = LLVMGetUndef(LLVMVectorType(type, dst_length));
+ return LLVMBuildInsertElement(gallivm->builder, undef, src, lp_build_const_int32(gallivm, 0), "");
+ }
+
+ undef = LLVMGetUndef(type);
+ src_length = LLVMGetVectorSize(type);
+
+ assert(dst_length <= ARRAY_SIZE(elems));
+ assert(dst_length >= src_length);
+
+ if (src_length == dst_length)
+ return src;
+
+ /* All elements from src vector */
+ for (i = 0; i < src_length; ++i)
+ elems[i] = lp_build_const_int32(gallivm, i);
+
+ /* Undef fill remaining space */
+ for (i = src_length; i < dst_length; ++i)
+ elems[i] = lp_build_const_int32(gallivm, src_length);
+
+ /* Combine the two vectors */
+ return LLVMBuildShuffleVector(gallivm->builder, src, undef, LLVMConstVector(elems, dst_length), "");
+}