+
+#else
+ *dst_lo = lp_build_interleave2(gallivm, src_type, msb, src, 0);
+ *dst_hi = lp_build_interleave2(gallivm, src_type, msb, src, 1);
+#endif
+
+ /* Cast the result into the new type (twice as wide) */
+
+ dst_vec_type = lp_build_vec_type(gallivm, dst_type);
+
+ *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
+ *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
+}
+
+
+/**
+ * Double the bit width, with an order which fits the cpu nicely.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ *
+ * The order of the results is not guaranteed, other than it will match
+ * the corresponding lp_build_pack2_native call.
+ */
+void
+lp_build_unpack2_native(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ LLVMValueRef src,
+ LLVMValueRef *dst_lo,
+ LLVMValueRef *dst_hi)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef msb;
+ LLVMTypeRef dst_vec_type;
+
+ assert(!src_type.floating);
+ assert(!dst_type.floating);
+ assert(dst_type.width == src_type.width * 2);
+ assert(dst_type.length * 2 == src_type.length);
+
+ if(dst_type.sign && src_type.sign) {
+ /* Replicate the sign bit in the most significant bits */
+ msb = LLVMBuildAShr(builder, src,
+ lp_build_const_int_vec(gallivm, src_type, src_type.width - 1), "");
+ }
+ else
+ /* Most significant bits always zero */
+ msb = lp_build_zero(gallivm, src_type);
+
+ /* Interleave bits */
+#if UTIL_ARCH_LITTLE_ENDIAN
+ if (src_type.length * src_type.width == 256 && util_cpu_caps.has_avx2) {
+ *dst_lo = lp_build_interleave2_half(gallivm, src_type, src, msb, 0);
+ *dst_hi = lp_build_interleave2_half(gallivm, src_type, src, msb, 1);
+ } else {
+ *dst_lo = lp_build_interleave2(gallivm, src_type, src, msb, 0);
+ *dst_hi = lp_build_interleave2(gallivm, src_type, src, msb, 1);
+ }