#include "lp_bld_format.h"
#include "lp_bld_gather.h"
#include "lp_bld_init.h"
+#include "lp_bld_intr.h"
/**
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offsets,
- unsigned i)
+ unsigned i,
+ boolean vector_justify)
{
LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
res = LLVMBuildLoad(gallivm->builder, ptr, "");
+ /* XXX
+ * On some archs we probably really want to avoid having to deal
+ * with alignments lower than 4 bytes (if fetch size is a power of
+ * two >= 32). On x86 it doesn't matter, however.
+ * We should be able to guarantee full alignment for any kind of texture
+ * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
+ * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
+ * but I don't think that's quite what we wanted).
+ * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
+ * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
+ * enforcing what we want (which is what d3d10 does, the offset needs to
+ * be aligned to element size, but GL has bytes regardless of element
+ * size which would only leave us with minimum alignment restriction of 16
+ * which doesn't make much sense if the type isn't 4x32bit). Due to
+ * translation of offsets to first_elem in sampler_views it actually seems
+ * gallium could not do anything else except 16 no matter what...
+ */
+ if (!aligned) {
+ lp_set_load_alignment(res, 1);
+ }
+
assert(src_width <= dst_width);
- if (src_width > dst_width)
+ if (src_width > dst_width) {
res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
- if (src_width < dst_width)
+ } else if (src_width < dst_width) {
res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
+ if (vector_justify) {
+#ifdef PIPE_ARCH_BIG_ENDIAN
+ res = LLVMBuildShl(gallivm->builder, res,
+ LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
+#endif
+ }
+ }
return res;
}
* Use for fetching texels from a texture.
* For SSE, typical values are length=4, src_width=32, dst_width=32.
*
+ * When src_width < dst_width, the return value can be justified in
+ * one of two ways:
+ * "integer justification" is used when the caller treats the destination
+ * as a packed integer bitmask, as described by the channels' "shift" and
+ * "width" fields;
+ * "vector justification" is used when the caller casts the destination
+ * to a vector and needs channel X to be in vector element 0.
+ *
* @param length length of the offsets
* @param src_width src element width in bits
* @param dst_width result element width in bits (src will be expanded to fit)
+ * @param aligned whether the data is guaranteed to be aligned (to src_width)
* @param base_ptr base pointer, should be a i8 pointer type.
* @param offsets vector with offsets
+ * @param vector_justify select vector rather than integer justification
*/
LLVMValueRef
lp_build_gather(struct gallivm_state *gallivm,
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
- LLVMValueRef offsets)
+ LLVMValueRef offsets,
+ boolean vector_justify)
{
LLVMValueRef res;
if (length == 1) {
/* Scalar */
return lp_build_gather_elem(gallivm, length,
- src_width, dst_width,
- base_ptr, offsets, 0);
+ src_width, dst_width, aligned,
+ base_ptr, offsets, 0, vector_justify);
} else {
/* Vector */
LLVMValueRef index = lp_build_const_int32(gallivm, i);
LLVMValueRef elem;
elem = lp_build_gather_elem(gallivm, length,
- src_width, dst_width,
- base_ptr, offsets, i);
+ src_width, dst_width, aligned,
+ base_ptr, offsets, i, vector_justify);
res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
}
}
return res;
}
+
+LLVMValueRef
+lp_build_gather_values(struct gallivm_state * gallivm,
+ LLVMValueRef * values,
+ unsigned value_count)
+{
+ LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef vec = LLVMGetUndef(vec_type);
+ unsigned i;
+
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef index = lp_build_const_int32(gallivm, i);
+ vec = LLVMBuildInsertElement(builder, vec, values[i], index, "");
+ }
+ return vec;
+}