val = lp_build_fetch_rgba_aos(gallivm,
format_desc,
lp_float32_vec4_type(),
+ FALSE,
map_ptr,
zero, zero, zero);
LLVMBuildStore(builder, val, temp_ptr);
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offset,
LLVMValueRef i,
* Fetch a pixel into a 4 float AoS.
*
* \param format_desc describes format of the image we're fetching from
+ * \param aligned whether the data is guaranteed to be aligned
* \param ptr address of the pixel block (or the texel if uncompressed)
* \param i, j the sub-block pixel coordinates. For non-compressed formats
* these will always be (0, 0).
lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offset,
LLVMValueRef i,
packed = lp_build_gather(gallivm, type.length/4,
format_desc->block.bits, type.width*4,
- base_ptr, offset, TRUE);
+ aligned, base_ptr, offset, TRUE);
assert(format_desc->block.bits <= vec_len);
LLVMValueRef packed;
packed = lp_build_gather_elem(gallivm, num_pixels,
- format_desc->block.bits, 32,
+ format_desc->block.bits, 32, aligned,
base_ptr, offset, k, FALSE);
tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
type.length,
format_desc->block.bits,
type.width,
+ TRUE,
base_ptr, offset, FALSE);
/*
packed = lp_build_gather(gallivm, type.length,
format_desc->block.bits,
- type.width, base_ptr, offset,
- FALSE);
+ type.width, TRUE,
+ base_ptr, offset, FALSE);
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
}
unsigned mask = (1 << 8) - 1;
LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
offset = LLVMBuildAdd(builder, offset, s_offset, "");
- packed = lp_build_gather(gallivm, type.length,
- 32, type.width, base_ptr, offset, FALSE);
+ packed = lp_build_gather(gallivm, type.length, 32, type.width,
+ TRUE, base_ptr, offset, FALSE);
packed = LLVMBuildAnd(builder, packed,
lp_build_const_int_vec(gallivm, type, mask), "");
}
else {
assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
- packed = lp_build_gather(gallivm, type.length,
- 32, type.width, base_ptr, offset, TRUE);
+ packed = lp_build_gather(gallivm, type.length, 32, type.width,
+ TRUE, base_ptr, offset, TRUE);
packed = LLVMBuildBitCast(builder, packed,
lp_build_vec_type(gallivm, type), "");
}
tmp_type.norm = TRUE;
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
- base_ptr, offset, i, j);
+ TRUE, base_ptr, offset, i, j);
lp_build_rgba8_to_fi32_soa(gallivm,
type,
/* Get a single float[4]={R,G,B,A} pixel */
tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
- base_ptr, offset_elem,
+ TRUE, base_ptr, offset_elem,
i_elem, j_elem);
/*
assert(format_desc->block.width == 2);
assert(format_desc->block.height == 1);
- packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
+ packed = lp_build_gather(gallivm, n, 32, 32, TRUE, base_ptr, offset, FALSE);
(void)j;
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offsets,
unsigned i,
ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
res = LLVMBuildLoad(gallivm->builder, ptr, "");
+ /* XXX
+ * On some archs we probably really want to avoid having to deal
+ * with alignments lower than 4 bytes (if fetch size is a power of
+ * two >= 32). On x86 it doesn't matter, however.
+ * We should be able to guarantee full alignment for any kind of texture
+ * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
+ * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
+ * but I don't think that's quite what we wanted).
+ * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
+ * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
+ * enforcing what we want (which is what d3d10 does, the offset needs to
+ * be aligned to element size, but GL has bytes regardless of element
+ * size which would only leave us with minimum alignment restriction of 16
+ * which doesn't make much sense if the type isn't 4x32bit). Due to
+ * translation of offsets to first_elem in sampler_views it actually seems
+ * gallium could not do anything else except 16 no matter what...
+ */
+ if (!aligned) {
+ lp_set_load_alignment(res, 1);
+ }
+
assert(src_width <= dst_width);
if (src_width > dst_width) {
res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
* @param length length of the offsets
* @param src_width src element width in bits
* @param dst_width result element width in bits (src will be expanded to fit)
+ * @param aligned whether the data is guaranteed to be aligned (to src_width)
* @param base_ptr base pointer, should be a i8 pointer type.
* @param offsets vector with offsets
* @param vector_justify select vector rather than integer justification
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offsets,
boolean vector_justify)
if (length == 1) {
/* Scalar */
return lp_build_gather_elem(gallivm, length,
- src_width, dst_width,
+ src_width, dst_width, aligned,
base_ptr, offsets, 0, vector_justify);
} else {
/* Vector */
LLVMValueRef index = lp_build_const_int32(gallivm, i);
LLVMValueRef elem;
elem = lp_build_gather_elem(gallivm, length,
- src_width, dst_width,
+ src_width, dst_width, aligned,
base_ptr, offsets, i, vector_justify);
res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
}
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offsets,
unsigned i,
unsigned length,
unsigned src_width,
unsigned dst_width,
+ boolean aligned,
LLVMValueRef base_ptr,
LLVMValueRef offsets,
boolean vector_justify);
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
+ TRUE,
data_ptr, offset, TRUE);
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
bld->format_desc,
u8n.type,
+ TRUE,
data_ptr, offset,
x_subcoord,
y_subcoord);
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
+ TRUE,
data_ptr, offset[k][j][i], TRUE);
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
bld->format_desc,
u8n.type,
+ TRUE,
data_ptr, offset[k][j][i],
x_subcoord[i],
y_subcoord[j]);
block = LLVMAppendBasicBlockInContext(context, func, "entry");
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_fetch_rgba_aos(gallivm, desc, type,
+ rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
packed_ptr, offset, i, j);
LLVMBuildStore(builder, rgba, rgba_ptr);
}
/* To ensure it's 16-byte aligned */
+ /* Could skip this and use unaligned lp_build_fetch_rgba_aos */
memcpy(packed, test->packed, sizeof packed);
for (i = 0; i < desc->block.height; ++i) {