const struct lp_derivatives *derivs)
{
struct gallivm_state *gallivm = bld->gallivm;
- struct lp_build_context *int_size_bld = &bld->int_size_bld;
- struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
+ struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
}
}
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- perquadf_bld->type, rho);
+ perquadf_bld->type, rho, 0);
}
else {
if (dims <= 1) {
else {
if (explicit_lod) {
lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- perquadf_bld->type, explicit_lod);
+ perquadf_bld->type, explicit_lod, 0);
}
else {
LLVMValueRef rho;
/* add shader lod bias */
if (lod_bias) {
lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- perquadf_bld->type, lod_bias);
+ perquadf_bld->type, lod_bias, 0);
lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
}
}
/**
* Return pointer to a single mipmap level.
- * \param data_array array of pointers to mipmap levels
* \param level integer mipmap level
*/
LLVMValueRef
return data_ptr;
}
+/**
+ * Return (per-pixel) offsets to mip levels.
+ * \param level integer mipmap level
+ */
+LLVMValueRef
+lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
+ LLVMValueRef level)
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMValueRef indexes[2], offsets, offset1;
+
+ indexes[0] = lp_build_const_int32(bld->gallivm, 0);
+ if (bld->num_lods == 1) {
+ indexes[1] = level;
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
+ }
+ else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ unsigned i;
+
+ offsets = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
+ }
+ offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
+ }
+ else {
+ unsigned i;
+
+ assert (bld->num_lods == bld->coord_bld.type.length);
+
+ offsets = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
+ }
+ }
+ return offsets;
+}
+
/**
* Codegen equivalent for u_minify().
LLVMValueRef stride_array, LLVMValueRef level)
{
LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef indexes[2], stride;
+ LLVMValueRef indexes[2], stride, stride1;
indexes[0] = lp_build_const_int32(bld->gallivm, 0);
- indexes[1] = level;
- stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
- stride = LLVMBuildLoad(builder, stride, "");
- stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ if (bld->num_lods == 1) {
+ indexes[1] = level;
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
+ }
+ else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ LLVMValueRef stride1;
+ unsigned i;
+
+ stride = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
+ }
+ stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
+ }
+ else {
+ LLVMValueRef stride1;
+ unsigned i;
+
+ assert (bld->num_lods == bld->coord_bld.type.length);
+
+ stride = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->coord_bld.type.length; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
+ }
+ }
return stride;
}
const unsigned dims = bld->dims;
LLVMValueRef ilevel_vec;
- ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
-
/*
* Compute width, height, depth at mipmap level 'ilevel'
*/
- *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+ if (bld->num_lods == 1) {
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
+ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+ }
+ else {
+ LLVMValueRef int_size_vec;
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned num_quads = bld->coord_bld.type.length / 4;
+ unsigned i;
+
+ if (bld->num_lods == num_quads) {
+ /*
+ * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
+ * intel "forgot" the variable shift count instruction until avx2.
+ * A harmless 8x32 shift gets translated into 32 instructions
+ * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
+ * unable to recognize if there are really just 2 different shift
+ * count values. So do the shift 4-wide before expansion.
+ */
+ struct lp_build_context bld4;
+ struct lp_type type4;
+
+ type4 = bld->int_coord_bld.type;
+ type4.length = 4;
+
+ lp_build_context_init(&bld4, bld->gallivm, type4);
+
+ if (bld->dims == 1) {
+ assert(bld->int_size_in_bld.type.length == 1);
+ int_size_vec = lp_build_broadcast_scalar(&bld4,
+ bld->int_size);
+ }
+ else {
+ assert(bld->int_size_in_bld.type.length == 4);
+ int_size_vec = bld->int_size;
+ }
+
+ for (i = 0; i < num_quads; i++) {
+ LLVMValueRef ileveli;
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+
+ ileveli = lp_build_extract_broadcast(bld->gallivm,
+ bld->perquadi_bld.type,
+ bld4.type,
+ ilevel,
+ indexi);
+ tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli);
+ }
+ /*
+ * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
+ * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
+ */
+ *out_size = lp_build_concat(bld->gallivm,
+ tmp,
+ bld4.type,
+ num_quads);
+ }
+ else {
+ /* FIXME: this is terrible and results in _huge_ vector
+ * (for the dims > 1 case).
+ * Should refactor this (together with extract_image_sizes) and do
+ * something more useful. Could for instance if we have width,height
+ * with 4-wide vector pack all elements into a 8xi16 vector
+ * (on which we can still do useful math) instead of using a 16xi32
+ * vector.
+ * FIXME: some callers can't handle this yet.
+ * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
+ * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
+ */
+ assert(bld->num_lods == bld->coord_bld.type.length);
+ if (bld->dims == 1) {
+ assert(bld->int_size_bld.type.length == 1);
+ int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
+ bld->int_size);
+ /* vector shift with variable shift count alert... */
+ *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel);
+ }
+ else {
+ LLVMValueRef ilevel1;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
+ bld->int_size_in_bld.type, ilevel, indexi);
+ tmp[i] = bld->int_size;
+ tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
+ }
+ int_size_vec = lp_build_concat(bld->gallivm,
+ tmp,
+ bld->int_size_in_bld.type,
+ bld->num_lods);
+ }
+ }
+ }
if (dims >= 2) {
*row_stride_vec = lp_build_get_level_stride_vec(bld,
*/
void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
- struct lp_type size_type,
+ struct lp_build_context *size_bld,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
{
const unsigned dims = bld->dims;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+ struct lp_type size_type = size_bld->type;
+
+ if (bld->num_lods == 1) {
+ *out_width = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 0, 0));
+ if (dims >= 2) {
+ *out_height = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 1, 0));
+ if (dims == 3) {
+ *out_depth = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 2, 0));
+ }
+ }
+ }
+ else {
+ unsigned num_quads = bld->coord_bld.type.length / 4;
- *out_width = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 0, 0));
- if (dims >= 2) {
- *out_height = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 1, 0));
- if (dims == 3) {
- *out_depth = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 2, 0));
+ if (dims == 1) {
+ *out_width = size;
+ }
+ else if (bld->num_lods == num_quads) {
+ *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
+ if (dims >= 2) {
+ *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
+ if (dims == 3) {
+ *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
+ }
+ }
+ }
+ else {
+ assert(bld->num_lods == bld->coord_type.length);
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 0);
+ if (dims >= 2) {
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 1);
+ if (dims == 3) {
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 2);
+ }
+ }
}
}
}
LLVMValueRef depth;
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&width,