draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
struct gallivm_state *gallivm,
struct lp_type type,
+ boolean is_fetch,
unsigned unit,
- unsigned num_coords,
const LLVMValueRef *coords,
+ const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
+ is_fetch,
unit,
- num_coords, coords,
+ coords,
+ offsets,
derivs,
lod_bias, explicit_lod,
texel);
lp_build_size_query_soa(gallivm,
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
- type,
+ type,
unit,
explicit_lod,
sizes_out);
const struct lp_derivatives *derivs)
{
struct gallivm_state *gallivm = bld->gallivm;
- struct lp_build_context *int_size_bld = &bld->int_size_bld;
- struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
+ struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
}
}
rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- perquadf_bld->type, rho);
+ perquadf_bld->type, rho, 0);
}
else {
if (dims <= 1) {
else {
if (explicit_lod) {
lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- perquadf_bld->type, explicit_lod);
+ perquadf_bld->type, explicit_lod, 0);
}
else {
LLVMValueRef rho;
/* add shader lod bias */
if (lod_bias) {
lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
- perquadf_bld->type, lod_bias);
+ perquadf_bld->type, lod_bias, 0);
lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
}
}
/**
* Return pointer to a single mipmap level.
- * \param data_array array of pointers to mipmap levels
* \param level integer mipmap level
*/
LLVMValueRef
return data_ptr;
}
+/**
+ * Return (per-pixel) offsets to mip levels.
+ * \param level integer mipmap level
+ */
+LLVMValueRef
+lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
+ LLVMValueRef level)
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMValueRef indexes[2], offsets, offset1;
+
+ indexes[0] = lp_build_const_int32(bld->gallivm, 0);
+ if (bld->num_lods == 1) {
+ indexes[1] = level;
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
+ }
+ else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ unsigned i;
+
+ offsets = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, "");
+ }
+ offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0);
+ }
+ else {
+ unsigned i;
+
+ assert (bld->num_lods == bld->coord_bld.type.length);
+
+ offsets = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ offset1 = LLVMBuildGEP(builder, bld->mip_offsets, indexes, 2, "");
+ offset1 = LLVMBuildLoad(builder, offset1, "");
+ offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexi, "");
+ }
+ }
+ return offsets;
+}
+
/**
* Codegen equivalent for u_minify().
LLVMValueRef stride_array, LLVMValueRef level)
{
LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef indexes[2], stride;
+ LLVMValueRef indexes[2], stride, stride1;
indexes[0] = lp_build_const_int32(bld->gallivm, 0);
- indexes[1] = level;
- stride = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
- stride = LLVMBuildLoad(builder, stride, "");
- stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
+ if (bld->num_lods == 1) {
+ indexes[1] = level;
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
+ }
+ else if (bld->num_lods == bld->coord_bld.type.length / 4) {
+ LLVMValueRef stride1;
+ unsigned i;
+
+ stride = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
+ }
+ stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0);
+ }
+ else {
+ LLVMValueRef stride1;
+ unsigned i;
+
+ assert (bld->num_lods == bld->coord_bld.type.length);
+
+ stride = bld->int_coord_bld.undef;
+ for (i = 0; i < bld->coord_bld.type.length; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
+ stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
+ stride1 = LLVMBuildLoad(builder, stride1, "");
+ stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
+ }
+ }
return stride;
}
const unsigned dims = bld->dims;
LLVMValueRef ilevel_vec;
- ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
-
/*
* Compute width, height, depth at mipmap level 'ilevel'
*/
- *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+ if (bld->num_lods == 1) {
+ ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
+ *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size, ilevel_vec);
+ }
+ else {
+ LLVMValueRef int_size_vec;
+ LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+ unsigned num_quads = bld->coord_bld.type.length / 4;
+ unsigned i;
+
+ if (bld->num_lods == num_quads) {
+ /*
+ * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
+ * intel "forgot" the variable shift count instruction until avx2.
+ * A harmless 8x32 shift gets translated into 32 instructions
+ * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
+ * unable to recognize if there are really just 2 different shift
+ * count values. So do the shift 4-wide before expansion.
+ */
+ struct lp_build_context bld4;
+ struct lp_type type4;
+
+ type4 = bld->int_coord_bld.type;
+ type4.length = 4;
+
+ lp_build_context_init(&bld4, bld->gallivm, type4);
+
+ if (bld->dims == 1) {
+ assert(bld->int_size_in_bld.type.length == 1);
+ int_size_vec = lp_build_broadcast_scalar(&bld4,
+ bld->int_size);
+ }
+ else {
+ assert(bld->int_size_in_bld.type.length == 4);
+ int_size_vec = bld->int_size;
+ }
+
+ for (i = 0; i < num_quads; i++) {
+ LLVMValueRef ileveli;
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+
+ ileveli = lp_build_extract_broadcast(bld->gallivm,
+ bld->perquadi_bld.type,
+ bld4.type,
+ ilevel,
+ indexi);
+ tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli);
+ }
+ /*
+ * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for dims > 1,
+ * [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
+ */
+ *out_size = lp_build_concat(bld->gallivm,
+ tmp,
+ bld4.type,
+ num_quads);
+ }
+ else {
+ /* FIXME: this is terrible and results in _huge_ vector
+ * (for the dims > 1 case).
+ * Should refactor this (together with extract_image_sizes) and do
+ * something more useful. Could for instance if we have width,height
+ * with 4-wide vector pack all elements into a 8xi16 vector
+ * (on which we can still do useful math) instead of using a 16xi32
+ * vector.
+ * FIXME: some callers can't handle this yet.
+ * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
+ * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...] vector.
+ */
+ assert(bld->num_lods == bld->coord_bld.type.length);
+ if (bld->dims == 1) {
+ assert(bld->int_size_bld.type.length == 1);
+ int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
+ bld->int_size);
+ /* vector shift with variable shift count alert... */
+ *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec, ilevel);
+ }
+ else {
+ LLVMValueRef ilevel1;
+ for (i = 0; i < bld->num_lods; i++) {
+ LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
+ ilevel1 = lp_build_extract_broadcast(bld->gallivm, bld->int_coord_type,
+ bld->int_size_in_bld.type, ilevel, indexi);
+ tmp[i] = bld->int_size;
+ tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
+ }
+ int_size_vec = lp_build_concat(bld->gallivm,
+ tmp,
+ bld->int_size_in_bld.type,
+ bld->num_lods);
+ }
+ }
+ }
if (dims >= 2) {
*row_stride_vec = lp_build_get_level_stride_vec(bld,
*/
void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
- struct lp_type size_type,
+ struct lp_build_context *size_bld,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
{
const unsigned dims = bld->dims;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+ struct lp_type size_type = size_bld->type;
+
+ if (bld->num_lods == 1) {
+ *out_width = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 0, 0));
+ if (dims >= 2) {
+ *out_height = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 1, 0));
+ if (dims == 3) {
+ *out_depth = lp_build_extract_broadcast(bld->gallivm,
+ size_type,
+ coord_type,
+ size,
+ LLVMConstInt(i32t, 2, 0));
+ }
+ }
+ }
+ else {
+ unsigned num_quads = bld->coord_bld.type.length / 4;
- *out_width = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 0, 0));
- if (dims >= 2) {
- *out_height = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 1, 0));
- if (dims == 3) {
- *out_depth = lp_build_extract_broadcast(bld->gallivm,
- size_type,
- coord_type,
- size,
- LLVMConstInt(i32t, 2, 0));
+ if (dims == 1) {
+ *out_width = size;
+ }
+ else if (bld->num_lods == num_quads) {
+ *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0);
+ if (dims >= 2) {
+ *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1);
+ if (dims == 3) {
+ *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2);
+ }
+ }
+ }
+ else {
+ assert(bld->num_lods == bld->coord_type.length);
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 0);
+ if (dims >= 2) {
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 1);
+ if (dims == 3) {
+ *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+ coord_type, size, 2);
+ }
+ }
}
}
}
LLVMValueRef depth;
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&width,
/** SIMD vector width */
unsigned vector_width;
+ /**Â number of lod values (valid are 1, length/4, length) */
+ unsigned num_lods;
+
/** regular scalar float type */
struct lp_type float_type;
struct lp_build_context float_bld;
struct lp_build_context int_coord_bld;
/** Unsigned integer texture size */
+ struct lp_type int_size_in_type;
+ struct lp_build_context int_size_in_bld;
+
+ /** Float incoming texture size */
+ struct lp_type float_size_in_type;
+ struct lp_build_context float_size_in_bld;
+
+ /** Unsigned integer texture size (might be per quad) */
struct lp_type int_size_type;
struct lp_build_context int_size_bld;
- /** Unsigned integer texture size */
+ /** Float texture size (might be per quad) */
struct lp_type float_size_type;
struct lp_build_context float_size_bld;
{
switch (tex) {
case PIPE_TEXTURE_1D:
+ case PIPE_BUFFER:
return 1;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_RECT:
LLVMValueRef level);
+LLVMValueRef
+lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
+ LLVMValueRef level);
+
+
void
lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
LLVMValueRef ilevel,
void
lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
- struct lp_type size_type,
+ struct lp_build_context *size_bld,
struct lp_type coord_type,
LLVMValueRef size,
LLVMValueRef *out_width,
const struct lp_sampler_static_state *static_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct lp_type fp_type,
+ boolean is_fetch,
unsigned unit,
- unsigned num_coords,
const LLVMValueRef *coords,
+ const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias,
LLVMValueRef explicit_lod,
void
lp_build_sample_nop(struct gallivm_state *gallivm,
struct lp_type type,
- unsigned num_coords,
const LLVMValueRef *coords,
LLVMValueRef texel_out[4]);
i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
lp_build_extract_image_sizes(bld,
- bld->int_size_type,
+ &bld->int_size_bld,
bld->int_coord_type,
int_size,
&width_vec,
flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&width_vec,
i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
lp_build_extract_image_sizes(bld,
- bld->int_size_type,
+ &bld->int_size_bld,
bld->int_coord_type,
int_size,
&width_vec,
flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&width_vec,
LLVMValueRef x, y, z;
lp_build_extract_image_sizes(bld,
- bld->int_size_type,
+ &bld->int_size_bld,
bld->int_coord_type,
size,
&width_vec, &height_vec, &depth_vec);
flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&flt_width_vec, &flt_height_vec, &flt_depth_vec);
int chan;
lp_build_extract_image_sizes(bld,
- bld->int_size_type,
+ &bld->int_size_bld,
bld->int_coord_type,
size,
&width_vec, &height_vec, &depth_vec);
flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
lp_build_extract_image_sizes(bld,
- bld->float_size_type,
+ &bld->float_size_bld,
bld->coord_type,
flt_size,
&flt_width_vec, &flt_height_vec, &flt_depth_vec);
}
+/**
+ * Texel fetch function.
+ * In contrast to general sampling there is no filtering, no coord minification,
+ * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
+ * directly to be applied to the selected mip level (after adding texel offsets).
+ * This function handles texel fetch for all targets where texel fetch is supported
+ * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
+ */
+static void
+lp_build_fetch_texel(struct lp_build_sample_context *bld,
+ unsigned unit,
+ const LLVMValueRef *coords,
+ LLVMValueRef explicit_lod,
+ const LLVMValueRef *offsets,
+ LLVMValueRef *colors_out)
+{
+ struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ unsigned dims = bld->dims, chan;
+ LLVMValueRef size, ilevel;
+ LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
+ LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
+ LLVMValueRef width, height, depth, i, j;
+ LLVMValueRef offset, out_of_bounds, out1;
+
+ /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
+ if (explicit_lod && bld->static_state->target != PIPE_BUFFER) {
+ /* could also avoid this if there are no mipmaps */
+ /* XXX temporary hack until ordinary sampling handles per-quad lod the same */
+ bld->num_lods = bld->coord_type.length / 4;
+ bld->float_size_type = bld->float_size_in_type;
+ bld->float_size_type.length = bld->num_lods > 1 ? bld->coord_type.length :
+ bld->float_size_in_type.length;
+ bld->int_size_type = lp_int_type(bld->float_size_type);
+ lp_build_context_init(&bld->int_size_bld, bld->gallivm, bld->int_size_type);
+ lp_build_context_init(&bld->float_size_bld, bld->gallivm, bld->float_size_type);
+
+ ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
+ perquadi_bld->type, explicit_lod, 0);
+ lp_build_nearest_mip_level(bld, unit, ilevel, &ilevel);
+ }
+ else {
+ bld->num_lods = 1;
+ ilevel = lp_build_const_int32(bld->gallivm, 0);
+ }
+ lp_build_mipmap_level_sizes(bld, ilevel,
+ &size,
+ &row_stride_vec, &img_stride_vec);
+ lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
+ size, &width, &height, &depth);
+
+ /* This is a lot like border sampling */
+ if (offsets[0]) {
+ /* XXX coords are really unsigned, offsets are signed */
+ x = lp_build_add(int_coord_bld, x, offsets[0]);
+ }
+ out_of_bounds = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+
+ if (dims >= 2) {
+ if (offsets[1]) {
+ y = lp_build_add(int_coord_bld, y, offsets[1]);
+ }
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+
+ if (dims >= 3) {
+ if (offsets[2]) {
+ z = lp_build_add(int_coord_bld, z, offsets[2]);
+ }
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+ }
+ }
+
+ lp_build_sample_offset(int_coord_bld,
+ bld->format_desc,
+ x, y, z, row_stride_vec, img_stride_vec,
+ &offset, &i, &j);
+
+ if (bld->static_state->target != PIPE_BUFFER) {
+ offset = lp_build_add(int_coord_bld, offset,
+ lp_build_get_mip_offsets(bld, ilevel));
+ }
+
+ offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
+
+ lp_build_fetch_rgba_soa(bld->gallivm,
+ bld->format_desc,
+ bld->texel_type,
+ bld->base_ptr, offset,
+ i, j,
+ colors_out);
+
+ if (0) {
+ /*
+ * Not needed except for ARB_robust_buffer_access_behavior.
+ * Could use min/max above instead of out-of-bounds comparisons
+ * (in fact cast to unsigned and min only is sufficient)
+ * if we don't care about the result returned for out-of-bounds.
+ */
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
+ bld->texel_bld.zero, colors_out[chan]);
+ }
+ }
+}
+
+
/**
* Do shadow test/comparison.
* \param p the texcoord Z (aka R, aka P) component
void
lp_build_sample_nop(struct gallivm_state *gallivm,
struct lp_type type,
- unsigned num_coords,
const LLVMValueRef *coords,
LLVMValueRef texel_out[4])
{
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
+ * \param is_fetch if this is a texel fetch instruction.
* \param derivs partial derivatives of (s,t,r,q) with respect to x and y
*/
void
const struct lp_sampler_static_state *static_state,
struct lp_sampler_dynamic_state *dynamic_state,
struct lp_type type,
+ boolean is_fetch,
unsigned unit,
- unsigned num_coords,
const LLVMValueRef *coords,
+ const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
bld.int_type = lp_type_int(32);
bld.coord_type = type;
bld.int_coord_type = lp_int_type(type);
- bld.float_size_type = lp_type_float(32);
- bld.float_size_type.length = dims > 1 ? 4 : 1;
- bld.int_size_type = lp_int_type(bld.float_size_type);
+ bld.float_size_in_type = lp_type_float(32);
+ bld.float_size_in_type.length = dims > 1 ? 4 : 1;
+ bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
bld.texel_type = type;
bld.perquadf_type = type;
/* we want native vector size to be able to use our intrinsics */
bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
bld.perquadi_type = lp_int_type(bld.perquadf_type);
+ bld.num_lods = 1;
+ bld.float_size_type = bld.float_size_in_type;
+ bld.float_size_type.length = bld.num_lods > 1 ? type.length :
+ bld.float_size_in_type.length;
+ bld.int_size_type = lp_int_type(bld.float_size_type);
+
lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
lp_build_context_init(&bld.float_vec_bld, gallivm, type);
lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
+ lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
+ lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
bld.int_size = tex_width;
}
else {
- bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef,
+ bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
tex_width, LLVMConstInt(i32t, 0, 0), "");
if (dims >= 2) {
bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
/* For debug: no-op texture sampling */
lp_build_sample_nop(gallivm,
bld.texel_type,
- num_coords,
coords,
texel_out);
}
static_state->wrap_t);
}
+ if (is_fetch) {
+ lp_build_fetch_texel(&bld, unit, coords,
+ explicit_lod, offsets,
+ texel_out);
+
+ if (static_state->target != PIPE_BUFFER) {
+ apply_sampler_swizzle(&bld, texel_out);
+ }
+
+ return;
+ }
+
lp_build_sample_common(&bld, unit,
&s, &t, &r,
derivs, lod_bias, explicit_lod,
bld4.int_type = lp_type_int(32);
bld4.coord_type = type4;
bld4.int_coord_type = lp_int_type(type4);
- bld4.float_size_type = lp_type_float(32);
- bld4.float_size_type.length = dims > 1 ? 4 : 1;
- bld4.int_size_type = lp_int_type(bld4.float_size_type);
+ bld4.float_size_in_type = lp_type_float(32);
+ bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
+ bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
+ bld4.float_size_type = bld4.float_size_in_type;
+ bld4.int_size_type = bld4.int_size_in_type;
bld4.texel_type = type4;
bld4.perquadf_type = type4;
/* we want native vector size to be able to use our intrinsics */
bld4.perquadf_type.length = 1;
bld4.perquadi_type = lp_int_type(bld4.perquadf_type);
+ bld4.num_lods = 1;
lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
+ lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
+ lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
/**
- * Pack first element of aos values,
+ * Pack n-th element of aos values,
* pad out to destination size.
- * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
+ * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
*/
LLVMValueRef
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type dst_type,
- const LLVMValueRef src)
+ const LLVMValueRef src,
+ unsigned channel)
{
LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef undef = LLVMGetUndef(i32t);
assert(num_src <= num_dst);
for (i = 0; i < num_src; i++) {
- shuffles[i] = LLVMConstInt(i32t, i * 4, 0);
+ shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
}
for (i = num_src; i < num_dst; i++) {
shuffles[i] = undef;
lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type dst_type,
- const LLVMValueRef src);
+ const LLVMValueRef src,
+ unsigned channel);
LLVMValueRef
}
+
+LLVMValueRef
+lp_build_emit_fetch_texoffset(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned tex_off_op,
+ const unsigned chan_index)
+{
+ const struct tgsi_texture_offset *off = &inst->TexOffsets[tex_off_op];
+ struct tgsi_full_src_register reg;
+ unsigned swizzle;
+ LLVMValueRef res;
+ enum tgsi_opcode_type stype = TGSI_TYPE_SIGNED;
+
+ /* convert offset "register" to ordinary register so can use normal emit funcs */
+ memset(®, 0, sizeof(reg));
+ reg.Register.File = off->File;
+ reg.Register.Index = off->Index;
+ reg.Register.SwizzleX = off->SwizzleX;
+ reg.Register.SwizzleY = off->SwizzleY;
+ reg.Register.SwizzleZ = off->SwizzleZ;
+
+ if (chan_index == LP_CHAN_ALL) {
+ swizzle = ~0;
+ } else {
+ swizzle = tgsi_util_get_src_register_swizzle(®.Register, chan_index);
+ if (swizzle > 2) {
+ assert(0 && "invalid swizzle in emit_fetch_texoffset()");
+ return bld_base->base.undef;
+ }
+ }
+
+ assert(off->Index <= bld_base->info->file_max[off->File]);
+
+ if (bld_base->emit_fetch_funcs[off->File]) {
+ res = bld_base->emit_fetch_funcs[off->File](bld_base, ®, stype,
+ swizzle);
+ } else {
+ assert(0 && "invalid src register in emit_fetch_texoffset()");
+ return bld_base->base.undef;
+ }
+
+ /*
+ * Swizzle the argument
+ */
+
+ if (swizzle == ~0) {
+ res = bld_base->emit_swizzle(bld_base, res,
+ off->SwizzleX,
+ off->SwizzleY,
+ off->SwizzleZ,
+ /* there's no 4th channel */
+ off->SwizzleX);
+ }
+
+ return res;
+
+}
+
+
boolean
lp_build_tgsi_llvm(
struct lp_build_tgsi_context * bld_base,
(*emit_fetch_texel)( const struct lp_build_sampler_soa *sampler,
struct gallivm_state *gallivm,
struct lp_type type,
+ boolean is_fetch,
unsigned unit,
- unsigned num_coords,
const LLVMValueRef *coords,
+ const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
unsigned src_op,
const unsigned chan_index);
+
+LLVMValueRef
+lp_build_emit_fetch_texoffset(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned tex_off_op,
+ const unsigned chan_index);
+
boolean
lp_build_tgsi_llvm(
struct lp_build_tgsi_context * bld_base,
unsigned unit;
LLVMValueRef lod_bias, explicit_lod;
LLVMValueRef oow = NULL;
- LLVMValueRef coords[3];
+ LLVMValueRef coords[4];
+ LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
unsigned num_coords;
unsigned dims;
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
}
- for (i = num_coords; i < 3; i++) {
+ for (i = num_coords; i < 4; i++) {
coords[i] = bld->bld_base.base.undef;
}
unit = inst->Src[1].Register.Index;
}
+ /* some advanced gather instructions (txgo) would require 4 offsets */
+ if (inst->Texture.NumOffsets == 1) {
+ unsigned dim;
+ for (dim = 0; dim < dims; dim++) {
+ offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
+ }
+ }
+
bld->sampler->emit_fetch_texel(bld->sampler,
bld->bld_base.base.gallivm,
bld->bld_base.base.type,
- unit, num_coords, coords,
+ FALSE,
+ unit, coords,
+ offsets,
&derivs,
lod_bias, explicit_lod,
texel);
}
+static void
+emit_txf( struct lp_build_tgsi_soa_context *bld,
+ const struct tgsi_full_instruction *inst,
+ LLVMValueRef *texel)
+{
+ unsigned unit;
+ LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+ LLVMValueRef explicit_lod = NULL;
+ LLVMValueRef coords[3];
+ LLVMValueRef offsets[3] = { NULL };
+ struct lp_derivatives derivs;
+ unsigned num_coords;
+ unsigned dims;
+ unsigned i;
+
+ if (!bld->sampler) {
+ _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
+ for (i = 0; i < 4; i++) {
+ texel[i] = coord_undef;
+ }
+ return;
+ }
+
+ derivs.ddx_ddy[0] = coord_undef;
+ derivs.ddx_ddy[1] = coord_undef;
+
+ switch (inst->Texture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_BUFFER:
+ num_coords = 1;
+ dims = 1;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ num_coords = 2;
+ dims = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ num_coords = 2;
+ dims = 2;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ num_coords = 3;
+ dims = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ num_coords = 3;
+ dims = 3;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* always have lod except for buffers ? */
+ if (inst->Texture.Texture != TGSI_TEXTURE_BUFFER) {
+ explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
+ }
+
+ for (i = 0; i < num_coords; i++) {
+ coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
+ }
+ for (i = num_coords; i < 3; i++) {
+ coords[i] = coord_undef;
+ }
+
+ unit = inst->Src[1].Register.Index;
+
+ if (inst->Texture.NumOffsets == 1) {
+ unsigned dim;
+ for (dim = 0; dim < dims; dim++) {
+ offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
+ }
+ }
+
+ bld->sampler->emit_fetch_texel(bld->sampler,
+ bld->bld_base.base.gallivm,
+ bld->bld_base.base.type,
+ TRUE,
+ unit, coords,
+ offsets,
+ &derivs,
+ NULL, explicit_lod,
+ texel);
+}
+
static void
emit_txq( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
emit_txq(bld, emit_data->inst, emit_data->output);
}
+static void
+txf_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_txf(bld, emit_data->inst, emit_data->output);
+}
+
static void
cal_emit(
const struct lp_build_tgsi_action * action,
bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
case TGSI_OPCODE_USHR:
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_TXQ:
+ case TGSI_OPCODE_TXF:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_MOD:
case TGSI_OPCODE_I2F:
return 0;
case PIPE_CAP_SCALED_RESOLVE:
return 0;
+ /* this is a lie could support arbitrary large offsets */
case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
case PIPE_CAP_MAX_TEXEL_OFFSET:
- return 0;
+ return 7;
case PIPE_CAP_CONDITIONAL_RENDER:
return 1;
case PIPE_CAP_TEXTURE_BARRIER:
lp_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
struct gallivm_state *gallivm,
struct lp_type type,
+ boolean is_fetch,
unsigned unit,
- unsigned num_coords,
const LLVMValueRef *coords,
+ const LLVMValueRef *offsets,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
assert(unit < PIPE_MAX_SAMPLERS);
if (LP_PERF & PERF_NO_TEX) {
- lp_build_sample_nop(gallivm, type, num_coords, coords, texel);
+ lp_build_sample_nop(gallivm, type, coords, texel);
return;
}
&sampler->dynamic_state.static_state[unit],
&sampler->dynamic_state.base,
type,
+ is_fetch,
unit,
- num_coords, coords,
+ coords,
+ offsets,
derivs,
lod_bias, explicit_lod,
texel);