* Texture sampling -- SoA.
*
* @author Jose Fonseca <jfonseca@vmware.com>
+ * @author Brian Paul <brianp@vmware.com>
*/
#include "pipe/p_defines.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_format.h"
-#include "util/u_cpu_detect.h"
#include "lp_bld_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_pack.h"
#include "lp_bld_flow.h"
+#include "lp_bld_gather.h"
#include "lp_bld_format.h"
#include "lp_bld_sample.h"
+#include "lp_bld_quad.h"
/**
case PIPE_TEXTURE_1D:
return 1;
case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
case PIPE_TEXTURE_CUBE:
return 2;
case PIPE_TEXTURE_3D:
}
-static LLVMValueRef
-lp_build_swizzle_chan_soa(struct lp_type type,
- const LLVMValueRef *unswizzled,
- enum util_format_swizzle swizzle)
-{
- switch (swizzle) {
- case PIPE_SWIZZLE_RED:
- case PIPE_SWIZZLE_GREEN:
- case PIPE_SWIZZLE_BLUE:
- case PIPE_SWIZZLE_ALPHA:
- return unswizzled[swizzle];
- case PIPE_SWIZZLE_ZERO:
- return lp_build_zero(type);
- case PIPE_SWIZZLE_ONE:
- return lp_build_one(type);
- default:
- assert(0);
- return lp_build_undef(type);
- }
-}
-
-
static void
-lp_build_swizzle_soa(struct lp_build_sample_context *bld,
- LLVMValueRef *texel)
+apply_sampler_swizzle(struct lp_build_sample_context *bld,
+ LLVMValueRef *texel)
{
- LLVMValueRef unswizzled[4];
unsigned char swizzles[4];
- unsigned chan;
-
- for (chan = 0; chan < 4; ++chan) {
- unswizzled[chan] = texel[chan];
- }
swizzles[0] = bld->static_state->swizzle_r;
swizzles[1] = bld->static_state->swizzle_g;
swizzles[2] = bld->static_state->swizzle_b;
swizzles[3] = bld->static_state->swizzle_a;
- for (chan = 0; chan < 4; ++chan) {
- unsigned swizzle = swizzles[chan];
- texel[chan] = lp_build_swizzle_chan_soa(bld->texel_type,
- unswizzled, swizzle);
- }
+ lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
}
LLVMValueRef y_stride,
LLVMValueRef z_stride,
LLVMValueRef data_ptr,
- LLVMValueRef *texel)
+ LLVMValueRef texel_out[4])
{
const int dims = texture_dims(bld->static_state->target);
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
}
}
- /*
- * Describe the coordinates in terms of pixel blocks.
- *
- * TODO: pixel blocks are power of two. LLVM should convert rem/div to
- * bit arithmetic. Verify this.
- */
-
- if (bld->format_desc->block.width == 1) {
- i = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
- i = LLVMBuildURem(bld->builder, x, block_width, "");
- x = LLVMBuildUDiv(bld->builder, x, block_width, "");
- }
-
- if (bld->format_desc->block.height == 1) {
- j = bld->uint_coord_bld.zero;
- }
- else {
- LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
- j = LLVMBuildURem(bld->builder, y, block_height, "");
- y = LLVMBuildUDiv(bld->builder, y, block_height, "");
- }
-
/* convert x,y,z coords to linear offset from start of texture, in bytes */
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, z, y_stride, z_stride);
+ lp_build_sample_offset(&bld->uint_coord_bld,
+ bld->format_desc,
+ x, y, z, y_stride, z_stride,
+ &offset, &i, &j);
if (use_border) {
/* If we can sample the border color, it means that texcoords may
bld->texel_type,
data_ptr, offset,
i, j,
- texel);
+ texel_out);
- lp_build_swizzle_soa(bld, texel);
+ apply_sampler_swizzle(bld, texel_out);
/*
* Note: if we find an app which frequently samples the texture border
LLVMValueRef border_chan =
lp_build_const_vec(bld->texel_type,
bld->static_state->border_color[chan]);
- texel[chan] = lp_build_select(&bld->texel_bld, use_border,
- border_chan, texel[chan]);
+ texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
+ border_chan, texel_out[chan]);
}
}
}
-static LLVMValueRef
-lp_build_sample_packed(struct lp_build_sample_context *bld,
- LLVMValueRef x,
- LLVMValueRef y,
- LLVMValueRef y_stride,
- LLVMValueRef data_array)
-{
- LLVMValueRef offset;
- LLVMValueRef data_ptr;
-
- offset = lp_build_sample_offset(&bld->uint_coord_bld,
- bld->format_desc,
- x, y, NULL, y_stride, NULL);
-
- assert(bld->format_desc->block.width == 1);
- assert(bld->format_desc->block.height == 1);
- assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
- /* get pointer to mipmap level 0 data */
- data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
-
- return lp_build_gather(bld->builder,
- bld->texel_type.length,
- bld->format_desc->block.bits,
- bld->texel_type.width,
- data_ptr, offset);
-}
-
-
/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
/**
- * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
+ * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
{
switch (mode) {
case PIPE_TEX_WRAP_REPEAT:
- case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
return TRUE;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
default:
return FALSE;
}
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param i0 resulting sub-block pixel coordinate for coord0
*/
-static LLVMValueRef
-lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
- LLVMValueRef coord,
- LLVMValueRef length,
- boolean is_pot,
- unsigned wrap_mode)
+static void
+lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *out_offset,
+ LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
coord = LLVMBuildURem(bld->builder, coord, length, "");
break;
- case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
coord = lp_build_min(int_coord_bld, coord, length_minus_one);
break;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_REPEAT:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- /* FIXME */
- _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
- util_dump_tex_wrap(wrap_mode, TRUE));
- coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
- coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
+ default:
+ assert(0);
+ }
+
+ lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
+ out_offset, out_i);
+}
+
+
+/**
+ * Build LLVM code for texture wrap mode, for scaled integer texcoords.
+ * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
+ * \param length the texture size along one dimension
+ * \param stride pixel stride along the coordinate axis
+ * \param block_length is the length of the pixel block along the
+ * coordinate axis
+ * \param is_pot if TRUE, length is a power of two
+ * \param wrap_mode one of PIPE_TEX_WRAP_x
+ * \param offset0 resulting relative offset for coord0
+ * \param offset1 resulting relative offset for coord0 + 1
+ * \param i0 resulting sub-block pixel coordinate for coord0
+ * \param i1 resulting sub-block pixel coordinate for coord0 + 1
+ */
+static void
+lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
+ unsigned block_length,
+ LLVMValueRef coord0,
+ LLVMValueRef length,
+ LLVMValueRef stride,
+ boolean is_pot,
+ unsigned wrap_mode,
+ LLVMValueRef *offset0,
+ LLVMValueRef *offset1,
+ LLVMValueRef *i0,
+ LLVMValueRef *i1)
+{
+ struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
+ struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
+ LLVMValueRef length_minus_one;
+ LLVMValueRef lmask, umask, mask;
+
+ if (block_length != 1) {
+ /*
+ * If the pixel block covers more than one pixel then there is no easy
+ * way to calculate offset1 relative to offset0. Instead, compute them
+ * independently.
+ */
+
+ LLVMValueRef coord1;
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord0,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset0, i0);
+
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ lp_build_sample_wrap_nearest_int(bld,
+ block_length,
+ coord1,
+ length,
+ stride,
+ is_pot,
+ wrap_mode,
+ offset1, i1);
+
+ return;
+ }
+
+ /*
+ * Scalar pixels -- try to compute offset0 and offset1 with a single stride
+ * multiplication.
+ */
+
+ *i0 = uint_coord_bld->zero;
+ *i1 = uint_coord_bld->zero;
+
+ length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
+
+ switch(wrap_mode) {
+ case PIPE_TEX_WRAP_REPEAT:
+ if (is_pot) {
+ coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
+ }
+ else {
+ /* Signed remainder won't give the right results for negative
+ * dividends but unsigned remainder does.*/
+ coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
+ }
+
+ mask = lp_build_compare(bld->builder, int_coord_bld->type,
+ PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = LLVMBuildAnd(bld->builder,
+ lp_build_add(uint_coord_bld, *offset0, stride),
+ mask, "");
+ break;
+
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
+ umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
+ PIPE_FUNC_LESS, coord0, length_minus_one);
+
+ coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
+ coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
+
+ mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
+
+ *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
+ *offset1 = lp_build_add(uint_coord_bld,
+ *offset0,
+ LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
+ case PIPE_TEX_WRAP_CLAMP:
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
+ *offset0 = uint_coord_bld->zero;
+ *offset1 = uint_coord_bld->zero;
+ break;
}
-
- return coord;
}
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
- LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef coord0, coord1, weight;
switch(wrap_mode) {
case PIPE_TEX_WRAP_CLAMP:
if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
+
+ /* clamp to [0, length] */
+ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
+
+ coord = lp_build_sub(coord_bld, coord, half);
+
weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
- length_f_minus_one);
- coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
- coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
- length_f_minus_one);
- coord0 = lp_build_ifloor(coord_bld, coord0);
- coord1 = lp_build_ifloor(coord_bld, coord1);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
else {
LLVMValueRef min, max;
/* clamp to [0.5, length - 0.5] */
- min = lp_build_const_vec(coord_bld->type, 0.5F);
+ min = half;
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
}
{
LLVMValueRef min, max;
if (bld->static_state->normalized_coords) {
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- /* scale coord to length (and sub 0.5?) */
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_sub(coord_bld, coord, half);
- }
- else {
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_sub(coord_bld, coord, half);
}
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_const_vec(coord_bld->type, -0.5F);
+ max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_clamp(coord_bld, coord, min, max);
+ coord = lp_build_sub(coord_bld, coord, half);
/* compute lerp weight */
weight = lp_build_fract(coord_bld, coord);
/* convert to int */
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_clamp(coord_bld, coord, min, max);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- if(0)coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
+
+ /* clamp to [0, length] */
+ coord = lp_build_min(coord_bld, coord, length_f);
+
+ coord = lp_build_sub(coord_bld, coord, half);
+
+ weight = lp_build_fract(coord_bld, coord);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
{
LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
coord = lp_build_abs(coord_bld, coord);
+
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+
+ /* clamp to [0.5, length - 0.5] */
+ min = half;
+ max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_mul(coord_bld, coord, length_f);
+
coord = lp_build_sub(coord_bld, coord, half);
+
weight = lp_build_fract(coord_bld, coord);
coord0 = lp_build_ifloor(coord_bld, coord);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
{
LLVMValueRef min, max;
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = 1.0 - min */
- max = lp_build_sub(coord_bld, coord_bld->one, min);
coord = lp_build_abs(coord_bld, coord);
+
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ }
+
+ /* clamp to [-0.5, length + 0.5] */
+ min = lp_build_negate(coord_bld, half);
+ max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
- coord = lp_build_mul(coord_bld, coord, length_f);
+
coord = lp_build_sub(coord_bld, coord, half);
+
weight = lp_build_fract(coord_bld, coord);
coord0 = lp_build_ifloor(coord_bld, coord);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
- LLVMValueRef two = lp_build_const_vec(coord_bld->type, 2.0);
LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
- LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
LLVMValueRef icoord;
switch(wrap_mode) {
break;
case PIPE_TEX_WRAP_CLAMP:
- /* mul by size */
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
+
/* floor */
icoord = lp_build_ifloor(coord_bld, coord);
- /* clamp to [0, size-1]. Note: int coord builder type */
+
+ /* clamp to [0, length - 1]. */
icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
length_minus_one);
break;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
- {
- LLVMValueRef min, max;
- if (bld->static_state->normalized_coords) {
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
- }
- else {
- /* clamp to [0.5, length - 0.5] */
- min = lp_build_const_vec(coord_bld->type, 0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- }
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
- }
- break;
-
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
/* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
{
LLVMValueRef min, max;
+
if (bld->static_state->normalized_coords) {
- /* min = -1.0 / (2 * length) = -0.5 / length */
- min = lp_build_mul(coord_bld,
- lp_build_const_vec(coord_bld->type, -0.5F),
- lp_build_rcp(coord_bld, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
/* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
}
- else {
- /* clamp to [-0.5, length + 0.5] */
- min = lp_build_const_vec(coord_bld->type, -0.5F);
- max = lp_build_sub(coord_bld, length_f, min);
- }
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
+
icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [-1, length] */
+ min = lp_build_negate(int_coord_bld, int_coord_bld->one);
+ max = length;
+ icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
}
break;
case PIPE_TEX_WRAP_MIRROR_REPEAT:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ /* compute mirror function */
+ coord = lp_build_coord_mirror(bld, coord);
- /* compute mirror function */
- coord = lp_build_coord_mirror(bld, coord);
+ /* scale coord to length */
+ assert(bld->static_state->normalized_coords);
+ coord = lp_build_mul(coord_bld, coord, length_f);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
+ icoord = lp_build_ifloor(coord_bld, coord);
- /* coord = clamp(coord, min, max) */
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
- }
+ /* clamp to [0, length - 1] */
+ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
- coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
- icoord = lp_build_ifloor(coord_bld, coord);
- break;
-
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
}
+
+ icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [0, length - 1] */
+ icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- {
- LLVMValueRef min, max;
- /* min = 1.0 / (2 * length) */
- min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
- min = lp_build_negate(coord_bld, min);
- /* max = length - min */
- max = lp_build_sub(coord_bld, length_f, min);
+ coord = lp_build_abs(coord_bld, coord);
- coord = lp_build_abs(coord_bld, coord);
+ if (bld->static_state->normalized_coords) {
+ /* scale coord to length */
coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_clamp(coord_bld, coord, min, max);
- icoord = lp_build_ifloor(coord_bld, coord);
}
+
+ icoord = lp_build_ifloor(coord_bld, coord);
+
+ /* clamp to [0, length] */
+ icoord = lp_build_min(int_coord_bld, icoord, length);
break;
default:
LLVMValueRef base_size,
LLVMValueRef level)
{
- LLVMValueRef size = LLVMBuildAShr(bld->builder, base_size, level, "minify");
+ LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
return size;
}
/**
* Generate code to compute texture level of detail (lambda).
- * \param s vector of texcoord s values
- * \param t vector of texcoord t values
- * \param r vector of texcoord r values
- * \param shader_lod_bias vector float with the shader lod bias,
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ * \param lod_bias optional float vector with the shader lod bias
+ * \param explicit_lod optional float vector with the explicit lod
* \param width scalar int texture width
* \param height scalar int texture height
* \param depth scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
*/
static LLVMValueRef
lp_build_lod_selector(struct lp_build_sample_context *bld,
- LLVMValueRef s,
- LLVMValueRef t,
- LLVMValueRef r,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
- LLVMValueRef shader_lod_bias,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth)
return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
}
else {
- const int dims = texture_dims(bld->static_state->target);
struct lp_build_context *float_bld = &bld->float_bld;
LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
bld->static_state->lod_bias);
bld->static_state->min_lod);
LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
bld->static_state->max_lod);
-
LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
- LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
- LLVMValueRef rho, lod;
+ LLVMValueRef lod;
- /*
- * dsdx = abs(s[1] - s[0]);
- * dsdy = abs(s[2] - s[0]);
- * dtdx = abs(t[1] - t[0]);
- * dtdy = abs(t[2] - t[0]);
- * drdx = abs(r[1] - r[0]);
- * drdy = abs(r[2] - r[0]);
- */
- dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
- dsdx = lp_build_abs(float_bld, dsdx);
- dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
- dsdy = lp_build_abs(float_bld, dsdy);
- if (dims > 1) {
- dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
- dtdx = lp_build_abs(float_bld, dtdx);
- dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
- dtdy = lp_build_abs(float_bld, dtdy);
- if (dims > 2) {
- drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
- drdx = lp_build_abs(float_bld, drdx);
- drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
- drdy = lp_build_abs(float_bld, drdy);
- }
+ if (explicit_lod) {
+ lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
+ index0, "");
}
+ else {
+ const int dims = texture_dims(bld->static_state->target);
+ LLVMValueRef dsdx, dsdy;
+ LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
+ LLVMValueRef rho;
+
+ dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
+ dsdx = lp_build_abs(float_bld, dsdx);
+ dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
+ dsdy = lp_build_abs(float_bld, dsdy);
+ if (dims > 1) {
+ dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
+ dtdx = lp_build_abs(float_bld, dtdx);
+ dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
+ dtdy = lp_build_abs(float_bld, dtdy);
+ if (dims > 2) {
+ drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
+ drdx = lp_build_abs(float_bld, drdx);
+ drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
+ drdy = lp_build_abs(float_bld, drdy);
+ }
+ }
- /* Compute rho = max of all partial derivatives scaled by texture size.
- * XXX this could be vectorized somewhat
- */
- rho = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dsdx, dsdy),
- lp_build_int_to_float(float_bld, width), "");
- if (dims > 1) {
- LLVMValueRef max;
- max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, dtdx, dtdy),
- lp_build_int_to_float(float_bld, height), "");
- rho = lp_build_max(float_bld, rho, max);
- if (dims > 2) {
- max = LLVMBuildMul(bld->builder,
- lp_build_max(float_bld, drdx, drdy),
- lp_build_int_to_float(float_bld, depth), "");
+ /* Compute rho = max of all partial derivatives scaled by texture size.
+ * XXX this could be vectorized somewhat
+ */
+ rho = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dsdx, dsdy),
+ lp_build_int_to_float(float_bld, width), "");
+ if (dims > 1) {
+ LLVMValueRef max;
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, dtdx, dtdy),
+ lp_build_int_to_float(float_bld, height), "");
rho = lp_build_max(float_bld, rho, max);
+ if (dims > 2) {
+ max = LLVMBuildFMul(bld->builder,
+ lp_build_max(float_bld, drdx, drdy),
+ lp_build_int_to_float(float_bld, depth), "");
+ rho = lp_build_max(float_bld, rho, max);
+ }
}
- }
- /* compute lod = log2(rho) */
- lod = lp_build_log2(float_bld, rho);
+ /* compute lod = log2(rho) */
+ lod = lp_build_log2(float_bld, rho);
- /* add sampler lod bias */
- lod = LLVMBuildAdd(bld->builder, lod, sampler_lod_bias, "sampler LOD bias");
+ /* add shader lod bias */
+ if (lod_bias) {
+ lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
+ index0, "");
+ lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
+ }
+ }
- /* add shader lod bias */
- /* XXX for now we take only the first element since our lod is scalar */
- shader_lod_bias = LLVMBuildExtractElement(bld->builder, shader_lod_bias,
- LLVMConstInt(LLVMInt32Type(), 0, 0), "");
- lod = LLVMBuildAdd(bld->builder, lod, shader_lod_bias, "shader LOD bias");
+ /* add sampler lod bias */
+ lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
/* clamp lod */
lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
int_bld->zero,
last_level);
/* compute level 1 and clamp to legal range of levels */
- *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
- *level1_out = lp_build_min(int_bld, *level1_out, last_level);
+ level = lp_build_add(int_bld, level, int_bld->one);
+ *level1_out = lp_build_clamp(int_bld, level,
+ int_bld->zero,
+ last_level);
*weight_out = lp_build_fract(float_bld, lod);
}
/* ima = -0.5 / abs(coord); */
LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
- LLVMValueRef ima = lp_build_mul(coord_bld, negHalf,
- lp_build_rcp(coord_bld, absCoord));
+ LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
return ima;
}
/**
- * Generate code to do cube face selection and per-face texcoords.
+ * Generate code to do cube face selection and compute per-face texcoords.
*/
static void
lp_build_cube_lookup(struct lp_build_sample_context *bld,
lp_build_endif(&if_ctx2);
lp_build_flow_scope_end(flow_ctx2);
lp_build_flow_destroy(flow_ctx2);
-
*face_s = face_s2;
*face_t = face_t2;
*face = face2;
int chan;
if (img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* sample the first mipmap level */
lp_build_sample_image_nearest(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_nearest(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
else {
assert(img_filter == PIPE_TEX_FILTER_LINEAR);
+ /* sample the first mipmap level */
lp_build_sample_image_linear(bld,
width0_vec, height0_vec, depth0_vec,
row_stride0_vec, img_stride0_vec,
data_ptr0, s, t, r, colors0);
if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
- /* sample the second mipmap level, and interp */
+ /* sample the second mipmap level */
lp_build_sample_image_linear(bld,
width1_vec, height1_vec, depth1_vec,
row_stride1_vec, img_stride1_vec,
LLVMValueRef r,
const LLVMValueRef *ddx,
const LLVMValueRef *ddy,
- LLVMValueRef lodbias,
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
LLVMValueRef width,
LLVMValueRef height,
LLVMValueRef depth,
LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
LLVMValueRef data_ptr0, data_ptr1 = NULL;
+ LLVMValueRef face_ddx[4], face_ddy[4];
/*
printf("%s mip %d min %d mag %d\n", __FUNCTION__,
mip_filter, min_filter, mag_filter);
*/
+ /*
+ * Choose cube face, recompute texcoords and derivatives for the chosen face.
+ */
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ LLVMValueRef face, face_s, face_t;
+ lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
+ s = face_s; /* vec */
+ t = face_t; /* vec */
+ /* use 'r' to indicate cube face */
+ r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
+
+ /* recompute ddx, ddy using the new (s,t) face texcoords */
+ face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
+ face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
+ face_ddx[2] = NULL;
+ face_ddx[3] = NULL;
+ face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
+ face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
+ face_ddy[2] = NULL;
+ face_ddy[3] = NULL;
+ ddx = face_ddx;
+ ddy = face_ddy;
+ }
+
/*
* Compute the level of detail (float).
*/
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lod = lp_build_lod_selector(bld, s, t, r, ddx, ddy, lodbias,
+ lod = lp_build_lod_selector(bld, ddx, ddy,
+ lod_bias, explicit_lod,
width, height, depth);
}
*/
if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
/* always use mip level 0 */
- ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
+ /* XXX this is a work-around for an apparent bug in LLVM 2.7.
+ * We should be able to set ilevel0 = const(0) but that causes
+ * bad x86 code to be emitted.
+ */
+ lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
+ lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
+ }
+ else {
+ ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
+ }
}
else {
+ assert(lod);
if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
}
}
}
- /*
- * Choose cube face, recompute per-face texcoords.
- */
- if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
- LLVMValueRef face, face_s, face_t;
- lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
- s = face_s; /* vec */
- t = face_t; /* vec */
- /* use 'r' to indicate cube face */
- r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
- }
-
/*
* Get pointer(s) to image data for mipmap level(s).
*/
-static void
-lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
- struct lp_type dst_type,
- LLVMValueRef packed,
- LLVMValueRef *rgba)
-{
- LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
- unsigned chan;
-
- /* Decode the input vector components */
- for (chan = 0; chan < 4; ++chan) {
- unsigned start = chan*8;
- unsigned stop = start + 8;
- LLVMValueRef input;
-
- input = packed;
-
- if(start)
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(dst_type, start), "");
-
- if(stop < 32)
- input = LLVMBuildAnd(builder, input, mask, "");
-
- input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
-
- rgba[chan] = input;
- }
-}
-
-
static void
lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef s,
LLVMValueRef height,
LLVMValueRef stride_array,
LLVMValueRef data_array,
- LLVMValueRef *texel)
+ LLVMValueRef texel_out[4])
{
LLVMBuilderRef builder = bld->builder;
struct lp_build_context i32, h16, u8n;
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
- LLVMValueRef x0, x1;
- LLVMValueRef y0, y1;
- LLVMValueRef neighbors[2][2];
+ LLVMValueRef data_ptr;
+ LLVMValueRef x_stride, y_stride;
+ LLVMValueRef x_offset0, x_offset1;
+ LLVMValueRef y_offset0, y_offset1;
+ LLVMValueRef offset[2][2];
+ LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
- LLVMValueRef stride;
+ const unsigned level = 0;
+ unsigned i, j;
- assert(bld->static_state->target == PIPE_TEXTURE_2D);
+ assert(bld->static_state->target == PIPE_TEXTURE_2D
+ || bld->static_state->target == PIPE_TEXTURE_RECT);
assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
- x0 = s_ipart;
- y0 = t_ipart;
-
- x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
- y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
-
- x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
-
- x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
- bld->static_state->wrap_s);
- y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
- bld->static_state->wrap_t);
+ x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
+ bld->format_desc->block.bits/8);
+
+ y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
+
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.width,
+ s_ipart, width, x_stride,
+ bld->static_state->pot_width,
+ bld->static_state->wrap_s,
+ &x_offset0, &x_offset1,
+ &x_subcoord[0], &x_subcoord[1]);
+ lp_build_sample_wrap_linear_int(bld,
+ bld->format_desc->block.height,
+ t_ipart, height, y_stride,
+ bld->static_state->pot_height,
+ bld->static_state->wrap_t,
+ &y_offset0, &y_offset1,
+ &y_subcoord[0], &y_subcoord[1]);
+
+ offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
+ offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
+ offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
+ offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
- unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
- unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned subindex = 0;
+#else
+ unsigned subindex = 1;
+#endif
LLVMValueRef index;
index = LLVMConstInt(elem_type, j/2 + subindex, 0);
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
- stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
+ /*
+ * get pointer to mipmap level 0 data
+ */
+ data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
* The higher 8 bits of the resulting elements will be zero.
*/
- neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
- neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
- neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
- neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
+ for (j = 0; j < 2; ++j) {
+ for (i = 0; i < 2; ++i) {
+ LLVMValueRef rgba8;
- neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
- neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
- neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
- neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ /*
+ * Given the format is a rgba8, just read the pixels as is,
+ * without any swizzling. Swizzling will be done later.
+ */
+ rgba8 = lp_build_gather(bld->builder,
+ bld->texel_type.length,
+ bld->format_desc->block.bits,
+ bld->texel_type.width,
+ data_ptr, offset[j][i]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
- lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+ rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
+
+ }
+ else {
+ rgba8 = lp_build_fetch_rgba_aos(bld->builder,
+ bld->format_desc,
+ u8n.type,
+ data_ptr, offset[j][i],
+ x_subcoord[i],
+ y_subcoord[j]);
+ }
+
+ lp_build_unpack2(builder, u8n.type, h16.type,
+ rgba8,
+ &neighbors_lo[j][i], &neighbors_hi[j][i]);
+ }
+ }
/*
* Linear interpolate with 8.8 fixed point.
* Convert to SoA and swizzle.
*/
- packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
-
lp_build_rgba8_to_f32_soa(bld->builder,
bld->texel_type,
packed, unswizzled);
- lp_build_format_swizzle_soa(bld->format_desc,
- bld->texel_type, unswizzled,
- texel);
+ if (util_format_is_rgba8_variant(bld->format_desc)) {
+ lp_build_format_swizzle_soa(bld->format_desc,
+ &bld->texel_bld,
+ unswizzled, texel_out);
+ } else {
+ texel_out[0] = unswizzled[0];
+ texel_out[1] = unswizzled[1];
+ texel_out[2] = unswizzled[2];
+ texel_out[3] = unswizzled[3];
+ }
- lp_build_swizzle_soa(bld, texel);
+ apply_sampler_swizzle(bld, texel_out);
}
static void
lp_build_sample_compare(struct lp_build_sample_context *bld,
LLVMValueRef p,
- LLVMValueRef *texel)
+ LLVMValueRef texel[4])
{
struct lp_build_context *texel_bld = &bld->texel_bld;
LLVMValueRef res;
*/
static void
lp_build_sample_nop(struct lp_build_sample_context *bld,
- LLVMValueRef *texel)
+ LLVMValueRef texel_out[4])
{
struct lp_build_context *texel_bld = &bld->texel_bld;
unsigned chan;
for (chan = 0; chan < 4; chan++) {
/*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
- texel[chan] = texel_bld->one;
+ texel_out[chan] = texel_bld->one;
}
}
* 'texel' will return a vector of four LLVMValueRefs corresponding to
* R, G, B, A.
* \param type vector float type to use for coords, etc.
+ * \param ddx partial derivatives of (s,t,r,q) with respect to x
+ * \param ddy partial derivatives of (s,t,r,q) with respect to y
*/
void
lp_build_sample_soa(LLVMBuilderRef builder,
unsigned unit,
unsigned num_coords,
const LLVMValueRef *coords,
- const LLVMValueRef *ddx,
- const LLVMValueRef *ddy,
- LLVMValueRef lodbias,
- LLVMValueRef *texel)
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ LLVMValueRef texel_out[4])
{
struct lp_build_sample_context bld;
LLVMValueRef width, width_vec;
LLVMValueRef t;
LLVMValueRef r;
+ if (0) {
+ enum pipe_format fmt = static_state->format;
+ debug_printf("Sample from %s\n", util_format_name(fmt));
+ }
+
+ assert(type.floating);
+
/* Setup our build context */
memset(&bld, 0, sizeof bld);
bld.builder = builder;
if (0) {
/* For debug: no-op texture sampling */
- lp_build_sample_nop(&bld, texel);
+ lp_build_sample_nop(&bld, texel_out);
}
- else if (util_format_is_rgba8_variant(bld.format_desc) &&
- static_state->target == PIPE_TEXTURE_2D &&
+ else if (util_format_fits_8unorm(bld.format_desc) &&
+ (static_state->target == PIPE_TEXTURE_2D ||
+ static_state->target == PIPE_TEXTURE_RECT) &&
static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
is_simple_wrap_mode(static_state->wrap_t)) {
/* special case */
lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
- row_stride_array, data_array, texel);
+ row_stride_array, data_array, texel_out);
}
else {
- lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy, lodbias,
+ if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+ (static_state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ static_state->mag_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ static_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) &&
+ util_format_fits_8unorm(bld.format_desc)) {
+ debug_printf("%s: using floating point linear filtering for %s\n",
+ __FUNCTION__, bld.format_desc->short_name);
+ }
+
+ lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
+ lod_bias, explicit_lod,
width, height, depth,
width_vec, height_vec, depth_vec,
row_stride_array, img_stride_array,
data_array,
- texel);
+ texel_out);
}
- lp_build_sample_compare(&bld, r, texel);
+ lp_build_sample_compare(&bld, r, texel_out);
}