From b3b3b389fa7756869e7a8bc2755bce081c94945f Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 1 Mar 2013 02:25:13 +0100 Subject: [PATCH] gallivm: add support for texel offsets for ordinary texturing. This was previously only handled for texelFetch (much easier). Depending on the wrap mode this works slightly differently (for somewhat efficient implementation), hence have to do that separately in all roughly 137 places - it is easy if we use fixed point coords for wrapping, however some wrapping modes are near impossible with fixed point (the repeat stuff) hence we have to normalize the offsets if we can't do the wrapping in unnormalized space (which is a division which is slow but should still be much better than the alternative, which would be integer modulo for wrapping which is just unusable). This should still give accurate results in all cases that really matter, though it might be not quite conformant behavior for some apis (but we have much worse problems there anyway even without using offsets). (Untested, no piglit test.) Reviewed-by: Jose Fonseca --- .../auxiliary/gallivm/lp_bld_sample_aos.c | 130 +++++++++++++---- .../auxiliary/gallivm/lp_bld_sample_aos.h | 1 + .../auxiliary/gallivm/lp_bld_sample_soa.c | 132 ++++++++++++++---- 3 files changed, 210 insertions(+), 53 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index bddff2c4a13..16d57189ed4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -64,9 +64,11 @@ * for scaled integer texcoords. * \param block_length is the length of the pixel block along the * coordinate axis - * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size + * \param coord the incoming texcoord (s,t or r) scaled to the texture size + * \param coord_f the incoming texcoord (s,t or r) as float vec * \param length the texture size along one dimension * \param stride pixel stride along the coordinate axis (in bytes) + * \param offset the texel offset along the coord axis * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param out_offset byte offset for the wrapped coordinate @@ -79,6 +81,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef coord_f, LLVMValueRef length, LLVMValueRef stride, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode, LLVMValueRef *out_offset, @@ -97,6 +100,11 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, else { struct lp_build_context *coord_bld = &bld->coord_bld; LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length_f); + coord_f = lp_build_add(coord_bld, coord_f, offset); + } coord = lp_build_fract_safe(coord_bld, coord_f); coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_itrunc(coord_bld, coord); @@ -126,8 +134,9 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, /** * Build LLVM code for texture coord wrapping, for nearest filtering, * for float texcoords. - * \param coord the incoming texcoord (s,t,r or q) + * \param coord the incoming texcoord (s,t or r) * \param length the texture size along one dimension + * \param offset the texel offset along the coord axis * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param icoord the texcoord after wrapping, as int @@ -136,6 +145,7 @@ static void lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode, LLVMValueRef *icoord) @@ -145,6 +155,12 @@ lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: + if (offset) { + /* this is definitely not ideal for POT case */ + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length); + coord = lp_build_add(coord_bld, coord, offset); + } /* take fraction, unnormalize */ coord = lp_build_fract_safe(coord_bld, coord); coord = lp_build_mul(coord_bld, coord, length); @@ -156,6 +172,10 @@ lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_minus_one); *icoord = lp_build_itrunc(coord_bld, coord); @@ -178,9 +198,11 @@ lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, * for scaled integer texcoords. * \param block_length is the length of the pixel block along the * coordinate axis - * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size + * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size + * \param coord_f the incoming texcoord (s,t or r) as float vec * \param length the texture size along one dimension * \param stride pixel stride along the coordinate axis (in bytes) + * \param offset the texel offset along the coord axis * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param offset0 resulting relative offset for coord0 @@ -196,6 +218,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef coord_f, LLVMValueRef length, LLVMValueRef stride, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode, LLVMValueRef *offset0, @@ -230,6 +253,11 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef mask; LLVMValueRef weight; LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); + if (offset) { + offset = lp_build_int_to_float(&bld->coord_bld, offset); + offset = lp_build_div(&bld->coord_bld, offset, length_f); + coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); + } lp_build_coord_repeat_npot_linear(bld, coord_f, length, length_f, &coord0, &weight); @@ -282,6 +310,11 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, else { LLVMValueRef weight; LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); + if (offset) { + offset = lp_build_int_to_float(&bld->coord_bld, offset); + offset = lp_build_div(&bld->coord_bld, offset, length_f); + coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); + } lp_build_coord_repeat_npot_linear(bld, coord_f, length, length_f, &coord0, &weight); @@ -340,8 +373,9 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * for float texcoords. * \param block_length is the length of the pixel block along the * coordinate axis - * \param coord the incoming texcoord (s,t,r or q) + * \param coord the incoming texcoord (s,t or r) * \param length the texture size along one dimension + * \param offset the texel offset along the coord axis * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x * \param coord0 the first texcoord after wrapping, as int @@ -354,6 +388,7 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, unsigned block_length, LLVMValueRef coord, LLVMValueRef length, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode, LLVMValueRef *coord0, @@ -372,6 +407,10 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, if (is_pot) { /* mul by size and subtract 0.5 */ coord = lp_build_mul(coord_bld, coord, length); + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } if (!force_nearest) coord = lp_build_sub(coord_bld, coord, half); *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); @@ -385,6 +424,11 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, } else { LLVMValueRef mask; + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length); + coord = lp_build_add(coord_bld, coord, offset); + } /* wrap with normalized floats is just fract */ coord = lp_build_fract(coord_bld, coord); /* unnormalize */ @@ -411,6 +455,10 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, /* mul by tex size */ coord = lp_build_mul(coord_bld, coord, length); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* subtract 0.5 */ if (!force_nearest) { coord = lp_build_sub(coord_bld, coord, half); @@ -520,6 +568,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { @@ -584,6 +633,17 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, if (dims >= 3) r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); + /* add texel offsets */ + if (offsets[0]) { + s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); + if (dims >= 2) { + t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); + if (dims >= 3) { + r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); + } + } + } + /* get pixel, row, image strides */ x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, @@ -593,7 +653,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, lp_build_sample_wrap_nearest_int(bld, bld->format_desc->block.width, s_ipart, s_float, - width_vec, x_stride, + width_vec, x_stride, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s, &x_offset, &x_subcoord); @@ -603,7 +663,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, lp_build_sample_wrap_nearest_int(bld, bld->format_desc->block.height, t_ipart, t_float, - height_vec, row_stride_vec, + height_vec, row_stride_vec, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t, &y_offset, &y_subcoord); @@ -613,7 +673,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, lp_build_sample_wrap_nearest_int(bld, 1, /* block length (depth) */ r_ipart, r_float, - depth_vec, img_stride_vec, + depth_vec, img_stride_vec, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r, &z_offset, &z_subcoord); @@ -655,6 +715,7 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { @@ -677,21 +738,21 @@ lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, /* Do texcoord wrapping */ lp_build_sample_wrap_nearest_float(bld, - s, width_vec, + s, width_vec, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s, &x_icoord); if (dims >= 2) { lp_build_sample_wrap_nearest_float(bld, - t, height_vec, + t, height_vec, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t, &y_icoord); if (dims >= 3) { lp_build_sample_wrap_nearest_float(bld, - r, depth_vec, + r, depth_vec, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r, &z_icoord); @@ -982,6 +1043,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { @@ -1063,6 +1125,17 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, if (dims >= 3) r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); + /* add texel offsets */ + if (offsets[0]) { + s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); + if (dims >= 2) { + t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); + if (dims >= 3) { + r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); + } + } + } + /* compute fractional part (AND with 0xff) */ i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); @@ -1081,7 +1154,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.width, s_ipart, &s_fpart, s_float, - width_vec, x_stride, + width_vec, x_stride, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s, &x_offset0, &x_offset1, @@ -1113,7 +1186,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, t_ipart, &t_fpart, t_float, - height_vec, y_stride, + height_vec, y_stride, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t, &y_offset0, &y_offset1, @@ -1133,7 +1206,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_sample_wrap_linear_int(bld, bld->format_desc->block.height, r_ipart, &r_fpart, r_float, - depth_vec, z_stride, + depth_vec, z_stride, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r, &z_offset0, &z_offset1, @@ -1171,6 +1244,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { @@ -1204,7 +1278,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, /* do texcoord wrapping and compute texel offsets */ lp_build_sample_wrap_linear_float(bld, bld->format_desc->block.width, - s, width_vec, + s, width_vec, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s, &x_icoord0, &x_icoord1, @@ -1214,7 +1288,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, if (dims >= 2) { lp_build_sample_wrap_linear_float(bld, bld->format_desc->block.height, - t, height_vec, + t, height_vec, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t, &y_icoord0, &y_icoord1, @@ -1224,7 +1298,7 @@ lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, if (dims >= 3) { lp_build_sample_wrap_linear_float(bld, bld->format_desc->block.height, - r, depth_vec, + r, depth_vec, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r, &z_icoord0, &z_icoord1, @@ -1339,6 +1413,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, @@ -1377,7 +1452,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest_afloat(bld, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, &colors0_lo, &colors0_hi); } else { @@ -1385,7 +1460,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_linear_afloat(bld, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, &colors0_lo, &colors0_hi); } } @@ -1394,7 +1469,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest(bld, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, &colors0_lo, &colors0_hi); } else { @@ -1402,7 +1477,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_linear(bld, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, &colors0_lo, &colors0_hi); } } @@ -1472,14 +1547,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest_afloat(bld, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear_afloat(bld, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, &colors1_lo, &colors1_hi); } } @@ -1488,14 +1563,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest(bld, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, &colors1_lo, &colors1_hi); } else { lp_build_sample_image_linear(bld, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, &colors1_lo, &colors1_hi); } } @@ -1574,6 +1649,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef lod_ipart, LLVMValueRef lod_fpart, LLVMValueRef ilevel0, @@ -1612,7 +1688,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* no need to distinguish between minification and magnification */ lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, + s, t, r, offsets, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } @@ -1645,7 +1721,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Use the minification filter */ lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, + s, t, r, offsets, ilevel0, ilevel1, lod_fpart, packed_lo, packed_hi); } @@ -1654,7 +1730,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Use the magnification filter */ lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, - s, t, r, + s, t, r, offsets, ilevel0, NULL, NULL, packed_lo, packed_hi); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h index 6590e8ce090..6fce9712a48 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.h @@ -46,6 +46,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef lod_ipart, LLVMValueRef lod_fpart, LLVMValueRef ilevel0, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index 50ccd2a1741..8aa41662d67 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -277,6 +277,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, LLVMValueRef length_f, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode, LLVMValueRef *x0_out, @@ -296,6 +297,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* mul by size and subtract 0.5 */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); @@ -305,6 +310,11 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, } else { LLVMValueRef mask; + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length_f); + coord = lp_build_add(coord_bld, coord, offset); + } lp_build_coord_repeat_npot_linear(bld, coord, length, length_f, &coord0, &weight); @@ -321,6 +331,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* clamp to [0, length] */ coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); @@ -341,6 +355,11 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* mul by tex size */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + /* clamp to length max */ coord = lp_build_min(coord_bld, coord, length_f); /* subtract 0.5 */ @@ -360,6 +379,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ /* can skip clamp (though might not work for very large coord values */ coord = lp_build_sub(coord_bld, coord, half); @@ -375,6 +398,10 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); coord = lp_build_sub(coord_bld, coord, half); + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } /* convert to int, compute lerp weight */ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); @@ -387,12 +414,15 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, break; case PIPE_TEX_WRAP_MIRROR_CLAMP: - coord = lp_build_abs(coord_bld, coord); - if (bld->static_sampler_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + coord = lp_build_abs(coord_bld, coord); /* clamp to [0, length] */ coord = lp_build_min(coord_bld, coord, length_f); @@ -409,12 +439,16 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, LLVMValueRef min, max; struct lp_build_context abs_coord_bld = bld->coord_bld; abs_coord_bld.type.sign = FALSE; - coord = lp_build_abs(coord_bld, coord); if (bld->static_sampler_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + coord = lp_build_abs(coord_bld, coord); /* clamp to [0.5, length - 0.5] */ min = half; @@ -431,12 +465,15 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { - coord = lp_build_abs(coord_bld, coord); - if (bld->static_sampler_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + coord = lp_build_abs(coord_bld, coord); /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ /* skip clamp - always positive, and other side @@ -466,6 +503,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, * Build LLVM code for texture wrap mode for nearest filtering. * \param coord the incoming texcoord (nominally in [0,1]) * \param length the texture size along one dimension, as int vector + * \param length_f the texture size along one dimension, as float vector + * \param offset texel offset along one dimension (as int vector) * \param is_pot if TRUE, length is a power of two * \param wrap_mode one of PIPE_TEX_WRAP_x */ @@ -474,6 +513,7 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, LLVMValueRef coord, LLVMValueRef length, LLVMValueRef length_f, + LLVMValueRef offset, boolean is_pot, unsigned wrap_mode) { @@ -488,9 +528,17 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, if (is_pot) { coord = lp_build_mul(coord_bld, coord, length_f); icoord = lp_build_ifloor(coord_bld, coord); + if (offset) { + icoord = lp_build_add(int_coord_bld, icoord, offset); + } icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); } else { + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length_f); + coord = lp_build_add(coord_bld, coord, offset); + } /* take fraction, unnormalize */ coord = lp_build_fract_safe(coord_bld, coord); coord = lp_build_mul(coord_bld, coord, length_f); @@ -508,6 +556,9 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, /* floor */ /* use itrunc instead since we clamp to 0 anyway */ icoord = lp_build_itrunc(coord_bld, coord); + if (offset) { + icoord = lp_build_add(int_coord_bld, icoord, offset); + } /* clamp to [0, length - 1]. */ icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, @@ -521,9 +572,17 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, } /* no clamp necessary, border masking will handle this */ icoord = lp_build_ifloor(coord_bld, coord); + if (offset) { + icoord = lp_build_add(int_coord_bld, icoord, offset); + } break; case PIPE_TEX_WRAP_MIRROR_REPEAT: + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + offset = lp_build_div(coord_bld, offset, length_f); + coord = lp_build_add(coord_bld, coord, offset); + } /* compute mirror function */ coord = lp_build_coord_mirror(bld, coord); @@ -540,12 +599,15 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP: case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - coord = lp_build_abs(coord_bld, coord); - if (bld->static_sampler_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + coord = lp_build_abs(coord_bld, coord); /* itrunc == ifloor here */ icoord = lp_build_itrunc(coord_bld, coord); @@ -555,12 +617,15 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, break; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: - coord = lp_build_abs(coord_bld, coord); - if (bld->static_sampler_state->normalized_coords) { /* scale coord to length */ coord = lp_build_mul(coord_bld, coord, length_f); } + if (offset) { + offset = lp_build_int_to_float(coord_bld, offset); + coord = lp_build_add(coord_bld, coord, offset); + } + coord = lp_build_abs(coord_bld, coord); /* itrunc == ifloor here */ icoord = lp_build_itrunc(coord_bld, coord); @@ -590,6 +655,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef colors_out[4]) { const unsigned dims = bld->dims; @@ -619,19 +685,19 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, /* * Compute integer texcoords. */ - x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, + x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s); lp_build_name(x, "tex.x.wrapped"); if (dims >= 2) { - y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, + y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t); lp_build_name(y, "tex.y.wrapped"); if (dims == 3) { - z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, + z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r); lp_build_name(z, "tex.z.wrapped"); @@ -670,6 +736,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef colors_out[4]) { const unsigned dims = bld->dims; @@ -702,7 +769,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, /* * Compute integer texcoords. */ - lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, + lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, offsets[0], bld->static_texture_state->pot_width, bld->static_sampler_state->wrap_s, &x0, &x1, &s_fpart); @@ -710,7 +777,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(x1, "tex.x1.wrapped"); if (dims >= 2) { - lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, + lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, offsets[1], bld->static_texture_state->pot_height, bld->static_sampler_state->wrap_t, &y0, &y1, &t_fpart); @@ -718,7 +785,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, lp_build_name(y1, "tex.y1.wrapped"); if (dims == 3) { - lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, + lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, offsets[2], bld->static_texture_state->pot_depth, bld->static_sampler_state->wrap_r, &z0, &z1, &r_fpart); @@ -851,6 +918,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef ilevel0, LLVMValueRef ilevel1, LLVMValueRef lod_fpart, @@ -886,7 +954,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest(bld, sampler_unit, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, colors0); } else { @@ -894,7 +962,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_linear(bld, sampler_unit, size0, row_stride0_vec, img_stride0_vec, - data_ptr0, mipoff0, s, t, r, + data_ptr0, mipoff0, s, t, r, offsets, colors0); } @@ -950,14 +1018,14 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, lp_build_sample_image_nearest(bld, sampler_unit, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, colors1); } else { lp_build_sample_image_linear(bld, sampler_unit, size1, row_stride1_vec, img_stride1_vec, - data_ptr1, mipoff1, s, t, r, + data_ptr1, mipoff1, s, t, r, offsets, colors1); } @@ -1120,6 +1188,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + const LLVMValueRef *offsets, LLVMValueRef lod_ipart, LLVMValueRef lod_fpart, LLVMValueRef ilevel0, @@ -1147,7 +1216,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* no need to distinguish between minification and magnification */ lp_build_sample_mipmap(bld, sampler_unit, min_filter, mip_filter, - s, t, r, + s, t, r, offsets, ilevel0, ilevel1, lod_fpart, texels); } @@ -1180,7 +1249,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Use the minification filter */ lp_build_sample_mipmap(bld, sampler_unit, min_filter, mip_filter, - s, t, r, + s, t, r, offsets, ilevel0, ilevel1, lod_fpart, texels); } @@ -1189,7 +1258,7 @@ lp_build_sample_general(struct lp_build_sample_context *bld, /* Use the magnification filter */ lp_build_sample_mipmap(bld, sampler_unit, mag_filter, PIPE_TEX_MIPFILTER_NONE, - s, t, r, + s, t, r, offsets, ilevel0, NULL, NULL, texels); } @@ -1605,7 +1674,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, if (use_aos) { /* do sampling/filtering with fixed pt arithmetic */ lp_build_sample_aos(&bld, sampler_index, - s, t, r, + s, t, r, offsets, lod_ipart, lod_fpart, ilevel0, ilevel1, texel_out); @@ -1613,7 +1682,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, else { lp_build_sample_general(&bld, sampler_index, - s, t, r, + s, t, r, offsets, lod_ipart, lod_fpart, ilevel0, ilevel1, texel_out); @@ -1681,10 +1750,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm, LLVMValueRef lod_iparts, lod_fparts = NULL; LLVMValueRef ilevel0s, ilevel1s = NULL; LLVMValueRef indexi = lp_build_const_int32(gallivm, i); + LLVMValueRef offsets4[4] = { NULL }; s4 = lp_build_extract_range(gallivm, s, 4*i, 4); t4 = lp_build_extract_range(gallivm, t, 4*i, 4); r4 = lp_build_extract_range(gallivm, r, 4*i, 4); + + if (offsets[0]) { + offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4); + if (dims > 1) { + offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4); + if (dims > 2) { + offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4); + } + } + } lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, ""); ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, ""); if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { @@ -1695,7 +1775,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, if (use_aos) { /* do sampling/filtering with fixed pt arithmetic */ lp_build_sample_aos(&bld4, sampler_index, - s4, t4, r4, + s4, t4, r4, offsets4, lod_iparts, lod_fparts, ilevel0s, ilevel1s, texelout4); @@ -1703,7 +1783,7 @@ lp_build_sample_soa(struct gallivm_state *gallivm, else { lp_build_sample_general(&bld4, sampler_index, - s4, t4, r4, + s4, t4, r4, offsets4, lod_iparts, lod_fparts, ilevel0s, ilevel1s, texelout4); -- 2.30.2