#include "util/u_dump.h"
#include "util/u_memory.h"
#include "util/u_math.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
#include "util/u_cpu_detect.h"
#include "util/format_rgb9e5.h"
#include "lp_bld_debug.h"
#include "lp_bld_quad.h"
#include "lp_bld_pack.h"
#include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
/**
for (chan = 0; chan < 4; chan++) {
unsigned chan_s;
/* reverse-map channel... */
- for (chan_s = 0; chan_s < 4; chan_s++) {
- if (chan_s == format_desc->swizzle[chan]) {
+ if (util_format_has_stencil(format_desc)) {
+ if (chan == 0)
+ chan_s = 0;
+ else
break;
+ }
+ else {
+ for (chan_s = 0; chan_s < 4; chan_s++) {
+ if (chan_s == format_desc->swizzle[chan]) {
+ break;
+ }
}
}
if (chan_s <= 3) {
/**
- * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
+ * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
+ * (Note that with pot sizes could do this much more easily post-scale
+ * with some bit arithmetic.)
*/
static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context *bld,
- LLVMValueRef coord)
+ LLVMValueRef coord, boolean posOnly)
{
struct lp_build_context *coord_bld = &bld->coord_bld;
- struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
- LLVMValueRef fract, flr, isOdd;
-
- lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
- /* kill off NaNs */
- /* XXX: not safe without arch rounding, fract can be anything. */
- fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
- GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
-
- /* isOdd = flr & 1 */
- isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
+ LLVMValueRef fract;
+ LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
- /* make coord positive or negative depending on isOdd */
- /* XXX slight overkill masking out sign bit is unnecessary */
- coord = lp_build_set_sign(coord_bld, fract, isOdd);
+ /*
+ * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
+ * it all works out. (The result is in range [-1, 1.0], negative if
+ * the coord is in the "odd" section, otherwise positive.)
+ */
- /* convert isOdd to float */
- isOdd = lp_build_int_to_float(coord_bld, isOdd);
+ coord = lp_build_mul(coord_bld, coord, half);
+ fract = lp_build_round(coord_bld, coord);
+ fract = lp_build_sub(coord_bld, coord, fract);
+ coord = lp_build_add(coord_bld, fract, fract);
- /* add isOdd to coord */
- coord = lp_build_add(coord_bld, coord, isOdd);
+ if (posOnly) {
+ /*
+ * Theoretically it's not quite 100% accurate because the spec says
+ * that ultimately a scaled coord of -x.0 should map to int coord
+ * -x + 1 with mirroring, not -x (this does not matter for bilinear
+ * filtering).
+ */
+ coord = lp_build_abs(coord_bld, coord);
+ /* kill off NaNs */
+ /* XXX: not safe without arch rounding, fract can be anything. */
+ coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ }
return coord;
}
*/
static void
lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
+ boolean is_gather,
LLVMValueRef coord,
LLVMValueRef length,
LLVMValueRef length_f,
coord = lp_build_add(coord_bld, coord, offset);
}
- /* clamp to [0, length] */
+ /*
+ * clamp to [0, length]
+ *
+ * Unlike some other wrap modes, this should be correct for gather
+ * too. GL_CLAMP explicitly does this clamp on the coord prior to
+ * actual wrapping (which is per sample).
+ */
coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
coord = lp_build_sub(coord_bld, coord, half);
/* clamp to length max */
coord = lp_build_min_ext(coord_bld, coord, length_f,
GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
- /* subtract 0.5 */
- coord = lp_build_sub(coord_bld, coord, half);
- /* clamp to [0, length - 0.5] */
- coord = lp_build_max(coord_bld, coord, coord_bld->zero);
- /* convert to int, compute lerp weight */
- lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ if (!is_gather) {
+ /* subtract 0.5 */
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* clamp to [0, length - 0.5] */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ } else {
+ /*
+ * The non-gather path will end up with coords 0, 1 if coord was
+ * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
+ * really matter what the second coord is). But for gather, we
+ * really need to end up with coords 0, 0.
+ */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ coord0 = lp_build_sub(coord_bld, coord, half);
+ coord1 = lp_build_add(coord_bld, coord, half);
+ /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
+ coord0 = lp_build_itrunc(coord_bld, coord0);
+ coord1 = lp_build_itrunc(coord_bld, coord1);
+ weight = coord_bld->undef;
+ }
/* coord1 = min(coord1, length-1) */
coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
break;
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
- /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
- /* can skip clamp (though might not work for very large coord values) */
+ /*
+ * We don't need any clamp. Technically, for very large (pos or neg)
+ * (or infinite) values, clamp against [-length, length] would be
+ * correct, but we don't need to guarantee any specific
+ * result for such coords (the ifloor will be undefined, but for modes
+ * requiring border all resulting coords are safe).
+ */
coord = lp_build_sub(coord_bld, coord, half);
/* convert to int, compute lerp weight */
lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
offset = lp_build_div(coord_bld, offset, length_f);
coord = lp_build_add(coord_bld, coord, offset);
}
- /* compute mirror function */
- coord = lp_build_coord_mirror(bld, coord);
+ if (!is_gather) {
+ /* compute mirror function */
+ coord = lp_build_coord_mirror(bld, coord, TRUE);
- /* scale coord to length */
- coord = lp_build_mul(coord_bld, coord, length_f);
- coord = lp_build_sub(coord_bld, coord, half);
+ /* scale coord to length */
+ coord = lp_build_mul(coord_bld, coord, length_f);
+ coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int, compute lerp weight */
- lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+
+ /* coord0 = max(coord0, 0) */
+ coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ } else {
+ /*
+ * This is pretty reasonable in the end, all what the tests care
+ * about is nasty edge cases (scaled coords x.5, so the individual
+ * coords are actually integers, which is REALLY tricky to get right
+ * due to this working differently both for negative numbers as well
+ * as for even/odd cases). But with enough magic it's not too complex
+ * after all.
+ * Maybe should try a bit arithmetic one though for POT textures...
+ */
+ LLVMValueRef isNeg;
+ /*
+ * Wrapping just once still works, even though it means we can
+ * get "wrong" sign due to performing mirror in the middle of the
+ * two coords (because this can only happen very near the odd/even
+ * edges, so both coords will actually end up as 0 or length - 1
+ * in the end).
+ * For GL4 gather with per-sample offsets we'd need to the mirroring
+ * per coord too.
+ */
+ coord = lp_build_coord_mirror(bld, coord, FALSE);
+ coord = lp_build_mul(coord_bld, coord, length_f);
- /* coord0 = max(coord0, 0) */
- coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
- /* coord1 = min(coord1, length-1) */
- coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ /*
+ * NaNs should be safe here, we'll do away with them with
+ * the ones' complement plus min.
+ */
+ coord0 = lp_build_sub(coord_bld, coord, half);
+ coord0 = lp_build_ifloor(coord_bld, coord0);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* ones complement for neg numbers (mirror(negX) = X - 1) */
+ isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
+ coord0, int_coord_bld->zero);
+ coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
+ isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
+ coord1, int_coord_bld->zero);
+ coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
+ coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+
+ weight = coord_bld->undef;
+ }
break;
case PIPE_TEX_WRAP_MIRROR_CLAMP:
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
+ /*
+ * XXX: probably not correct for gather, albeit I'm not
+ * entirely sure as it's poorly specified. The wrapping looks
+ * correct according to the spec which is against gl 1.2.1,
+ * however negative values will be swapped - gl re-specified
+ * wrapping with newer versions (no more pre-clamp except with
+ * GL_CLAMP).
+ */
coord = lp_build_abs(coord_bld, coord);
/* clamp to [0, length] */
- coord = lp_build_min(coord_bld, coord, length_f);
+ coord = lp_build_min_ext(coord_bld, coord, length_f,
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
coord = lp_build_sub(coord_bld, coord, half);
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
- coord = lp_build_abs(coord_bld, coord);
-
- /* clamp to length max */
- coord = lp_build_min_ext(coord_bld, coord, length_f,
- GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
- /* subtract 0.5 */
- coord = lp_build_sub(coord_bld, coord, half);
- /* clamp to [0, length - 0.5] */
- coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+ if (!is_gather) {
+ coord = lp_build_abs(coord_bld, coord);
+
+ /* clamp to length max */
+ coord = lp_build_min_ext(coord_bld, coord, length_f,
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ /* subtract 0.5 */
+ coord = lp_build_sub(coord_bld, coord, half);
+ /* clamp to [0, length - 0.5] */
+ coord = lp_build_max(coord_bld, coord, coord_bld->zero);
+
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ /* coord1 = min(coord1, length-1) */
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ } else {
+ /*
+ * The non-gather path will swap coord0/1 if coord was negative,
+ * which is ok for filtering since the filter weight matches
+ * accordingly. Also, if coord is close to zero, coord0/1 will
+ * be 0 and 1, instead of 0 and 0 (again ok due to filter
+ * weight being 0.0). Both issues need to be fixed for gather.
+ */
+ LLVMValueRef isNeg;
- /* convert to int, compute lerp weight */
- lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
- coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
- /* coord1 = min(coord1, length-1) */
- coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+ /*
+ * Actually wanted to cheat here and use:
+ * coord1 = lp_build_iround(coord_bld, coord);
+ * but it's not good enough for some tests (even piglit
+ * textureGather is set up in a way so the coords area always
+ * .5, that is right at the crossover points).
+ * So do ordinary sub/floor, then do ones' complement
+ * for negative numbers.
+ * (Note can't just do sub|add/abs/itrunc per coord neither -
+ * because the spec demands that mirror(3.0) = 3 but
+ * mirror(-3.0) = 2.)
+ */
+ coord = lp_build_sub(coord_bld, coord, half);
+ coord0 = lp_build_ifloor(coord_bld, coord);
+ coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+ isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
+ int_coord_bld->zero);
+ coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
+ coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
+
+ isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
+ int_coord_bld->zero);
+ coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
+ coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
+
+ weight = coord_bld->undef;
+ }
}
break;
offset = lp_build_int_to_float(coord_bld, offset);
coord = lp_build_add(coord_bld, coord, offset);
}
+ /*
+ * XXX: probably not correct for gather due to swapped
+ * order if coord is negative (same rationale as for
+ * MIRROR_CLAMP).
+ */
coord = lp_build_abs(coord_bld, coord);
- /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
- /* skip clamp - always positive, and other side
- only potentially matters for very large coords */
+ /*
+ * We don't need any clamp. Technically, for very large
+ * (or infinite) values, clamp against length would be
+ * correct, but we don't need to guarantee any specific
+ * result for such coords (the ifloor will be undefined, but
+ * for modes requiring border all resulting coords are safe).
+ */
coord = lp_build_sub(coord_bld, coord, half);
/* convert to int, compute lerp weight */
coord = lp_build_add(coord_bld, coord, offset);
}
/* compute mirror function */
- coord = lp_build_coord_mirror(bld, coord);
+ coord = lp_build_coord_mirror(bld, coord, TRUE);
/* scale coord to length */
assert(bld->static_sampler_state->normalized_coords);
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
LLVMValueRef mipoffsets,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef colors_out[4])
{
LLVMValueRef img_stride_vec,
LLVMValueRef data_ptr,
LLVMValueRef mipoffsets,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef colors_out[4])
{
LLVMValueRef neighbors[2][2][4];
int chan, texel_index;
boolean seamless_cube_filter, accurate_cube_corners;
+ unsigned chan_swiz = bld->static_texture_state->swizzle_r;
+
+ if (is_gather) {
+ switch (bld->gather_comp) {
+ case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
+ case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
+ case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
+ case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
+ default:
+ break;
+ }
+ }
seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
bld->static_sampler_state->seamless_cube_map;
+
/*
- * XXX I don't know how this is really supposed to work with gather. From GL
- * spec wording (not gather specific) it sounds like the 4th missing texel
- * should be an average of the other 3, hence for gather could return this.
- * This is however NOT how the code here works, which just fixes up the
- * weights used for filtering instead. And of course for gather there is
- * no filter to tweak...
+ * Disable accurate cube corners for integer textures, which should only
+ * get here in the gather path.
*/
accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
- !is_gather;
+ !util_format_is_pure_integer(bld->static_texture_state->format);
lp_build_extract_image_sizes(bld,
&bld->int_size_bld,
*/
if (!seamless_cube_filter) {
- lp_build_sample_wrap_linear(bld, coords[0], width_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
flt_width_vec, offsets[0],
bld->static_texture_state->pot_width,
bld->static_sampler_state->wrap_s,
x11 = x01;
if (dims >= 2) {
- lp_build_sample_wrap_linear(bld, coords[1], height_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
flt_height_vec, offsets[1],
bld->static_texture_state->pot_height,
bld->static_sampler_state->wrap_t,
y11 = y10;
if (dims == 3) {
- lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
+ lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
flt_depth_vec, offsets[2],
bld->static_texture_state->pot_depth,
bld->static_sampler_state->wrap_r,
struct lp_build_if_state edge_if;
LLVMTypeRef int1t;
LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
- LLVMValueRef coord, have_edge, have_corner;
+ LLVMValueRef coord0, coord1, have_edge, have_corner;
LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
*/
/* should always have normalized coords, and offsets are undefined */
assert(bld->static_sampler_state->normalized_coords);
- coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
+ /*
+ * The coords should all be between [0,1] however we can have NaNs,
+ * which will wreak havoc. In particular the y1_clamped value below
+ * can be -INT_MAX (on x86) and be propagated right through (probably
+ * other values might be bogus in the end too).
+ * So kill off the NaNs here.
+ */
+ coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
/* instead of clamp, build mask if overflowed */
- coord = lp_build_sub(coord_bld, coord, half);
+ coord0 = lp_build_sub(coord_bld, coord0, half);
/* convert to int, compute lerp weight */
/* not ideal with AVX (and no AVX2) */
- lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
+ lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
- coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
- coord = lp_build_sub(coord_bld, coord, half);
- lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
+ coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
+ GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+ coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
+ coord1 = lp_build_sub(coord_bld, coord1, half);
+ lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
* as well) here.
*/
if (accurate_cube_corners) {
- LLVMValueRef w00, w01, w10, w11, wx0, wy0;
- LLVMValueRef c_weight, c00, c01, c10, c11;
- LLVMValueRef have_corner, one_third, tmp;
+ LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
+ LLVMValueRef have_corner, one_third;
- colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
- colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
have_corner = LLVMBuildLoad(builder, have_corners, "");
lp_build_if(&corner_if, bld->gallivm, have_corner);
- /*
- * we can't use standard 2d lerp as we need per-element weight
- * in case of corners, so just calculate bilinear result as
- * w00*s00 + w01*s01 + w10*s10 + w11*s11.
- * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
- * however calculating the weights needs another 6, so actually probably
- * not slower than 2d lerp only for 4 channels as weights only need
- * to be calculated once - of course fixing the weights has additional cost.)
- */
- wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
- wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
- w00 = lp_build_mul(coord_bld, wx0, wy0);
- w01 = lp_build_mul(coord_bld, s_fpart, wy0);
- w10 = lp_build_mul(coord_bld, wx0, t_fpart);
- w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
-
- /* find corner weight */
+ one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
+ 1.0f/3.0f);
+
+ /* find corner */
c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
- c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
- c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+ c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
- c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+ c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
- c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
+ c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
- /*
- * add 1/3 of the corner weight to each of the 3 other samples
- * and null out corner weight
- */
- one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
- c_weight = lp_build_mul(coord_bld, c_weight, one_third);
- w00 = lp_build_add(coord_bld, w00, c_weight);
- c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
- w00 = lp_build_andnot(coord_bld, w00, c00);
- w01 = lp_build_add(coord_bld, w01, c_weight);
- c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
- w01 = lp_build_andnot(coord_bld, w01, c01);
- w10 = lp_build_add(coord_bld, w10, c_weight);
- c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
- w10 = lp_build_andnot(coord_bld, w10, c10);
- w11 = lp_build_add(coord_bld, w11, c_weight);
- c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
- w11 = lp_build_andnot(coord_bld, w11, c11);
+ if (!is_gather) {
+ /*
+ * we can't use standard 2d lerp as we need per-element weight
+ * in case of corners, so just calculate bilinear result as
+ * w00*s00 + w01*s01 + w10*s10 + w11*s11.
+ * (This is actually less work than using 2d lerp, 7 vs. 9
+ * instructions, however calculating the weights needs another 6,
+ * so actually probably not slower than 2d lerp only for 4 channels
+ * as weights only need to be calculated once - of course fixing
+ * the weights has additional cost.)
+ */
+ LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
+ wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
+ wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
+ w00 = lp_build_mul(coord_bld, wx0, wy0);
+ w01 = lp_build_mul(coord_bld, s_fpart, wy0);
+ w10 = lp_build_mul(coord_bld, wx0, t_fpart);
+ w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
+
+ /* find corner weight */
+ c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+ c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+ c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
- if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
- for (chan = 0; chan < 4; chan++) {
- colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
- tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
- colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
- tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
- colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
- tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
- colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ /*
+ * add 1/3 of the corner weight to the weight of the 3 other
+ * samples and null out corner weight.
+ */
+ c_weight = lp_build_mul(coord_bld, c_weight, one_third);
+ w00 = lp_build_add(coord_bld, w00, c_weight);
+ w00 = lp_build_andnot(coord_bld, w00, c00f);
+ w01 = lp_build_add(coord_bld, w01, c_weight);
+ w01 = lp_build_andnot(coord_bld, w01, c01f);
+ w10 = lp_build_add(coord_bld, w10, c_weight);
+ w10 = lp_build_andnot(coord_bld, w10, c10f);
+ w11 = lp_build_add(coord_bld, w11, c_weight);
+ w11 = lp_build_andnot(coord_bld, w11, c11f);
+
+ if (bld->static_sampler_state->compare_mode ==
+ PIPE_TEX_COMPARE_NONE) {
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_mul(coord_bld, w00,
+ neighbors[0][0][chan]);
+ tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[1][1][0]);
+ /*
+ * inputs to interpolation are just masks so just add
+ * masked weights together
+ */
+ cmpval00 = LLVMBuildBitCast(builder, cmpval00,
+ coord_bld->vec_type, "");
+ cmpval01 = LLVMBuildBitCast(builder, cmpval01,
+ coord_bld->vec_type, "");
+ cmpval10 = LLVMBuildBitCast(builder, cmpval10,
+ coord_bld->vec_type, "");
+ cmpval11 = LLVMBuildBitCast(builder, cmpval11,
+ coord_bld->vec_type, "");
+ colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
+ tmp = lp_build_and(coord_bld, w01, cmpval01);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w10, cmpval10);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w11, cmpval11);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
}
}
else {
- LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
- cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
- cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
- cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
- cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
- /* inputs to interpolation are just masks so just add masked weights together */
- cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
- cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
- cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
- cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
- colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
- tmp = lp_build_and(coord_bld, w01, cmpval01);
- colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
- tmp = lp_build_and(coord_bld, w10, cmpval10);
- colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
- tmp = lp_build_and(coord_bld, w11, cmpval11);
- colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
- colors0[1] = colors0[2] = colors0[3] = colors0[0];
+ /*
+ * We don't have any weights to adjust, so instead calculate
+ * the fourth texel as simply the average of the other 3.
+ * (This would work for non-gather too, however we'd have
+ * a boatload more of the select stuff due to there being
+ * 4 times as many colors as weights.)
+ */
+ LLVMValueRef col00, col01, col10, col11;
+ LLVMValueRef colc, colc0, colc1;
+ col10 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][0], chan_swiz);
+ col11 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][1], chan_swiz);
+ col01 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][1], chan_swiz);
+ col00 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][0], chan_swiz);
+
+ /*
+ * The spec says for comparison filtering, the comparison
+ * must happen before synthesizing the new value.
+ * This means all gathered values are always 0 or 1,
+ * except for the non-existing texel, which can be 0,1/3,2/3,1...
+ * Seems like we'd be allowed to just return 0 or 1 too, so we
+ * could simplify and pass down the compare mask values to the
+ * end (using int arithmetic/compare on the mask values to
+ * construct the fourth texel) and only there convert to floats
+ * but it's probably not worth it (it might be easier for the cpu
+ * but not for the code)...
+ */
+ if (bld->static_sampler_state->compare_mode !=
+ PIPE_TEX_COMPARE_NONE) {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
+ col00 = lp_build_select(texel_bld, cmpval00,
+ texel_bld->one, texel_bld->zero);
+ col01 = lp_build_select(texel_bld, cmpval01,
+ texel_bld->one, texel_bld->zero);
+ col10 = lp_build_select(texel_bld, cmpval10,
+ texel_bld->one, texel_bld->zero);
+ col11 = lp_build_select(texel_bld, cmpval11,
+ texel_bld->one, texel_bld->zero);
+ }
+
+ /*
+ * Null out corner color.
+ */
+ col00 = lp_build_andnot(coord_bld, col00, c00f);
+ col01 = lp_build_andnot(coord_bld, col01, c01f);
+ col10 = lp_build_andnot(coord_bld, col10, c10f);
+ col11 = lp_build_andnot(coord_bld, col11, c11f);
+
+ /*
+ * New corner texel color is all colors added / 3.
+ */
+ colc0 = lp_build_add(coord_bld, col00, col01);
+ colc1 = lp_build_add(coord_bld, col10, col11);
+ colc = lp_build_add(coord_bld, colc0, colc1);
+ colc = lp_build_mul(coord_bld, one_third, colc);
+
+ /*
+ * Replace the corner texel color with the new value.
+ */
+ col00 = lp_build_select(coord_bld, c00, colc, col00);
+ col01 = lp_build_select(coord_bld, c01, colc, col01);
+ col10 = lp_build_select(coord_bld, c10, colc, col10);
+ col11 = lp_build_select(coord_bld, c11, colc, col11);
+
+ colors0[0] = col10;
+ colors0[1] = col11;
+ colors0[2] = col01;
+ colors0[3] = col00;
}
LLVMBuildStore(builder, colors0[0], colorss[0]);
* end of sampling (much less values to swizzle), but this
* obviously cannot work when using gather.
*/
- unsigned chan_swiz = bld->static_texture_state->swizzle_r;
colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
neighbors[1][0],
chan_swiz);
if (is_gather) {
/* more hacks for swizzling, should be X, ONE or ZERO... */
- unsigned chan_swiz = bld->static_texture_state->swizzle_r;
- if (chan_swiz <= PIPE_SWIZZLE_W) {
- colors0[0] = lp_build_select(texel_bld, cmpval10,
- texel_bld->one, texel_bld->zero);
- colors0[1] = lp_build_select(texel_bld, cmpval11,
- texel_bld->one, texel_bld->zero);
- colors0[2] = lp_build_select(texel_bld, cmpval01,
- texel_bld->one, texel_bld->zero);
- colors0[3] = lp_build_select(texel_bld, cmpval00,
- texel_bld->one, texel_bld->zero);
- }
- else if (chan_swiz == PIPE_SWIZZLE_0) {
- colors0[0] = colors0[1] = colors0[2] = colors0[3] =
- texel_bld->zero;
- }
- else {
- colors0[0] = colors0[1] = colors0[2] = colors0[3] =
- texel_bld->one;
- }
+ colors0[0] = lp_build_select(texel_bld, cmpval10,
+ texel_bld->one, texel_bld->zero);
+ colors0[1] = lp_build_select(texel_bld, cmpval11,
+ texel_bld->one, texel_bld->zero);
+ colors0[2] = lp_build_select(texel_bld, cmpval01,
+ texel_bld->one, texel_bld->zero);
+ colors0[3] = lp_build_select(texel_bld, cmpval00,
+ texel_bld->one, texel_bld->zero);
}
else {
colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
}
}
}
+ if (is_gather) {
+ /*
+ * For gather, we can't do our usual channel swizzling done later,
+ * so do it here. It only really matters for 0/1 swizzles in case
+ * of comparison filtering, since in this case the results would be
+ * wrong, without comparison it should all work out alright but it
+ * can't hurt to do that here, since it will instantly drop all
+ * calculations above, though it's a rather stupid idea to do
+ * gather on a channel which will always return 0 or 1 in any case...
+ */
+ if (chan_swiz == PIPE_SWIZZLE_1) {
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = texel_bld->one;
+ }
+ } else if (chan_swiz == PIPE_SWIZZLE_0) {
+ for (chan = 0; chan < 4; chan++) {
+ colors_out[chan] = texel_bld->zero;
+ }
+ }
+ }
}
unsigned img_filter,
unsigned mip_filter,
boolean is_gather,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef ilevel0,
LLVMValueRef ilevel1,
PIPE_FUNC_GREATER,
lod_fpart, bld->lodf_bld.zero);
need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
+ lp_build_name(need_lerp, "need_lerp");
}
lp_build_if(&if_ctx, bld->gallivm, need_lerp);
lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
LLVMValueRef linear_mask,
unsigned mip_filter,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef ilevel0,
LLVMValueRef ilevel1,
* should be able to merge the branches in this case.
*/
need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
+ lp_build_name(need_lerp, "need_lerp");
lp_build_if(&if_ctx, bld->gallivm, need_lerp);
{
*/
static void
lp_build_sample_common(struct lp_build_sample_context *bld,
+ boolean is_lodq,
unsigned texture_index,
unsigned sampler_index,
LLVMValueRef *coords,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
LLVMValueRef *lod_pos_or_zero,
+ LLVMValueRef *lod,
LLVMValueRef *lod_fpart,
LLVMValueRef *ilevel0,
LLVMValueRef *ilevel1)
* Compute the level of detail (float).
*/
if (min_filter != mag_filter ||
- mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+ mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
/* Need to compute lod either to choose mipmap levels or to
* distinguish between minification/magnification with one mipmap level.
*/
- lp_build_lod_selector(bld, texture_index, sampler_index,
+ lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index,
coords[0], coords[1], coords[2], cube_rho,
derivs, lod_bias, explicit_lod,
- mip_filter,
+ mip_filter, lod,
&lod_ipart, lod_fpart, lod_pos_or_zero);
+ if (is_lodq) {
+ LLVMValueRef last_level;
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->gallivm,
+ bld->context_ptr,
+ texture_index);
+ first_level = bld->dynamic_state->first_level(bld->dynamic_state,
+ bld->gallivm,
+ bld->context_ptr,
+ texture_index);
+ last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
+ last_level = lp_build_int_to_float(&bld->float_bld, last_level);
+ last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
+
+ switch (mip_filter) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ *lod_fpart = bld->lodf_bld.zero;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
+ /* fallthrough */
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
+ bld->lodf_bld.zero, last_level);
+ break;
+ }
+ return;
+ }
+
} else {
lod_ipart = bld->lodi_bld.zero;
*lod_pos_or_zero = bld->lodi_bld.zero;
max_clamp = vec4_bld.one;
}
else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
- format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
+ format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
+ format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
switch (format_desc->format) {
case PIPE_FORMAT_RGTC1_UNORM:
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC1_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
case PIPE_FORMAT_ETC1_RGB8:
+ case PIPE_FORMAT_BPTC_RGBA_UNORM:
+ case PIPE_FORMAT_BPTC_SRGBA:
min_clamp = vec4_bld.zero;
max_clamp = vec4_bld.one;
break;
min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
max_clamp = vec4_bld.one;
break;
+ case PIPE_FORMAT_BPTC_RGB_FLOAT:
+ /* not sure if we should clamp to max half float? */
+ break;
+ case PIPE_FORMAT_BPTC_RGB_UFLOAT:
+ min_clamp = vec4_bld.zero;
+ break;
default:
assert(0);
break;
lp_build_sample_general(struct lp_build_sample_context *bld,
unsigned sampler_unit,
boolean is_gather,
- LLVMValueRef *coords,
+ const LLVMValueRef *coords,
const LLVMValueRef *offsets,
LLVMValueRef lod_positive,
LLVMValueRef lod_fpart,
struct lp_build_if_state if_ctx;
lod_positive = LLVMBuildTrunc(builder, lod_positive,
- LLVMInt1TypeInContext(bld->gallivm->context), "");
+ LLVMInt1TypeInContext(bld->gallivm->context),
+ "lod_pos");
lp_build_if(&if_ctx, bld->gallivm, lod_positive);
{
}
need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
linear_mask);
+ lp_build_name(need_linear, "need_linear");
if (bld->num_lods != bld->coord_type.length) {
linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
static void
lp_build_fetch_texel(struct lp_build_sample_context *bld,
unsigned texture_unit,
+ LLVMValueRef ms_index,
const LLVMValueRef *coords,
LLVMValueRef explicit_lod,
const LLVMValueRef *offsets,
lp_build_get_mip_offsets(bld, ilevel));
}
+ if (bld->fetch_ms) {
+ LLVMValueRef num_samples;
+ num_samples = bld->dynamic_state->num_samples(bld->dynamic_state, bld->gallivm,
+ bld->context_ptr, texture_unit);
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+ out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(int_coord_bld, num_samples));
+ out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
+ offset = lp_build_add(int_coord_bld, offset,
+ lp_build_mul(int_coord_bld, bld->sample_stride, ms_index));
+ }
+
offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
lp_build_fetch_rgba_soa(bld->gallivm,
const LLVMValueRef *offsets,
const struct lp_derivatives *derivs, /* optional */
LLVMValueRef lod, /* optional */
+ LLVMValueRef ms_index, /* optional */
LLVMValueRef texel_out[4])
{
unsigned target = static_texture_state->target;
enum lp_sampler_op_type op_type;
LLVMValueRef lod_bias = NULL;
LLVMValueRef explicit_lod = NULL;
- boolean op_is_tex;
+ boolean op_is_tex, op_is_lodq, op_is_gather, fetch_ms;
if (0) {
enum pipe_format fmt = static_texture_state->format;
LP_SAMPLER_LOD_CONTROL_SHIFT;
op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
LP_SAMPLER_OP_TYPE_SHIFT;
+ fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
+ op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
+ op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
if (lod_control == LP_SAMPLER_LOD_BIAS) {
lod_bias = lod;
bld.format_desc = util_format_description(static_texture_state->format);
bld.dims = dims;
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
+ bld.no_quad_lod = TRUE;
+ }
+ if (gallivm_perf & GALLIVM_PERF_NO_RHO_APPROX || op_is_lodq) {
+ bld.no_rho_approx = TRUE;
+ }
+ if (gallivm_perf & GALLIVM_PERF_NO_BRILINEAR || op_is_lodq) {
+ bld.no_brilinear = TRUE;
+ }
+
bld.vector_width = lp_type_width(type);
bld.float_type = lp_type_float(32);
else if (util_format_has_stencil(bld.format_desc) &&
!util_format_has_depth(bld.format_desc)) {
/* for stencil only formats, sample stencil (uint) */
- bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
+ bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
}
- if (!static_texture_state->level_zero_only) {
+ if (!static_texture_state->level_zero_only ||
+ !static_sampler_state->max_lod_pos || op_is_lodq) {
derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
} else {
derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
}
- if (op_type == LP_SAMPLER_OP_GATHER) {
+ if (op_is_gather) {
/*
* gather4 is exactly like GL_LINEAR filtering but in the end skipping
* the actual filtering. Using mostly the same paths, so cube face
*/
bld.num_mips = bld.num_lods = 1;
- if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
- (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
- (static_texture_state->target == PIPE_TEXTURE_CUBE ||
- static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
- (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+ if (bld.no_quad_lod && bld.no_rho_approx &&
+ ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
+ (static_texture_state->target == PIPE_TEXTURE_CUBE ||
+ static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
+ op_is_lodq)) {
/*
* special case for using per-pixel lod even for implicit lod,
* which is generally never required (ok by APIs) except to please
* can cause derivatives to be different for pixels outside the primitive
* due to the major axis division even if pre-project derivatives are
* looking normal).
+ * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
+ * cube maps we do indeed get per-pixel lod values).
*/
bld.num_mips = type.length;
bld.num_lods = type.length;
bld.num_lods = num_quads;
}
-
+ bld.fetch_ms = fetch_ms;
+ if (op_is_gather)
+ bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
bld.lodf_type = type;
/* we want native vector size to be able to use our intrinsics */
if (bld.num_lods != type.length) {
context_ptr, texture_index);
bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
context_ptr, texture_index);
+
+ if (fetch_ms)
+ bld.sample_stride = lp_build_broadcast_scalar(&bld.int_coord_bld, dynamic_state->sample_stride(dynamic_state, gallivm,
+ context_ptr, texture_index));
/* Note that mip_offsets is an array[level] of offsets to texture images */
if (dynamic_state->cache_ptr && thread_data_ptr) {
newcoords[i] = coords[i];
}
+ if (util_format_is_pure_integer(static_texture_state->format) &&
+ !util_format_has_depth(bld.format_desc) && op_is_tex &&
+ (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
+ static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
+ static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
+ /*
+ * Bail if impossible filtering is specified (the awkard additional
+ * depth check is because it is legal in gallium to have things like S8Z24
+ * here which would say it's pure int despite such formats should sample
+ * the depth component).
+ * In GL such filters make the texture incomplete, this makes it robust
+ * against gallium frontends which set this up regardless (we'd crash in the
+ * lerp later otherwise).
+ * At least in some apis it may be legal to use such filters with lod
+ * queries and/or gather (at least for gather d3d10 says only the wrap
+ * bits are really used hence filter bits are likely simply ignored).
+ * For fetch, we don't get valid samplers either way here.
+ */
+ unsigned chan;
+ LLVMValueRef zero = lp_build_zero(gallivm, type);
+ for (chan = 0; chan < 4; chan++) {
+ texel_out[chan] = zero;
+ }
+ return;
+ }
+
if (0) {
/* For debug: no-op texture sampling */
lp_build_sample_nop(gallivm,
}
else if (op_type == LP_SAMPLER_OP_FETCH) {
- lp_build_fetch_texel(&bld, texture_index, newcoords,
+ lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
lod, offsets,
texel_out);
}
else {
LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
- LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
+ LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
boolean use_aos;
- if (util_format_is_pure_integer(static_texture_state->format) &&
- !util_format_has_depth(bld.format_desc) &&
- (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
- static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
- static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
- /*
- * Bail if impossible filtering is specified (the awkard additional
- * depth check is because it is legal in gallium to have things like S8Z24
- * here which would say it's pure int despite such formats should sample
- * the depth component).
- * In GL such filters make the texture incomplete, this makes it robust
- * against state trackers which set this up regardless (we'd crash in the
- * lerp later (except for gather)).
- * Must do this after fetch_texel code since with GL state tracker we'll
- * get some junk sampler for buffer textures.
- */
- unsigned chan;
- LLVMValueRef zero = lp_build_zero(gallivm, type);
- for (chan = 0; chan < 4; chan++) {
- texel_out[chan] = zero;
- }
- return;
- }
-
use_aos = util_format_fits_8unorm(bld.format_desc) &&
op_is_tex &&
/* not sure this is strictly needed or simply impossible */
use_aos &= bld.num_lods <= num_quads ||
derived_sampler_state.min_img_filter ==
derived_sampler_state.mag_img_filter;
+
+ if(gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
+ use_aos = 0;
+ }
+
if (dims > 1) {
use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
if (dims > 2) {
derived_sampler_state.wrap_r);
}
- lp_build_sample_common(&bld, texture_index, sampler_index,
+ lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
newcoords,
derivs, lod_bias, explicit_lod,
- &lod_positive, &lod_fpart,
+ &lod_positive, &lod, &lod_fpart,
&ilevel0, &ilevel1);
+ if (op_is_lodq) {
+ texel_out[0] = lod_fpart;
+ texel_out[1] = lod;
+ texel_out[2] = texel_out[3] = bld.coord_bld.zero;
+ return;
+ }
+
if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
/* The aos path doesn't do seamless filtering so simply add cube layer
* to face now.
/* Setup our build context */
memset(&bld4, 0, sizeof bld4);
+ bld4.no_quad_lod = bld.no_quad_lod;
+ bld4.no_rho_approx = bld.no_rho_approx;
+ bld4.no_brilinear = bld.no_brilinear;
bld4.gallivm = bld.gallivm;
bld4.context_ptr = bld.context_ptr;
bld4.static_texture_state = bld.static_texture_state;
bld4.texel_type.length = 4;
bld4.num_mips = bld4.num_lods = 1;
- if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
- (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
+ if (bld4.no_quad_lod && bld4.no_rho_approx &&
(static_texture_state->target == PIPE_TEXTURE_CUBE ||
static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
(op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
LLVMValueRef lod = NULL;
+ LLVMValueRef ms_index = NULL;
LLVMValueRef context_ptr;
LLVMValueRef thread_data_ptr = NULL;
LLVMValueRef texel_out[4];
if (sample_key & LP_SAMPLER_SHADOW) {
coords[4] = LLVMGetParam(function, num_param++);
}
+ if (sample_key & LP_SAMPLER_FETCH_MS) {
+ ms_index = LLVMGetParam(function, num_param++);
+ }
if (sample_key & LP_SAMPLER_OFFSETS) {
for (i = 0; i < num_offsets; i++) {
offsets[i] = LLVMGetParam(function, num_param++);
offsets,
deriv_ptr,
lod,
+ ms_index,
texel_out);
LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
const struct lp_static_texture_state *static_texture_state,
const struct lp_static_sampler_state *static_sampler_state,
struct lp_sampler_dynamic_state *dynamic_state,
- const struct lp_sampler_params *params)
+ const struct lp_sampler_params *params,
+ int texture_index, int sampler_index,
+ LLVMValueRef *tex_ret)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
LLVMValueRef function, inst;
LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
LLVMBasicBlockRef bb;
- LLVMValueRef tex_ret;
unsigned num_args = 0;
char func_name[64];
unsigned i, num_coords, num_derivs, num_offsets, layer;
- unsigned texture_index = params->texture_index;
- unsigned sampler_index = params->sampler_index;
unsigned sample_key = params->sample_key;
const LLVMValueRef *coords = params->coords;
const LLVMValueRef *offsets = params->offsets;
const struct util_format_description *format_desc;
format_desc = util_format_description(static_texture_state->format);
if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- /*
- * This is not 100% correct, if we have cache but the
- * util_format_s3tc_prefer is true the cache won't get used
- * regardless (could hook up the block decode there...) */
need_cache = TRUE;
}
}
* Additionally lod_property has to be included too.
*/
- util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
- texture_index, sampler_index, sample_key);
+ snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
+ texture_index, sampler_index, sample_key);
function = LLVMGetNamedFunction(module, func_name);
if (sample_key & LP_SAMPLER_SHADOW) {
arg_types[num_param++] = LLVMTypeOf(coords[0]);
}
+ if (sample_key & LP_SAMPLER_FETCH_MS) {
+ arg_types[num_param++] = LLVMTypeOf(params->ms_index);
+ }
if (sample_key & LP_SAMPLER_OFFSETS) {
for (i = 0; i < num_offsets; i++) {
arg_types[num_param++] = LLVMTypeOf(offsets[0]);
if (sample_key & LP_SAMPLER_SHADOW) {
args[num_args++] = coords[4];
}
+ if (sample_key & LP_SAMPLER_FETCH_MS) {
+ args[num_args++] = params->ms_index;
+ }
if (sample_key & LP_SAMPLER_OFFSETS) {
for (i = 0; i < num_offsets; i++) {
args[num_args++] = offsets[i];
assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
- tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
+ *tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
bb = LLVMGetInsertBlock(builder);
inst = LLVMGetLastInstruction(bb);
LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
- for (i = 0; i < 4; i++) {
- params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
- }
}
}
if (use_tex_func) {
+ LLVMValueRef tex_ret;
lp_build_sample_soa_func(gallivm,
static_texture_state,
static_sampler_state,
dynamic_state,
- params);
+ params, params->texture_index, params->sampler_index, &tex_ret);
+
+ for (unsigned i = 0; i < 4; i++) {
+ params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
+ }
}
else {
lp_build_sample_soa_code(gallivm,
params->offsets,
params->derivs,
params->lod,
+ params->ms_index,
params->texel);
}
}
lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
+ if (params->samples_only) {
+ params->sizes_out[0] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type),
+ dynamic_state->num_samples(dynamic_state, gallivm,
+ context_ptr, texture_unit));
+ return;
+ }
if (params->explicit_lod) {
/* FIXME: this needs to honor per-element lod */
lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
num_levels);
}
}
+
+static void
+lp_build_do_atomic_soa(struct gallivm_state *gallivm,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef exec_mask,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef out_of_bounds,
+ unsigned img_op,
+ LLVMAtomicRMWBinOp op,
+ const LLVMValueRef rgba_in[4],
+ const LLVMValueRef rgba2_in[4],
+ LLVMValueRef atomic_result[4])
+{
+ enum pipe_format format = format_desc->format;
+
+ if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT) {
+ atomic_result[0] = lp_build_zero(gallivm, type);
+ return;
+ }
+
+ LLVMValueRef atom_res = lp_build_alloca(gallivm,
+ LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), "");
+
+ offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond;
+ LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
+
+ LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+ assert(exec_mask);
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
+ LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
+ cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+ data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), "");
+
+ if (img_op == LP_IMG_ATOMIC_CAS) {
+ LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, "");
+ LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+ data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
+ cas_src,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
+ } else {
+ data = LLVMBuildAtomicRMW(gallivm->builder, op,
+ cast_base_ptr, data,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ }
+
+ LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, "");
+ LLVMBuildStore(gallivm->builder, temp_res, atom_res);
+
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+ NULL, LLVMIntUGE);
+ atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+}
+
+void
+lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct gallivm_state *gallivm,
+ const struct lp_img_params *params)
+{
+ unsigned target = params->target;
+ unsigned dims = texture_dims(target);
+ /** regular scalar int type */
+ struct lp_type int_type, int_coord_type;
+ struct lp_build_context int_bld, int_coord_bld;
+ const struct util_format_description *format_desc = util_format_description(static_texture_state->format);
+ LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2];
+ LLVMValueRef ms_index = params->ms_index;
+ LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
+ int_type = lp_type_int(32);
+ int_coord_type = lp_int_type(params->type);
+ lp_build_context_init(&int_bld, gallivm, int_type);
+ lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
+
+ LLVMValueRef offset, i, j;
+
+ LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ LLVMValueRef num_samples = NULL, sample_stride = NULL;
+ if (ms_index) {
+ num_samples = dynamic_state->num_samples(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ sample_stride = dynamic_state->sample_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index);
+ }
+
+ boolean layer_coord = has_layer_coord(target);
+
+ width = lp_build_broadcast_scalar(&int_coord_bld, width);
+ if (dims >= 2) {
+ height = lp_build_broadcast_scalar(&int_coord_bld, height);
+ row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
+ }
+ if (dims >= 3 || layer_coord) {
+ depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
+ img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
+ }
+
+ LLVMValueRef out_of_bounds = int_coord_bld.zero;
+ LLVMValueRef out1;
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+ if (dims >= 2) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+ }
+ if (dims >= 3) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+ }
+ lp_build_sample_offset(&int_coord_bld,
+ format_desc,
+ x, y, z, row_stride_vec, img_stride_vec,
+ &offset, &i, &j);
+
+ if (ms_index) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(&int_coord_bld, num_samples));
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+ offset = lp_build_add(&int_coord_bld, offset,
+ lp_build_mul(&int_coord_bld, lp_build_broadcast_scalar(&int_coord_bld, sample_stride),
+ ms_index));
+ }
+ if (params->img_op == LP_IMG_LOAD) {
+ struct lp_type texel_type = params->type;
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->channel[0].pure_integer) {
+ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length);
+ } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length);
+ }
+ }
+
+ if (static_texture_state->format == PIPE_FORMAT_NONE) {
+ /*
+ * If there's nothing bound, format is NONE, and we must return
+ * all zero as mandated by d3d10 in this case.
+ */
+ unsigned chan;
+ LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+ for (chan = 0; chan < 4; chan++) {
+ params->outdata[chan] = zero;
+ }
+ return;
+ }
+
+ offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
+ struct lp_build_context texel_bld;
+ lp_build_context_init(&texel_bld, gallivm, texel_type);
+ lp_build_fetch_rgba_soa(gallivm,
+ format_desc,
+ texel_type, TRUE,
+ base_ptr, offset,
+ i, j,
+ NULL,
+ params->outdata);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ params->outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
+ texel_bld.zero, params->outdata[chan]);
+ }
+ } else if (params->img_op == LP_IMG_STORE) {
+ if (static_texture_state->format == PIPE_FORMAT_NONE)
+ return;
+ lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+ params->indata);
+ } else {
+ if (static_texture_state->format == PIPE_FORMAT_NONE) {
+ /*
+ * For atomic operation just return 0 in the unbound case to avoid a crash.
+ */
+ LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+ params->outdata[0] = zero;
+ return;
+ }
+ lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+ params->img_op, params->op, params->indata, params->indata2, params->outdata);
+ }
+}