+ /*
+ * To avoid having to duplicate linear_mask / fetch code use
+ * another branch (with corner condition though edge would work
+ * as well) here.
+ */
+ if (accurate_cube_corners) {
+ LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
+ LLVMValueRef have_corner, one_third;
+
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
+
+ have_corner = LLVMBuildLoad(builder, have_corners, "");
+
+ lp_build_if(&corner_if, bld->gallivm, have_corner);
+
+ one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
+ 1.0f/3.0f);
+
+ /* find corner */
+ c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
+ c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
+ c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
+ c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
+ c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
+ c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
+ c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
+ c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
+
+ if (!is_gather) {
+ /*
+ * we can't use standard 2d lerp as we need per-element weight
+ * in case of corners, so just calculate bilinear result as
+ * w00*s00 + w01*s01 + w10*s10 + w11*s11.
+ * (This is actually less work than using 2d lerp, 7 vs. 9
+ * instructions, however calculating the weights needs another 6,
+ * so actually probably not slower than 2d lerp only for 4 channels
+ * as weights only need to be calculated once - of course fixing
+ * the weights has additional cost.)
+ */
+ LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
+ wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
+ wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
+ w00 = lp_build_mul(coord_bld, wx0, wy0);
+ w01 = lp_build_mul(coord_bld, s_fpart, wy0);
+ w10 = lp_build_mul(coord_bld, wx0, t_fpart);
+ w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
+
+ /* find corner weight */
+ c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+ c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+ c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
+
+ /*
+ * add 1/3 of the corner weight to the weight of the 3 other
+ * samples and null out corner weight.
+ */
+ c_weight = lp_build_mul(coord_bld, c_weight, one_third);
+ w00 = lp_build_add(coord_bld, w00, c_weight);
+ w00 = lp_build_andnot(coord_bld, w00, c00f);
+ w01 = lp_build_add(coord_bld, w01, c_weight);
+ w01 = lp_build_andnot(coord_bld, w01, c01f);
+ w10 = lp_build_add(coord_bld, w10, c_weight);
+ w10 = lp_build_andnot(coord_bld, w10, c10f);
+ w11 = lp_build_add(coord_bld, w11, c_weight);
+ w11 = lp_build_andnot(coord_bld, w11, c11f);
+
+ if (bld->static_sampler_state->compare_mode ==
+ PIPE_TEX_COMPARE_NONE) {
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_mul(coord_bld, w00,
+ neighbors[0][0][chan]);
+ tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
+ neighbors[1][1][0]);
+ /*
+ * inputs to interpolation are just masks so just add
+ * masked weights together
+ */
+ cmpval00 = LLVMBuildBitCast(builder, cmpval00,
+ coord_bld->vec_type, "");
+ cmpval01 = LLVMBuildBitCast(builder, cmpval01,
+ coord_bld->vec_type, "");
+ cmpval10 = LLVMBuildBitCast(builder, cmpval10,
+ coord_bld->vec_type, "");
+ cmpval11 = LLVMBuildBitCast(builder, cmpval11,
+ coord_bld->vec_type, "");
+ colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
+ tmp = lp_build_and(coord_bld, w01, cmpval01);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w10, cmpval10);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w11, cmpval11);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
+ }
+ }
+ else {
+ /*
+ * We don't have any weights to adjust, so instead calculate
+ * the fourth texel as simply the average of the other 3.
+ * (This would work for non-gather too, however we'd have
+ * a boatload more of the select stuff due to there being
+ * 4 times as many colors as weights.)
+ */
+ LLVMValueRef col00, col01, col10, col11;
+ LLVMValueRef colc, colc0, colc1;
+ col10 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][0], chan_swiz);
+ col11 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][1], chan_swiz);
+ col01 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][1], chan_swiz);
+ col00 = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][0], chan_swiz);
+
+ /*
+ * The spec says for comparison filtering, the comparison
+ * must happen before synthesizing the new value.
+ * This means all gathered values are always 0 or 1,
+ * except for the non-existing texel, which can be 0,1/3,2/3,1...
+ * Seems like we'd be allowed to just return 0 or 1 too, so we
+ * could simplify and pass down the compare mask values to the
+ * end (using int arithmetic/compare on the mask values to
+ * construct the fourth texel) and only there convert to floats
+ * but it's probably not worth it (it might be easier for the cpu
+ * but not for the code)...
+ */
+ if (bld->static_sampler_state->compare_mode !=
+ PIPE_TEX_COMPARE_NONE) {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
+ col00 = lp_build_select(texel_bld, cmpval00,
+ texel_bld->one, texel_bld->zero);
+ col01 = lp_build_select(texel_bld, cmpval01,
+ texel_bld->one, texel_bld->zero);
+ col10 = lp_build_select(texel_bld, cmpval10,
+ texel_bld->one, texel_bld->zero);
+ col11 = lp_build_select(texel_bld, cmpval11,
+ texel_bld->one, texel_bld->zero);
+ }
+
+ /*
+ * Null out corner color.
+ */
+ col00 = lp_build_andnot(coord_bld, col00, c00f);
+ col01 = lp_build_andnot(coord_bld, col01, c01f);
+ col10 = lp_build_andnot(coord_bld, col10, c10f);
+ col11 = lp_build_andnot(coord_bld, col11, c11f);
+
+ /*
+ * New corner texel color is all colors added / 3.
+ */
+ colc0 = lp_build_add(coord_bld, col00, col01);
+ colc1 = lp_build_add(coord_bld, col10, col11);
+ colc = lp_build_add(coord_bld, colc0, colc1);
+ colc = lp_build_mul(coord_bld, one_third, colc);
+
+ /*
+ * Replace the corner texel color with the new value.
+ */
+ col00 = lp_build_select(coord_bld, c00, colc, col00);
+ col01 = lp_build_select(coord_bld, c01, colc, col01);
+ col10 = lp_build_select(coord_bld, c10, colc, col10);
+ col11 = lp_build_select(coord_bld, c11, colc, col11);
+
+ colors0[0] = col10;
+ colors0[1] = col11;
+ colors0[2] = col01;
+ colors0[3] = col00;
+ }
+
+ LLVMBuildStore(builder, colors0[0], colorss[0]);
+ LLVMBuildStore(builder, colors0[1], colorss[1]);
+ LLVMBuildStore(builder, colors0[2], colorss[2]);
+ LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+ lp_build_else(&corner_if);
+ }
+
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ if (is_gather) {
+ /*
+ * Just assign the red channel (no component selection yet).
+ * This is a bit hackish, we usually do the swizzle at the
+ * end of sampling (much less values to swizzle), but this
+ * obviously cannot work when using gather.
+ */
+ colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][0],
+ chan_swiz);
+ colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[1][1],
+ chan_swiz);
+ colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][1],
+ chan_swiz);
+ colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
+ neighbors[0][0],
+ chan_swiz);
+ }
+ else {
+ /* Bilinear interpolate the four samples from the 2D image / 3D slice */
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_lerp_2d(texel_bld,
+ s_fpart, t_fpart,
+ neighbors[0][0][chan],
+ neighbors[0][1][chan],
+ neighbors[1][0][chan],
+ neighbors[1][1][chan],
+ 0);
+ }
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+
+ if (is_gather) {
+ /* more hacks for swizzling, should be X, ONE or ZERO... */
+ colors0[0] = lp_build_select(texel_bld, cmpval10,
+ texel_bld->one, texel_bld->zero);
+ colors0[1] = lp_build_select(texel_bld, cmpval11,
+ texel_bld->one, texel_bld->zero);
+ colors0[2] = lp_build_select(texel_bld, cmpval01,
+ texel_bld->one, texel_bld->zero);
+ colors0[3] = lp_build_select(texel_bld, cmpval00,
+ texel_bld->one, texel_bld->zero);
+ }
+ else {
+ colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
+ cmpval00, cmpval01, cmpval10, cmpval11);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
+ }
+ }
+
+ if (accurate_cube_corners) {
+ LLVMBuildStore(builder, colors0[0], colorss[0]);
+ LLVMBuildStore(builder, colors0[1], colorss[1]);
+ LLVMBuildStore(builder, colors0[2], colorss[2]);
+ LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+ lp_build_endif(&corner_if);
+
+ colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
+ colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
+ colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
+ colors0[3] = LLVMBuildLoad(builder, colorss[3], "");