+ /*
+ * To avoid having to duplicate linear_mask / fetch code use
+ * another branch (with corner condition though edge would work
+ * as well) here.
+ */
+ if (accurate_cube_corners) {
+ LLVMValueRef w00, w01, w10, w11, wx0, wy0;
+ LLVMValueRef c_weight, c00, c01, c10, c11;
+ LLVMValueRef have_corner, one_third, tmp;
+
+ colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+ colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
+
+ have_corner = LLVMBuildLoad(builder, have_corners, "");
+
+ lp_build_if(&corner_if, bld->gallivm, have_corner);
+
+ /*
+ * we can't use standard 2d lerp as we need per-element weight
+ * in case of corners, so just calculate bilinear result as
+ * w00*s00 + w01*s01 + w10*s10 + w11*s11.
+ * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
+ * however calculating the weights needs another 6, so actually probably
+ * not slower than 2d lerp only for 4 channels as weights only need
+ * to be calculated once - of course fixing the weights has additional cost.)
+ */
+ wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
+ wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
+ w00 = lp_build_mul(coord_bld, wx0, wy0);
+ w01 = lp_build_mul(coord_bld, s_fpart, wy0);
+ w10 = lp_build_mul(coord_bld, wx0, t_fpart);
+ w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
+
+ /* find corner weight */
+ c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
+ c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
+ c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
+ c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
+ c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
+ c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
+ c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
+ c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
+
+ /*
+ * add 1/3 of the corner weight to each of the 3 other samples
+ * and null out corner weight
+ */
+ one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
+ c_weight = lp_build_mul(coord_bld, c_weight, one_third);
+ w00 = lp_build_add(coord_bld, w00, c_weight);
+ c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
+ w00 = lp_build_andnot(coord_bld, w00, c00);
+ w01 = lp_build_add(coord_bld, w01, c_weight);
+ c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
+ w01 = lp_build_andnot(coord_bld, w01, c01);
+ w10 = lp_build_add(coord_bld, w10, c_weight);
+ c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
+ w10 = lp_build_andnot(coord_bld, w10, c10);
+ w11 = lp_build_add(coord_bld, w11, c_weight);
+ c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
+ w11 = lp_build_andnot(coord_bld, w11, c11);
+
+ if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
+ for (chan = 0; chan < 4; chan++) {
+ colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
+ tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
+ colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
+ }
+ }
+ else {
+ LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
+ cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
+ cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
+ cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
+ cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
+ /* inputs to interpolation are just masks so just add masked weights together */
+ cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
+ cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
+ cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
+ cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
+ colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
+ tmp = lp_build_and(coord_bld, w01, cmpval01);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w10, cmpval10);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ tmp = lp_build_and(coord_bld, w11, cmpval11);
+ colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
+ colors0[1] = colors0[2] = colors0[3] = colors0[0];
+ }
+
+ LLVMBuildStore(builder, colors0[0], colorss[0]);
+ LLVMBuildStore(builder, colors0[1], colorss[1]);
+ LLVMBuildStore(builder, colors0[2], colorss[2]);
+ LLVMBuildStore(builder, colors0[3], colorss[3]);
+
+ lp_build_else(&corner_if);
+ }
+