}
+/**
+ * Combined ifloor() & fract().
+ *
+ * Preferred to calling the functions separately, as it will ensure that the
+ * stratergy (floor() vs ifloor()) that results in less redundant work is used.
+ */
+void
+lp_build_ifloor_fract(struct lp_build_context *bld,
+ LLVMValueRef a,
+ LLVMValueRef *out_ipart,
+ LLVMValueRef *out_fpart)
+{
+
+
+ const struct lp_type type = bld->type;
+ LLVMValueRef ipart;
+
+ assert(type.floating);
+ assert(lp_check_value(type, a));
+
+ if (util_cpu_caps.has_sse4_1 &&
+ (type.length == 1 || type.width*type.length == 128)) {
+ /*
+ * floor() is easier.
+ */
+
+ ipart = lp_build_floor(bld, a);
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ *out_ipart = LLVMBuildFPToSI(bld->builder, ipart, bld->int_vec_type, "ipart");
+ }
+ else {
+ /*
+ * ifloor() is easier.
+ */
+
+ *out_ipart = lp_build_ifloor(bld, a);
+ ipart = LLVMBuildSIToFP(bld->builder, *out_ipart, bld->vec_type, "ipart");
+ *out_fpart = LLVMBuildFSub(bld->builder, a, ipart, "fpart");
+ }
+}
+
+
LLVMValueRef
lp_build_sqrt(struct lp_build_context *bld,
LLVMValueRef a)
/* mul by size and subtract 0.5 */
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
/* repeat wrap */
if (is_pot) {
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
}
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* coord0 = floor(coord); */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
max = lp_build_sub(coord_bld, length_f, min);
coord = lp_build_clamp(coord_bld, coord, min, max);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
- /* convert to int */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
coord = lp_build_mul(coord_bld, coord, length_f);
coord = lp_build_sub(coord_bld, coord, half);
- /* compute lerp weight */
- weight = lp_build_fract(coord_bld, coord);
-
- /* convert to int coords */
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
/* coord0 = max(coord0, 0) */
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
break;
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;
coord = lp_build_sub(coord_bld, coord, half);
- weight = lp_build_fract(coord_bld, coord);
- coord0 = lp_build_ifloor(coord_bld, coord);
+ /* convert to int, compute lerp weight */
+ lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
}
break;