+ * XXX: The resulting rho is scalar, so we ignore all but the first element of
+ * derivatives that are passed by the shader.
+ */
+static LLVMValueRef
+lp_build_rho(struct lp_build_sample_context *bld,
+ unsigned unit,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4])
+{
+ struct lp_build_context *int_size_bld = &bld->int_size_bld;
+ struct lp_build_context *float_size_bld = &bld->float_size_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ const unsigned dims = bld->dims;
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+ LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
+ LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
+ LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
+ LLVMValueRef dsdx, dsdy, dtdx, dtdy, drdx, drdy;
+ LLVMValueRef rho_x, rho_y;
+ LLVMValueRef rho_vec;
+ LLVMValueRef int_size, float_size;
+ LLVMValueRef rho;
+ LLVMValueRef first_level, first_level_vec;
+
+ dsdx = ddx[0];
+ dsdy = ddy[0];
+
+ if (dims <= 1) {
+ rho_x = dsdx;
+ rho_y = dsdy;
+ }
+ else {
+ rho_x = float_size_bld->undef;
+ rho_y = float_size_bld->undef;
+
+ rho_x = LLVMBuildInsertElement(builder, rho_x, dsdx, index0, "");
+ rho_y = LLVMBuildInsertElement(builder, rho_y, dsdy, index0, "");
+
+ dtdx = ddx[1];
+ dtdy = ddy[1];
+
+ rho_x = LLVMBuildInsertElement(builder, rho_x, dtdx, index1, "");
+ rho_y = LLVMBuildInsertElement(builder, rho_y, dtdy, index1, "");
+
+ if (dims >= 3) {
+ drdx = ddx[2];
+ drdy = ddy[2];
+
+ rho_x = LLVMBuildInsertElement(builder, rho_x, drdx, index2, "");
+ rho_y = LLVMBuildInsertElement(builder, rho_y, drdy, index2, "");
+ }
+ }
+
+ rho_x = lp_build_abs(float_size_bld, rho_x);
+ rho_y = lp_build_abs(float_size_bld, rho_y);
+
+ rho_vec = lp_build_max(float_size_bld, rho_x, rho_y);
+
+ first_level = bld->dynamic_state->first_level(bld->dynamic_state,
+ bld->gallivm, unit);
+ first_level_vec = lp_build_broadcast_scalar(&bld->int_size_bld, first_level);
+ int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
+ float_size = lp_build_int_to_float(float_size_bld, int_size);
+
+ rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ LLVMValueRef rho_s, rho_t, rho_r;
+
+ rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+ rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
+
+ rho = lp_build_max(float_bld, rho_s, rho_t);
+
+ if (dims >= 3) {
+ rho_r = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+ rho = lp_build_max(float_bld, rho, rho_r);
+ }
+ }
+ }
+
+ return rho;
+}
+
+
+/*
+ * Bri-linear lod computation
+ *
+ * Use a piece-wise linear approximation of log2 such that:
+ * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
+ * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
+ * with the steepness specified in 'factor'
+ * - exact result for 0.5, 1.5, etc.
+ *
+ *
+ * 1.0 - /----*
+ * /
+ * /
+ * /
+ * 0.5 - *
+ * /
+ * /
+ * /
+ * 0.0 - *----/
+ *
+ * | |
+ * 2^0 2^1
+ *
+ * This is a technique also commonly used in hardware:
+ * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
+ *
+ * TODO: For correctness, this should only be applied when texture is known to
+ * have regular mipmaps, i.e., mipmaps derived from the base level.
+ *
+ * TODO: This could be done in fixed point, where applicable.
+ */
+static void
+lp_build_brilinear_lod(struct lp_build_context *bld,
+ LLVMValueRef lod,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_fpart;
+ double pre_offset = (factor - 0.5)/factor - 0.5;
+ double post_offset = 1 - factor;
+
+ if (0) {
+ lp_build_printf(bld->gallivm, "lod = %f\n", lod);
+ }
+
+ lod = lp_build_add(bld, lod,
+ lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
+
+ lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, post_offset));
+
+ /*
+ * It's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_fpart = lod_fpart;
+
+ if (0) {
+ lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
+ lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
+ }
+}
+
+
+/*
+ * Combined log2 and brilinear lod computation.
+ *
+ * It's in all identical to calling lp_build_fast_log2() and
+ * lp_build_brilinear_lod() above, but by combining we can compute the interger
+ * and fractional part independently.
+ */
+static void
+lp_build_brilinear_rho(struct lp_build_context *bld,
+ LLVMValueRef rho,
+ double factor,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+{
+ LLVMValueRef lod_ipart;
+ LLVMValueRef lod_fpart;
+
+ const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
+ const double post_offset = 1 - 2*factor;
+
+ assert(bld->type.floating);
+
+ assert(lp_check_value(bld->type, rho));
+
+ /*
+ * The pre factor will make the intersections with the exact powers of two
+ * happen precisely where we want then to be, which means that the integer
+ * part will not need any post adjustments.
+ */
+ rho = lp_build_mul(bld, rho,
+ lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
+
+ /* ipart = ifloor(log2(rho)) */
+ lod_ipart = lp_build_extract_exponent(bld, rho, 0);
+
+ /* fpart = rho / 2**ipart */
+ lod_fpart = lp_build_extract_mantissa(bld, rho);
+
+ lod_fpart = lp_build_mul(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, factor));
+
+ lod_fpart = lp_build_add(bld, lod_fpart,
+ lp_build_const_vec(bld->gallivm, bld->type, post_offset));
+
+ /*
+ * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
+ * - the above expression will never produce numbers greater than one.
+ * - the mip filtering branch is only taken if lod_fpart is positive
+ */
+
+ *out_lod_ipart = lod_ipart;
+ *out_lod_fpart = lod_fpart;
+}
+
+
+/**
+ * Generate code to compute texture level of detail (lambda).
+ * \param ddx partial derivatives of (s, t, r, q) with respect to X
+ * \param ddy partial derivatives of (s, t, r, q) with respect to Y
+ * \param lod_bias optional float vector with the shader lod bias
+ * \param explicit_lod optional float vector with the explicit lod
+ * \param width scalar int texture width
+ * \param height scalar int texture height
+ * \param depth scalar int texture depth
+ *
+ * XXX: The resulting lod is scalar, so ignore all but the first element of
+ * derivatives, lod_bias, etc that are passed by the shader.
+ */
+void
+lp_build_lod_selector(struct lp_build_sample_context *bld,
+ unsigned unit,
+ const LLVMValueRef ddx[4],
+ const LLVMValueRef ddy[4],
+ LLVMValueRef lod_bias, /* optional */
+ LLVMValueRef explicit_lod, /* optional */
+ unsigned mip_filter,
+ LLVMValueRef *out_lod_ipart,
+ LLVMValueRef *out_lod_fpart)
+
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef lod;
+
+ *out_lod_ipart = bld->int_bld.zero;
+ *out_lod_fpart = bld->float_bld.zero;
+
+ if (bld->static_state->min_max_lod_equal) {
+ /* User is forcing sampling from a particular mipmap level.
+ * This is hit during mipmap generation.
+ */
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
+
+ lod = min_lod;
+ }
+ else {
+ LLVMValueRef sampler_lod_bias =
+ bld->dynamic_state->lod_bias(bld->dynamic_state, bld->gallivm, unit);
+ LLVMValueRef index0 = lp_build_const_int32(bld->gallivm, 0);
+
+ if (explicit_lod) {
+ lod = LLVMBuildExtractElement(builder, explicit_lod,
+ index0, "");
+ }
+ else {
+ LLVMValueRef rho;
+
+ rho = lp_build_rho(bld, unit, ddx, ddy);
+
+ /*
+ * Compute lod = log2(rho)
+ */
+
+ if (!lod_bias &&
+ !bld->static_state->lod_bias_non_zero &&
+ !bld->static_state->apply_max_lod &&
+ !bld->static_state->apply_min_lod) {
+ /*
+ * Special case when there are no post-log2 adjustments, which
+ * saves instructions but keeping the integer and fractional lod
+ * computations separate from the start.
+ */
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
+ mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
+ *out_lod_ipart = lp_build_ilog2(float_bld, rho);
+ *out_lod_fpart = bld->float_bld.zero;
+ return;
+ }
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
+ !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_rho(float_bld, rho, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ return;
+ }
+ }
+
+ if (0) {
+ lod = lp_build_log2(float_bld, rho);
+ }
+ else {
+ lod = lp_build_fast_log2(float_bld, rho);
+ }
+
+ /* add shader lod bias */
+ if (lod_bias) {
+ lod_bias = LLVMBuildExtractElement(builder, lod_bias,
+ index0, "");
+ lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
+ }
+ }
+
+ /* add sampler lod bias */
+ if (bld->static_state->lod_bias_non_zero)
+ lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
+
+
+ /* clamp lod */
+ if (bld->static_state->apply_max_lod) {
+ LLVMValueRef max_lod =
+ bld->dynamic_state->max_lod(bld->dynamic_state, bld->gallivm, unit);
+
+ lod = lp_build_min(float_bld, lod, max_lod);
+ }
+ if (bld->static_state->apply_min_lod) {
+ LLVMValueRef min_lod =
+ bld->dynamic_state->min_lod(bld->dynamic_state, bld->gallivm, unit);
+
+ lod = lp_build_max(float_bld, lod, min_lod);
+ }
+ }
+
+ if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
+ if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
+ lp_build_brilinear_lod(float_bld, lod, BRILINEAR_FACTOR,
+ out_lod_ipart, out_lod_fpart);
+ }
+ else {
+ lp_build_ifloor_fract(float_bld, lod, out_lod_ipart, out_lod_fpart);
+ }
+
+ lp_build_name(*out_lod_fpart, "lod_fpart");
+ }
+ else {
+ *out_lod_ipart = lp_build_iround(float_bld, lod);
+ }
+
+ lp_build_name(*out_lod_ipart, "lod_ipart");
+
+ return;
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
+ * mipmap level index.
+ * Note: this is all scalar code.
+ * \param lod scalar float texture level of detail
+ * \param level_out returns integer
+ */
+void
+lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *level_out)
+{
+ struct lp_build_context *int_bld = &bld->int_bld;
+ LLVMValueRef first_level, last_level, level;
+
+ first_level = bld->dynamic_state->first_level(bld->dynamic_state,
+ bld->gallivm, unit);
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->gallivm, unit);
+
+ /* convert float lod to integer */
+ level = lp_build_add(int_bld, lod_ipart, first_level);
+
+ /* clamp level to legal range of levels */
+ *level_out = lp_build_clamp(int_bld, level, first_level, last_level);
+}
+
+
+/**
+ * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
+ * two (adjacent) mipmap level indexes. Later, we'll sample from those
+ * two mipmap levels and interpolate between them.
+ */
+void
+lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
+ unsigned unit,
+ LLVMValueRef lod_ipart,
+ LLVMValueRef *lod_fpart_inout,
+ LLVMValueRef *level0_out,
+ LLVMValueRef *level1_out)
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ struct lp_build_context *int_bld = &bld->int_bld;
+ struct lp_build_context *float_bld = &bld->float_bld;
+ LLVMValueRef first_level, last_level;
+ LLVMValueRef clamp_min;
+ LLVMValueRef clamp_max;
+
+ first_level = bld->dynamic_state->first_level(bld->dynamic_state,
+ bld->gallivm, unit);
+
+ *level0_out = lp_build_add(int_bld, lod_ipart, first_level);
+ *level1_out = lp_build_add(int_bld, *level0_out, int_bld->one);
+
+ last_level = bld->dynamic_state->last_level(bld->dynamic_state,
+ bld->gallivm, unit);
+
+ /*
+ * Clamp both *level0_out and *level1_out to [first_level, last_level], with
+ * the minimum number of comparisons, and zeroing lod_fpart in the extreme
+ * ends in the process.
+ */
+
+ /* *level0_out < first_level */
+ clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
+ *level0_out, first_level,
+ "clamp_lod_to_first");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_min,
+ first_level, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_min,
+ first_level, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ /* *level0_out >= last_level */
+ clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
+ *level0_out, last_level,
+ "clamp_lod_to_last");
+
+ *level0_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level0_out, "");
+
+ *level1_out = LLVMBuildSelect(builder, clamp_max,
+ last_level, *level1_out, "");
+
+ *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
+ float_bld->zero, *lod_fpart_inout, "");
+
+ lp_build_name(*level0_out, "sampler%u_miplevel0", unit);
+ lp_build_name(*level1_out, "sampler%u_miplevel1", unit);
+ lp_build_name(*lod_fpart_inout, "sampler%u_mipweight", unit);
+}
+
+
+/**
+ * Return pointer to a single mipmap level.
+ * \param data_array array of pointers to mipmap levels
+ * \param level integer mipmap level