X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_sample_aos.c;h=74858bc9718b19c5c2ca440fb1ae1d863466a0e0;hb=ba9c1773d77afc71adc7bad3d8c326b104c5e094;hp=000d4938a039ee3fb5408fdff7ad253f27be44ab;hpb=61b7da074e2faebf03d3dfc30e910ee1367bcd5a;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 000d4938a03..74858bc9718 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -45,12 +45,14 @@ #include "lp_bld_const.h" #include "lp_bld_conv.h" #include "lp_bld_arit.h" +#include "lp_bld_bitarit.h" #include "lp_bld_logic.h" #include "lp_bld_swizzle.h" #include "lp_bld_pack.h" #include "lp_bld_flow.h" #include "lp_bld_gather.h" #include "lp_bld_format.h" +#include "lp_bld_init.h" #include "lp_bld_sample.h" #include "lp_bld_sample_aos.h" #include "lp_bld_quad.h" @@ -80,20 +82,22 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, LLVMValueRef *out_offset, LLVMValueRef *out_i) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; - length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one); + length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if(is_pot) - coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, ""); - else - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord = LLVMBuildURem(bld->builder, coord, length, ""); + coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); + else { + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord = LLVMBuildAdd(builder, coord, bias, ""); + coord = LLVMBuildURem(builder, coord, length, ""); + } break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: @@ -111,7 +115,7 @@ lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, assert(0); } - lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride, + lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, out_offset, out_i); } @@ -144,8 +148,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, LLVMValueRef *i0, LLVMValueRef *i1) { - struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld; struct lp_build_context *int_coord_bld = &bld->int_coord_bld; + LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef length_minus_one; LLVMValueRef lmask, umask, mask; @@ -186,46 +190,47 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, * multiplication. */ - *i0 = uint_coord_bld->zero; - *i1 = uint_coord_bld->zero; + *i0 = int_coord_bld->zero; + *i1 = int_coord_bld->zero; length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); switch(wrap_mode) { case PIPE_TEX_WRAP_REPEAT: if (is_pot) { - coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, ""); + coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); } else { - /* Signed remainder won't give the right results for negative - * dividends but unsigned remainder does.*/ - coord0 = LLVMBuildURem(bld->builder, coord0, length, ""); + /* Add a bias to the texcoord to handle negative coords */ + LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024); + coord0 = LLVMBuildAdd(builder, coord0, bias, ""); + coord0 = LLVMBuildURem(builder, coord0, length, ""); } - mask = lp_build_compare(bld->builder, int_coord_bld->type, + mask = lp_build_compare(bld->gallivm, int_coord_bld->type, PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = LLVMBuildAnd(bld->builder, - lp_build_add(uint_coord_bld, *offset0, stride), + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = LLVMBuildAnd(builder, + lp_build_add(int_coord_bld, *offset0, stride), mask, ""); break; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); - umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type, + umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, PIPE_FUNC_LESS, coord0, length_minus_one); coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); - mask = LLVMBuildAnd(bld->builder, lmask, umask, ""); + mask = LLVMBuildAnd(builder, lmask, umask, ""); - *offset0 = lp_build_mul(uint_coord_bld, coord0, stride); - *offset1 = lp_build_add(uint_coord_bld, + *offset0 = lp_build_mul(int_coord_bld, coord0, stride); + *offset1 = lp_build_add(int_coord_bld, *offset0, - LLVMBuildAnd(bld->builder, stride, mask, "")); + LLVMBuildAnd(builder, stride, mask, "")); break; case PIPE_TEX_WRAP_CLAMP: @@ -236,8 +241,8 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: default: assert(0); - *offset0 = uint_coord_bld->zero; - *offset1 = uint_coord_bld->zero; + *offset0 = int_coord_bld->zero; + *offset1 = int_coord_bld->zero; break; } } @@ -250,9 +255,7 @@ lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_nearest(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -262,48 +265,51 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); - LLVMBuilderRef builder = bld->builder; + const unsigned dims = bld->dims; + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; - LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; + LLVMTypeRef i32_vec_type, u8n_vec_type; LLVMValueRef i32_c8; - LLVMValueRef s_ipart, t_ipart, r_ipart; + LLVMValueRef width_vec, height_vec, depth_vec; + LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; LLVMValueRef x_stride; LLVMValueRef x_offset, offset; LLVMValueRef x_subcoord, y_subcoord, z_subcoord; - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); + lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); + lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); + + i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); + u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -313,7 +319,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); + i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); @@ -321,7 +327,8 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* get pixel, row, image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->gallivm, + bld->int_coord_bld.type, bld->format_desc->block.bits/8); /* Do texcoord wrapping, compute texel offset */ @@ -340,22 +347,22 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, bld->static_state->pot_height, bld->static_state->wrap_t, &y_offset, &y_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); if (dims >= 3) { LLVMValueRef z_offset; lp_build_sample_wrap_nearest_int(bld, 1, /* block length (depth) */ r_ipart, depth_vec, img_stride_vec, - bld->static_state->pot_height, + bld->static_state->pot_depth, bld->static_state->wrap_r, &z_offset, &z_subcoord); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; /* The r coord is the cube face in [0,5] */ - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); - offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); + offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); } } @@ -383,7 +390,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ - rgba8 = lp_build_gather(bld->builder, + rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, @@ -392,7 +399,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, + rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset, @@ -401,7 +408,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, } /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(builder, u8n.type, h16.type, + lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, colors_lo, colors_hi); } @@ -414,9 +421,7 @@ lp_build_sample_image_nearest(struct lp_build_sample_context *bld, */ static void lp_build_sample_image_linear(struct lp_build_sample_context *bld, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, + LLVMValueRef int_size, LLVMValueRef row_stride_vec, LLVMValueRef img_stride_vec, LLVMValueRef data_ptr, @@ -426,14 +431,15 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, LLVMValueRef *colors_lo, LLVMValueRef *colors_hi) { - const int dims = texture_dims(bld->static_state->target); - LLVMBuilderRef builder = bld->builder; + const unsigned dims = bld->dims; + LLVMBuilderRef builder = bld->gallivm->builder; struct lp_build_context i32, h16, u8n; LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type; LLVMValueRef i32_c8, i32_c128, i32_c255; + LLVMValueRef width_vec, height_vec, depth_vec; LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi; - LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi; - LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi; + LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL; + LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL; LLVMValueRef x_stride, y_stride, z_stride; LLVMValueRef x_offset0, x_offset1; LLVMValueRef y_offset0, y_offset1; @@ -447,38 +453,41 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, unsigned i, j, k; unsigned numj, numk; - lp_build_context_init(&i32, builder, lp_type_int_vec(32)); - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); + lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32)); + lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16)); + lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8)); - i32_vec_type = lp_build_vec_type(i32.type); - h16_vec_type = lp_build_vec_type(h16.type); - u8n_vec_type = lp_build_vec_type(u8n.type); + i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); + h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); + u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); + + lp_build_extract_image_sizes(bld, + bld->int_size_type, + bld->int_coord_type, + int_size, + &width_vec, + &height_vec, + &depth_vec); if (bld->static_state->normalized_coords) { - /* s = s * width, t = t * height */ - LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type); - LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec, - coord_vec_type, ""); - s = lp_build_mul(&bld->coord_bld, s, fp_width); - if (dims >= 2) { - LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec, - coord_vec_type, ""); - t = lp_build_mul(&bld->coord_bld, t, fp_height); - } - if (dims >= 3) { - LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec, - coord_vec_type, ""); - r = lp_build_mul(&bld->coord_bld, r, fp_depth); - } - } + LLVMValueRef scaled_size; + LLVMValueRef flt_size; - /* scale coords by 256 (8 fractional bits) */ - s = lp_build_mul_imm(&bld->coord_bld, s, 256); - if (dims >= 2) - t = lp_build_mul_imm(&bld->coord_bld, t, 256); - if (dims >= 3) - r = lp_build_mul_imm(&bld->coord_bld, r, 256); + /* scale size by 256 (8 fractional bits) */ + scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); + + flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); + + lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); + } + else { + /* scale coords by 256 (8 fractional bits) */ + s = lp_build_mul_imm(&bld->coord_bld, s, 256); + if (dims >= 2) + t = lp_build_mul_imm(&bld->coord_bld, t, 256); + if (dims >= 3) + r = lp_build_mul_imm(&bld->coord_bld, r, 256); + } /* convert float to int */ s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); @@ -488,7 +497,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); /* subtract 0.5 (add -128) */ - i32_c128 = lp_build_const_int_vec(i32.type, -128); + i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); if (!bld->static_state->force_nearest_s) { s = LLVMBuildAdd(builder, s, i32_c128, ""); } @@ -500,7 +509,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } /* compute floor (shift right 8) */ - i32_c8 = lp_build_const_int_vec(i32.type, 8); + i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); if (dims >= 2) t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); @@ -508,7 +517,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); /* compute fractional part (AND with 0xff) */ - i32_c255 = lp_build_const_int_vec(i32.type, 255); + i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); if (dims >= 2) t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); @@ -516,7 +525,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); /* get pixel, row and image strides */ - x_stride = lp_build_const_vec(bld->uint_coord_bld.type, + x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, bld->format_desc->block.bits/8); y_stride = row_stride_vec; z_stride = img_stride_vec; @@ -547,9 +556,9 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, for (z = 0; z < 2; z++) { for (x = 0; x < 2; x++) { - offset[z][0][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][0][x] = lp_build_add(&bld->int_coord_bld, offset[z][0][x], y_offset0); - offset[z][1][x] = lp_build_add(&bld->uint_coord_bld, + offset[z][1][x] = lp_build_add(&bld->int_coord_bld, offset[z][1][x], y_offset1); } } @@ -565,20 +574,20 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, &z_subcoord[0], &z_subcoord[1]); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset0); - offset[1][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[1][y][x] = lp_build_add(&bld->int_coord_bld, offset[1][y][x], z_offset1); } } } else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { LLVMValueRef z_offset; - z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec); + z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { /* The r coord is the cube face in [0,5] */ - offset[0][y][x] = lp_build_add(&bld->uint_coord_bld, + offset[0][y][x] = lp_build_add(&bld->int_coord_bld, offset[0][y][x], z_offset); } } @@ -608,7 +617,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); { - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; LLVMValueRef shuffle_lo; @@ -681,7 +690,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, * Given the format is a rgba8, just read the pixels as is, * without any swizzling. Swizzling will be done later. */ - rgba8 = lp_build_gather(bld->builder, + rgba8 = lp_build_gather(bld->gallivm, bld->texel_type.length, bld->format_desc->block.bits, bld->texel_type.width, @@ -690,7 +699,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); } else { - rgba8 = lp_build_fetch_rgba_aos(bld->builder, + rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, bld->format_desc, u8n.type, data_ptr, offset[k][j][i], @@ -699,7 +708,7 @@ lp_build_sample_image_linear(struct lp_build_sample_context *bld, } /* Expand one 4*rgba8 to two 2*rgba16 */ - lp_build_unpack2(builder, u8n.type, h16.type, + lp_build_unpack2(bld->gallivm, u8n.type, h16.type, rgba8, &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); } @@ -806,76 +815,123 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld, LLVMValueRef s, LLVMValueRef t, LLVMValueRef r, + LLVMValueRef ilevel0, + LLVMValueRef ilevel1, LLVMValueRef lod_fpart, - LLVMValueRef width0_vec, - LLVMValueRef width1_vec, - LLVMValueRef height0_vec, - LLVMValueRef height1_vec, - LLVMValueRef depth0_vec, - LLVMValueRef depth1_vec, - LLVMValueRef row_stride0_vec, - LLVMValueRef row_stride1_vec, - LLVMValueRef img_stride0_vec, - LLVMValueRef img_stride1_vec, - LLVMValueRef data_ptr0, - LLVMValueRef data_ptr1, - LLVMValueRef *colors_lo, - LLVMValueRef *colors_hi) + LLVMValueRef colors_lo_var, + LLVMValueRef colors_hi_var) { + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMValueRef size0; + LLVMValueRef size1; + LLVMValueRef row_stride0_vec; + LLVMValueRef row_stride1_vec; + LLVMValueRef img_stride0_vec; + LLVMValueRef img_stride1_vec; + LLVMValueRef data_ptr0; + LLVMValueRef data_ptr1; LLVMValueRef colors0_lo, colors0_hi; LLVMValueRef colors1_lo, colors1_hi; + /* sample the first mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel0, + &size0, + &row_stride0_vec, &img_stride0_vec); + data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); if (img_filter == PIPE_TEX_FILTER_NEAREST) { - /* sample the first mipmap level */ lp_build_sample_image_nearest(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_nearest(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } else { assert(img_filter == PIPE_TEX_FILTER_LINEAR); - - /* sample the first mipmap level */ lp_build_sample_image_linear(bld, - width0_vec, height0_vec, depth0_vec, + size0, row_stride0_vec, img_stride0_vec, data_ptr0, s, t, r, &colors0_lo, &colors0_hi); - - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* sample the second mipmap level */ - lp_build_sample_image_linear(bld, - width1_vec, height1_vec, depth1_vec, - row_stride1_vec, img_stride1_vec, - data_ptr1, s, t, r, - &colors1_lo, &colors1_hi); - } } + /* Store the first level's colors in the output variables */ + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - /* interpolate samples from the two mipmap levels */ - struct lp_build_context h16; - lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16)); - - *colors_lo = lp_build_lerp(&h16, lod_fpart, - colors0_lo, colors1_lo); - *colors_hi = lp_build_lerp(&h16, lod_fpart, - colors0_hi, colors1_hi); - } - else { - /* use first/only level's colors */ - *colors_lo = colors0_lo; - *colors_hi = colors0_hi; + LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0); + LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32); + struct lp_build_if_state if_ctx; + LLVMValueRef need_lerp; + + lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, ""); + lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16"); + + /* need_lerp = lod_fpart > 0 */ + need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, + lod_fpart, LLVMConstNull(i32_type), + "need_lerp"); + + lp_build_if(&if_ctx, bld->gallivm, need_lerp); + { + struct lp_build_context h16_bld; + + lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); + + /* sample the second mipmap level */ + lp_build_mipmap_level_sizes(bld, ilevel1, + &size1, + &row_stride1_vec, &img_stride1_vec); + data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); + if (img_filter == PIPE_TEX_FILTER_NEAREST) { + lp_build_sample_image_nearest(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + else { + lp_build_sample_image_linear(bld, + size1, + row_stride1_vec, img_stride1_vec, + data_ptr1, s, t, r, + &colors1_lo, &colors1_hi); + } + + /* interpolate samples from the two mipmap levels */ + + lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); + lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); + +#if HAVE_LLVM == 0x208 + /* This is a work-around for a bug in LLVM 2.8. + * Evidently, something goes wrong in the construction of the + * lod_fpart short[8] vector. Adding this no-effect shuffle seems + * to force the vector to be properly constructed. + * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). + */ + { + LLVMValueRef shuffles[8], shuffle; + int i; + assert(h16_bld.type.length <= Elements(shuffles)); + for (i = 0; i < h16_bld.type.length; i++) + shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); + shuffle = LLVMConstVector(shuffles, h16_bld.type.length); + lod_fpart = LLVMBuildShuffleVector(builder, + lod_fpart, lod_fpart, + shuffle, ""); + } +#endif + + colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, + colors0_lo, colors1_lo); + colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, + colors0_hi, colors1_hi); + + LLVMBuildStore(builder, colors0_lo, colors_lo_var); + LLVMBuildStore(builder, colors0_hi, colors_hi_var); + } + lp_build_endif(&if_ctx); } } @@ -896,35 +952,22 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, const LLVMValueRef *ddy, LLVMValueRef lod_bias, /* optional */ LLVMValueRef explicit_lod, /* optional */ - LLVMValueRef width, - LLVMValueRef height, - LLVMValueRef depth, - LLVMValueRef width_vec, - LLVMValueRef height_vec, - LLVMValueRef depth_vec, - LLVMValueRef row_stride_array, - LLVMValueRef img_stride_array, - LLVMValueRef data_array, LLVMValueRef texel_out[4]) { - struct lp_build_context *float_bld = &bld->float_bld; - LLVMBuilderRef builder = bld->builder; + struct lp_build_context *int_bld = &bld->int_bld; + LLVMBuilderRef builder = bld->gallivm->builder; const unsigned mip_filter = bld->static_state->min_mip_filter; const unsigned min_filter = bld->static_state->min_img_filter; const unsigned mag_filter = bld->static_state->mag_img_filter; - const int dims = texture_dims(bld->static_state->target); - LLVMValueRef lod = NULL, lod_fpart = NULL; + const unsigned dims = bld->dims; + LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; LLVMValueRef ilevel0, ilevel1 = NULL; - LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL; - LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL; - LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL; - LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL; - LLVMValueRef data_ptr0, data_ptr1 = NULL; LLVMValueRef packed, packed_lo, packed_hi; LLVMValueRef unswizzled[4]; LLVMValueRef face_ddx[4], face_ddy[4]; - struct lp_build_context h16; - LLVMTypeRef h16_vec_type; + struct lp_build_context h16_bld; + LLVMValueRef first_level; + LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0); /* we only support the common/simple wrap modes at this time */ assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); @@ -935,9 +978,7 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* make 16-bit fixed-pt builder context */ - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - h16_vec_type = lp_build_vec_type(h16.type); - + lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16)); /* cube face selection, compute pre-face coords, etc. */ if (bld->static_state->target == PIPE_TEXTURE_CUBE) { @@ -949,19 +990,18 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */ /* recompute ddx, ddy using the new (s,t) face texcoords */ - face_ddx[0] = lp_build_ddx(&bld->coord_bld, s); - face_ddx[1] = lp_build_ddx(&bld->coord_bld, t); + face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s); + face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t); face_ddx[2] = NULL; face_ddx[3] = NULL; - face_ddy[0] = lp_build_ddy(&bld->coord_bld, s); - face_ddy[1] = lp_build_ddy(&bld->coord_bld, t); + face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s); + face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t); face_ddy[2] = NULL; face_ddy[3] = NULL; ddx = face_ddx; ddy = face_ddy; } - /* * Compute the level of detail (float). */ @@ -970,15 +1010,16 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, /* Need to compute lod either to choose mipmap levels or to * distinguish between minification/magnification with one mipmap level. */ - lod = lp_build_lod_selector(bld, unit, ddx, ddy, - lod_bias, explicit_lod, - width, height, depth); + lp_build_lod_selector(bld, unit, ddx, ddy, + lod_bias, explicit_lod, + mip_filter, + &lod_ipart, &lod_fpart); + } else { + lod_ipart = i32t_zero; } /* * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 - * If mipfilter=linear, also compute the weight between the two - * mipmap levels: lod_fpart */ switch (mip_filter) { default: @@ -991,140 +1032,88 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, * We should be able to set ilevel0 = const(0) but that causes * bad x86 code to be emitted. */ - lod = lp_build_const_elem(bld->coord_bld.type, 0.0); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); } else { - ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0); + first_level = bld->dynamic_state->first_level(bld->dynamic_state, + bld->gallivm, unit); + ilevel0 = first_level; } break; case PIPE_TEX_MIPFILTER_NEAREST: - assert(lod); - lp_build_nearest_mip_level(bld, unit, lod, &ilevel0); + assert(lod_ipart); + lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0); break; case PIPE_TEX_MIPFILTER_LINEAR: - { - LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0); - LLVMValueRef i255 = lp_build_const_int32(255); - LLVMTypeRef i16_type = LLVMIntType(16); - - assert(lod); - - lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1, - &lod_fpart); - lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, ""); - lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart); - lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, ""); - lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, ""); - lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart); - - /* the lod_fpart values will be fixed pt values in [0,1) */ - } + assert(lod_ipart); + assert(lod_fpart); + lp_build_linear_mip_levels(bld, unit, + lod_ipart, &lod_fpart, + &ilevel0, &ilevel1); break; } - /* compute image size(s) of source mipmap level(s) */ - lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec, - ilevel0, ilevel1, - row_stride_array, img_stride_array, - &width0_vec, &width1_vec, - &height0_vec, &height1_vec, - &depth0_vec, &depth1_vec, - &row_stride0_vec, &row_stride1_vec, - &img_stride0_vec, &img_stride1_vec); - /* - * Get pointer(s) to image data for mipmap level(s). + * Get/interpolate texture colors. */ - data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0); - if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { - data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1); - } + packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); + packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); - /* - * Get/interpolate texture colors. - */ if (min_filter == mag_filter) { /* no need to distinquish between minification and magnification */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } else { /* Emit conditional to choose min image filter or mag image filter * depending on the lod being > 0 or <= 0, respectively. */ - struct lp_build_flow_context *flow_ctx; struct lp_build_if_state if_ctx; LLVMValueRef minify; - flow_ctx = lp_build_flow_create(builder); - lp_build_flow_scope_begin(flow_ctx); - - packed_lo = LLVMGetUndef(h16_vec_type); - packed_hi = LLVMGetUndef(h16_vec_type); + /* minify = lod >= 0.0 */ + minify = LLVMBuildICmp(builder, LLVMIntSGE, + lod_ipart, int_bld->zero, ""); - lp_build_flow_scope_declare(flow_ctx, &packed_lo); - lp_build_flow_scope_declare(flow_ctx, &packed_hi); - - /* minify = lod > 0.0 */ - minify = LLVMBuildFCmp(builder, LLVMRealUGE, - lod, float_bld->zero, ""); - - lp_build_if(&if_ctx, flow_ctx, builder, minify); + lp_build_if(&if_ctx, bld->gallivm, minify); { /* Use the minification filter */ - lp_build_sample_mipmap(bld, min_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + min_filter, mip_filter, + s, t, r, + ilevel0, ilevel1, lod_fpart, + packed_lo, packed_hi); } lp_build_else(&if_ctx); { /* Use the magnification filter */ - lp_build_sample_mipmap(bld, mag_filter, mip_filter, - s, t, r, lod_fpart, - width0_vec, width1_vec, - height0_vec, height1_vec, - depth0_vec, depth1_vec, - row_stride0_vec, row_stride1_vec, - img_stride0_vec, img_stride1_vec, - data_ptr0, data_ptr1, - &packed_lo, &packed_hi); + lp_build_sample_mipmap(bld, + mag_filter, PIPE_TEX_MIPFILTER_NONE, + s, t, r, + ilevel0, NULL, NULL, + packed_lo, packed_hi); } lp_build_endif(&if_ctx); - - lp_build_flow_scope_end(flow_ctx); - lp_build_flow_destroy(flow_ctx); } - /* combine 'packed_lo', 'packed_hi' into 'packed' */ - { - struct lp_build_context h16, u8n; - - lp_build_context_init(&h16, builder, lp_type_ufixed(16)); - lp_build_context_init(&u8n, builder, lp_type_unorm(8)); - - packed = lp_build_pack2(builder, h16.type, u8n.type, - packed_lo, packed_hi); - } + /* + * combine the values stored in 'packed_lo' and 'packed_hi' variables + * into 'packed' + */ + packed = lp_build_pack2(bld->gallivm, + h16_bld.type, lp_type_unorm(8), + LLVMBuildLoad(builder, packed_lo, ""), + LLVMBuildLoad(builder, packed_hi, "")); /* * Convert to SoA and swizzle. */ - lp_build_rgba8_to_f32_soa(builder, + lp_build_rgba8_to_f32_soa(bld->gallivm, bld->texel_type, packed, unswizzled); @@ -1139,6 +1128,4 @@ lp_build_sample_aos(struct lp_build_sample_context *bld, texel_out[2] = unswizzled[2]; texel_out[3] = unswizzled[3]; } - - apply_sampler_swizzle(bld, texel_out); }