gallivm: Do not do mipfiltering when magnifying.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_flow.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_format.h"
54 #include "lp_bld_sample.h"
55 #include "lp_bld_sample_aos.h"
56 #include "lp_bld_quad.h"
57
58
59 /**
60 * Build LLVM code for texture coord wrapping, for nearest filtering,
61 * for scaled integer texcoords.
62 * \param block_length is the length of the pixel block along the
63 * coordinate axis
64 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
65 * \param length the texture size along one dimension
66 * \param stride pixel stride along the coordinate axis (in bytes)
67 * \param is_pot if TRUE, length is a power of two
68 * \param wrap_mode one of PIPE_TEX_WRAP_x
69 * \param out_offset byte offset for the wrapped coordinate
70 * \param out_i resulting sub-block pixel coordinate for coord0
71 */
72 static void
73 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
74 unsigned block_length,
75 LLVMValueRef coord,
76 LLVMValueRef length,
77 LLVMValueRef stride,
78 boolean is_pot,
79 unsigned wrap_mode,
80 LLVMValueRef *out_offset,
81 LLVMValueRef *out_i)
82 {
83 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
84 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
85 LLVMValueRef length_minus_one;
86
87 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
88
89 switch(wrap_mode) {
90 case PIPE_TEX_WRAP_REPEAT:
91 if(is_pot)
92 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
93 else {
94 /* Add a bias to the texcoord to handle negative coords */
95 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
96 coord = LLVMBuildAdd(bld->builder, coord, bias, "");
97 coord = LLVMBuildURem(bld->builder, coord, length, "");
98 }
99 break;
100
101 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
102 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
103 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
104 break;
105
106 case PIPE_TEX_WRAP_CLAMP:
107 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
108 case PIPE_TEX_WRAP_MIRROR_REPEAT:
109 case PIPE_TEX_WRAP_MIRROR_CLAMP:
110 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
111 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
112 default:
113 assert(0);
114 }
115
116 lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
117 out_offset, out_i);
118 }
119
120
121 /**
122 * Build LLVM code for texture coord wrapping, for linear filtering,
123 * for scaled integer texcoords.
124 * \param block_length is the length of the pixel block along the
125 * coordinate axis
126 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
127 * \param length the texture size along one dimension
128 * \param stride pixel stride along the coordinate axis (in bytes)
129 * \param is_pot if TRUE, length is a power of two
130 * \param wrap_mode one of PIPE_TEX_WRAP_x
131 * \param offset0 resulting relative offset for coord0
132 * \param offset1 resulting relative offset for coord0 + 1
133 * \param i0 resulting sub-block pixel coordinate for coord0
134 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
135 */
136 static void
137 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
138 unsigned block_length,
139 LLVMValueRef coord0,
140 LLVMValueRef length,
141 LLVMValueRef stride,
142 boolean is_pot,
143 unsigned wrap_mode,
144 LLVMValueRef *offset0,
145 LLVMValueRef *offset1,
146 LLVMValueRef *i0,
147 LLVMValueRef *i1)
148 {
149 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
150 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
151 LLVMValueRef length_minus_one;
152 LLVMValueRef lmask, umask, mask;
153
154 if (block_length != 1) {
155 /*
156 * If the pixel block covers more than one pixel then there is no easy
157 * way to calculate offset1 relative to offset0. Instead, compute them
158 * independently.
159 */
160
161 LLVMValueRef coord1;
162
163 lp_build_sample_wrap_nearest_int(bld,
164 block_length,
165 coord0,
166 length,
167 stride,
168 is_pot,
169 wrap_mode,
170 offset0, i0);
171
172 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
173
174 lp_build_sample_wrap_nearest_int(bld,
175 block_length,
176 coord1,
177 length,
178 stride,
179 is_pot,
180 wrap_mode,
181 offset1, i1);
182
183 return;
184 }
185
186 /*
187 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
188 * multiplication.
189 */
190
191 *i0 = uint_coord_bld->zero;
192 *i1 = uint_coord_bld->zero;
193
194 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
195
196 switch(wrap_mode) {
197 case PIPE_TEX_WRAP_REPEAT:
198 if (is_pot) {
199 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
200 }
201 else {
202 /* Add a bias to the texcoord to handle negative coords */
203 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
204 coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
205 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
206 }
207
208 mask = lp_build_compare(bld->builder, int_coord_bld->type,
209 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
210
211 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
212 *offset1 = LLVMBuildAnd(bld->builder,
213 lp_build_add(uint_coord_bld, *offset0, stride),
214 mask, "");
215 break;
216
217 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
218 lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
219 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
220 umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
221 PIPE_FUNC_LESS, coord0, length_minus_one);
222
223 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
224 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
225
226 mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
227
228 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
229 *offset1 = lp_build_add(uint_coord_bld,
230 *offset0,
231 LLVMBuildAnd(bld->builder, stride, mask, ""));
232 break;
233
234 case PIPE_TEX_WRAP_CLAMP:
235 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
236 case PIPE_TEX_WRAP_MIRROR_REPEAT:
237 case PIPE_TEX_WRAP_MIRROR_CLAMP:
238 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
239 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
240 default:
241 assert(0);
242 *offset0 = uint_coord_bld->zero;
243 *offset1 = uint_coord_bld->zero;
244 break;
245 }
246 }
247
248
249 /**
250 * Sample a single texture image with nearest sampling.
251 * If sampling a cube texture, r = cube face in [0,5].
252 * Return filtered color as two vectors of 16-bit fixed point values.
253 */
254 static void
255 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
256 LLVMValueRef width_vec,
257 LLVMValueRef height_vec,
258 LLVMValueRef depth_vec,
259 LLVMValueRef row_stride_vec,
260 LLVMValueRef img_stride_vec,
261 LLVMValueRef data_ptr,
262 LLVMValueRef s,
263 LLVMValueRef t,
264 LLVMValueRef r,
265 LLVMValueRef *colors_lo,
266 LLVMValueRef *colors_hi)
267 {
268 const int dims = texture_dims(bld->static_state->target);
269 LLVMBuilderRef builder = bld->builder;
270 struct lp_build_context i32, h16, u8n;
271 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
272 LLVMValueRef i32_c8;
273 LLVMValueRef s_ipart, t_ipart, r_ipart;
274 LLVMValueRef x_stride;
275 LLVMValueRef x_offset, offset;
276 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
277
278 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
279 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
280 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
281
282 i32_vec_type = lp_build_vec_type(i32.type);
283 h16_vec_type = lp_build_vec_type(h16.type);
284 u8n_vec_type = lp_build_vec_type(u8n.type);
285
286 if (bld->static_state->normalized_coords) {
287 /* s = s * width, t = t * height */
288 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
289 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
290 coord_vec_type, "");
291 s = lp_build_mul(&bld->coord_bld, s, fp_width);
292 if (dims >= 2) {
293 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
294 coord_vec_type, "");
295 t = lp_build_mul(&bld->coord_bld, t, fp_height);
296 if (dims >= 3) {
297 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
298 coord_vec_type, "");
299 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
300 }
301 }
302 }
303
304 /* scale coords by 256 (8 fractional bits) */
305 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
306 if (dims >= 2)
307 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
308 if (dims >= 3)
309 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
310
311 /* convert float to int */
312 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
313 if (dims >= 2)
314 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
315 if (dims >= 3)
316 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
317
318 /* compute floor (shift right 8) */
319 i32_c8 = lp_build_const_int_vec(i32.type, 8);
320 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
321 if (dims >= 2)
322 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
323 if (dims >= 3)
324 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
325
326 /* get pixel, row, image strides */
327 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
328 bld->format_desc->block.bits/8);
329
330 /* Do texcoord wrapping, compute texel offset */
331 lp_build_sample_wrap_nearest_int(bld,
332 bld->format_desc->block.width,
333 s_ipart, width_vec, x_stride,
334 bld->static_state->pot_width,
335 bld->static_state->wrap_s,
336 &x_offset, &x_subcoord);
337 offset = x_offset;
338 if (dims >= 2) {
339 LLVMValueRef y_offset;
340 lp_build_sample_wrap_nearest_int(bld,
341 bld->format_desc->block.height,
342 t_ipart, height_vec, row_stride_vec,
343 bld->static_state->pot_height,
344 bld->static_state->wrap_t,
345 &y_offset, &y_subcoord);
346 offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
347 if (dims >= 3) {
348 LLVMValueRef z_offset;
349 lp_build_sample_wrap_nearest_int(bld,
350 1, /* block length (depth) */
351 r_ipart, depth_vec, img_stride_vec,
352 bld->static_state->pot_height,
353 bld->static_state->wrap_r,
354 &z_offset, &z_subcoord);
355 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
356 }
357 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
358 LLVMValueRef z_offset;
359 /* The r coord is the cube face in [0,5] */
360 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
361 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
362 }
363 }
364
365 /*
366 * Fetch the pixels as 4 x 32bit (rgba order might differ):
367 *
368 * rgba0 rgba1 rgba2 rgba3
369 *
370 * bit cast them into 16 x u8
371 *
372 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
373 *
374 * unpack them into two 8 x i16:
375 *
376 * r0 g0 b0 a0 r1 g1 b1 a1
377 * r2 g2 b2 a2 r3 g3 b3 a3
378 *
379 * The higher 8 bits of the resulting elements will be zero.
380 */
381 {
382 LLVMValueRef rgba8;
383
384 if (util_format_is_rgba8_variant(bld->format_desc)) {
385 /*
386 * Given the format is a rgba8, just read the pixels as is,
387 * without any swizzling. Swizzling will be done later.
388 */
389 rgba8 = lp_build_gather(bld->builder,
390 bld->texel_type.length,
391 bld->format_desc->block.bits,
392 bld->texel_type.width,
393 data_ptr, offset);
394
395 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
396 }
397 else {
398 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
399 bld->format_desc,
400 u8n.type,
401 data_ptr, offset,
402 x_subcoord,
403 y_subcoord);
404 }
405
406 /* Expand one 4*rgba8 to two 2*rgba16 */
407 lp_build_unpack2(builder, u8n.type, h16.type,
408 rgba8,
409 colors_lo, colors_hi);
410 }
411 }
412
413
414 /**
415 * Sample a single texture image with (bi-)(tri-)linear sampling.
416 * Return filtered color as two vectors of 16-bit fixed point values.
417 */
418 static void
419 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
420 LLVMValueRef width_vec,
421 LLVMValueRef height_vec,
422 LLVMValueRef depth_vec,
423 LLVMValueRef row_stride_vec,
424 LLVMValueRef img_stride_vec,
425 LLVMValueRef data_ptr,
426 LLVMValueRef s,
427 LLVMValueRef t,
428 LLVMValueRef r,
429 LLVMValueRef *colors_lo,
430 LLVMValueRef *colors_hi)
431 {
432 const int dims = texture_dims(bld->static_state->target);
433 LLVMBuilderRef builder = bld->builder;
434 struct lp_build_context i32, h16, u8n;
435 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
436 LLVMValueRef i32_c8, i32_c128, i32_c255;
437 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
438 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
439 LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
440 LLVMValueRef x_stride, y_stride, z_stride;
441 LLVMValueRef x_offset0, x_offset1;
442 LLVMValueRef y_offset0, y_offset1;
443 LLVMValueRef z_offset0, z_offset1;
444 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
445 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
446 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
447 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
448 LLVMValueRef packed_lo, packed_hi;
449 unsigned x, y, z;
450 unsigned i, j, k;
451 unsigned numj, numk;
452
453 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
454 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
455 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
456
457 i32_vec_type = lp_build_vec_type(i32.type);
458 h16_vec_type = lp_build_vec_type(h16.type);
459 u8n_vec_type = lp_build_vec_type(u8n.type);
460
461 if (bld->static_state->normalized_coords) {
462 /* s = s * width, t = t * height */
463 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
464 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
465 coord_vec_type, "");
466 s = lp_build_mul(&bld->coord_bld, s, fp_width);
467 if (dims >= 2) {
468 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
469 coord_vec_type, "");
470 t = lp_build_mul(&bld->coord_bld, t, fp_height);
471 }
472 if (dims >= 3) {
473 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
474 coord_vec_type, "");
475 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
476 }
477 }
478
479 /* scale coords by 256 (8 fractional bits) */
480 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
481 if (dims >= 2)
482 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
483 if (dims >= 3)
484 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
485
486 /* convert float to int */
487 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
488 if (dims >= 2)
489 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
490 if (dims >= 3)
491 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
492
493 /* subtract 0.5 (add -128) */
494 i32_c128 = lp_build_const_int_vec(i32.type, -128);
495 s = LLVMBuildAdd(builder, s, i32_c128, "");
496 if (dims >= 2) {
497 t = LLVMBuildAdd(builder, t, i32_c128, "");
498 }
499 if (dims >= 3) {
500 r = LLVMBuildAdd(builder, r, i32_c128, "");
501 }
502
503 /* compute floor (shift right 8) */
504 i32_c8 = lp_build_const_int_vec(i32.type, 8);
505 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
506 if (dims >= 2)
507 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
508 if (dims >= 3)
509 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
510
511 /* compute fractional part (AND with 0xff) */
512 i32_c255 = lp_build_const_int_vec(i32.type, 255);
513 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
514 if (dims >= 2)
515 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
516 if (dims >= 3)
517 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
518
519 /* get pixel, row and image strides */
520 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
521 bld->format_desc->block.bits/8);
522 y_stride = row_stride_vec;
523 z_stride = img_stride_vec;
524
525 /* do texcoord wrapping and compute texel offsets */
526 lp_build_sample_wrap_linear_int(bld,
527 bld->format_desc->block.width,
528 s_ipart, width_vec, x_stride,
529 bld->static_state->pot_width,
530 bld->static_state->wrap_s,
531 &x_offset0, &x_offset1,
532 &x_subcoord[0], &x_subcoord[1]);
533 for (z = 0; z < 2; z++) {
534 for (y = 0; y < 2; y++) {
535 offset[z][y][0] = x_offset0;
536 offset[z][y][1] = x_offset1;
537 }
538 }
539
540 if (dims >= 2) {
541 lp_build_sample_wrap_linear_int(bld,
542 bld->format_desc->block.height,
543 t_ipart, height_vec, y_stride,
544 bld->static_state->pot_height,
545 bld->static_state->wrap_t,
546 &y_offset0, &y_offset1,
547 &y_subcoord[0], &y_subcoord[1]);
548
549 for (z = 0; z < 2; z++) {
550 for (x = 0; x < 2; x++) {
551 offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
552 offset[z][0][x], y_offset0);
553 offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
554 offset[z][1][x], y_offset1);
555 }
556 }
557 }
558
559 if (dims >= 3) {
560 lp_build_sample_wrap_linear_int(bld,
561 bld->format_desc->block.height,
562 r_ipart, depth_vec, z_stride,
563 bld->static_state->pot_depth,
564 bld->static_state->wrap_r,
565 &z_offset0, &z_offset1,
566 &z_subcoord[0], &z_subcoord[1]);
567 for (y = 0; y < 2; y++) {
568 for (x = 0; x < 2; x++) {
569 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
570 offset[0][y][x], z_offset0);
571 offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
572 offset[1][y][x], z_offset1);
573 }
574 }
575 }
576 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
577 LLVMValueRef z_offset;
578 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
579 for (y = 0; y < 2; y++) {
580 for (x = 0; x < 2; x++) {
581 /* The r coord is the cube face in [0,5] */
582 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
583 offset[0][y][x], z_offset);
584 }
585 }
586 }
587
588 /*
589 * Transform 4 x i32 in
590 *
591 * s_fpart = {s0, s1, s2, s3}
592 *
593 * into 8 x i16
594 *
595 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
596 *
597 * into two 8 x i16
598 *
599 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
600 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
601 *
602 * and likewise for t_fpart. There is no risk of loosing precision here
603 * since the fractional parts only use the lower 8bits.
604 */
605 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
606 if (dims >= 2)
607 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
608 if (dims >= 3)
609 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
610
611 {
612 LLVMTypeRef elem_type = LLVMInt32Type();
613 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
614 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
615 LLVMValueRef shuffle_lo;
616 LLVMValueRef shuffle_hi;
617
618 for (j = 0; j < h16.type.length; j += 4) {
619 #ifdef PIPE_ARCH_LITTLE_ENDIAN
620 unsigned subindex = 0;
621 #else
622 unsigned subindex = 1;
623 #endif
624 LLVMValueRef index;
625
626 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
627 for (i = 0; i < 4; ++i)
628 shuffles_lo[j + i] = index;
629
630 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
631 for (i = 0; i < 4; ++i)
632 shuffles_hi[j + i] = index;
633 }
634
635 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
636 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
637
638 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
639 shuffle_lo, "");
640 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
641 shuffle_hi, "");
642 if (dims >= 2) {
643 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
644 shuffle_lo, "");
645 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
646 shuffle_hi, "");
647 }
648 if (dims >= 3) {
649 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
650 shuffle_lo, "");
651 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
652 shuffle_hi, "");
653 }
654 }
655
656 /*
657 * Fetch the pixels as 4 x 32bit (rgba order might differ):
658 *
659 * rgba0 rgba1 rgba2 rgba3
660 *
661 * bit cast them into 16 x u8
662 *
663 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
664 *
665 * unpack them into two 8 x i16:
666 *
667 * r0 g0 b0 a0 r1 g1 b1 a1
668 * r2 g2 b2 a2 r3 g3 b3 a3
669 *
670 * The higher 8 bits of the resulting elements will be zero.
671 */
672 numj = 1 + (dims >= 2);
673 numk = 1 + (dims >= 3);
674
675 for (k = 0; k < numk; k++) {
676 for (j = 0; j < numj; j++) {
677 for (i = 0; i < 2; i++) {
678 LLVMValueRef rgba8;
679
680 if (util_format_is_rgba8_variant(bld->format_desc)) {
681 /*
682 * Given the format is a rgba8, just read the pixels as is,
683 * without any swizzling. Swizzling will be done later.
684 */
685 rgba8 = lp_build_gather(bld->builder,
686 bld->texel_type.length,
687 bld->format_desc->block.bits,
688 bld->texel_type.width,
689 data_ptr, offset[k][j][i]);
690
691 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
692 }
693 else {
694 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
695 bld->format_desc,
696 u8n.type,
697 data_ptr, offset[k][j][i],
698 x_subcoord[i],
699 y_subcoord[j]);
700 }
701
702 /* Expand one 4*rgba8 to two 2*rgba16 */
703 lp_build_unpack2(builder, u8n.type, h16.type,
704 rgba8,
705 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
706 }
707 }
708 }
709
710 /*
711 * Linear interpolation with 8.8 fixed point.
712 */
713 if (dims == 1) {
714 /* 1-D lerp */
715 packed_lo = lp_build_lerp(&h16,
716 s_fpart_lo,
717 neighbors_lo[0][0][0],
718 neighbors_lo[0][0][1]);
719
720 packed_hi = lp_build_lerp(&h16,
721 s_fpart_hi,
722 neighbors_hi[0][0][0],
723 neighbors_hi[0][0][1]);
724 }
725 else {
726 /* 2-D lerp */
727 packed_lo = lp_build_lerp_2d(&h16,
728 s_fpart_lo, t_fpart_lo,
729 neighbors_lo[0][0][0],
730 neighbors_lo[0][0][1],
731 neighbors_lo[0][1][0],
732 neighbors_lo[0][1][1]);
733
734 packed_hi = lp_build_lerp_2d(&h16,
735 s_fpart_hi, t_fpart_hi,
736 neighbors_hi[0][0][0],
737 neighbors_hi[0][0][1],
738 neighbors_hi[0][1][0],
739 neighbors_hi[0][1][1]);
740
741 if (dims >= 3) {
742 LLVMValueRef packed_lo2, packed_hi2;
743
744 /* lerp in the second z slice */
745 packed_lo2 = lp_build_lerp_2d(&h16,
746 s_fpart_lo, t_fpart_lo,
747 neighbors_lo[1][0][0],
748 neighbors_lo[1][0][1],
749 neighbors_lo[1][1][0],
750 neighbors_lo[1][1][1]);
751
752 packed_hi2 = lp_build_lerp_2d(&h16,
753 s_fpart_hi, t_fpart_hi,
754 neighbors_hi[1][0][0],
755 neighbors_hi[1][0][1],
756 neighbors_hi[1][1][0],
757 neighbors_hi[1][1][1]);
758 /* interp between two z slices */
759 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
760 packed_lo, packed_lo2);
761 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
762 packed_hi, packed_hi2);
763 }
764 }
765
766 *colors_lo = packed_lo;
767 *colors_hi = packed_hi;
768 }
769
770
771 /**
772 * Sample the texture/mipmap using given image filter and mip filter.
773 * data0_ptr and data1_ptr point to the two mipmap levels to sample
774 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
775 * If we're using nearest miplevel sampling the '1' values will be null/unused.
776 */
777 static void
778 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
779 unsigned img_filter,
780 unsigned mip_filter,
781 LLVMValueRef s,
782 LLVMValueRef t,
783 LLVMValueRef r,
784 LLVMValueRef lod_fpart,
785 LLVMValueRef width0_vec,
786 LLVMValueRef width1_vec,
787 LLVMValueRef height0_vec,
788 LLVMValueRef height1_vec,
789 LLVMValueRef depth0_vec,
790 LLVMValueRef depth1_vec,
791 LLVMValueRef row_stride0_vec,
792 LLVMValueRef row_stride1_vec,
793 LLVMValueRef img_stride0_vec,
794 LLVMValueRef img_stride1_vec,
795 LLVMValueRef data_ptr0,
796 LLVMValueRef data_ptr1,
797 LLVMValueRef *colors_lo,
798 LLVMValueRef *colors_hi)
799 {
800 LLVMValueRef colors0_lo, colors0_hi;
801 LLVMValueRef colors1_lo, colors1_hi;
802
803 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
804 /* sample the first mipmap level */
805 lp_build_sample_image_nearest(bld,
806 width0_vec, height0_vec, depth0_vec,
807 row_stride0_vec, img_stride0_vec,
808 data_ptr0, s, t, r,
809 &colors0_lo, &colors0_hi);
810
811 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
812 /* sample the second mipmap level */
813 lp_build_sample_image_nearest(bld,
814 width1_vec, height1_vec, depth1_vec,
815 row_stride1_vec, img_stride1_vec,
816 data_ptr1, s, t, r,
817 &colors1_lo, &colors1_hi);
818 }
819 }
820 else {
821 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
822
823 /* sample the first mipmap level */
824 lp_build_sample_image_linear(bld,
825 width0_vec, height0_vec, depth0_vec,
826 row_stride0_vec, img_stride0_vec,
827 data_ptr0, s, t, r,
828 &colors0_lo, &colors0_hi);
829
830 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
831 /* sample the second mipmap level */
832 lp_build_sample_image_linear(bld,
833 width1_vec, height1_vec, depth1_vec,
834 row_stride1_vec, img_stride1_vec,
835 data_ptr1, s, t, r,
836 &colors1_lo, &colors1_hi);
837 }
838 }
839
840 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
841 /* interpolate samples from the two mipmap levels */
842 struct lp_build_context h16;
843 lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
844
845 *colors_lo = lp_build_lerp(&h16, lod_fpart,
846 colors0_lo, colors1_lo);
847 *colors_hi = lp_build_lerp(&h16, lod_fpart,
848 colors0_hi, colors1_hi);
849 }
850 else {
851 /* use first/only level's colors */
852 *colors_lo = colors0_lo;
853 *colors_hi = colors0_hi;
854 }
855 }
856
857
858
859 /**
860 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
861 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
862 * but only limited texture coord wrap modes.
863 */
864 void
865 lp_build_sample_aos(struct lp_build_sample_context *bld,
866 unsigned unit,
867 LLVMValueRef s,
868 LLVMValueRef t,
869 LLVMValueRef r,
870 const LLVMValueRef *ddx,
871 const LLVMValueRef *ddy,
872 LLVMValueRef lod_bias, /* optional */
873 LLVMValueRef explicit_lod, /* optional */
874 LLVMValueRef width,
875 LLVMValueRef height,
876 LLVMValueRef depth,
877 LLVMValueRef width_vec,
878 LLVMValueRef height_vec,
879 LLVMValueRef depth_vec,
880 LLVMValueRef row_stride_array,
881 LLVMValueRef img_stride_array,
882 LLVMValueRef data_array,
883 LLVMValueRef texel_out[4])
884 {
885 struct lp_build_context *int_bld = &bld->int_bld;
886 LLVMBuilderRef builder = bld->builder;
887 const unsigned mip_filter = bld->static_state->min_mip_filter;
888 const unsigned min_filter = bld->static_state->min_img_filter;
889 const unsigned mag_filter = bld->static_state->mag_img_filter;
890 const int dims = texture_dims(bld->static_state->target);
891 LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
892 LLVMValueRef ilevel0, ilevel1 = NULL;
893 LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
894 LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
895 LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
896 LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
897 LLVMValueRef data_ptr0, data_ptr1 = NULL;
898 LLVMValueRef packed, packed_lo, packed_hi;
899 LLVMValueRef unswizzled[4];
900 LLVMValueRef face_ddx[4], face_ddy[4];
901 struct lp_build_context h16;
902 LLVMTypeRef h16_vec_type;
903
904 /* we only support the common/simple wrap modes at this time */
905 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
906 if (dims >= 2)
907 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
908 if (dims >= 3)
909 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
910
911
912 /* make 16-bit fixed-pt builder context */
913 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
914 h16_vec_type = lp_build_vec_type(h16.type);
915
916
917 /* cube face selection, compute pre-face coords, etc. */
918 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
919 LLVMValueRef face, face_s, face_t;
920 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
921 s = face_s; /* vec */
922 t = face_t; /* vec */
923 /* use 'r' to indicate cube face */
924 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
925
926 /* recompute ddx, ddy using the new (s,t) face texcoords */
927 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
928 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
929 face_ddx[2] = NULL;
930 face_ddx[3] = NULL;
931 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
932 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
933 face_ddy[2] = NULL;
934 face_ddy[3] = NULL;
935 ddx = face_ddx;
936 ddy = face_ddy;
937 }
938
939 /*
940 * Compute the level of detail (float).
941 */
942 if (min_filter != mag_filter ||
943 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
944 /* Need to compute lod either to choose mipmap levels or to
945 * distinguish between minification/magnification with one mipmap level.
946 */
947 lp_build_lod_selector(bld, unit, ddx, ddy,
948 lod_bias, explicit_lod,
949 width, height, depth,
950 mip_filter,
951 &lod_ipart, &lod_fpart);
952 } else {
953 lod_ipart = LLVMConstInt(LLVMInt32Type(), 0, 0);
954 }
955
956 /*
957 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
958 * If mipfilter=linear, also compute the weight between the two
959 * mipmap levels: lod_fpart
960 */
961 switch (mip_filter) {
962 default:
963 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
964 /* fall-through */
965 case PIPE_TEX_MIPFILTER_NONE:
966 /* always use mip level 0 */
967 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
968 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
969 * We should be able to set ilevel0 = const(0) but that causes
970 * bad x86 code to be emitted.
971 */
972 assert(lod_ipart);
973 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
974 }
975 else {
976 ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
977 }
978 break;
979 case PIPE_TEX_MIPFILTER_NEAREST:
980 assert(lod_ipart);
981 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
982 break;
983 case PIPE_TEX_MIPFILTER_LINEAR:
984 {
985 LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
986 LLVMTypeRef i32_type = LLVMIntType(32);
987 LLVMTypeRef i16_type = LLVMIntType(16);
988
989 assert(lod_fpart);
990
991 lp_build_linear_mip_levels(bld, unit, lod_ipart, &ilevel0, &ilevel1);
992
993 lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
994 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "");
995 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
996 lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
997
998 /* the lod_fpart values will be fixed pt values in [0,1) */
999 }
1000 break;
1001 }
1002
1003 /* compute image size(s) of source mipmap level(s) */
1004 lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
1005 ilevel0, ilevel1,
1006 row_stride_array, img_stride_array,
1007 &width0_vec, &width1_vec,
1008 &height0_vec, &height1_vec,
1009 &depth0_vec, &depth1_vec,
1010 &row_stride0_vec, &row_stride1_vec,
1011 &img_stride0_vec, &img_stride1_vec);
1012
1013 /*
1014 * Get pointer(s) to image data for mipmap level(s).
1015 */
1016 data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1017 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1018 data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1019 }
1020
1021
1022 /*
1023 * Get/interpolate texture colors.
1024 */
1025 if (min_filter == mag_filter) {
1026 /* no need to distinquish between minification and magnification */
1027 lp_build_sample_mipmap(bld, min_filter, mip_filter,
1028 s, t, r, lod_fpart,
1029 width0_vec, width1_vec,
1030 height0_vec, height1_vec,
1031 depth0_vec, depth1_vec,
1032 row_stride0_vec, row_stride1_vec,
1033 img_stride0_vec, img_stride1_vec,
1034 data_ptr0, data_ptr1,
1035 &packed_lo, &packed_hi);
1036 }
1037 else {
1038 /* Emit conditional to choose min image filter or mag image filter
1039 * depending on the lod being > 0 or <= 0, respectively.
1040 */
1041 struct lp_build_flow_context *flow_ctx;
1042 struct lp_build_if_state if_ctx;
1043 LLVMValueRef minify;
1044
1045 flow_ctx = lp_build_flow_create(builder);
1046 lp_build_flow_scope_begin(flow_ctx);
1047
1048 packed_lo = LLVMGetUndef(h16_vec_type);
1049 packed_hi = LLVMGetUndef(h16_vec_type);
1050
1051 lp_build_flow_scope_declare(flow_ctx, &packed_lo);
1052 lp_build_flow_scope_declare(flow_ctx, &packed_hi);
1053
1054 /* minify = lod >= 0.0 */
1055 minify = LLVMBuildICmp(builder, LLVMIntSGE,
1056 lod_ipart, int_bld->zero, "");
1057
1058 lp_build_if(&if_ctx, flow_ctx, builder, minify);
1059 {
1060 /* Use the minification filter */
1061 lp_build_sample_mipmap(bld, min_filter, mip_filter,
1062 s, t, r, lod_fpart,
1063 width0_vec, width1_vec,
1064 height0_vec, height1_vec,
1065 depth0_vec, depth1_vec,
1066 row_stride0_vec, row_stride1_vec,
1067 img_stride0_vec, img_stride1_vec,
1068 data_ptr0, data_ptr1,
1069 &packed_lo, &packed_hi);
1070 }
1071 lp_build_else(&if_ctx);
1072 {
1073 /* Use the magnification filter */
1074 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
1075 s, t, r, NULL,
1076 width_vec, NULL,
1077 height_vec, NULL,
1078 depth_vec, NULL,
1079 row_stride0_vec, NULL,
1080 img_stride0_vec, NULL,
1081 data_ptr0, NULL,
1082 &packed_lo, &packed_hi);
1083 }
1084 lp_build_endif(&if_ctx);
1085
1086 lp_build_flow_scope_end(flow_ctx);
1087 lp_build_flow_destroy(flow_ctx);
1088 }
1089
1090 /* combine 'packed_lo', 'packed_hi' into 'packed' */
1091 {
1092 struct lp_build_context h16, u8n;
1093
1094 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1095 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1096
1097 packed = lp_build_pack2(builder, h16.type, u8n.type,
1098 packed_lo, packed_hi);
1099 }
1100
1101 /*
1102 * Convert to SoA and swizzle.
1103 */
1104 lp_build_rgba8_to_f32_soa(builder,
1105 bld->texel_type,
1106 packed, unswizzled);
1107
1108 if (util_format_is_rgba8_variant(bld->format_desc)) {
1109 lp_build_format_swizzle_soa(bld->format_desc,
1110 &bld->texel_bld,
1111 unswizzled, texel_out);
1112 }
1113 else {
1114 texel_out[0] = unswizzled[0];
1115 texel_out[1] = unswizzled[1];
1116 texel_out[2] = unswizzled[2];
1117 texel_out[3] = unswizzled[3];
1118 }
1119
1120 apply_sampler_swizzle(bld, texel_out);
1121 }