gallivm: Do size computations simultanously for all dimensions (AoS).
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_bitarit.h"
49 #include "lp_bld_logic.h"
50 #include "lp_bld_swizzle.h"
51 #include "lp_bld_pack.h"
52 #include "lp_bld_flow.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_format.h"
55 #include "lp_bld_sample.h"
56 #include "lp_bld_sample_aos.h"
57 #include "lp_bld_quad.h"
58
59
60 /**
61 * Build LLVM code for texture coord wrapping, for nearest filtering,
62 * for scaled integer texcoords.
63 * \param block_length is the length of the pixel block along the
64 * coordinate axis
65 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
66 * \param length the texture size along one dimension
67 * \param stride pixel stride along the coordinate axis (in bytes)
68 * \param is_pot if TRUE, length is a power of two
69 * \param wrap_mode one of PIPE_TEX_WRAP_x
70 * \param out_offset byte offset for the wrapped coordinate
71 * \param out_i resulting sub-block pixel coordinate for coord0
72 */
73 static void
74 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
75 unsigned block_length,
76 LLVMValueRef coord,
77 LLVMValueRef length,
78 LLVMValueRef stride,
79 boolean is_pot,
80 unsigned wrap_mode,
81 LLVMValueRef *out_offset,
82 LLVMValueRef *out_i)
83 {
84 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
85 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
86 LLVMValueRef length_minus_one;
87
88 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
89
90 switch(wrap_mode) {
91 case PIPE_TEX_WRAP_REPEAT:
92 if(is_pot)
93 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
94 else {
95 /* Add a bias to the texcoord to handle negative coords */
96 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
97 coord = LLVMBuildAdd(bld->builder, coord, bias, "");
98 coord = LLVMBuildURem(bld->builder, coord, length, "");
99 }
100 break;
101
102 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
103 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
104 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
105 break;
106
107 case PIPE_TEX_WRAP_CLAMP:
108 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
109 case PIPE_TEX_WRAP_MIRROR_REPEAT:
110 case PIPE_TEX_WRAP_MIRROR_CLAMP:
111 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
112 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
113 default:
114 assert(0);
115 }
116
117 lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
118 out_offset, out_i);
119 }
120
121
122 /**
123 * Build LLVM code for texture coord wrapping, for linear filtering,
124 * for scaled integer texcoords.
125 * \param block_length is the length of the pixel block along the
126 * coordinate axis
127 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
128 * \param length the texture size along one dimension
129 * \param stride pixel stride along the coordinate axis (in bytes)
130 * \param is_pot if TRUE, length is a power of two
131 * \param wrap_mode one of PIPE_TEX_WRAP_x
132 * \param offset0 resulting relative offset for coord0
133 * \param offset1 resulting relative offset for coord0 + 1
134 * \param i0 resulting sub-block pixel coordinate for coord0
135 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
136 */
137 static void
138 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
139 unsigned block_length,
140 LLVMValueRef coord0,
141 LLVMValueRef length,
142 LLVMValueRef stride,
143 boolean is_pot,
144 unsigned wrap_mode,
145 LLVMValueRef *offset0,
146 LLVMValueRef *offset1,
147 LLVMValueRef *i0,
148 LLVMValueRef *i1)
149 {
150 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
151 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
152 LLVMValueRef length_minus_one;
153 LLVMValueRef lmask, umask, mask;
154
155 if (block_length != 1) {
156 /*
157 * If the pixel block covers more than one pixel then there is no easy
158 * way to calculate offset1 relative to offset0. Instead, compute them
159 * independently.
160 */
161
162 LLVMValueRef coord1;
163
164 lp_build_sample_wrap_nearest_int(bld,
165 block_length,
166 coord0,
167 length,
168 stride,
169 is_pot,
170 wrap_mode,
171 offset0, i0);
172
173 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
174
175 lp_build_sample_wrap_nearest_int(bld,
176 block_length,
177 coord1,
178 length,
179 stride,
180 is_pot,
181 wrap_mode,
182 offset1, i1);
183
184 return;
185 }
186
187 /*
188 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
189 * multiplication.
190 */
191
192 *i0 = uint_coord_bld->zero;
193 *i1 = uint_coord_bld->zero;
194
195 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
196
197 switch(wrap_mode) {
198 case PIPE_TEX_WRAP_REPEAT:
199 if (is_pot) {
200 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
201 }
202 else {
203 /* Add a bias to the texcoord to handle negative coords */
204 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
205 coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
206 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
207 }
208
209 mask = lp_build_compare(bld->builder, int_coord_bld->type,
210 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
211
212 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
213 *offset1 = LLVMBuildAnd(bld->builder,
214 lp_build_add(uint_coord_bld, *offset0, stride),
215 mask, "");
216 break;
217
218 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
219 lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
220 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
221 umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
222 PIPE_FUNC_LESS, coord0, length_minus_one);
223
224 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
225 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
226
227 mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
228
229 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
230 *offset1 = lp_build_add(uint_coord_bld,
231 *offset0,
232 LLVMBuildAnd(bld->builder, stride, mask, ""));
233 break;
234
235 case PIPE_TEX_WRAP_CLAMP:
236 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
237 case PIPE_TEX_WRAP_MIRROR_REPEAT:
238 case PIPE_TEX_WRAP_MIRROR_CLAMP:
239 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
240 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
241 default:
242 assert(0);
243 *offset0 = uint_coord_bld->zero;
244 *offset1 = uint_coord_bld->zero;
245 break;
246 }
247 }
248
249
250 /**
251 * Sample a single texture image with nearest sampling.
252 * If sampling a cube texture, r = cube face in [0,5].
253 * Return filtered color as two vectors of 16-bit fixed point values.
254 */
255 static void
256 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
257 LLVMValueRef int_size,
258 LLVMValueRef row_stride_vec,
259 LLVMValueRef img_stride_vec,
260 LLVMValueRef data_ptr,
261 LLVMValueRef s,
262 LLVMValueRef t,
263 LLVMValueRef r,
264 LLVMValueRef *colors_lo,
265 LLVMValueRef *colors_hi)
266 {
267 const unsigned dims = bld->dims;
268 LLVMBuilderRef builder = bld->builder;
269 struct lp_build_context i32, h16, u8n;
270 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
271 LLVMValueRef i32_c8;
272 LLVMValueRef width_vec, height_vec, depth_vec;
273 LLVMValueRef s_ipart, t_ipart, r_ipart;
274 LLVMValueRef x_stride;
275 LLVMValueRef x_offset, offset;
276 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
277
278 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
279 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
280 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
281
282 i32_vec_type = lp_build_vec_type(i32.type);
283 h16_vec_type = lp_build_vec_type(h16.type);
284 u8n_vec_type = lp_build_vec_type(u8n.type);
285
286 lp_build_extract_image_sizes(bld,
287 bld->int_size_type,
288 bld->int_coord_type,
289 int_size,
290 &width_vec,
291 &height_vec,
292 &depth_vec);
293
294 if (bld->static_state->normalized_coords) {
295 LLVMValueRef scaled_size;
296 LLVMValueRef flt_size;
297
298 /* scale size by 256 (8 fractional bits) */
299 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
300
301 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
302
303 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
304 }
305 else {
306 /* scale coords by 256 (8 fractional bits) */
307 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
308 if (dims >= 2)
309 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
310 if (dims >= 3)
311 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
312 }
313
314 /* convert float to int */
315 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
316 if (dims >= 2)
317 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
318 if (dims >= 3)
319 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
320
321 /* compute floor (shift right 8) */
322 i32_c8 = lp_build_const_int_vec(i32.type, 8);
323 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
324 if (dims >= 2)
325 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
326 if (dims >= 3)
327 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
328
329 /* get pixel, row, image strides */
330 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
331 bld->format_desc->block.bits/8);
332
333 /* Do texcoord wrapping, compute texel offset */
334 lp_build_sample_wrap_nearest_int(bld,
335 bld->format_desc->block.width,
336 s_ipart, width_vec, x_stride,
337 bld->static_state->pot_width,
338 bld->static_state->wrap_s,
339 &x_offset, &x_subcoord);
340 offset = x_offset;
341 if (dims >= 2) {
342 LLVMValueRef y_offset;
343 lp_build_sample_wrap_nearest_int(bld,
344 bld->format_desc->block.height,
345 t_ipart, height_vec, row_stride_vec,
346 bld->static_state->pot_height,
347 bld->static_state->wrap_t,
348 &y_offset, &y_subcoord);
349 offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
350 if (dims >= 3) {
351 LLVMValueRef z_offset;
352 lp_build_sample_wrap_nearest_int(bld,
353 1, /* block length (depth) */
354 r_ipart, depth_vec, img_stride_vec,
355 bld->static_state->pot_height,
356 bld->static_state->wrap_r,
357 &z_offset, &z_subcoord);
358 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
359 }
360 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
361 LLVMValueRef z_offset;
362 /* The r coord is the cube face in [0,5] */
363 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
364 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
365 }
366 }
367
368 /*
369 * Fetch the pixels as 4 x 32bit (rgba order might differ):
370 *
371 * rgba0 rgba1 rgba2 rgba3
372 *
373 * bit cast them into 16 x u8
374 *
375 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
376 *
377 * unpack them into two 8 x i16:
378 *
379 * r0 g0 b0 a0 r1 g1 b1 a1
380 * r2 g2 b2 a2 r3 g3 b3 a3
381 *
382 * The higher 8 bits of the resulting elements will be zero.
383 */
384 {
385 LLVMValueRef rgba8;
386
387 if (util_format_is_rgba8_variant(bld->format_desc)) {
388 /*
389 * Given the format is a rgba8, just read the pixels as is,
390 * without any swizzling. Swizzling will be done later.
391 */
392 rgba8 = lp_build_gather(bld->builder,
393 bld->texel_type.length,
394 bld->format_desc->block.bits,
395 bld->texel_type.width,
396 data_ptr, offset);
397
398 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
399 }
400 else {
401 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
402 bld->format_desc,
403 u8n.type,
404 data_ptr, offset,
405 x_subcoord,
406 y_subcoord);
407 }
408
409 /* Expand one 4*rgba8 to two 2*rgba16 */
410 lp_build_unpack2(builder, u8n.type, h16.type,
411 rgba8,
412 colors_lo, colors_hi);
413 }
414 }
415
416
417 /**
418 * Sample a single texture image with (bi-)(tri-)linear sampling.
419 * Return filtered color as two vectors of 16-bit fixed point values.
420 */
421 static void
422 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
423 LLVMValueRef int_size,
424 LLVMValueRef row_stride_vec,
425 LLVMValueRef img_stride_vec,
426 LLVMValueRef data_ptr,
427 LLVMValueRef s,
428 LLVMValueRef t,
429 LLVMValueRef r,
430 LLVMValueRef *colors_lo,
431 LLVMValueRef *colors_hi)
432 {
433 const unsigned dims = bld->dims;
434 LLVMBuilderRef builder = bld->builder;
435 struct lp_build_context i32, h16, u8n;
436 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
437 LLVMValueRef i32_c8, i32_c128, i32_c255;
438 LLVMValueRef width_vec, height_vec, depth_vec;
439 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
440 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
441 LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
442 LLVMValueRef x_stride, y_stride, z_stride;
443 LLVMValueRef x_offset0, x_offset1;
444 LLVMValueRef y_offset0, y_offset1;
445 LLVMValueRef z_offset0, z_offset1;
446 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
447 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
448 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
449 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
450 LLVMValueRef packed_lo, packed_hi;
451 unsigned x, y, z;
452 unsigned i, j, k;
453 unsigned numj, numk;
454
455 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
456 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
457 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
458
459 i32_vec_type = lp_build_vec_type(i32.type);
460 h16_vec_type = lp_build_vec_type(h16.type);
461 u8n_vec_type = lp_build_vec_type(u8n.type);
462
463 lp_build_extract_image_sizes(bld,
464 bld->int_size_type,
465 bld->int_coord_type,
466 int_size,
467 &width_vec,
468 &height_vec,
469 &depth_vec);
470
471 if (bld->static_state->normalized_coords) {
472 LLVMValueRef scaled_size;
473 LLVMValueRef flt_size;
474
475 /* scale size by 256 (8 fractional bits) */
476 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
477
478 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
479
480 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
481 }
482 else {
483 /* scale coords by 256 (8 fractional bits) */
484 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
485 if (dims >= 2)
486 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
487 if (dims >= 3)
488 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
489 }
490
491 /* convert float to int */
492 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
493 if (dims >= 2)
494 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
495 if (dims >= 3)
496 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
497
498 /* subtract 0.5 (add -128) */
499 i32_c128 = lp_build_const_int_vec(i32.type, -128);
500 s = LLVMBuildAdd(builder, s, i32_c128, "");
501 if (dims >= 2) {
502 t = LLVMBuildAdd(builder, t, i32_c128, "");
503 }
504 if (dims >= 3) {
505 r = LLVMBuildAdd(builder, r, i32_c128, "");
506 }
507
508 /* compute floor (shift right 8) */
509 i32_c8 = lp_build_const_int_vec(i32.type, 8);
510 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
511 if (dims >= 2)
512 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
513 if (dims >= 3)
514 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
515
516 /* compute fractional part (AND with 0xff) */
517 i32_c255 = lp_build_const_int_vec(i32.type, 255);
518 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
519 if (dims >= 2)
520 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
521 if (dims >= 3)
522 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
523
524 /* get pixel, row and image strides */
525 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
526 bld->format_desc->block.bits/8);
527 y_stride = row_stride_vec;
528 z_stride = img_stride_vec;
529
530 /* do texcoord wrapping and compute texel offsets */
531 lp_build_sample_wrap_linear_int(bld,
532 bld->format_desc->block.width,
533 s_ipart, width_vec, x_stride,
534 bld->static_state->pot_width,
535 bld->static_state->wrap_s,
536 &x_offset0, &x_offset1,
537 &x_subcoord[0], &x_subcoord[1]);
538 for (z = 0; z < 2; z++) {
539 for (y = 0; y < 2; y++) {
540 offset[z][y][0] = x_offset0;
541 offset[z][y][1] = x_offset1;
542 }
543 }
544
545 if (dims >= 2) {
546 lp_build_sample_wrap_linear_int(bld,
547 bld->format_desc->block.height,
548 t_ipart, height_vec, y_stride,
549 bld->static_state->pot_height,
550 bld->static_state->wrap_t,
551 &y_offset0, &y_offset1,
552 &y_subcoord[0], &y_subcoord[1]);
553
554 for (z = 0; z < 2; z++) {
555 for (x = 0; x < 2; x++) {
556 offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
557 offset[z][0][x], y_offset0);
558 offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
559 offset[z][1][x], y_offset1);
560 }
561 }
562 }
563
564 if (dims >= 3) {
565 lp_build_sample_wrap_linear_int(bld,
566 bld->format_desc->block.height,
567 r_ipart, depth_vec, z_stride,
568 bld->static_state->pot_depth,
569 bld->static_state->wrap_r,
570 &z_offset0, &z_offset1,
571 &z_subcoord[0], &z_subcoord[1]);
572 for (y = 0; y < 2; y++) {
573 for (x = 0; x < 2; x++) {
574 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
575 offset[0][y][x], z_offset0);
576 offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
577 offset[1][y][x], z_offset1);
578 }
579 }
580 }
581 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
582 LLVMValueRef z_offset;
583 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
584 for (y = 0; y < 2; y++) {
585 for (x = 0; x < 2; x++) {
586 /* The r coord is the cube face in [0,5] */
587 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
588 offset[0][y][x], z_offset);
589 }
590 }
591 }
592
593 /*
594 * Transform 4 x i32 in
595 *
596 * s_fpart = {s0, s1, s2, s3}
597 *
598 * into 8 x i16
599 *
600 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
601 *
602 * into two 8 x i16
603 *
604 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
605 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
606 *
607 * and likewise for t_fpart. There is no risk of loosing precision here
608 * since the fractional parts only use the lower 8bits.
609 */
610 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
611 if (dims >= 2)
612 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
613 if (dims >= 3)
614 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
615
616 {
617 LLVMTypeRef elem_type = LLVMInt32Type();
618 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
619 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
620 LLVMValueRef shuffle_lo;
621 LLVMValueRef shuffle_hi;
622
623 for (j = 0; j < h16.type.length; j += 4) {
624 #ifdef PIPE_ARCH_LITTLE_ENDIAN
625 unsigned subindex = 0;
626 #else
627 unsigned subindex = 1;
628 #endif
629 LLVMValueRef index;
630
631 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
632 for (i = 0; i < 4; ++i)
633 shuffles_lo[j + i] = index;
634
635 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
636 for (i = 0; i < 4; ++i)
637 shuffles_hi[j + i] = index;
638 }
639
640 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
641 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
642
643 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
644 shuffle_lo, "");
645 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
646 shuffle_hi, "");
647 if (dims >= 2) {
648 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
649 shuffle_lo, "");
650 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
651 shuffle_hi, "");
652 }
653 if (dims >= 3) {
654 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
655 shuffle_lo, "");
656 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
657 shuffle_hi, "");
658 }
659 }
660
661 /*
662 * Fetch the pixels as 4 x 32bit (rgba order might differ):
663 *
664 * rgba0 rgba1 rgba2 rgba3
665 *
666 * bit cast them into 16 x u8
667 *
668 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
669 *
670 * unpack them into two 8 x i16:
671 *
672 * r0 g0 b0 a0 r1 g1 b1 a1
673 * r2 g2 b2 a2 r3 g3 b3 a3
674 *
675 * The higher 8 bits of the resulting elements will be zero.
676 */
677 numj = 1 + (dims >= 2);
678 numk = 1 + (dims >= 3);
679
680 for (k = 0; k < numk; k++) {
681 for (j = 0; j < numj; j++) {
682 for (i = 0; i < 2; i++) {
683 LLVMValueRef rgba8;
684
685 if (util_format_is_rgba8_variant(bld->format_desc)) {
686 /*
687 * Given the format is a rgba8, just read the pixels as is,
688 * without any swizzling. Swizzling will be done later.
689 */
690 rgba8 = lp_build_gather(bld->builder,
691 bld->texel_type.length,
692 bld->format_desc->block.bits,
693 bld->texel_type.width,
694 data_ptr, offset[k][j][i]);
695
696 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
697 }
698 else {
699 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
700 bld->format_desc,
701 u8n.type,
702 data_ptr, offset[k][j][i],
703 x_subcoord[i],
704 y_subcoord[j]);
705 }
706
707 /* Expand one 4*rgba8 to two 2*rgba16 */
708 lp_build_unpack2(builder, u8n.type, h16.type,
709 rgba8,
710 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
711 }
712 }
713 }
714
715 /*
716 * Linear interpolation with 8.8 fixed point.
717 */
718 if (dims == 1) {
719 /* 1-D lerp */
720 packed_lo = lp_build_lerp(&h16,
721 s_fpart_lo,
722 neighbors_lo[0][0][0],
723 neighbors_lo[0][0][1]);
724
725 packed_hi = lp_build_lerp(&h16,
726 s_fpart_hi,
727 neighbors_hi[0][0][0],
728 neighbors_hi[0][0][1]);
729 }
730 else {
731 /* 2-D lerp */
732 packed_lo = lp_build_lerp_2d(&h16,
733 s_fpart_lo, t_fpart_lo,
734 neighbors_lo[0][0][0],
735 neighbors_lo[0][0][1],
736 neighbors_lo[0][1][0],
737 neighbors_lo[0][1][1]);
738
739 packed_hi = lp_build_lerp_2d(&h16,
740 s_fpart_hi, t_fpart_hi,
741 neighbors_hi[0][0][0],
742 neighbors_hi[0][0][1],
743 neighbors_hi[0][1][0],
744 neighbors_hi[0][1][1]);
745
746 if (dims >= 3) {
747 LLVMValueRef packed_lo2, packed_hi2;
748
749 /* lerp in the second z slice */
750 packed_lo2 = lp_build_lerp_2d(&h16,
751 s_fpart_lo, t_fpart_lo,
752 neighbors_lo[1][0][0],
753 neighbors_lo[1][0][1],
754 neighbors_lo[1][1][0],
755 neighbors_lo[1][1][1]);
756
757 packed_hi2 = lp_build_lerp_2d(&h16,
758 s_fpart_hi, t_fpart_hi,
759 neighbors_hi[1][0][0],
760 neighbors_hi[1][0][1],
761 neighbors_hi[1][1][0],
762 neighbors_hi[1][1][1]);
763 /* interp between two z slices */
764 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
765 packed_lo, packed_lo2);
766 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
767 packed_hi, packed_hi2);
768 }
769 }
770
771 *colors_lo = packed_lo;
772 *colors_hi = packed_hi;
773 }
774
775
776 /**
777 * Sample the texture/mipmap using given image filter and mip filter.
778 * data0_ptr and data1_ptr point to the two mipmap levels to sample
779 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
780 * If we're using nearest miplevel sampling the '1' values will be null/unused.
781 */
782 static void
783 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
784 unsigned img_filter,
785 unsigned mip_filter,
786 LLVMValueRef s,
787 LLVMValueRef t,
788 LLVMValueRef r,
789 LLVMValueRef ilevel0,
790 LLVMValueRef ilevel1,
791 LLVMValueRef lod_fpart,
792 LLVMValueRef colors_lo_var,
793 LLVMValueRef colors_hi_var)
794 {
795 LLVMBuilderRef builder = bld->builder;
796 LLVMValueRef size0;
797 LLVMValueRef size1;
798 LLVMValueRef row_stride0_vec;
799 LLVMValueRef row_stride1_vec;
800 LLVMValueRef img_stride0_vec;
801 LLVMValueRef img_stride1_vec;
802 LLVMValueRef data_ptr0;
803 LLVMValueRef data_ptr1;
804 LLVMValueRef colors0_lo, colors0_hi;
805 LLVMValueRef colors1_lo, colors1_hi;
806
807
808 /* sample the first mipmap level */
809 lp_build_mipmap_level_sizes(bld, ilevel0,
810 &size0,
811 &row_stride0_vec, &img_stride0_vec);
812 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
813 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
814 lp_build_sample_image_nearest(bld,
815 size0,
816 row_stride0_vec, img_stride0_vec,
817 data_ptr0, s, t, r,
818 &colors0_lo, &colors0_hi);
819 }
820 else {
821 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
822 lp_build_sample_image_linear(bld,
823 size0,
824 row_stride0_vec, img_stride0_vec,
825 data_ptr0, s, t, r,
826 &colors0_lo, &colors0_hi);
827 }
828
829 /* Store the first level's colors in the output variables */
830 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
831 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
832
833 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
834 LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
835 LLVMTypeRef i32_type = LLVMIntType(32);
836 struct lp_build_flow_context *flow_ctx;
837 struct lp_build_if_state if_ctx;
838 LLVMValueRef need_lerp;
839
840 flow_ctx = lp_build_flow_create(builder);
841
842 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
843 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
844
845 /* need_lerp = lod_fpart > 0 */
846 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
847 lod_fpart, LLVMConstNull(i32_type),
848 "need_lerp");
849
850 lp_build_if(&if_ctx, flow_ctx, builder, need_lerp);
851 {
852 struct lp_build_context h16_bld;
853
854 lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
855
856 /* sample the second mipmap level */
857 lp_build_mipmap_level_sizes(bld, ilevel1,
858 &size1,
859 &row_stride1_vec, &img_stride1_vec);
860 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
861 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
862 lp_build_sample_image_nearest(bld,
863 size1,
864 row_stride1_vec, img_stride1_vec,
865 data_ptr1, s, t, r,
866 &colors1_lo, &colors1_hi);
867 }
868 else {
869 lp_build_sample_image_linear(bld,
870 size1,
871 row_stride1_vec, img_stride1_vec,
872 data_ptr1, s, t, r,
873 &colors1_lo, &colors1_hi);
874 }
875
876 /* interpolate samples from the two mipmap levels */
877
878 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
879 lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
880
881 colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
882 colors0_lo, colors1_lo);
883 colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
884 colors0_hi, colors1_hi);
885
886 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
887 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
888 }
889 lp_build_endif(&if_ctx);
890
891 lp_build_flow_destroy(flow_ctx);
892 }
893 }
894
895
896
897 /**
898 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
899 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
900 * but only limited texture coord wrap modes.
901 */
902 void
903 lp_build_sample_aos(struct lp_build_sample_context *bld,
904 unsigned unit,
905 LLVMValueRef s,
906 LLVMValueRef t,
907 LLVMValueRef r,
908 const LLVMValueRef *ddx,
909 const LLVMValueRef *ddy,
910 LLVMValueRef lod_bias, /* optional */
911 LLVMValueRef explicit_lod, /* optional */
912 LLVMValueRef texel_out[4])
913 {
914 struct lp_build_context *int_bld = &bld->int_bld;
915 LLVMBuilderRef builder = bld->builder;
916 const unsigned mip_filter = bld->static_state->min_mip_filter;
917 const unsigned min_filter = bld->static_state->min_img_filter;
918 const unsigned mag_filter = bld->static_state->mag_img_filter;
919 const unsigned dims = bld->dims;
920 LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
921 LLVMValueRef ilevel0, ilevel1 = NULL;
922 LLVMValueRef packed, packed_lo, packed_hi;
923 LLVMValueRef unswizzled[4];
924 LLVMValueRef face_ddx[4], face_ddy[4];
925 struct lp_build_context h16_bld;
926 LLVMTypeRef i32t = LLVMInt32Type();
927 LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
928
929 /* we only support the common/simple wrap modes at this time */
930 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
931 if (dims >= 2)
932 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
933 if (dims >= 3)
934 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
935
936
937 /* make 16-bit fixed-pt builder context */
938 lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
939
940 /* cube face selection, compute pre-face coords, etc. */
941 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
942 LLVMValueRef face, face_s, face_t;
943 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
944 s = face_s; /* vec */
945 t = face_t; /* vec */
946 /* use 'r' to indicate cube face */
947 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
948
949 /* recompute ddx, ddy using the new (s,t) face texcoords */
950 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
951 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
952 face_ddx[2] = NULL;
953 face_ddx[3] = NULL;
954 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
955 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
956 face_ddy[2] = NULL;
957 face_ddy[3] = NULL;
958 ddx = face_ddx;
959 ddy = face_ddy;
960 }
961
962 /*
963 * Compute the level of detail (float).
964 */
965 if (min_filter != mag_filter ||
966 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
967 /* Need to compute lod either to choose mipmap levels or to
968 * distinguish between minification/magnification with one mipmap level.
969 */
970 lp_build_lod_selector(bld, unit, ddx, ddy,
971 lod_bias, explicit_lod,
972 mip_filter,
973 &lod_ipart, &lod_fpart);
974 } else {
975 lod_ipart = i32t_zero;
976 }
977
978 /*
979 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
980 */
981 switch (mip_filter) {
982 default:
983 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
984 /* fall-through */
985 case PIPE_TEX_MIPFILTER_NONE:
986 /* always use mip level 0 */
987 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
988 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
989 * We should be able to set ilevel0 = const(0) but that causes
990 * bad x86 code to be emitted.
991 */
992 assert(lod_ipart);
993 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
994 }
995 else {
996 ilevel0 = i32t_zero;
997 }
998 break;
999 case PIPE_TEX_MIPFILTER_NEAREST:
1000 assert(lod_ipart);
1001 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1002 break;
1003 case PIPE_TEX_MIPFILTER_LINEAR:
1004 assert(lod_ipart);
1005 assert(lod_fpart);
1006 lp_build_linear_mip_levels(bld, unit,
1007 lod_ipart, &lod_fpart,
1008 &ilevel0, &ilevel1);
1009 break;
1010 }
1011
1012 /*
1013 * Get/interpolate texture colors.
1014 */
1015
1016 packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
1017 packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
1018
1019 if (min_filter == mag_filter) {
1020 /* no need to distinquish between minification and magnification */
1021 lp_build_sample_mipmap(bld,
1022 min_filter, mip_filter,
1023 s, t, r,
1024 ilevel0, ilevel1, lod_fpart,
1025 packed_lo, packed_hi);
1026 }
1027 else {
1028 /* Emit conditional to choose min image filter or mag image filter
1029 * depending on the lod being > 0 or <= 0, respectively.
1030 */
1031 struct lp_build_flow_context *flow_ctx;
1032 struct lp_build_if_state if_ctx;
1033 LLVMValueRef minify;
1034
1035 flow_ctx = lp_build_flow_create(builder);
1036
1037 /* minify = lod >= 0.0 */
1038 minify = LLVMBuildICmp(builder, LLVMIntSGE,
1039 lod_ipart, int_bld->zero, "");
1040
1041 lp_build_if(&if_ctx, flow_ctx, builder, minify);
1042 {
1043 /* Use the minification filter */
1044 lp_build_sample_mipmap(bld,
1045 min_filter, mip_filter,
1046 s, t, r,
1047 ilevel0, ilevel1, lod_fpart,
1048 packed_lo, packed_hi);
1049 }
1050 lp_build_else(&if_ctx);
1051 {
1052 /* Use the magnification filter */
1053 lp_build_sample_mipmap(bld,
1054 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1055 s, t, r,
1056 i32t_zero, NULL, NULL,
1057 packed_lo, packed_hi);
1058 }
1059 lp_build_endif(&if_ctx);
1060
1061 lp_build_flow_destroy(flow_ctx);
1062 }
1063
1064 /*
1065 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1066 * into 'packed'
1067 */
1068 packed = lp_build_pack2(builder,
1069 h16_bld.type, lp_type_unorm(8),
1070 LLVMBuildLoad(builder, packed_lo, ""),
1071 LLVMBuildLoad(builder, packed_hi, ""));
1072
1073 /*
1074 * Convert to SoA and swizzle.
1075 */
1076 lp_build_rgba8_to_f32_soa(builder,
1077 bld->texel_type,
1078 packed, unswizzled);
1079
1080 if (util_format_is_rgba8_variant(bld->format_desc)) {
1081 lp_build_format_swizzle_soa(bld->format_desc,
1082 &bld->texel_bld,
1083 unswizzled, texel_out);
1084 }
1085 else {
1086 texel_out[0] = unswizzled[0];
1087 texel_out[1] = unswizzled[1];
1088 texel_out[2] = unswizzled[2];
1089 texel_out[3] = unswizzled[3];
1090 }
1091
1092 apply_sampler_swizzle(bld, texel_out);
1093 }