gallivm: Move into the as much of the second level code as possible.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_flow.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_format.h"
54 #include "lp_bld_sample.h"
55 #include "lp_bld_sample_aos.h"
56 #include "lp_bld_quad.h"
57
58
59 /**
60 * Build LLVM code for texture coord wrapping, for nearest filtering,
61 * for scaled integer texcoords.
62 * \param block_length is the length of the pixel block along the
63 * coordinate axis
64 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
65 * \param length the texture size along one dimension
66 * \param stride pixel stride along the coordinate axis (in bytes)
67 * \param is_pot if TRUE, length is a power of two
68 * \param wrap_mode one of PIPE_TEX_WRAP_x
69 * \param out_offset byte offset for the wrapped coordinate
70 * \param out_i resulting sub-block pixel coordinate for coord0
71 */
72 static void
73 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
74 unsigned block_length,
75 LLVMValueRef coord,
76 LLVMValueRef length,
77 LLVMValueRef stride,
78 boolean is_pot,
79 unsigned wrap_mode,
80 LLVMValueRef *out_offset,
81 LLVMValueRef *out_i)
82 {
83 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
84 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
85 LLVMValueRef length_minus_one;
86
87 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
88
89 switch(wrap_mode) {
90 case PIPE_TEX_WRAP_REPEAT:
91 if(is_pot)
92 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
93 else {
94 /* Add a bias to the texcoord to handle negative coords */
95 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
96 coord = LLVMBuildAdd(bld->builder, coord, bias, "");
97 coord = LLVMBuildURem(bld->builder, coord, length, "");
98 }
99 break;
100
101 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
102 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
103 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
104 break;
105
106 case PIPE_TEX_WRAP_CLAMP:
107 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
108 case PIPE_TEX_WRAP_MIRROR_REPEAT:
109 case PIPE_TEX_WRAP_MIRROR_CLAMP:
110 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
111 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
112 default:
113 assert(0);
114 }
115
116 lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
117 out_offset, out_i);
118 }
119
120
121 /**
122 * Build LLVM code for texture coord wrapping, for linear filtering,
123 * for scaled integer texcoords.
124 * \param block_length is the length of the pixel block along the
125 * coordinate axis
126 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
127 * \param length the texture size along one dimension
128 * \param stride pixel stride along the coordinate axis (in bytes)
129 * \param is_pot if TRUE, length is a power of two
130 * \param wrap_mode one of PIPE_TEX_WRAP_x
131 * \param offset0 resulting relative offset for coord0
132 * \param offset1 resulting relative offset for coord0 + 1
133 * \param i0 resulting sub-block pixel coordinate for coord0
134 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
135 */
136 static void
137 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
138 unsigned block_length,
139 LLVMValueRef coord0,
140 LLVMValueRef length,
141 LLVMValueRef stride,
142 boolean is_pot,
143 unsigned wrap_mode,
144 LLVMValueRef *offset0,
145 LLVMValueRef *offset1,
146 LLVMValueRef *i0,
147 LLVMValueRef *i1)
148 {
149 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
150 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
151 LLVMValueRef length_minus_one;
152 LLVMValueRef lmask, umask, mask;
153
154 if (block_length != 1) {
155 /*
156 * If the pixel block covers more than one pixel then there is no easy
157 * way to calculate offset1 relative to offset0. Instead, compute them
158 * independently.
159 */
160
161 LLVMValueRef coord1;
162
163 lp_build_sample_wrap_nearest_int(bld,
164 block_length,
165 coord0,
166 length,
167 stride,
168 is_pot,
169 wrap_mode,
170 offset0, i0);
171
172 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
173
174 lp_build_sample_wrap_nearest_int(bld,
175 block_length,
176 coord1,
177 length,
178 stride,
179 is_pot,
180 wrap_mode,
181 offset1, i1);
182
183 return;
184 }
185
186 /*
187 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
188 * multiplication.
189 */
190
191 *i0 = uint_coord_bld->zero;
192 *i1 = uint_coord_bld->zero;
193
194 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
195
196 switch(wrap_mode) {
197 case PIPE_TEX_WRAP_REPEAT:
198 if (is_pot) {
199 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
200 }
201 else {
202 /* Add a bias to the texcoord to handle negative coords */
203 LLVMValueRef bias = lp_build_mul_imm(uint_coord_bld, length, 1024);
204 coord0 = LLVMBuildAdd(bld->builder, coord0, bias, "");
205 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
206 }
207
208 mask = lp_build_compare(bld->builder, int_coord_bld->type,
209 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
210
211 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
212 *offset1 = LLVMBuildAnd(bld->builder,
213 lp_build_add(uint_coord_bld, *offset0, stride),
214 mask, "");
215 break;
216
217 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
218 lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
219 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
220 umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
221 PIPE_FUNC_LESS, coord0, length_minus_one);
222
223 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
224 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
225
226 mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
227
228 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
229 *offset1 = lp_build_add(uint_coord_bld,
230 *offset0,
231 LLVMBuildAnd(bld->builder, stride, mask, ""));
232 break;
233
234 case PIPE_TEX_WRAP_CLAMP:
235 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
236 case PIPE_TEX_WRAP_MIRROR_REPEAT:
237 case PIPE_TEX_WRAP_MIRROR_CLAMP:
238 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
239 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
240 default:
241 assert(0);
242 *offset0 = uint_coord_bld->zero;
243 *offset1 = uint_coord_bld->zero;
244 break;
245 }
246 }
247
248
249 /**
250 * Sample a single texture image with nearest sampling.
251 * If sampling a cube texture, r = cube face in [0,5].
252 * Return filtered color as two vectors of 16-bit fixed point values.
253 */
254 static void
255 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
256 LLVMValueRef width_vec,
257 LLVMValueRef height_vec,
258 LLVMValueRef depth_vec,
259 LLVMValueRef row_stride_vec,
260 LLVMValueRef img_stride_vec,
261 LLVMValueRef data_ptr,
262 LLVMValueRef s,
263 LLVMValueRef t,
264 LLVMValueRef r,
265 LLVMValueRef *colors_lo,
266 LLVMValueRef *colors_hi)
267 {
268 const unsigned dims = bld->dims;
269 LLVMBuilderRef builder = bld->builder;
270 struct lp_build_context i32, h16, u8n;
271 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
272 LLVMValueRef i32_c8;
273 LLVMValueRef s_ipart, t_ipart, r_ipart;
274 LLVMValueRef x_stride;
275 LLVMValueRef x_offset, offset;
276 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
277
278 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
279 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
280 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
281
282 i32_vec_type = lp_build_vec_type(i32.type);
283 h16_vec_type = lp_build_vec_type(h16.type);
284 u8n_vec_type = lp_build_vec_type(u8n.type);
285
286 if (bld->static_state->normalized_coords) {
287 /* s = s * width, t = t * height */
288 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
289 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
290 coord_vec_type, "");
291 s = lp_build_mul(&bld->coord_bld, s, fp_width);
292 if (dims >= 2) {
293 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
294 coord_vec_type, "");
295 t = lp_build_mul(&bld->coord_bld, t, fp_height);
296 if (dims >= 3) {
297 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
298 coord_vec_type, "");
299 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
300 }
301 }
302 }
303
304 /* scale coords by 256 (8 fractional bits) */
305 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
306 if (dims >= 2)
307 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
308 if (dims >= 3)
309 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
310
311 /* convert float to int */
312 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
313 if (dims >= 2)
314 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
315 if (dims >= 3)
316 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
317
318 /* compute floor (shift right 8) */
319 i32_c8 = lp_build_const_int_vec(i32.type, 8);
320 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
321 if (dims >= 2)
322 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
323 if (dims >= 3)
324 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
325
326 /* get pixel, row, image strides */
327 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
328 bld->format_desc->block.bits/8);
329
330 /* Do texcoord wrapping, compute texel offset */
331 lp_build_sample_wrap_nearest_int(bld,
332 bld->format_desc->block.width,
333 s_ipart, width_vec, x_stride,
334 bld->static_state->pot_width,
335 bld->static_state->wrap_s,
336 &x_offset, &x_subcoord);
337 offset = x_offset;
338 if (dims >= 2) {
339 LLVMValueRef y_offset;
340 lp_build_sample_wrap_nearest_int(bld,
341 bld->format_desc->block.height,
342 t_ipart, height_vec, row_stride_vec,
343 bld->static_state->pot_height,
344 bld->static_state->wrap_t,
345 &y_offset, &y_subcoord);
346 offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
347 if (dims >= 3) {
348 LLVMValueRef z_offset;
349 lp_build_sample_wrap_nearest_int(bld,
350 1, /* block length (depth) */
351 r_ipart, depth_vec, img_stride_vec,
352 bld->static_state->pot_height,
353 bld->static_state->wrap_r,
354 &z_offset, &z_subcoord);
355 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
356 }
357 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
358 LLVMValueRef z_offset;
359 /* The r coord is the cube face in [0,5] */
360 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
361 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
362 }
363 }
364
365 /*
366 * Fetch the pixels as 4 x 32bit (rgba order might differ):
367 *
368 * rgba0 rgba1 rgba2 rgba3
369 *
370 * bit cast them into 16 x u8
371 *
372 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
373 *
374 * unpack them into two 8 x i16:
375 *
376 * r0 g0 b0 a0 r1 g1 b1 a1
377 * r2 g2 b2 a2 r3 g3 b3 a3
378 *
379 * The higher 8 bits of the resulting elements will be zero.
380 */
381 {
382 LLVMValueRef rgba8;
383
384 if (util_format_is_rgba8_variant(bld->format_desc)) {
385 /*
386 * Given the format is a rgba8, just read the pixels as is,
387 * without any swizzling. Swizzling will be done later.
388 */
389 rgba8 = lp_build_gather(bld->builder,
390 bld->texel_type.length,
391 bld->format_desc->block.bits,
392 bld->texel_type.width,
393 data_ptr, offset);
394
395 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
396 }
397 else {
398 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
399 bld->format_desc,
400 u8n.type,
401 data_ptr, offset,
402 x_subcoord,
403 y_subcoord);
404 }
405
406 /* Expand one 4*rgba8 to two 2*rgba16 */
407 lp_build_unpack2(builder, u8n.type, h16.type,
408 rgba8,
409 colors_lo, colors_hi);
410 }
411 }
412
413
414 /**
415 * Sample a single texture image with (bi-)(tri-)linear sampling.
416 * Return filtered color as two vectors of 16-bit fixed point values.
417 */
418 static void
419 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
420 LLVMValueRef width_vec,
421 LLVMValueRef height_vec,
422 LLVMValueRef depth_vec,
423 LLVMValueRef row_stride_vec,
424 LLVMValueRef img_stride_vec,
425 LLVMValueRef data_ptr,
426 LLVMValueRef s,
427 LLVMValueRef t,
428 LLVMValueRef r,
429 LLVMValueRef *colors_lo,
430 LLVMValueRef *colors_hi)
431 {
432 const unsigned dims = bld->dims;
433 LLVMBuilderRef builder = bld->builder;
434 struct lp_build_context i32, h16, u8n;
435 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
436 LLVMValueRef i32_c8, i32_c128, i32_c255;
437 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
438 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
439 LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
440 LLVMValueRef x_stride, y_stride, z_stride;
441 LLVMValueRef x_offset0, x_offset1;
442 LLVMValueRef y_offset0, y_offset1;
443 LLVMValueRef z_offset0, z_offset1;
444 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
445 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
446 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
447 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
448 LLVMValueRef packed_lo, packed_hi;
449 unsigned x, y, z;
450 unsigned i, j, k;
451 unsigned numj, numk;
452
453 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
454 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
455 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
456
457 i32_vec_type = lp_build_vec_type(i32.type);
458 h16_vec_type = lp_build_vec_type(h16.type);
459 u8n_vec_type = lp_build_vec_type(u8n.type);
460
461 if (bld->static_state->normalized_coords) {
462 /* s = s * width, t = t * height */
463 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
464 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
465 coord_vec_type, "");
466 s = lp_build_mul(&bld->coord_bld, s, fp_width);
467 if (dims >= 2) {
468 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
469 coord_vec_type, "");
470 t = lp_build_mul(&bld->coord_bld, t, fp_height);
471 }
472 if (dims >= 3) {
473 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
474 coord_vec_type, "");
475 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
476 }
477 }
478
479 /* scale coords by 256 (8 fractional bits) */
480 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
481 if (dims >= 2)
482 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
483 if (dims >= 3)
484 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
485
486 /* convert float to int */
487 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
488 if (dims >= 2)
489 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
490 if (dims >= 3)
491 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
492
493 /* subtract 0.5 (add -128) */
494 i32_c128 = lp_build_const_int_vec(i32.type, -128);
495 s = LLVMBuildAdd(builder, s, i32_c128, "");
496 if (dims >= 2) {
497 t = LLVMBuildAdd(builder, t, i32_c128, "");
498 }
499 if (dims >= 3) {
500 r = LLVMBuildAdd(builder, r, i32_c128, "");
501 }
502
503 /* compute floor (shift right 8) */
504 i32_c8 = lp_build_const_int_vec(i32.type, 8);
505 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
506 if (dims >= 2)
507 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
508 if (dims >= 3)
509 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
510
511 /* compute fractional part (AND with 0xff) */
512 i32_c255 = lp_build_const_int_vec(i32.type, 255);
513 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
514 if (dims >= 2)
515 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
516 if (dims >= 3)
517 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
518
519 /* get pixel, row and image strides */
520 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
521 bld->format_desc->block.bits/8);
522 y_stride = row_stride_vec;
523 z_stride = img_stride_vec;
524
525 /* do texcoord wrapping and compute texel offsets */
526 lp_build_sample_wrap_linear_int(bld,
527 bld->format_desc->block.width,
528 s_ipart, width_vec, x_stride,
529 bld->static_state->pot_width,
530 bld->static_state->wrap_s,
531 &x_offset0, &x_offset1,
532 &x_subcoord[0], &x_subcoord[1]);
533 for (z = 0; z < 2; z++) {
534 for (y = 0; y < 2; y++) {
535 offset[z][y][0] = x_offset0;
536 offset[z][y][1] = x_offset1;
537 }
538 }
539
540 if (dims >= 2) {
541 lp_build_sample_wrap_linear_int(bld,
542 bld->format_desc->block.height,
543 t_ipart, height_vec, y_stride,
544 bld->static_state->pot_height,
545 bld->static_state->wrap_t,
546 &y_offset0, &y_offset1,
547 &y_subcoord[0], &y_subcoord[1]);
548
549 for (z = 0; z < 2; z++) {
550 for (x = 0; x < 2; x++) {
551 offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
552 offset[z][0][x], y_offset0);
553 offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
554 offset[z][1][x], y_offset1);
555 }
556 }
557 }
558
559 if (dims >= 3) {
560 lp_build_sample_wrap_linear_int(bld,
561 bld->format_desc->block.height,
562 r_ipart, depth_vec, z_stride,
563 bld->static_state->pot_depth,
564 bld->static_state->wrap_r,
565 &z_offset0, &z_offset1,
566 &z_subcoord[0], &z_subcoord[1]);
567 for (y = 0; y < 2; y++) {
568 for (x = 0; x < 2; x++) {
569 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
570 offset[0][y][x], z_offset0);
571 offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
572 offset[1][y][x], z_offset1);
573 }
574 }
575 }
576 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
577 LLVMValueRef z_offset;
578 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
579 for (y = 0; y < 2; y++) {
580 for (x = 0; x < 2; x++) {
581 /* The r coord is the cube face in [0,5] */
582 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
583 offset[0][y][x], z_offset);
584 }
585 }
586 }
587
588 /*
589 * Transform 4 x i32 in
590 *
591 * s_fpart = {s0, s1, s2, s3}
592 *
593 * into 8 x i16
594 *
595 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
596 *
597 * into two 8 x i16
598 *
599 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
600 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
601 *
602 * and likewise for t_fpart. There is no risk of loosing precision here
603 * since the fractional parts only use the lower 8bits.
604 */
605 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
606 if (dims >= 2)
607 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
608 if (dims >= 3)
609 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
610
611 {
612 LLVMTypeRef elem_type = LLVMInt32Type();
613 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
614 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
615 LLVMValueRef shuffle_lo;
616 LLVMValueRef shuffle_hi;
617
618 for (j = 0; j < h16.type.length; j += 4) {
619 #ifdef PIPE_ARCH_LITTLE_ENDIAN
620 unsigned subindex = 0;
621 #else
622 unsigned subindex = 1;
623 #endif
624 LLVMValueRef index;
625
626 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
627 for (i = 0; i < 4; ++i)
628 shuffles_lo[j + i] = index;
629
630 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
631 for (i = 0; i < 4; ++i)
632 shuffles_hi[j + i] = index;
633 }
634
635 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
636 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
637
638 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
639 shuffle_lo, "");
640 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
641 shuffle_hi, "");
642 if (dims >= 2) {
643 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
644 shuffle_lo, "");
645 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
646 shuffle_hi, "");
647 }
648 if (dims >= 3) {
649 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
650 shuffle_lo, "");
651 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
652 shuffle_hi, "");
653 }
654 }
655
656 /*
657 * Fetch the pixels as 4 x 32bit (rgba order might differ):
658 *
659 * rgba0 rgba1 rgba2 rgba3
660 *
661 * bit cast them into 16 x u8
662 *
663 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
664 *
665 * unpack them into two 8 x i16:
666 *
667 * r0 g0 b0 a0 r1 g1 b1 a1
668 * r2 g2 b2 a2 r3 g3 b3 a3
669 *
670 * The higher 8 bits of the resulting elements will be zero.
671 */
672 numj = 1 + (dims >= 2);
673 numk = 1 + (dims >= 3);
674
675 for (k = 0; k < numk; k++) {
676 for (j = 0; j < numj; j++) {
677 for (i = 0; i < 2; i++) {
678 LLVMValueRef rgba8;
679
680 if (util_format_is_rgba8_variant(bld->format_desc)) {
681 /*
682 * Given the format is a rgba8, just read the pixels as is,
683 * without any swizzling. Swizzling will be done later.
684 */
685 rgba8 = lp_build_gather(bld->builder,
686 bld->texel_type.length,
687 bld->format_desc->block.bits,
688 bld->texel_type.width,
689 data_ptr, offset[k][j][i]);
690
691 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
692 }
693 else {
694 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
695 bld->format_desc,
696 u8n.type,
697 data_ptr, offset[k][j][i],
698 x_subcoord[i],
699 y_subcoord[j]);
700 }
701
702 /* Expand one 4*rgba8 to two 2*rgba16 */
703 lp_build_unpack2(builder, u8n.type, h16.type,
704 rgba8,
705 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
706 }
707 }
708 }
709
710 /*
711 * Linear interpolation with 8.8 fixed point.
712 */
713 if (dims == 1) {
714 /* 1-D lerp */
715 packed_lo = lp_build_lerp(&h16,
716 s_fpart_lo,
717 neighbors_lo[0][0][0],
718 neighbors_lo[0][0][1]);
719
720 packed_hi = lp_build_lerp(&h16,
721 s_fpart_hi,
722 neighbors_hi[0][0][0],
723 neighbors_hi[0][0][1]);
724 }
725 else {
726 /* 2-D lerp */
727 packed_lo = lp_build_lerp_2d(&h16,
728 s_fpart_lo, t_fpart_lo,
729 neighbors_lo[0][0][0],
730 neighbors_lo[0][0][1],
731 neighbors_lo[0][1][0],
732 neighbors_lo[0][1][1]);
733
734 packed_hi = lp_build_lerp_2d(&h16,
735 s_fpart_hi, t_fpart_hi,
736 neighbors_hi[0][0][0],
737 neighbors_hi[0][0][1],
738 neighbors_hi[0][1][0],
739 neighbors_hi[0][1][1]);
740
741 if (dims >= 3) {
742 LLVMValueRef packed_lo2, packed_hi2;
743
744 /* lerp in the second z slice */
745 packed_lo2 = lp_build_lerp_2d(&h16,
746 s_fpart_lo, t_fpart_lo,
747 neighbors_lo[1][0][0],
748 neighbors_lo[1][0][1],
749 neighbors_lo[1][1][0],
750 neighbors_lo[1][1][1]);
751
752 packed_hi2 = lp_build_lerp_2d(&h16,
753 s_fpart_hi, t_fpart_hi,
754 neighbors_hi[1][0][0],
755 neighbors_hi[1][0][1],
756 neighbors_hi[1][1][0],
757 neighbors_hi[1][1][1]);
758 /* interp between two z slices */
759 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
760 packed_lo, packed_lo2);
761 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
762 packed_hi, packed_hi2);
763 }
764 }
765
766 *colors_lo = packed_lo;
767 *colors_hi = packed_hi;
768 }
769
770
771 /**
772 * Sample the texture/mipmap using given image filter and mip filter.
773 * data0_ptr and data1_ptr point to the two mipmap levels to sample
774 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
775 * If we're using nearest miplevel sampling the '1' values will be null/unused.
776 */
777 static void
778 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
779 unsigned img_filter,
780 unsigned mip_filter,
781 LLVMValueRef s,
782 LLVMValueRef t,
783 LLVMValueRef r,
784 LLVMValueRef ilevel0,
785 LLVMValueRef ilevel1,
786 LLVMValueRef lod_fpart,
787 LLVMValueRef colors_lo_var,
788 LLVMValueRef colors_hi_var)
789 {
790 LLVMBuilderRef builder = bld->builder;
791 LLVMValueRef width0_vec;
792 LLVMValueRef width1_vec;
793 LLVMValueRef height0_vec;
794 LLVMValueRef height1_vec;
795 LLVMValueRef depth0_vec;
796 LLVMValueRef depth1_vec;
797 LLVMValueRef row_stride0_vec;
798 LLVMValueRef row_stride1_vec;
799 LLVMValueRef img_stride0_vec;
800 LLVMValueRef img_stride1_vec;
801 LLVMValueRef data_ptr0;
802 LLVMValueRef data_ptr1;
803 LLVMValueRef colors0_lo, colors0_hi;
804 LLVMValueRef colors1_lo, colors1_hi;
805
806
807 /* sample the first mipmap level */
808 lp_build_mipmap_level_sizes(bld, ilevel0,
809 &width0_vec, &height0_vec, &depth0_vec,
810 &row_stride0_vec, &img_stride0_vec);
811 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
812 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
813 lp_build_sample_image_nearest(bld,
814 width0_vec, height0_vec, depth0_vec,
815 row_stride0_vec, img_stride0_vec,
816 data_ptr0, s, t, r,
817 &colors0_lo, &colors0_hi);
818 }
819 else {
820 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
821 lp_build_sample_image_linear(bld,
822 width0_vec, height0_vec, depth0_vec,
823 row_stride0_vec, img_stride0_vec,
824 data_ptr0, s, t, r,
825 &colors0_lo, &colors0_hi);
826 }
827
828 /* Store the first level's colors in the output variables */
829 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
830 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
831
832 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
833 LLVMValueRef h16_scale = LLVMConstReal(LLVMFloatType(), 256.0);
834 LLVMTypeRef i32_type = LLVMIntType(32);
835 struct lp_build_flow_context *flow_ctx;
836 struct lp_build_if_state if_ctx;
837 LLVMValueRef need_lerp;
838
839 flow_ctx = lp_build_flow_create(builder);
840
841 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
842 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
843
844 /* need_lerp = lod_fpart > 0 */
845 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
846 lod_fpart, LLVMConstNull(i32_type),
847 "need_lerp");
848
849 lp_build_if(&if_ctx, flow_ctx, builder, need_lerp);
850 {
851 struct lp_build_context h16_bld;
852
853 lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
854
855 /* sample the second mipmap level */
856 lp_build_mipmap_level_sizes(bld, ilevel1,
857 &width1_vec, &height1_vec, &depth1_vec,
858 &row_stride1_vec, &img_stride1_vec);
859 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
860 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
861 lp_build_sample_image_nearest(bld,
862 width1_vec, height1_vec, depth1_vec,
863 row_stride1_vec, img_stride1_vec,
864 data_ptr1, s, t, r,
865 &colors1_lo, &colors1_hi);
866 }
867 else {
868 lp_build_sample_image_linear(bld,
869 width1_vec, height1_vec, depth1_vec,
870 row_stride1_vec, img_stride1_vec,
871 data_ptr1, s, t, r,
872 &colors1_lo, &colors1_hi);
873 }
874
875 /* interpolate samples from the two mipmap levels */
876
877 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
878 lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
879
880 colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
881 colors0_lo, colors1_lo);
882 colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
883 colors0_hi, colors1_hi);
884
885 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
886 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
887 }
888 lp_build_endif(&if_ctx);
889
890 lp_build_flow_destroy(flow_ctx);
891 }
892 }
893
894
895
896 /**
897 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
898 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
899 * but only limited texture coord wrap modes.
900 */
901 void
902 lp_build_sample_aos(struct lp_build_sample_context *bld,
903 unsigned unit,
904 LLVMValueRef s,
905 LLVMValueRef t,
906 LLVMValueRef r,
907 const LLVMValueRef *ddx,
908 const LLVMValueRef *ddy,
909 LLVMValueRef lod_bias, /* optional */
910 LLVMValueRef explicit_lod, /* optional */
911 LLVMValueRef texel_out[4])
912 {
913 struct lp_build_context *int_bld = &bld->int_bld;
914 LLVMBuilderRef builder = bld->builder;
915 const unsigned mip_filter = bld->static_state->min_mip_filter;
916 const unsigned min_filter = bld->static_state->min_img_filter;
917 const unsigned mag_filter = bld->static_state->mag_img_filter;
918 const unsigned dims = bld->dims;
919 LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
920 LLVMValueRef ilevel0, ilevel1 = NULL;
921 LLVMValueRef packed, packed_lo, packed_hi;
922 LLVMValueRef unswizzled[4];
923 LLVMValueRef face_ddx[4], face_ddy[4];
924 struct lp_build_context h16_bld;
925 LLVMTypeRef i32t = LLVMInt32Type();
926 LLVMValueRef i32t_zero = LLVMConstInt(i32t, 0, 0);
927
928 /* we only support the common/simple wrap modes at this time */
929 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
930 if (dims >= 2)
931 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
932 if (dims >= 3)
933 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
934
935
936 /* make 16-bit fixed-pt builder context */
937 lp_build_context_init(&h16_bld, builder, lp_type_ufixed(16));
938
939 /* cube face selection, compute pre-face coords, etc. */
940 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
941 LLVMValueRef face, face_s, face_t;
942 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
943 s = face_s; /* vec */
944 t = face_t; /* vec */
945 /* use 'r' to indicate cube face */
946 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
947
948 /* recompute ddx, ddy using the new (s,t) face texcoords */
949 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
950 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
951 face_ddx[2] = NULL;
952 face_ddx[3] = NULL;
953 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
954 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
955 face_ddy[2] = NULL;
956 face_ddy[3] = NULL;
957 ddx = face_ddx;
958 ddy = face_ddy;
959 }
960
961 /*
962 * Compute the level of detail (float).
963 */
964 if (min_filter != mag_filter ||
965 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
966 /* Need to compute lod either to choose mipmap levels or to
967 * distinguish between minification/magnification with one mipmap level.
968 */
969 lp_build_lod_selector(bld, unit, ddx, ddy,
970 lod_bias, explicit_lod,
971 mip_filter,
972 &lod_ipart, &lod_fpart);
973 } else {
974 lod_ipart = i32t_zero;
975 }
976
977 /*
978 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
979 */
980 switch (mip_filter) {
981 default:
982 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
983 /* fall-through */
984 case PIPE_TEX_MIPFILTER_NONE:
985 /* always use mip level 0 */
986 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
987 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
988 * We should be able to set ilevel0 = const(0) but that causes
989 * bad x86 code to be emitted.
990 */
991 assert(lod_ipart);
992 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
993 }
994 else {
995 ilevel0 = i32t_zero;
996 }
997 break;
998 case PIPE_TEX_MIPFILTER_NEAREST:
999 assert(lod_ipart);
1000 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1001 break;
1002 case PIPE_TEX_MIPFILTER_LINEAR:
1003 assert(lod_ipart);
1004 assert(lod_fpart);
1005 lp_build_linear_mip_levels(bld, unit,
1006 lod_ipart, &lod_fpart,
1007 &ilevel0, &ilevel1);
1008 break;
1009 }
1010
1011 /*
1012 * Get/interpolate texture colors.
1013 */
1014
1015 packed_lo = lp_build_alloca(builder, h16_bld.vec_type, "packed_lo");
1016 packed_hi = lp_build_alloca(builder, h16_bld.vec_type, "packed_hi");
1017
1018 if (min_filter == mag_filter) {
1019 /* no need to distinquish between minification and magnification */
1020 lp_build_sample_mipmap(bld,
1021 min_filter, mip_filter,
1022 s, t, r,
1023 ilevel0, ilevel1, lod_fpart,
1024 packed_lo, packed_hi);
1025 }
1026 else {
1027 /* Emit conditional to choose min image filter or mag image filter
1028 * depending on the lod being > 0 or <= 0, respectively.
1029 */
1030 struct lp_build_flow_context *flow_ctx;
1031 struct lp_build_if_state if_ctx;
1032 LLVMValueRef minify;
1033
1034 flow_ctx = lp_build_flow_create(builder);
1035
1036 /* minify = lod >= 0.0 */
1037 minify = LLVMBuildICmp(builder, LLVMIntSGE,
1038 lod_ipart, int_bld->zero, "");
1039
1040 lp_build_if(&if_ctx, flow_ctx, builder, minify);
1041 {
1042 /* Use the minification filter */
1043 lp_build_sample_mipmap(bld,
1044 min_filter, mip_filter,
1045 s, t, r,
1046 ilevel0, ilevel1, lod_fpart,
1047 packed_lo, packed_hi);
1048 }
1049 lp_build_else(&if_ctx);
1050 {
1051 /* Use the magnification filter */
1052 lp_build_sample_mipmap(bld,
1053 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1054 s, t, r,
1055 i32t_zero, NULL, NULL,
1056 packed_lo, packed_hi);
1057 }
1058 lp_build_endif(&if_ctx);
1059
1060 lp_build_flow_destroy(flow_ctx);
1061 }
1062
1063 /*
1064 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1065 * into 'packed'
1066 */
1067 packed = lp_build_pack2(builder,
1068 h16_bld.type, lp_type_unorm(8),
1069 LLVMBuildLoad(builder, packed_lo, ""),
1070 LLVMBuildLoad(builder, packed_hi, ""));
1071
1072 /*
1073 * Convert to SoA and swizzle.
1074 */
1075 lp_build_rgba8_to_f32_soa(builder,
1076 bld->texel_type,
1077 packed, unswizzled);
1078
1079 if (util_format_is_rgba8_variant(bld->format_desc)) {
1080 lp_build_format_swizzle_soa(bld->format_desc,
1081 &bld->texel_bld,
1082 unswizzled, texel_out);
1083 }
1084 else {
1085 texel_out[0] = unswizzled[0];
1086 texel_out[1] = unswizzled[1];
1087 texel_out[2] = unswizzled[2];
1088 texel_out[3] = unswizzled[3];
1089 }
1090
1091 apply_sampler_swizzle(bld, texel_out);
1092 }