gallivm: Remove dead experimental code.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_flow.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_format.h"
54 #include "lp_bld_sample.h"
55 #include "lp_bld_sample_aos.h"
56 #include "lp_bld_quad.h"
57
58
59 /**
60 * Build LLVM code for texture coord wrapping, for nearest filtering,
61 * for scaled integer texcoords.
62 * \param block_length is the length of the pixel block along the
63 * coordinate axis
64 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
65 * \param length the texture size along one dimension
66 * \param stride pixel stride along the coordinate axis (in bytes)
67 * \param is_pot if TRUE, length is a power of two
68 * \param wrap_mode one of PIPE_TEX_WRAP_x
69 * \param out_offset byte offset for the wrapped coordinate
70 * \param out_i resulting sub-block pixel coordinate for coord0
71 */
72 static void
73 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
74 unsigned block_length,
75 LLVMValueRef coord,
76 LLVMValueRef length,
77 LLVMValueRef stride,
78 boolean is_pot,
79 unsigned wrap_mode,
80 LLVMValueRef *out_offset,
81 LLVMValueRef *out_i)
82 {
83 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
84 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
85 LLVMValueRef length_minus_one;
86
87 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
88
89 switch(wrap_mode) {
90 case PIPE_TEX_WRAP_REPEAT:
91 if(is_pot)
92 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
93 else
94 /* Signed remainder won't give the right results for negative
95 * dividends but unsigned remainder does.*/
96 coord = LLVMBuildURem(bld->builder, coord, length, "");
97 break;
98
99 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
100 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
101 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
102 break;
103
104 case PIPE_TEX_WRAP_CLAMP:
105 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
106 case PIPE_TEX_WRAP_MIRROR_REPEAT:
107 case PIPE_TEX_WRAP_MIRROR_CLAMP:
108 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
109 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
110 default:
111 assert(0);
112 }
113
114 lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
115 out_offset, out_i);
116 }
117
118
119 /**
120 * Build LLVM code for texture coord wrapping, for linear filtering,
121 * for scaled integer texcoords.
122 * \param block_length is the length of the pixel block along the
123 * coordinate axis
124 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
125 * \param length the texture size along one dimension
126 * \param stride pixel stride along the coordinate axis (in bytes)
127 * \param is_pot if TRUE, length is a power of two
128 * \param wrap_mode one of PIPE_TEX_WRAP_x
129 * \param offset0 resulting relative offset for coord0
130 * \param offset1 resulting relative offset for coord0 + 1
131 * \param i0 resulting sub-block pixel coordinate for coord0
132 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
133 */
134 static void
135 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
136 unsigned block_length,
137 LLVMValueRef coord0,
138 LLVMValueRef length,
139 LLVMValueRef stride,
140 boolean is_pot,
141 unsigned wrap_mode,
142 LLVMValueRef *offset0,
143 LLVMValueRef *offset1,
144 LLVMValueRef *i0,
145 LLVMValueRef *i1)
146 {
147 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
148 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
149 LLVMValueRef length_minus_one;
150 LLVMValueRef lmask, umask, mask;
151
152 if (block_length != 1) {
153 /*
154 * If the pixel block covers more than one pixel then there is no easy
155 * way to calculate offset1 relative to offset0. Instead, compute them
156 * independently.
157 */
158
159 LLVMValueRef coord1;
160
161 lp_build_sample_wrap_nearest_int(bld,
162 block_length,
163 coord0,
164 length,
165 stride,
166 is_pot,
167 wrap_mode,
168 offset0, i0);
169
170 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
171
172 lp_build_sample_wrap_nearest_int(bld,
173 block_length,
174 coord1,
175 length,
176 stride,
177 is_pot,
178 wrap_mode,
179 offset1, i1);
180
181 return;
182 }
183
184 /*
185 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
186 * multiplication.
187 */
188
189 *i0 = uint_coord_bld->zero;
190 *i1 = uint_coord_bld->zero;
191
192 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
193
194 switch(wrap_mode) {
195 case PIPE_TEX_WRAP_REPEAT:
196 if (is_pot) {
197 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
198 }
199 else {
200 /* Signed remainder won't give the right results for negative
201 * dividends but unsigned remainder does.*/
202 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
203 }
204
205 mask = lp_build_compare(bld->builder, int_coord_bld->type,
206 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
207
208 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
209 *offset1 = LLVMBuildAnd(bld->builder,
210 lp_build_add(uint_coord_bld, *offset0, stride),
211 mask, "");
212 break;
213
214 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
215 lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
216 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
217 umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
218 PIPE_FUNC_LESS, coord0, length_minus_one);
219
220 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
221 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
222
223 mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
224
225 *offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
226 *offset1 = lp_build_add(uint_coord_bld,
227 *offset0,
228 LLVMBuildAnd(bld->builder, stride, mask, ""));
229 break;
230
231 case PIPE_TEX_WRAP_CLAMP:
232 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
233 case PIPE_TEX_WRAP_MIRROR_REPEAT:
234 case PIPE_TEX_WRAP_MIRROR_CLAMP:
235 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
236 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
237 default:
238 assert(0);
239 *offset0 = uint_coord_bld->zero;
240 *offset1 = uint_coord_bld->zero;
241 break;
242 }
243 }
244
245
246 /**
247 * Sample a single texture image with nearest sampling.
248 * If sampling a cube texture, r = cube face in [0,5].
249 * Return filtered color as two vectors of 16-bit fixed point values.
250 */
251 static void
252 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
253 LLVMValueRef width_vec,
254 LLVMValueRef height_vec,
255 LLVMValueRef depth_vec,
256 LLVMValueRef row_stride_vec,
257 LLVMValueRef img_stride_vec,
258 LLVMValueRef data_ptr,
259 LLVMValueRef s,
260 LLVMValueRef t,
261 LLVMValueRef r,
262 LLVMValueRef *colors_lo,
263 LLVMValueRef *colors_hi)
264 {
265 const int dims = texture_dims(bld->static_state->target);
266 LLVMBuilderRef builder = bld->builder;
267 struct lp_build_context i32, h16, u8n;
268 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
269 LLVMValueRef i32_c8;
270 LLVMValueRef s_ipart, t_ipart, r_ipart;
271 LLVMValueRef x_stride;
272 LLVMValueRef x_offset, offset;
273 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
274
275 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
276 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
277 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
278
279 i32_vec_type = lp_build_vec_type(i32.type);
280 h16_vec_type = lp_build_vec_type(h16.type);
281 u8n_vec_type = lp_build_vec_type(u8n.type);
282
283 if (bld->static_state->normalized_coords) {
284 /* s = s * width, t = t * height */
285 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
286 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
287 coord_vec_type, "");
288 s = lp_build_mul(&bld->coord_bld, s, fp_width);
289 if (dims >= 2) {
290 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
291 coord_vec_type, "");
292 t = lp_build_mul(&bld->coord_bld, t, fp_height);
293 if (dims >= 3) {
294 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
295 coord_vec_type, "");
296 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
297 }
298 }
299 }
300
301 /* scale coords by 256 (8 fractional bits) */
302 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
303 if (dims >= 2)
304 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
305 if (dims >= 3)
306 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
307
308 /* convert float to int */
309 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
310 if (dims >= 2)
311 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
312 if (dims >= 3)
313 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
314
315 /* compute floor (shift right 8) */
316 i32_c8 = lp_build_const_int_vec(i32.type, 8);
317 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
318 if (dims >= 2)
319 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
320 if (dims >= 3)
321 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
322
323 /* get pixel, row, image strides */
324 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
325 bld->format_desc->block.bits/8);
326
327 /* Do texcoord wrapping, compute texel offset */
328 lp_build_sample_wrap_nearest_int(bld,
329 bld->format_desc->block.width,
330 s_ipart, width_vec, x_stride,
331 bld->static_state->pot_width,
332 bld->static_state->wrap_s,
333 &x_offset, &x_subcoord);
334 offset = x_offset;
335 if (dims >= 2) {
336 LLVMValueRef y_offset;
337 lp_build_sample_wrap_nearest_int(bld,
338 bld->format_desc->block.height,
339 t_ipart, height_vec, row_stride_vec,
340 bld->static_state->pot_height,
341 bld->static_state->wrap_t,
342 &y_offset, &y_subcoord);
343 offset = lp_build_add(&bld->uint_coord_bld, offset, y_offset);
344 if (dims >= 3) {
345 LLVMValueRef z_offset;
346 lp_build_sample_wrap_nearest_int(bld,
347 1, /* block length (depth) */
348 r_ipart, depth_vec, img_stride_vec,
349 bld->static_state->pot_height,
350 bld->static_state->wrap_r,
351 &z_offset, &z_subcoord);
352 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
353 }
354 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
355 LLVMValueRef z_offset;
356 /* The r coord is the cube face in [0,5] */
357 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
358 offset = lp_build_add(&bld->uint_coord_bld, offset, z_offset);
359 }
360 }
361
362 /*
363 * Fetch the pixels as 4 x 32bit (rgba order might differ):
364 *
365 * rgba0 rgba1 rgba2 rgba3
366 *
367 * bit cast them into 16 x u8
368 *
369 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
370 *
371 * unpack them into two 8 x i16:
372 *
373 * r0 g0 b0 a0 r1 g1 b1 a1
374 * r2 g2 b2 a2 r3 g3 b3 a3
375 *
376 * The higher 8 bits of the resulting elements will be zero.
377 */
378 {
379 LLVMValueRef rgba8;
380
381 if (util_format_is_rgba8_variant(bld->format_desc)) {
382 /*
383 * Given the format is a rgba8, just read the pixels as is,
384 * without any swizzling. Swizzling will be done later.
385 */
386 rgba8 = lp_build_gather(bld->builder,
387 bld->texel_type.length,
388 bld->format_desc->block.bits,
389 bld->texel_type.width,
390 data_ptr, offset);
391
392 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
393 }
394 else {
395 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
396 bld->format_desc,
397 u8n.type,
398 data_ptr, offset,
399 x_subcoord,
400 y_subcoord);
401 }
402
403 /* Expand one 4*rgba8 to two 2*rgba16 */
404 lp_build_unpack2(builder, u8n.type, h16.type,
405 rgba8,
406 colors_lo, colors_hi);
407 }
408 }
409
410
411 /**
412 * Sample a single texture image with (bi-)(tri-)linear sampling.
413 * Return filtered color as two vectors of 16-bit fixed point values.
414 */
415 static void
416 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
417 LLVMValueRef width_vec,
418 LLVMValueRef height_vec,
419 LLVMValueRef depth_vec,
420 LLVMValueRef row_stride_vec,
421 LLVMValueRef img_stride_vec,
422 LLVMValueRef data_ptr,
423 LLVMValueRef s,
424 LLVMValueRef t,
425 LLVMValueRef r,
426 LLVMValueRef *colors_lo,
427 LLVMValueRef *colors_hi)
428 {
429 const int dims = texture_dims(bld->static_state->target);
430 LLVMBuilderRef builder = bld->builder;
431 struct lp_build_context i32, h16, u8n;
432 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
433 LLVMValueRef i32_c8, i32_c128, i32_c255;
434 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
435 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
436 LLVMValueRef r_ipart, r_fpart, r_fpart_lo, r_fpart_hi;
437 LLVMValueRef x_stride, y_stride, z_stride;
438 LLVMValueRef x_offset0, x_offset1;
439 LLVMValueRef y_offset0, y_offset1;
440 LLVMValueRef z_offset0, z_offset1;
441 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
442 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
443 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
444 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
445 LLVMValueRef packed_lo, packed_hi;
446 unsigned x, y, z;
447 unsigned i, j, k;
448 unsigned numj, numk;
449
450 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
451 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
452 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
453
454 i32_vec_type = lp_build_vec_type(i32.type);
455 h16_vec_type = lp_build_vec_type(h16.type);
456 u8n_vec_type = lp_build_vec_type(u8n.type);
457
458 if (bld->static_state->normalized_coords) {
459 /* s = s * width, t = t * height */
460 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
461 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width_vec,
462 coord_vec_type, "");
463 s = lp_build_mul(&bld->coord_bld, s, fp_width);
464 if (dims >= 2) {
465 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height_vec,
466 coord_vec_type, "");
467 t = lp_build_mul(&bld->coord_bld, t, fp_height);
468 }
469 if (dims >= 3) {
470 LLVMValueRef fp_depth = LLVMBuildSIToFP(bld->builder, depth_vec,
471 coord_vec_type, "");
472 r = lp_build_mul(&bld->coord_bld, r, fp_depth);
473 }
474 }
475
476 /* scale coords by 256 (8 fractional bits) */
477 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
478 if (dims >= 2)
479 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
480 if (dims >= 3)
481 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
482
483 /* convert float to int */
484 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
485 if (dims >= 2)
486 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
487 if (dims >= 3)
488 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
489
490 /* subtract 0.5 (add -128) */
491 i32_c128 = lp_build_const_int_vec(i32.type, -128);
492 s = LLVMBuildAdd(builder, s, i32_c128, "");
493 if (dims >= 2) {
494 t = LLVMBuildAdd(builder, t, i32_c128, "");
495 }
496 if (dims >= 3) {
497 r = LLVMBuildAdd(builder, r, i32_c128, "");
498 }
499
500 /* compute floor (shift right 8) */
501 i32_c8 = lp_build_const_int_vec(i32.type, 8);
502 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
503 if (dims >= 2)
504 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
505 if (dims >= 3)
506 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
507
508 /* compute fractional part (AND with 0xff) */
509 i32_c255 = lp_build_const_int_vec(i32.type, 255);
510 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
511 if (dims >= 2)
512 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
513 if (dims >= 3)
514 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
515
516 /* get pixel, row and image strides */
517 x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
518 bld->format_desc->block.bits/8);
519 y_stride = row_stride_vec;
520 z_stride = img_stride_vec;
521
522 /* do texcoord wrapping and compute texel offsets */
523 lp_build_sample_wrap_linear_int(bld,
524 bld->format_desc->block.width,
525 s_ipart, width_vec, x_stride,
526 bld->static_state->pot_width,
527 bld->static_state->wrap_s,
528 &x_offset0, &x_offset1,
529 &x_subcoord[0], &x_subcoord[1]);
530 for (z = 0; z < 2; z++) {
531 for (y = 0; y < 2; y++) {
532 offset[z][y][0] = x_offset0;
533 offset[z][y][1] = x_offset1;
534 }
535 }
536
537 if (dims >= 2) {
538 lp_build_sample_wrap_linear_int(bld,
539 bld->format_desc->block.height,
540 t_ipart, height_vec, y_stride,
541 bld->static_state->pot_height,
542 bld->static_state->wrap_t,
543 &y_offset0, &y_offset1,
544 &y_subcoord[0], &y_subcoord[1]);
545
546 for (z = 0; z < 2; z++) {
547 for (x = 0; x < 2; x++) {
548 offset[z][0][x] = lp_build_add(&bld->uint_coord_bld,
549 offset[z][0][x], y_offset0);
550 offset[z][1][x] = lp_build_add(&bld->uint_coord_bld,
551 offset[z][1][x], y_offset1);
552 }
553 }
554 }
555
556 if (dims >= 3) {
557 lp_build_sample_wrap_linear_int(bld,
558 bld->format_desc->block.height,
559 r_ipart, depth_vec, z_stride,
560 bld->static_state->pot_depth,
561 bld->static_state->wrap_r,
562 &z_offset0, &z_offset1,
563 &z_subcoord[0], &z_subcoord[1]);
564 for (y = 0; y < 2; y++) {
565 for (x = 0; x < 2; x++) {
566 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
567 offset[0][y][x], z_offset0);
568 offset[1][y][x] = lp_build_add(&bld->uint_coord_bld,
569 offset[1][y][x], z_offset1);
570 }
571 }
572 }
573 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
574 LLVMValueRef z_offset;
575 z_offset = lp_build_mul(&bld->uint_coord_bld, r, img_stride_vec);
576 for (y = 0; y < 2; y++) {
577 for (x = 0; x < 2; x++) {
578 /* The r coord is the cube face in [0,5] */
579 offset[0][y][x] = lp_build_add(&bld->uint_coord_bld,
580 offset[0][y][x], z_offset);
581 }
582 }
583 }
584
585 /*
586 * Transform 4 x i32 in
587 *
588 * s_fpart = {s0, s1, s2, s3}
589 *
590 * into 8 x i16
591 *
592 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
593 *
594 * into two 8 x i16
595 *
596 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
597 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
598 *
599 * and likewise for t_fpart. There is no risk of loosing precision here
600 * since the fractional parts only use the lower 8bits.
601 */
602 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
603 if (dims >= 2)
604 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
605 if (dims >= 3)
606 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
607
608 {
609 LLVMTypeRef elem_type = LLVMInt32Type();
610 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
611 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
612 LLVMValueRef shuffle_lo;
613 LLVMValueRef shuffle_hi;
614
615 for (j = 0; j < h16.type.length; j += 4) {
616 #ifdef PIPE_ARCH_LITTLE_ENDIAN
617 unsigned subindex = 0;
618 #else
619 unsigned subindex = 1;
620 #endif
621 LLVMValueRef index;
622
623 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
624 for (i = 0; i < 4; ++i)
625 shuffles_lo[j + i] = index;
626
627 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
628 for (i = 0; i < 4; ++i)
629 shuffles_hi[j + i] = index;
630 }
631
632 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
633 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
634
635 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
636 shuffle_lo, "");
637 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
638 shuffle_hi, "");
639 if (dims >= 2) {
640 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
641 shuffle_lo, "");
642 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
643 shuffle_hi, "");
644 }
645 if (dims >= 3) {
646 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
647 shuffle_lo, "");
648 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
649 shuffle_hi, "");
650 }
651 }
652
653 /*
654 * Fetch the pixels as 4 x 32bit (rgba order might differ):
655 *
656 * rgba0 rgba1 rgba2 rgba3
657 *
658 * bit cast them into 16 x u8
659 *
660 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
661 *
662 * unpack them into two 8 x i16:
663 *
664 * r0 g0 b0 a0 r1 g1 b1 a1
665 * r2 g2 b2 a2 r3 g3 b3 a3
666 *
667 * The higher 8 bits of the resulting elements will be zero.
668 */
669 numj = 1 + (dims >= 2);
670 numk = 1 + (dims >= 3);
671
672 for (k = 0; k < numk; k++) {
673 for (j = 0; j < numj; j++) {
674 for (i = 0; i < 2; i++) {
675 LLVMValueRef rgba8;
676
677 if (util_format_is_rgba8_variant(bld->format_desc)) {
678 /*
679 * Given the format is a rgba8, just read the pixels as is,
680 * without any swizzling. Swizzling will be done later.
681 */
682 rgba8 = lp_build_gather(bld->builder,
683 bld->texel_type.length,
684 bld->format_desc->block.bits,
685 bld->texel_type.width,
686 data_ptr, offset[k][j][i]);
687
688 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
689 }
690 else {
691 rgba8 = lp_build_fetch_rgba_aos(bld->builder,
692 bld->format_desc,
693 u8n.type,
694 data_ptr, offset[k][j][i],
695 x_subcoord[i],
696 y_subcoord[j]);
697 }
698
699 /* Expand one 4*rgba8 to two 2*rgba16 */
700 lp_build_unpack2(builder, u8n.type, h16.type,
701 rgba8,
702 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
703 }
704 }
705 }
706
707 /*
708 * Linear interpolation with 8.8 fixed point.
709 */
710 if (dims == 1) {
711 /* 1-D lerp */
712 packed_lo = lp_build_lerp(&h16,
713 s_fpart_lo,
714 neighbors_lo[0][0][0],
715 neighbors_lo[0][0][1]);
716
717 packed_hi = lp_build_lerp(&h16,
718 s_fpart_hi,
719 neighbors_hi[0][0][0],
720 neighbors_hi[0][0][1]);
721 }
722 else {
723 /* 2-D lerp */
724 packed_lo = lp_build_lerp_2d(&h16,
725 s_fpart_lo, t_fpart_lo,
726 neighbors_lo[0][0][0],
727 neighbors_lo[0][0][1],
728 neighbors_lo[0][1][0],
729 neighbors_lo[0][1][1]);
730
731 packed_hi = lp_build_lerp_2d(&h16,
732 s_fpart_hi, t_fpart_hi,
733 neighbors_hi[0][0][0],
734 neighbors_hi[0][0][1],
735 neighbors_hi[0][1][0],
736 neighbors_hi[0][1][1]);
737
738 if (dims >= 3) {
739 LLVMValueRef packed_lo2, packed_hi2;
740
741 /* lerp in the second z slice */
742 packed_lo2 = lp_build_lerp_2d(&h16,
743 s_fpart_lo, t_fpart_lo,
744 neighbors_lo[1][0][0],
745 neighbors_lo[1][0][1],
746 neighbors_lo[1][1][0],
747 neighbors_lo[1][1][1]);
748
749 packed_hi2 = lp_build_lerp_2d(&h16,
750 s_fpart_hi, t_fpart_hi,
751 neighbors_hi[1][0][0],
752 neighbors_hi[1][0][1],
753 neighbors_hi[1][1][0],
754 neighbors_hi[1][1][1]);
755 /* interp between two z slices */
756 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
757 packed_lo, packed_lo2);
758 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
759 packed_hi, packed_hi2);
760 }
761 }
762
763 *colors_lo = packed_lo;
764 *colors_hi = packed_hi;
765 }
766
767
768 /**
769 * Sample the texture/mipmap using given image filter and mip filter.
770 * data0_ptr and data1_ptr point to the two mipmap levels to sample
771 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
772 * If we're using nearest miplevel sampling the '1' values will be null/unused.
773 */
774 static void
775 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
776 unsigned img_filter,
777 unsigned mip_filter,
778 LLVMValueRef s,
779 LLVMValueRef t,
780 LLVMValueRef r,
781 LLVMValueRef lod_fpart,
782 LLVMValueRef width0_vec,
783 LLVMValueRef width1_vec,
784 LLVMValueRef height0_vec,
785 LLVMValueRef height1_vec,
786 LLVMValueRef depth0_vec,
787 LLVMValueRef depth1_vec,
788 LLVMValueRef row_stride0_vec,
789 LLVMValueRef row_stride1_vec,
790 LLVMValueRef img_stride0_vec,
791 LLVMValueRef img_stride1_vec,
792 LLVMValueRef data_ptr0,
793 LLVMValueRef data_ptr1,
794 LLVMValueRef *colors_lo,
795 LLVMValueRef *colors_hi)
796 {
797 LLVMValueRef colors0_lo, colors0_hi;
798 LLVMValueRef colors1_lo, colors1_hi;
799
800 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
801 /* sample the first mipmap level */
802 lp_build_sample_image_nearest(bld,
803 width0_vec, height0_vec, depth0_vec,
804 row_stride0_vec, img_stride0_vec,
805 data_ptr0, s, t, r,
806 &colors0_lo, &colors0_hi);
807
808 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
809 /* sample the second mipmap level */
810 lp_build_sample_image_nearest(bld,
811 width1_vec, height1_vec, depth1_vec,
812 row_stride1_vec, img_stride1_vec,
813 data_ptr1, s, t, r,
814 &colors1_lo, &colors1_hi);
815 }
816 }
817 else {
818 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
819
820 /* sample the first mipmap level */
821 lp_build_sample_image_linear(bld,
822 width0_vec, height0_vec, depth0_vec,
823 row_stride0_vec, img_stride0_vec,
824 data_ptr0, s, t, r,
825 &colors0_lo, &colors0_hi);
826
827 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
828 /* sample the second mipmap level */
829 lp_build_sample_image_linear(bld,
830 width1_vec, height1_vec, depth1_vec,
831 row_stride1_vec, img_stride1_vec,
832 data_ptr1, s, t, r,
833 &colors1_lo, &colors1_hi);
834 }
835 }
836
837 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
838 /* interpolate samples from the two mipmap levels */
839 struct lp_build_context h16;
840 lp_build_context_init(&h16, bld->builder, lp_type_ufixed(16));
841
842 *colors_lo = lp_build_lerp(&h16, lod_fpart,
843 colors0_lo, colors1_lo);
844 *colors_hi = lp_build_lerp(&h16, lod_fpart,
845 colors0_hi, colors1_hi);
846 }
847 else {
848 /* use first/only level's colors */
849 *colors_lo = colors0_lo;
850 *colors_hi = colors0_hi;
851 }
852 }
853
854
855
856 /**
857 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
858 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
859 * but only limited texture coord wrap modes.
860 */
861 void
862 lp_build_sample_aos(struct lp_build_sample_context *bld,
863 unsigned unit,
864 LLVMValueRef s,
865 LLVMValueRef t,
866 LLVMValueRef r,
867 const LLVMValueRef *ddx,
868 const LLVMValueRef *ddy,
869 LLVMValueRef lod_bias, /* optional */
870 LLVMValueRef explicit_lod, /* optional */
871 LLVMValueRef width,
872 LLVMValueRef height,
873 LLVMValueRef depth,
874 LLVMValueRef width_vec,
875 LLVMValueRef height_vec,
876 LLVMValueRef depth_vec,
877 LLVMValueRef row_stride_array,
878 LLVMValueRef img_stride_array,
879 LLVMValueRef data_array,
880 LLVMValueRef texel_out[4])
881 {
882 struct lp_build_context *float_bld = &bld->float_bld;
883 LLVMBuilderRef builder = bld->builder;
884 const unsigned mip_filter = bld->static_state->min_mip_filter;
885 const unsigned min_filter = bld->static_state->min_img_filter;
886 const unsigned mag_filter = bld->static_state->mag_img_filter;
887 const int dims = texture_dims(bld->static_state->target);
888 LLVMValueRef lod = NULL, lod_fpart = NULL;
889 LLVMValueRef ilevel0, ilevel1 = NULL;
890 LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
891 LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
892 LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
893 LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
894 LLVMValueRef data_ptr0, data_ptr1 = NULL;
895 LLVMValueRef packed, packed_lo, packed_hi;
896 LLVMValueRef unswizzled[4];
897 LLVMValueRef face_ddx[4], face_ddy[4];
898 struct lp_build_context h16;
899 LLVMTypeRef h16_vec_type;
900
901 /* we only support the common/simple wrap modes at this time */
902 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
903 if (dims >= 2)
904 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
905 if (dims >= 3)
906 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
907
908
909 /* make 16-bit fixed-pt builder context */
910 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
911 h16_vec_type = lp_build_vec_type(h16.type);
912
913
914 /* cube face selection, compute pre-face coords, etc. */
915 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
916 LLVMValueRef face, face_s, face_t;
917 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
918 s = face_s; /* vec */
919 t = face_t; /* vec */
920 /* use 'r' to indicate cube face */
921 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
922
923 /* recompute ddx, ddy using the new (s,t) face texcoords */
924 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
925 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
926 face_ddx[2] = NULL;
927 face_ddx[3] = NULL;
928 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
929 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
930 face_ddy[2] = NULL;
931 face_ddy[3] = NULL;
932 ddx = face_ddx;
933 ddy = face_ddy;
934 }
935
936
937 /*
938 * Compute the level of detail (float).
939 */
940 if (min_filter != mag_filter ||
941 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
942 /* Need to compute lod either to choose mipmap levels or to
943 * distinguish between minification/magnification with one mipmap level.
944 */
945 lod = lp_build_lod_selector(bld, unit, ddx, ddy,
946 lod_bias, explicit_lod,
947 width, height, depth);
948 }
949
950 /*
951 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
952 * If mipfilter=linear, also compute the weight between the two
953 * mipmap levels: lod_fpart
954 */
955 switch (mip_filter) {
956 default:
957 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
958 /* fall-through */
959 case PIPE_TEX_MIPFILTER_NONE:
960 /* always use mip level 0 */
961 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
962 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
963 * We should be able to set ilevel0 = const(0) but that causes
964 * bad x86 code to be emitted.
965 */
966 lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
967 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
968 }
969 else {
970 ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
971 }
972 break;
973 case PIPE_TEX_MIPFILTER_NEAREST:
974 assert(lod);
975 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
976 break;
977 case PIPE_TEX_MIPFILTER_LINEAR:
978 {
979 LLVMValueRef f256 = LLVMConstReal(LLVMFloatType(), 256.0);
980 LLVMValueRef i255 = lp_build_const_int32(255);
981 LLVMTypeRef i16_type = LLVMIntType(16);
982
983 assert(lod);
984
985 lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
986 &lod_fpart);
987 lod_fpart = LLVMBuildFMul(builder, lod_fpart, f256, "");
988 lod_fpart = lp_build_ifloor(&bld->float_bld, lod_fpart);
989 lod_fpart = LLVMBuildAnd(builder, lod_fpart, i255, "");
990 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, i16_type, "");
991 lod_fpart = lp_build_broadcast_scalar(&h16, lod_fpart);
992
993 /* the lod_fpart values will be fixed pt values in [0,1) */
994 }
995 break;
996 }
997
998 /* compute image size(s) of source mipmap level(s) */
999 lp_build_mipmap_level_sizes(bld, dims, width_vec, height_vec, depth_vec,
1000 ilevel0, ilevel1,
1001 row_stride_array, img_stride_array,
1002 &width0_vec, &width1_vec,
1003 &height0_vec, &height1_vec,
1004 &depth0_vec, &depth1_vec,
1005 &row_stride0_vec, &row_stride1_vec,
1006 &img_stride0_vec, &img_stride1_vec);
1007
1008 /*
1009 * Get pointer(s) to image data for mipmap level(s).
1010 */
1011 data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1012 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1013 data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1014 }
1015
1016
1017 /*
1018 * Get/interpolate texture colors.
1019 */
1020 if (min_filter == mag_filter) {
1021 /* no need to distinquish between minification and magnification */
1022 lp_build_sample_mipmap(bld, min_filter, mip_filter,
1023 s, t, r, lod_fpart,
1024 width0_vec, width1_vec,
1025 height0_vec, height1_vec,
1026 depth0_vec, depth1_vec,
1027 row_stride0_vec, row_stride1_vec,
1028 img_stride0_vec, img_stride1_vec,
1029 data_ptr0, data_ptr1,
1030 &packed_lo, &packed_hi);
1031 }
1032 else {
1033 /* Emit conditional to choose min image filter or mag image filter
1034 * depending on the lod being > 0 or <= 0, respectively.
1035 */
1036 struct lp_build_flow_context *flow_ctx;
1037 struct lp_build_if_state if_ctx;
1038 LLVMValueRef minify;
1039
1040 flow_ctx = lp_build_flow_create(builder);
1041 lp_build_flow_scope_begin(flow_ctx);
1042
1043 packed_lo = LLVMGetUndef(h16_vec_type);
1044 packed_hi = LLVMGetUndef(h16_vec_type);
1045
1046 lp_build_flow_scope_declare(flow_ctx, &packed_lo);
1047 lp_build_flow_scope_declare(flow_ctx, &packed_hi);
1048
1049 /* minify = lod > 0.0 */
1050 minify = LLVMBuildFCmp(builder, LLVMRealUGE,
1051 lod, float_bld->zero, "");
1052
1053 lp_build_if(&if_ctx, flow_ctx, builder, minify);
1054 {
1055 /* Use the minification filter */
1056 lp_build_sample_mipmap(bld, min_filter, mip_filter,
1057 s, t, r, lod_fpart,
1058 width0_vec, width1_vec,
1059 height0_vec, height1_vec,
1060 depth0_vec, depth1_vec,
1061 row_stride0_vec, row_stride1_vec,
1062 img_stride0_vec, img_stride1_vec,
1063 data_ptr0, data_ptr1,
1064 &packed_lo, &packed_hi);
1065 }
1066 lp_build_else(&if_ctx);
1067 {
1068 /* Use the magnification filter */
1069 lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1070 s, t, r, lod_fpart,
1071 width0_vec, width1_vec,
1072 height0_vec, height1_vec,
1073 depth0_vec, depth1_vec,
1074 row_stride0_vec, row_stride1_vec,
1075 img_stride0_vec, img_stride1_vec,
1076 data_ptr0, data_ptr1,
1077 &packed_lo, &packed_hi);
1078 }
1079 lp_build_endif(&if_ctx);
1080
1081 lp_build_flow_scope_end(flow_ctx);
1082 lp_build_flow_destroy(flow_ctx);
1083 }
1084
1085 /* combine 'packed_lo', 'packed_hi' into 'packed' */
1086 {
1087 struct lp_build_context h16, u8n;
1088
1089 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1090 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1091
1092 packed = lp_build_pack2(builder, h16.type, u8n.type,
1093 packed_lo, packed_hi);
1094 }
1095
1096 /*
1097 * Convert to SoA and swizzle.
1098 */
1099 lp_build_rgba8_to_f32_soa(builder,
1100 bld->texel_type,
1101 packed, unswizzled);
1102
1103 if (util_format_is_rgba8_variant(bld->format_desc)) {
1104 lp_build_format_swizzle_soa(bld->format_desc,
1105 &bld->texel_bld,
1106 unswizzled, texel_out);
1107 }
1108 else {
1109 texel_out[0] = unswizzled[0];
1110 texel_out[1] = unswizzled[1];
1111 texel_out[2] = unswizzled[2];
1112 texel_out[3] = unswizzled[3];
1113 }
1114
1115 apply_sampler_swizzle(bld, texel_out);
1116 }