util: add mutex lock in u_debug_memory.c code
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_bitarit.h"
49 #include "lp_bld_logic.h"
50 #include "lp_bld_swizzle.h"
51 #include "lp_bld_pack.h"
52 #include "lp_bld_flow.h"
53 #include "lp_bld_gather.h"
54 #include "lp_bld_format.h"
55 #include "lp_bld_init.h"
56 #include "lp_bld_sample.h"
57 #include "lp_bld_sample_aos.h"
58 #include "lp_bld_quad.h"
59
60
61 /**
62 * Build LLVM code for texture coord wrapping, for nearest filtering,
63 * for scaled integer texcoords.
64 * \param block_length is the length of the pixel block along the
65 * coordinate axis
66 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
67 * \param length the texture size along one dimension
68 * \param stride pixel stride along the coordinate axis (in bytes)
69 * \param is_pot if TRUE, length is a power of two
70 * \param wrap_mode one of PIPE_TEX_WRAP_x
71 * \param out_offset byte offset for the wrapped coordinate
72 * \param out_i resulting sub-block pixel coordinate for coord0
73 */
74 static void
75 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
76 unsigned block_length,
77 LLVMValueRef coord,
78 LLVMValueRef length,
79 LLVMValueRef stride,
80 boolean is_pot,
81 unsigned wrap_mode,
82 LLVMValueRef *out_offset,
83 LLVMValueRef *out_i)
84 {
85 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
86 LLVMBuilderRef builder = bld->gallivm->builder;
87 LLVMValueRef length_minus_one;
88
89 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
90
91 switch(wrap_mode) {
92 case PIPE_TEX_WRAP_REPEAT:
93 if(is_pot)
94 coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
95 else {
96 /* Add a bias to the texcoord to handle negative coords */
97 LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
98 coord = LLVMBuildAdd(builder, coord, bias, "");
99 coord = LLVMBuildURem(builder, coord, length, "");
100 }
101 break;
102
103 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
104 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
105 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
106 break;
107
108 case PIPE_TEX_WRAP_CLAMP:
109 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
110 case PIPE_TEX_WRAP_MIRROR_REPEAT:
111 case PIPE_TEX_WRAP_MIRROR_CLAMP:
112 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
113 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
114 default:
115 assert(0);
116 }
117
118 lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
119 out_offset, out_i);
120 }
121
122
123 /**
124 * Build LLVM code for texture coord wrapping, for linear filtering,
125 * for scaled integer texcoords.
126 * \param block_length is the length of the pixel block along the
127 * coordinate axis
128 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
129 * \param length the texture size along one dimension
130 * \param stride pixel stride along the coordinate axis (in bytes)
131 * \param is_pot if TRUE, length is a power of two
132 * \param wrap_mode one of PIPE_TEX_WRAP_x
133 * \param offset0 resulting relative offset for coord0
134 * \param offset1 resulting relative offset for coord0 + 1
135 * \param i0 resulting sub-block pixel coordinate for coord0
136 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
137 */
138 static void
139 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
140 unsigned block_length,
141 LLVMValueRef coord0,
142 LLVMValueRef length,
143 LLVMValueRef stride,
144 boolean is_pot,
145 unsigned wrap_mode,
146 LLVMValueRef *offset0,
147 LLVMValueRef *offset1,
148 LLVMValueRef *i0,
149 LLVMValueRef *i1)
150 {
151 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
152 LLVMBuilderRef builder = bld->gallivm->builder;
153 LLVMValueRef length_minus_one;
154 LLVMValueRef lmask, umask, mask;
155
156 if (block_length != 1) {
157 /*
158 * If the pixel block covers more than one pixel then there is no easy
159 * way to calculate offset1 relative to offset0. Instead, compute them
160 * independently.
161 */
162
163 LLVMValueRef coord1;
164
165 lp_build_sample_wrap_nearest_int(bld,
166 block_length,
167 coord0,
168 length,
169 stride,
170 is_pot,
171 wrap_mode,
172 offset0, i0);
173
174 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
175
176 lp_build_sample_wrap_nearest_int(bld,
177 block_length,
178 coord1,
179 length,
180 stride,
181 is_pot,
182 wrap_mode,
183 offset1, i1);
184
185 return;
186 }
187
188 /*
189 * Scalar pixels -- try to compute offset0 and offset1 with a single stride
190 * multiplication.
191 */
192
193 *i0 = int_coord_bld->zero;
194 *i1 = int_coord_bld->zero;
195
196 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
197
198 switch(wrap_mode) {
199 case PIPE_TEX_WRAP_REPEAT:
200 if (is_pot) {
201 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
202 }
203 else {
204 /* Add a bias to the texcoord to handle negative coords */
205 LLVMValueRef bias = lp_build_mul_imm(int_coord_bld, length, 1024);
206 coord0 = LLVMBuildAdd(builder, coord0, bias, "");
207 coord0 = LLVMBuildURem(builder, coord0, length, "");
208 }
209
210 mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
211 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
212
213 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
214 *offset1 = LLVMBuildAnd(builder,
215 lp_build_add(int_coord_bld, *offset0, stride),
216 mask, "");
217 break;
218
219 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
220 lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
221 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
222 umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
223 PIPE_FUNC_LESS, coord0, length_minus_one);
224
225 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
226 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
227
228 mask = LLVMBuildAnd(builder, lmask, umask, "");
229
230 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
231 *offset1 = lp_build_add(int_coord_bld,
232 *offset0,
233 LLVMBuildAnd(builder, stride, mask, ""));
234 break;
235
236 case PIPE_TEX_WRAP_CLAMP:
237 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
238 case PIPE_TEX_WRAP_MIRROR_REPEAT:
239 case PIPE_TEX_WRAP_MIRROR_CLAMP:
240 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
241 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
242 default:
243 assert(0);
244 *offset0 = int_coord_bld->zero;
245 *offset1 = int_coord_bld->zero;
246 break;
247 }
248 }
249
250
251 /**
252 * Sample a single texture image with nearest sampling.
253 * If sampling a cube texture, r = cube face in [0,5].
254 * Return filtered color as two vectors of 16-bit fixed point values.
255 */
256 static void
257 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
258 LLVMValueRef int_size,
259 LLVMValueRef row_stride_vec,
260 LLVMValueRef img_stride_vec,
261 LLVMValueRef data_ptr,
262 LLVMValueRef s,
263 LLVMValueRef t,
264 LLVMValueRef r,
265 LLVMValueRef *colors_lo,
266 LLVMValueRef *colors_hi)
267 {
268 const unsigned dims = bld->dims;
269 LLVMBuilderRef builder = bld->gallivm->builder;
270 struct lp_build_context i32, h16, u8n;
271 LLVMTypeRef i32_vec_type, u8n_vec_type;
272 LLVMValueRef i32_c8;
273 LLVMValueRef width_vec, height_vec, depth_vec;
274 LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
275 LLVMValueRef x_stride;
276 LLVMValueRef x_offset, offset;
277 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
278
279 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
280 lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
281 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
282
283 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
284 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
285
286 lp_build_extract_image_sizes(bld,
287 bld->int_size_type,
288 bld->int_coord_type,
289 int_size,
290 &width_vec,
291 &height_vec,
292 &depth_vec);
293
294 if (bld->static_state->normalized_coords) {
295 LLVMValueRef scaled_size;
296 LLVMValueRef flt_size;
297
298 /* scale size by 256 (8 fractional bits) */
299 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
300
301 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
302
303 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
304 }
305 else {
306 /* scale coords by 256 (8 fractional bits) */
307 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
308 if (dims >= 2)
309 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
310 if (dims >= 3)
311 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
312 }
313
314 /* convert float to int */
315 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
316 if (dims >= 2)
317 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
318 if (dims >= 3)
319 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
320
321 /* compute floor (shift right 8) */
322 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
323 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
324 if (dims >= 2)
325 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
326 if (dims >= 3)
327 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
328
329 /* get pixel, row, image strides */
330 x_stride = lp_build_const_vec(bld->gallivm,
331 bld->int_coord_bld.type,
332 bld->format_desc->block.bits/8);
333
334 /* Do texcoord wrapping, compute texel offset */
335 lp_build_sample_wrap_nearest_int(bld,
336 bld->format_desc->block.width,
337 s_ipart, width_vec, x_stride,
338 bld->static_state->pot_width,
339 bld->static_state->wrap_s,
340 &x_offset, &x_subcoord);
341 offset = x_offset;
342 if (dims >= 2) {
343 LLVMValueRef y_offset;
344 lp_build_sample_wrap_nearest_int(bld,
345 bld->format_desc->block.height,
346 t_ipart, height_vec, row_stride_vec,
347 bld->static_state->pot_height,
348 bld->static_state->wrap_t,
349 &y_offset, &y_subcoord);
350 offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
351 if (dims >= 3) {
352 LLVMValueRef z_offset;
353 lp_build_sample_wrap_nearest_int(bld,
354 1, /* block length (depth) */
355 r_ipart, depth_vec, img_stride_vec,
356 bld->static_state->pot_depth,
357 bld->static_state->wrap_r,
358 &z_offset, &z_subcoord);
359 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
360 }
361 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
362 LLVMValueRef z_offset;
363 /* The r coord is the cube face in [0,5] */
364 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
365 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
366 }
367 }
368
369 /*
370 * Fetch the pixels as 4 x 32bit (rgba order might differ):
371 *
372 * rgba0 rgba1 rgba2 rgba3
373 *
374 * bit cast them into 16 x u8
375 *
376 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
377 *
378 * unpack them into two 8 x i16:
379 *
380 * r0 g0 b0 a0 r1 g1 b1 a1
381 * r2 g2 b2 a2 r3 g3 b3 a3
382 *
383 * The higher 8 bits of the resulting elements will be zero.
384 */
385 {
386 LLVMValueRef rgba8;
387
388 if (util_format_is_rgba8_variant(bld->format_desc)) {
389 /*
390 * Given the format is a rgba8, just read the pixels as is,
391 * without any swizzling. Swizzling will be done later.
392 */
393 rgba8 = lp_build_gather(bld->gallivm,
394 bld->texel_type.length,
395 bld->format_desc->block.bits,
396 bld->texel_type.width,
397 data_ptr, offset);
398
399 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
400 }
401 else {
402 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
403 bld->format_desc,
404 u8n.type,
405 data_ptr, offset,
406 x_subcoord,
407 y_subcoord);
408 }
409
410 /* Expand one 4*rgba8 to two 2*rgba16 */
411 lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
412 rgba8,
413 colors_lo, colors_hi);
414 }
415 }
416
417
418 /**
419 * Sample a single texture image with (bi-)(tri-)linear sampling.
420 * Return filtered color as two vectors of 16-bit fixed point values.
421 */
422 static void
423 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
424 LLVMValueRef int_size,
425 LLVMValueRef row_stride_vec,
426 LLVMValueRef img_stride_vec,
427 LLVMValueRef data_ptr,
428 LLVMValueRef s,
429 LLVMValueRef t,
430 LLVMValueRef r,
431 LLVMValueRef *colors_lo,
432 LLVMValueRef *colors_hi)
433 {
434 const unsigned dims = bld->dims;
435 LLVMBuilderRef builder = bld->gallivm->builder;
436 struct lp_build_context i32, h16, u8n;
437 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
438 LLVMValueRef i32_c8, i32_c128, i32_c255;
439 LLVMValueRef width_vec, height_vec, depth_vec;
440 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
441 LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_fpart_lo = NULL, t_fpart_hi = NULL;
442 LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_fpart_lo = NULL, r_fpart_hi = NULL;
443 LLVMValueRef x_stride, y_stride, z_stride;
444 LLVMValueRef x_offset0, x_offset1;
445 LLVMValueRef y_offset0, y_offset1;
446 LLVMValueRef z_offset0, z_offset1;
447 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
448 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
449 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */
450 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */
451 LLVMValueRef packed_lo, packed_hi;
452 unsigned x, y, z;
453 unsigned i, j, k;
454 unsigned numj, numk;
455
456 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32));
457 lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16));
458 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8));
459
460 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
461 h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type);
462 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
463
464 lp_build_extract_image_sizes(bld,
465 bld->int_size_type,
466 bld->int_coord_type,
467 int_size,
468 &width_vec,
469 &height_vec,
470 &depth_vec);
471
472 if (bld->static_state->normalized_coords) {
473 LLVMValueRef scaled_size;
474 LLVMValueRef flt_size;
475
476 /* scale size by 256 (8 fractional bits) */
477 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
478
479 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
480
481 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
482 }
483 else {
484 /* scale coords by 256 (8 fractional bits) */
485 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
486 if (dims >= 2)
487 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
488 if (dims >= 3)
489 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
490 }
491
492 /* convert float to int */
493 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
494 if (dims >= 2)
495 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
496 if (dims >= 3)
497 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
498
499 /* subtract 0.5 (add -128) */
500 i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
501 s = LLVMBuildAdd(builder, s, i32_c128, "");
502 if (dims >= 2) {
503 t = LLVMBuildAdd(builder, t, i32_c128, "");
504 }
505 if (dims >= 3) {
506 r = LLVMBuildAdd(builder, r, i32_c128, "");
507 }
508
509 /* compute floor (shift right 8) */
510 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
511 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
512 if (dims >= 2)
513 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
514 if (dims >= 3)
515 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
516
517 /* compute fractional part (AND with 0xff) */
518 i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
519 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
520 if (dims >= 2)
521 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
522 if (dims >= 3)
523 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
524
525 /* get pixel, row and image strides */
526 x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
527 bld->format_desc->block.bits/8);
528 y_stride = row_stride_vec;
529 z_stride = img_stride_vec;
530
531 /* do texcoord wrapping and compute texel offsets */
532 lp_build_sample_wrap_linear_int(bld,
533 bld->format_desc->block.width,
534 s_ipart, width_vec, x_stride,
535 bld->static_state->pot_width,
536 bld->static_state->wrap_s,
537 &x_offset0, &x_offset1,
538 &x_subcoord[0], &x_subcoord[1]);
539 for (z = 0; z < 2; z++) {
540 for (y = 0; y < 2; y++) {
541 offset[z][y][0] = x_offset0;
542 offset[z][y][1] = x_offset1;
543 }
544 }
545
546 if (dims >= 2) {
547 lp_build_sample_wrap_linear_int(bld,
548 bld->format_desc->block.height,
549 t_ipart, height_vec, y_stride,
550 bld->static_state->pot_height,
551 bld->static_state->wrap_t,
552 &y_offset0, &y_offset1,
553 &y_subcoord[0], &y_subcoord[1]);
554
555 for (z = 0; z < 2; z++) {
556 for (x = 0; x < 2; x++) {
557 offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
558 offset[z][0][x], y_offset0);
559 offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
560 offset[z][1][x], y_offset1);
561 }
562 }
563 }
564
565 if (dims >= 3) {
566 lp_build_sample_wrap_linear_int(bld,
567 bld->format_desc->block.height,
568 r_ipart, depth_vec, z_stride,
569 bld->static_state->pot_depth,
570 bld->static_state->wrap_r,
571 &z_offset0, &z_offset1,
572 &z_subcoord[0], &z_subcoord[1]);
573 for (y = 0; y < 2; y++) {
574 for (x = 0; x < 2; x++) {
575 offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
576 offset[0][y][x], z_offset0);
577 offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
578 offset[1][y][x], z_offset1);
579 }
580 }
581 }
582 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
583 LLVMValueRef z_offset;
584 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
585 for (y = 0; y < 2; y++) {
586 for (x = 0; x < 2; x++) {
587 /* The r coord is the cube face in [0,5] */
588 offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
589 offset[0][y][x], z_offset);
590 }
591 }
592 }
593
594 /*
595 * Transform 4 x i32 in
596 *
597 * s_fpart = {s0, s1, s2, s3}
598 *
599 * into 8 x i16
600 *
601 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
602 *
603 * into two 8 x i16
604 *
605 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
606 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
607 *
608 * and likewise for t_fpart. There is no risk of loosing precision here
609 * since the fractional parts only use the lower 8bits.
610 */
611 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
612 if (dims >= 2)
613 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
614 if (dims >= 3)
615 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, "");
616
617 {
618 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
619 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
620 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
621 LLVMValueRef shuffle_lo;
622 LLVMValueRef shuffle_hi;
623
624 for (j = 0; j < h16.type.length; j += 4) {
625 #ifdef PIPE_ARCH_LITTLE_ENDIAN
626 unsigned subindex = 0;
627 #else
628 unsigned subindex = 1;
629 #endif
630 LLVMValueRef index;
631
632 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
633 for (i = 0; i < 4; ++i)
634 shuffles_lo[j + i] = index;
635
636 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
637 for (i = 0; i < 4; ++i)
638 shuffles_hi[j + i] = index;
639 }
640
641 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
642 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
643
644 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
645 shuffle_lo, "");
646 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef,
647 shuffle_hi, "");
648 if (dims >= 2) {
649 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
650 shuffle_lo, "");
651 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef,
652 shuffle_hi, "");
653 }
654 if (dims >= 3) {
655 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
656 shuffle_lo, "");
657 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef,
658 shuffle_hi, "");
659 }
660 }
661
662 /*
663 * Fetch the pixels as 4 x 32bit (rgba order might differ):
664 *
665 * rgba0 rgba1 rgba2 rgba3
666 *
667 * bit cast them into 16 x u8
668 *
669 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
670 *
671 * unpack them into two 8 x i16:
672 *
673 * r0 g0 b0 a0 r1 g1 b1 a1
674 * r2 g2 b2 a2 r3 g3 b3 a3
675 *
676 * The higher 8 bits of the resulting elements will be zero.
677 */
678 numj = 1 + (dims >= 2);
679 numk = 1 + (dims >= 3);
680
681 for (k = 0; k < numk; k++) {
682 for (j = 0; j < numj; j++) {
683 for (i = 0; i < 2; i++) {
684 LLVMValueRef rgba8;
685
686 if (util_format_is_rgba8_variant(bld->format_desc)) {
687 /*
688 * Given the format is a rgba8, just read the pixels as is,
689 * without any swizzling. Swizzling will be done later.
690 */
691 rgba8 = lp_build_gather(bld->gallivm,
692 bld->texel_type.length,
693 bld->format_desc->block.bits,
694 bld->texel_type.width,
695 data_ptr, offset[k][j][i]);
696
697 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
698 }
699 else {
700 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
701 bld->format_desc,
702 u8n.type,
703 data_ptr, offset[k][j][i],
704 x_subcoord[i],
705 y_subcoord[j]);
706 }
707
708 /* Expand one 4*rgba8 to two 2*rgba16 */
709 lp_build_unpack2(bld->gallivm, u8n.type, h16.type,
710 rgba8,
711 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]);
712 }
713 }
714 }
715
716 /*
717 * Linear interpolation with 8.8 fixed point.
718 */
719 if (dims == 1) {
720 /* 1-D lerp */
721 packed_lo = lp_build_lerp(&h16,
722 s_fpart_lo,
723 neighbors_lo[0][0][0],
724 neighbors_lo[0][0][1]);
725
726 packed_hi = lp_build_lerp(&h16,
727 s_fpart_hi,
728 neighbors_hi[0][0][0],
729 neighbors_hi[0][0][1]);
730 }
731 else {
732 /* 2-D lerp */
733 packed_lo = lp_build_lerp_2d(&h16,
734 s_fpart_lo, t_fpart_lo,
735 neighbors_lo[0][0][0],
736 neighbors_lo[0][0][1],
737 neighbors_lo[0][1][0],
738 neighbors_lo[0][1][1]);
739
740 packed_hi = lp_build_lerp_2d(&h16,
741 s_fpart_hi, t_fpart_hi,
742 neighbors_hi[0][0][0],
743 neighbors_hi[0][0][1],
744 neighbors_hi[0][1][0],
745 neighbors_hi[0][1][1]);
746
747 if (dims >= 3) {
748 LLVMValueRef packed_lo2, packed_hi2;
749
750 /* lerp in the second z slice */
751 packed_lo2 = lp_build_lerp_2d(&h16,
752 s_fpart_lo, t_fpart_lo,
753 neighbors_lo[1][0][0],
754 neighbors_lo[1][0][1],
755 neighbors_lo[1][1][0],
756 neighbors_lo[1][1][1]);
757
758 packed_hi2 = lp_build_lerp_2d(&h16,
759 s_fpart_hi, t_fpart_hi,
760 neighbors_hi[1][0][0],
761 neighbors_hi[1][0][1],
762 neighbors_hi[1][1][0],
763 neighbors_hi[1][1][1]);
764 /* interp between two z slices */
765 packed_lo = lp_build_lerp(&h16, r_fpart_lo,
766 packed_lo, packed_lo2);
767 packed_hi = lp_build_lerp(&h16, r_fpart_hi,
768 packed_hi, packed_hi2);
769 }
770 }
771
772 *colors_lo = packed_lo;
773 *colors_hi = packed_hi;
774 }
775
776
777 /**
778 * Sample the texture/mipmap using given image filter and mip filter.
779 * data0_ptr and data1_ptr point to the two mipmap levels to sample
780 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
781 * If we're using nearest miplevel sampling the '1' values will be null/unused.
782 */
783 static void
784 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
785 unsigned img_filter,
786 unsigned mip_filter,
787 LLVMValueRef s,
788 LLVMValueRef t,
789 LLVMValueRef r,
790 LLVMValueRef ilevel0,
791 LLVMValueRef ilevel1,
792 LLVMValueRef lod_fpart,
793 LLVMValueRef colors_lo_var,
794 LLVMValueRef colors_hi_var)
795 {
796 LLVMBuilderRef builder = bld->gallivm->builder;
797 LLVMValueRef size0;
798 LLVMValueRef size1;
799 LLVMValueRef row_stride0_vec;
800 LLVMValueRef row_stride1_vec;
801 LLVMValueRef img_stride0_vec;
802 LLVMValueRef img_stride1_vec;
803 LLVMValueRef data_ptr0;
804 LLVMValueRef data_ptr1;
805 LLVMValueRef colors0_lo, colors0_hi;
806 LLVMValueRef colors1_lo, colors1_hi;
807
808 /* sample the first mipmap level */
809 lp_build_mipmap_level_sizes(bld, ilevel0,
810 &size0,
811 &row_stride0_vec, &img_stride0_vec);
812 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
813 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
814 lp_build_sample_image_nearest(bld,
815 size0,
816 row_stride0_vec, img_stride0_vec,
817 data_ptr0, s, t, r,
818 &colors0_lo, &colors0_hi);
819 }
820 else {
821 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
822 lp_build_sample_image_linear(bld,
823 size0,
824 row_stride0_vec, img_stride0_vec,
825 data_ptr0, s, t, r,
826 &colors0_lo, &colors0_hi);
827 }
828
829 /* Store the first level's colors in the output variables */
830 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
831 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
832
833 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
834 LLVMValueRef h16_scale = lp_build_const_float(bld->gallivm, 256.0);
835 LLVMTypeRef i32_type = LLVMIntTypeInContext(bld->gallivm->context, 32);
836 struct lp_build_if_state if_ctx;
837 LLVMValueRef need_lerp;
838
839 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16_scale, "");
840 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32_type, "lod_fpart.fixed16");
841
842 /* need_lerp = lod_fpart > 0 */
843 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
844 lod_fpart, LLVMConstNull(i32_type),
845 "need_lerp");
846
847 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
848 {
849 struct lp_build_context h16_bld;
850
851 lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
852
853 /* sample the second mipmap level */
854 lp_build_mipmap_level_sizes(bld, ilevel1,
855 &size1,
856 &row_stride1_vec, &img_stride1_vec);
857 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
858 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
859 lp_build_sample_image_nearest(bld,
860 size1,
861 row_stride1_vec, img_stride1_vec,
862 data_ptr1, s, t, r,
863 &colors1_lo, &colors1_hi);
864 }
865 else {
866 lp_build_sample_image_linear(bld,
867 size1,
868 row_stride1_vec, img_stride1_vec,
869 data_ptr1, s, t, r,
870 &colors1_lo, &colors1_hi);
871 }
872
873 /* interpolate samples from the two mipmap levels */
874
875 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, "");
876 lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart);
877
878 #if HAVE_LLVM == 0x208
879 /* This is a work-around for a bug in LLVM 2.8.
880 * Evidently, something goes wrong in the construction of the
881 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
882 * to force the vector to be properly constructed.
883 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
884 */
885 {
886 LLVMValueRef shuffles[8], shuffle;
887 int i;
888 assert(h16_bld.type.length <= Elements(shuffles));
889 for (i = 0; i < h16_bld.type.length; i++)
890 shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1));
891 shuffle = LLVMConstVector(shuffles, h16_bld.type.length);
892 lod_fpart = LLVMBuildShuffleVector(builder,
893 lod_fpart, lod_fpart,
894 shuffle, "");
895 }
896 #endif
897
898 colors0_lo = lp_build_lerp(&h16_bld, lod_fpart,
899 colors0_lo, colors1_lo);
900 colors0_hi = lp_build_lerp(&h16_bld, lod_fpart,
901 colors0_hi, colors1_hi);
902
903 LLVMBuildStore(builder, colors0_lo, colors_lo_var);
904 LLVMBuildStore(builder, colors0_hi, colors_hi_var);
905 }
906 lp_build_endif(&if_ctx);
907 }
908 }
909
910
911
912 /**
913 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
914 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
915 * but only limited texture coord wrap modes.
916 */
917 void
918 lp_build_sample_aos(struct lp_build_sample_context *bld,
919 unsigned unit,
920 LLVMValueRef s,
921 LLVMValueRef t,
922 LLVMValueRef r,
923 const LLVMValueRef *ddx,
924 const LLVMValueRef *ddy,
925 LLVMValueRef lod_bias, /* optional */
926 LLVMValueRef explicit_lod, /* optional */
927 LLVMValueRef texel_out[4])
928 {
929 struct lp_build_context *int_bld = &bld->int_bld;
930 LLVMBuilderRef builder = bld->gallivm->builder;
931 const unsigned mip_filter = bld->static_state->min_mip_filter;
932 const unsigned min_filter = bld->static_state->min_img_filter;
933 const unsigned mag_filter = bld->static_state->mag_img_filter;
934 const unsigned dims = bld->dims;
935 LLVMValueRef lod_ipart = NULL, lod_fpart = NULL;
936 LLVMValueRef ilevel0, ilevel1 = NULL;
937 LLVMValueRef packed, packed_lo, packed_hi;
938 LLVMValueRef unswizzled[4];
939 LLVMValueRef face_ddx[4], face_ddy[4];
940 struct lp_build_context h16_bld;
941 LLVMValueRef first_level;
942 LLVMValueRef i32t_zero = lp_build_const_int32(bld->gallivm, 0);
943
944 /* we only support the common/simple wrap modes at this time */
945 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s));
946 if (dims >= 2)
947 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t));
948 if (dims >= 3)
949 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r));
950
951
952 /* make 16-bit fixed-pt builder context */
953 lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16));
954
955 /* cube face selection, compute pre-face coords, etc. */
956 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
957 LLVMValueRef face, face_s, face_t;
958 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
959 s = face_s; /* vec */
960 t = face_t; /* vec */
961 /* use 'r' to indicate cube face */
962 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
963
964 /* recompute ddx, ddy using the new (s,t) face texcoords */
965 face_ddx[0] = lp_build_scalar_ddx(&bld->coord_bld, s);
966 face_ddx[1] = lp_build_scalar_ddx(&bld->coord_bld, t);
967 face_ddx[2] = NULL;
968 face_ddx[3] = NULL;
969 face_ddy[0] = lp_build_scalar_ddy(&bld->coord_bld, s);
970 face_ddy[1] = lp_build_scalar_ddy(&bld->coord_bld, t);
971 face_ddy[2] = NULL;
972 face_ddy[3] = NULL;
973 ddx = face_ddx;
974 ddy = face_ddy;
975 }
976
977 /*
978 * Compute the level of detail (float).
979 */
980 if (min_filter != mag_filter ||
981 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
982 /* Need to compute lod either to choose mipmap levels or to
983 * distinguish between minification/magnification with one mipmap level.
984 */
985 lp_build_lod_selector(bld, unit, ddx, ddy,
986 lod_bias, explicit_lod,
987 mip_filter,
988 &lod_ipart, &lod_fpart);
989 } else {
990 lod_ipart = i32t_zero;
991 }
992
993 /*
994 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
995 */
996 switch (mip_filter) {
997 default:
998 assert(0 && "bad mip_filter value in lp_build_sample_aos()");
999 /* fall-through */
1000 case PIPE_TEX_MIPFILTER_NONE:
1001 /* always use mip level 0 */
1002 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1003 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1004 * We should be able to set ilevel0 = const(0) but that causes
1005 * bad x86 code to be emitted.
1006 */
1007 assert(lod_ipart);
1008 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1009 }
1010 else {
1011 first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1012 bld->gallivm, unit);
1013 ilevel0 = first_level;
1014 }
1015 break;
1016 case PIPE_TEX_MIPFILTER_NEAREST:
1017 assert(lod_ipart);
1018 lp_build_nearest_mip_level(bld, unit, lod_ipart, &ilevel0);
1019 break;
1020 case PIPE_TEX_MIPFILTER_LINEAR:
1021 assert(lod_ipart);
1022 assert(lod_fpart);
1023 lp_build_linear_mip_levels(bld, unit,
1024 lod_ipart, &lod_fpart,
1025 &ilevel0, &ilevel1);
1026 break;
1027 }
1028
1029 /*
1030 * Get/interpolate texture colors.
1031 */
1032
1033 packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo");
1034 packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi");
1035
1036 if (min_filter == mag_filter) {
1037 /* no need to distinquish between minification and magnification */
1038 lp_build_sample_mipmap(bld,
1039 min_filter, mip_filter,
1040 s, t, r,
1041 ilevel0, ilevel1, lod_fpart,
1042 packed_lo, packed_hi);
1043 }
1044 else {
1045 /* Emit conditional to choose min image filter or mag image filter
1046 * depending on the lod being > 0 or <= 0, respectively.
1047 */
1048 struct lp_build_if_state if_ctx;
1049 LLVMValueRef minify;
1050
1051 /* minify = lod >= 0.0 */
1052 minify = LLVMBuildICmp(builder, LLVMIntSGE,
1053 lod_ipart, int_bld->zero, "");
1054
1055 lp_build_if(&if_ctx, bld->gallivm, minify);
1056 {
1057 /* Use the minification filter */
1058 lp_build_sample_mipmap(bld,
1059 min_filter, mip_filter,
1060 s, t, r,
1061 ilevel0, ilevel1, lod_fpart,
1062 packed_lo, packed_hi);
1063 }
1064 lp_build_else(&if_ctx);
1065 {
1066 /* Use the magnification filter */
1067 lp_build_sample_mipmap(bld,
1068 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1069 s, t, r,
1070 ilevel0, NULL, NULL,
1071 packed_lo, packed_hi);
1072 }
1073 lp_build_endif(&if_ctx);
1074 }
1075
1076 /*
1077 * combine the values stored in 'packed_lo' and 'packed_hi' variables
1078 * into 'packed'
1079 */
1080 packed = lp_build_pack2(bld->gallivm,
1081 h16_bld.type, lp_type_unorm(8),
1082 LLVMBuildLoad(builder, packed_lo, ""),
1083 LLVMBuildLoad(builder, packed_hi, ""));
1084
1085 /*
1086 * Convert to SoA and swizzle.
1087 */
1088 lp_build_rgba8_to_f32_soa(bld->gallivm,
1089 bld->texel_type,
1090 packed, unswizzled);
1091
1092 if (util_format_is_rgba8_variant(bld->format_desc)) {
1093 lp_build_format_swizzle_soa(bld->format_desc,
1094 &bld->texel_bld,
1095 unswizzled, texel_out);
1096 }
1097 else {
1098 texel_out[0] = unswizzled[0];
1099 texel_out[1] = unswizzled[1];
1100 texel_out[2] = unswizzled[2];
1101 texel_out[3] = unswizzled[3];
1102 }
1103 }