Merge branch 'glsl2'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_flow.h"
52 #include "lp_bld_gather.h"
53 #include "lp_bld_format.h"
54 #include "lp_bld_sample.h"
55 #include "lp_bld_quad.h"
56
57
58 /**
59 * Keep all information for sampling code generation in a single place.
60 */
61 struct lp_build_sample_context
62 {
63 LLVMBuilderRef builder;
64
65 const struct lp_sampler_static_state *static_state;
66
67 struct lp_sampler_dynamic_state *dynamic_state;
68
69 const struct util_format_description *format_desc;
70
71 /** regular scalar float type */
72 struct lp_type float_type;
73 struct lp_build_context float_bld;
74
75 /** regular scalar float type */
76 struct lp_type int_type;
77 struct lp_build_context int_bld;
78
79 /** Incoming coordinates type and build context */
80 struct lp_type coord_type;
81 struct lp_build_context coord_bld;
82
83 /** Unsigned integer coordinates */
84 struct lp_type uint_coord_type;
85 struct lp_build_context uint_coord_bld;
86
87 /** Signed integer coordinates */
88 struct lp_type int_coord_type;
89 struct lp_build_context int_coord_bld;
90
91 /** Output texels type and build context */
92 struct lp_type texel_type;
93 struct lp_build_context texel_bld;
94 };
95
96
97 /**
98 * Does the given texture wrap mode allow sampling the texture border color?
99 * XXX maybe move this into gallium util code.
100 */
101 static boolean
102 wrap_mode_uses_border_color(unsigned mode)
103 {
104 switch (mode) {
105 case PIPE_TEX_WRAP_REPEAT:
106 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
107 case PIPE_TEX_WRAP_MIRROR_REPEAT:
108 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
109 return FALSE;
110 case PIPE_TEX_WRAP_CLAMP:
111 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
112 case PIPE_TEX_WRAP_MIRROR_CLAMP:
113 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
114 return TRUE;
115 default:
116 assert(0 && "unexpected wrap mode");
117 return FALSE;
118 }
119 }
120
121
122 static LLVMValueRef
123 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
124 LLVMValueRef data_array, LLVMValueRef level)
125 {
126 LLVMValueRef indexes[2], data_ptr;
127 indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
128 indexes[1] = level;
129 data_ptr = LLVMBuildGEP(bld->builder, data_array, indexes, 2, "");
130 data_ptr = LLVMBuildLoad(bld->builder, data_ptr, "");
131 return data_ptr;
132 }
133
134
135 static LLVMValueRef
136 lp_build_get_const_mipmap_level(struct lp_build_sample_context *bld,
137 LLVMValueRef data_array, int level)
138 {
139 LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
140 return lp_build_get_mipmap_level(bld, data_array, lvl);
141 }
142
143
144 /**
145 * Dereference stride_array[mipmap_level] array to get a stride.
146 * Return stride as a vector.
147 */
148 static LLVMValueRef
149 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
150 LLVMValueRef stride_array, LLVMValueRef level)
151 {
152 LLVMValueRef indexes[2], stride;
153 indexes[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
154 indexes[1] = level;
155 stride = LLVMBuildGEP(bld->builder, stride_array, indexes, 2, "");
156 stride = LLVMBuildLoad(bld->builder, stride, "");
157 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride);
158 return stride;
159 }
160
161
162 /** Dereference stride_array[0] array to get a stride (as vector). */
163 static LLVMValueRef
164 lp_build_get_const_level_stride_vec(struct lp_build_sample_context *bld,
165 LLVMValueRef stride_array, int level)
166 {
167 LLVMValueRef lvl = LLVMConstInt(LLVMInt32Type(), level, 0);
168 return lp_build_get_level_stride_vec(bld, stride_array, lvl);
169 }
170
171
172 static int
173 texture_dims(enum pipe_texture_target tex)
174 {
175 switch (tex) {
176 case PIPE_TEXTURE_1D:
177 return 1;
178 case PIPE_TEXTURE_2D:
179 case PIPE_TEXTURE_CUBE:
180 return 2;
181 case PIPE_TEXTURE_3D:
182 return 3;
183 default:
184 assert(0 && "bad texture target in texture_dims()");
185 return 2;
186 }
187 }
188
189
190 static void
191 apply_sampler_swizzle(struct lp_build_sample_context *bld,
192 LLVMValueRef *texel)
193 {
194 unsigned char swizzles[4];
195
196 swizzles[0] = bld->static_state->swizzle_r;
197 swizzles[1] = bld->static_state->swizzle_g;
198 swizzles[2] = bld->static_state->swizzle_b;
199 swizzles[3] = bld->static_state->swizzle_a;
200
201 lp_build_swizzle_soa_inplace(&bld->texel_bld, texel, swizzles);
202 }
203
204
205
206 /**
207 * Generate code to fetch a texel from a texture at int coords (x, y, z).
208 * The computation depends on whether the texture is 1D, 2D or 3D.
209 * The result, texel, will be:
210 * texel[0] = red values
211 * texel[1] = green values
212 * texel[2] = blue values
213 * texel[3] = alpha values
214 */
215 static void
216 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
217 LLVMValueRef width,
218 LLVMValueRef height,
219 LLVMValueRef depth,
220 LLVMValueRef x,
221 LLVMValueRef y,
222 LLVMValueRef z,
223 LLVMValueRef y_stride,
224 LLVMValueRef z_stride,
225 LLVMValueRef data_ptr,
226 LLVMValueRef texel_out[4])
227 {
228 const int dims = texture_dims(bld->static_state->target);
229 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230 LLVMValueRef offset;
231 LLVMValueRef i, j;
232 LLVMValueRef use_border = NULL;
233
234 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
235 if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
236 LLVMValueRef b1, b2;
237 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
238 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
239 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
240 }
241
242 if (dims >= 2 && wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
243 LLVMValueRef b1, b2;
244 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
245 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
246 if (use_border) {
247 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
248 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
249 }
250 else {
251 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
252 }
253 }
254
255 if (dims == 3 && wrap_mode_uses_border_color(bld->static_state->wrap_r)) {
256 LLVMValueRef b1, b2;
257 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
258 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
259 if (use_border) {
260 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
261 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
262 }
263 else {
264 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
265 }
266 }
267
268 /* convert x,y,z coords to linear offset from start of texture, in bytes */
269 lp_build_sample_offset(&bld->uint_coord_bld,
270 bld->format_desc,
271 x, y, z, y_stride, z_stride,
272 &offset, &i, &j);
273
274 if (use_border) {
275 /* If we can sample the border color, it means that texcoords may
276 * lie outside the bounds of the texture image. We need to do
277 * something to prevent reading out of bounds and causing a segfault.
278 *
279 * Simply AND the texture coords with !use_border. This will cause
280 * coords which are out of bounds to become zero. Zero's guaranteed
281 * to be inside the texture image.
282 */
283 offset = lp_build_andc(&bld->uint_coord_bld, offset, use_border);
284 }
285
286 lp_build_fetch_rgba_soa(bld->builder,
287 bld->format_desc,
288 bld->texel_type,
289 data_ptr, offset,
290 i, j,
291 texel_out);
292
293 apply_sampler_swizzle(bld, texel_out);
294
295 /*
296 * Note: if we find an app which frequently samples the texture border
297 * we might want to implement a true conditional here to avoid sampling
298 * the texture whenever possible (since that's quite a bit of code).
299 * Ex:
300 * if (use_border) {
301 * texel = border_color;
302 * }
303 * else {
304 * texel = sample_texture(coord);
305 * }
306 * As it is now, we always sample the texture, then selectively replace
307 * the texel color results with the border color.
308 */
309
310 if (use_border) {
311 /* select texel color or border color depending on use_border */
312 int chan;
313 for (chan = 0; chan < 4; chan++) {
314 LLVMValueRef border_chan =
315 lp_build_const_vec(bld->texel_type,
316 bld->static_state->border_color[chan]);
317 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
318 border_chan, texel_out[chan]);
319 }
320 }
321 }
322
323
324 /**
325 * Fetch the texels as <4n x i8> in AoS form.
326 */
327 static LLVMValueRef
328 lp_build_sample_packed(struct lp_build_sample_context *bld,
329 LLVMValueRef x,
330 LLVMValueRef y,
331 LLVMValueRef y_stride,
332 LLVMValueRef data_array)
333 {
334 LLVMValueRef offset, i, j;
335 LLVMValueRef data_ptr;
336 LLVMValueRef res;
337
338 /* convert x,y,z coords to linear offset from start of texture, in bytes */
339 lp_build_sample_offset(&bld->uint_coord_bld,
340 bld->format_desc,
341 x, y, NULL, y_stride, NULL,
342 &offset, &i, &j);
343
344 /* get pointer to mipmap level 0 data */
345 data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
346
347 if (util_format_is_rgba8_variant(bld->format_desc)) {
348 /* Just fetch the data directly without swizzling */
349 assert(bld->format_desc->block.width == 1);
350 assert(bld->format_desc->block.height == 1);
351 assert(bld->format_desc->block.bits <= bld->texel_type.width);
352
353 res = lp_build_gather(bld->builder,
354 bld->texel_type.length,
355 bld->format_desc->block.bits,
356 bld->texel_type.width,
357 data_ptr, offset);
358 }
359 else {
360 struct lp_type type;
361
362 assert(bld->texel_type.width == 32);
363
364 memset(&type, 0, sizeof type);
365 type.width = 8;
366 type.length = bld->texel_type.length*4;
367 type.norm = TRUE;
368
369 res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
370 data_ptr, offset, i, j);
371 }
372
373 return res;
374 }
375
376
377 /**
378 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
379 */
380 static LLVMValueRef
381 lp_build_coord_mirror(struct lp_build_sample_context *bld,
382 LLVMValueRef coord)
383 {
384 struct lp_build_context *coord_bld = &bld->coord_bld;
385 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
386 LLVMValueRef fract, flr, isOdd;
387
388 /* fract = coord - floor(coord) */
389 fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
390
391 /* flr = ifloor(coord); */
392 flr = lp_build_ifloor(coord_bld, coord);
393
394 /* isOdd = flr & 1 */
395 isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
396
397 /* make coord positive or negative depending on isOdd */
398 coord = lp_build_set_sign(coord_bld, fract, isOdd);
399
400 /* convert isOdd to float */
401 isOdd = lp_build_int_to_float(coord_bld, isOdd);
402
403 /* add isOdd to coord */
404 coord = lp_build_add(coord_bld, coord, isOdd);
405
406 return coord;
407 }
408
409
410 /**
411 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
412 * Return whether the given mode is supported by that function.
413 */
414 static boolean
415 is_simple_wrap_mode(unsigned mode)
416 {
417 switch (mode) {
418 case PIPE_TEX_WRAP_REPEAT:
419 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
420 return TRUE;
421 default:
422 return FALSE;
423 }
424 }
425
426
427 /**
428 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
429 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
430 * \param length the texture size along one dimension
431 * \param is_pot if TRUE, length is a power of two
432 * \param wrap_mode one of PIPE_TEX_WRAP_x
433 */
434 static LLVMValueRef
435 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
436 LLVMValueRef coord,
437 LLVMValueRef length,
438 boolean is_pot,
439 unsigned wrap_mode)
440 {
441 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
442 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
443 LLVMValueRef length_minus_one;
444
445 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
446
447 switch(wrap_mode) {
448 case PIPE_TEX_WRAP_REPEAT:
449 if(is_pot)
450 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
451 else
452 /* Signed remainder won't give the right results for negative
453 * dividends but unsigned remainder does.*/
454 coord = LLVMBuildURem(bld->builder, coord, length, "");
455 break;
456
457 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
458 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
459 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
460 break;
461
462 case PIPE_TEX_WRAP_CLAMP:
463 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
464 case PIPE_TEX_WRAP_MIRROR_REPEAT:
465 case PIPE_TEX_WRAP_MIRROR_CLAMP:
466 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
467 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
468 default:
469 assert(0);
470 }
471
472 return coord;
473 }
474
475
476 /**
477 * Build LLVM code for texture wrap mode for linear filtering.
478 * \param x0_out returns first integer texcoord
479 * \param x1_out returns second integer texcoord
480 * \param weight_out returns linear interpolation weight
481 */
482 static void
483 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
484 LLVMValueRef coord,
485 LLVMValueRef length,
486 boolean is_pot,
487 unsigned wrap_mode,
488 LLVMValueRef *x0_out,
489 LLVMValueRef *x1_out,
490 LLVMValueRef *weight_out)
491 {
492 struct lp_build_context *coord_bld = &bld->coord_bld;
493 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
494 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
495 LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
496 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
497 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
498 LLVMValueRef coord0, coord1, weight;
499
500 switch(wrap_mode) {
501 case PIPE_TEX_WRAP_REPEAT:
502 /* mul by size and subtract 0.5 */
503 coord = lp_build_mul(coord_bld, coord, length_f);
504 coord = lp_build_sub(coord_bld, coord, half);
505 /* convert to int */
506 coord0 = lp_build_ifloor(coord_bld, coord);
507 coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
508 /* compute lerp weight */
509 weight = lp_build_fract(coord_bld, coord);
510 /* repeat wrap */
511 if (is_pot) {
512 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
513 coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
514 }
515 else {
516 /* Signed remainder won't give the right results for negative
517 * dividends but unsigned remainder does.*/
518 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
519 coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
520 }
521 break;
522
523 case PIPE_TEX_WRAP_CLAMP:
524 if (bld->static_state->normalized_coords) {
525 /* scale coord to length */
526 coord = lp_build_mul(coord_bld, coord, length_f);
527 }
528
529 /* clamp to [0, length] */
530 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
531
532 coord = lp_build_sub(coord_bld, coord, half);
533
534 weight = lp_build_fract(coord_bld, coord);
535 coord0 = lp_build_ifloor(coord_bld, coord);
536 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
537 break;
538
539 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
540 if (bld->static_state->normalized_coords) {
541 /* clamp to [0,1] */
542 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
543 /* mul by tex size and subtract 0.5 */
544 coord = lp_build_mul(coord_bld, coord, length_f);
545 coord = lp_build_sub(coord_bld, coord, half);
546 }
547 else {
548 LLVMValueRef min, max;
549 /* clamp to [0.5, length - 0.5] */
550 min = half;
551 max = lp_build_sub(coord_bld, length_f, min);
552 coord = lp_build_clamp(coord_bld, coord, min, max);
553 }
554 /* compute lerp weight */
555 weight = lp_build_fract(coord_bld, coord);
556 /* coord0 = floor(coord); */
557 coord0 = lp_build_ifloor(coord_bld, coord);
558 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
559 /* coord0 = max(coord0, 0) */
560 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
561 /* coord1 = min(coord1, length-1) */
562 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
563 break;
564
565 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
566 {
567 LLVMValueRef min, max;
568 if (bld->static_state->normalized_coords) {
569 /* scale coord to length */
570 coord = lp_build_mul(coord_bld, coord, length_f);
571 }
572 /* clamp to [-0.5, length + 0.5] */
573 min = lp_build_const_vec(coord_bld->type, -0.5F);
574 max = lp_build_sub(coord_bld, length_f, min);
575 coord = lp_build_clamp(coord_bld, coord, min, max);
576 coord = lp_build_sub(coord_bld, coord, half);
577 /* compute lerp weight */
578 weight = lp_build_fract(coord_bld, coord);
579 /* convert to int */
580 coord0 = lp_build_ifloor(coord_bld, coord);
581 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
582 }
583 break;
584
585 case PIPE_TEX_WRAP_MIRROR_REPEAT:
586 /* compute mirror function */
587 coord = lp_build_coord_mirror(bld, coord);
588
589 /* scale coord to length */
590 coord = lp_build_mul(coord_bld, coord, length_f);
591 coord = lp_build_sub(coord_bld, coord, half);
592
593 /* compute lerp weight */
594 weight = lp_build_fract(coord_bld, coord);
595
596 /* convert to int coords */
597 coord0 = lp_build_ifloor(coord_bld, coord);
598 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
599
600 /* coord0 = max(coord0, 0) */
601 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
602 /* coord1 = min(coord1, length-1) */
603 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
604 break;
605
606 case PIPE_TEX_WRAP_MIRROR_CLAMP:
607 coord = lp_build_abs(coord_bld, coord);
608
609 if (bld->static_state->normalized_coords) {
610 /* scale coord to length */
611 coord = lp_build_mul(coord_bld, coord, length_f);
612 }
613
614 /* clamp to [0, length] */
615 coord = lp_build_min(coord_bld, coord, length_f);
616
617 coord = lp_build_sub(coord_bld, coord, half);
618
619 weight = lp_build_fract(coord_bld, coord);
620 coord0 = lp_build_ifloor(coord_bld, coord);
621 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
622 break;
623
624 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
625 {
626 LLVMValueRef min, max;
627
628 coord = lp_build_abs(coord_bld, coord);
629
630 if (bld->static_state->normalized_coords) {
631 /* scale coord to length */
632 coord = lp_build_mul(coord_bld, coord, length_f);
633 }
634
635 /* clamp to [0.5, length - 0.5] */
636 min = half;
637 max = lp_build_sub(coord_bld, length_f, min);
638 coord = lp_build_clamp(coord_bld, coord, min, max);
639
640 coord = lp_build_sub(coord_bld, coord, half);
641
642 weight = lp_build_fract(coord_bld, coord);
643 coord0 = lp_build_ifloor(coord_bld, coord);
644 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
645 }
646 break;
647
648 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
649 {
650 LLVMValueRef min, max;
651
652 coord = lp_build_abs(coord_bld, coord);
653
654 if (bld->static_state->normalized_coords) {
655 /* scale coord to length */
656 coord = lp_build_mul(coord_bld, coord, length_f);
657 }
658
659 /* clamp to [-0.5, length + 0.5] */
660 min = lp_build_negate(coord_bld, half);
661 max = lp_build_sub(coord_bld, length_f, min);
662 coord = lp_build_clamp(coord_bld, coord, min, max);
663
664 coord = lp_build_sub(coord_bld, coord, half);
665
666 weight = lp_build_fract(coord_bld, coord);
667 coord0 = lp_build_ifloor(coord_bld, coord);
668 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
669 }
670 break;
671
672 default:
673 assert(0);
674 coord0 = NULL;
675 coord1 = NULL;
676 weight = NULL;
677 }
678
679 *x0_out = coord0;
680 *x1_out = coord1;
681 *weight_out = weight;
682 }
683
684
685 /**
686 * Build LLVM code for texture wrap mode for nearest filtering.
687 * \param coord the incoming texcoord (nominally in [0,1])
688 * \param length the texture size along one dimension, as int
689 * \param is_pot if TRUE, length is a power of two
690 * \param wrap_mode one of PIPE_TEX_WRAP_x
691 */
692 static LLVMValueRef
693 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
694 LLVMValueRef coord,
695 LLVMValueRef length,
696 boolean is_pot,
697 unsigned wrap_mode)
698 {
699 struct lp_build_context *coord_bld = &bld->coord_bld;
700 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
701 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
702 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
703 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
704 LLVMValueRef icoord;
705
706 switch(wrap_mode) {
707 case PIPE_TEX_WRAP_REPEAT:
708 coord = lp_build_mul(coord_bld, coord, length_f);
709 icoord = lp_build_ifloor(coord_bld, coord);
710 if (is_pot)
711 icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
712 else
713 /* Signed remainder won't give the right results for negative
714 * dividends but unsigned remainder does.*/
715 icoord = LLVMBuildURem(bld->builder, icoord, length, "");
716 break;
717
718 case PIPE_TEX_WRAP_CLAMP:
719 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
720 if (bld->static_state->normalized_coords) {
721 /* scale coord to length */
722 coord = lp_build_mul(coord_bld, coord, length_f);
723 }
724
725 /* floor */
726 icoord = lp_build_ifloor(coord_bld, coord);
727
728 /* clamp to [0, length - 1]. */
729 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
730 length_minus_one);
731 break;
732
733 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
734 /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
735 {
736 LLVMValueRef min, max;
737
738 if (bld->static_state->normalized_coords) {
739 /* scale coord to length */
740 coord = lp_build_mul(coord_bld, coord, length_f);
741 }
742
743 icoord = lp_build_ifloor(coord_bld, coord);
744
745 /* clamp to [-1, length] */
746 min = lp_build_negate(int_coord_bld, int_coord_bld->one);
747 max = length;
748 icoord = lp_build_clamp(int_coord_bld, icoord, min, max);
749 }
750 break;
751
752 case PIPE_TEX_WRAP_MIRROR_REPEAT:
753 /* compute mirror function */
754 coord = lp_build_coord_mirror(bld, coord);
755
756 /* scale coord to length */
757 assert(bld->static_state->normalized_coords);
758 coord = lp_build_mul(coord_bld, coord, length_f);
759
760 icoord = lp_build_ifloor(coord_bld, coord);
761
762 /* clamp to [0, length - 1] */
763 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
764 break;
765
766 case PIPE_TEX_WRAP_MIRROR_CLAMP:
767 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
768 coord = lp_build_abs(coord_bld, coord);
769
770 if (bld->static_state->normalized_coords) {
771 /* scale coord to length */
772 coord = lp_build_mul(coord_bld, coord, length_f);
773 }
774
775 icoord = lp_build_ifloor(coord_bld, coord);
776
777 /* clamp to [0, length - 1] */
778 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
779 break;
780
781 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
782 coord = lp_build_abs(coord_bld, coord);
783
784 if (bld->static_state->normalized_coords) {
785 /* scale coord to length */
786 coord = lp_build_mul(coord_bld, coord, length_f);
787 }
788
789 icoord = lp_build_ifloor(coord_bld, coord);
790
791 /* clamp to [0, length] */
792 icoord = lp_build_min(int_coord_bld, icoord, length);
793 break;
794
795 default:
796 assert(0);
797 icoord = NULL;
798 }
799
800 return icoord;
801 }
802
803
804 /**
805 * Codegen equivalent for u_minify().
806 * Return max(1, base_size >> level);
807 */
808 static LLVMValueRef
809 lp_build_minify(struct lp_build_sample_context *bld,
810 LLVMValueRef base_size,
811 LLVMValueRef level)
812 {
813 LLVMValueRef size = LLVMBuildLShr(bld->builder, base_size, level, "minify");
814 size = lp_build_max(&bld->int_coord_bld, size, bld->int_coord_bld.one);
815 return size;
816 }
817
818
819 /**
820 * Generate code to compute texture level of detail (lambda).
821 * \param ddx partial derivatives of (s, t, r, q) with respect to X
822 * \param ddy partial derivatives of (s, t, r, q) with respect to Y
823 * \param lod_bias optional float vector with the shader lod bias
824 * \param explicit_lod optional float vector with the explicit lod
825 * \param width scalar int texture width
826 * \param height scalar int texture height
827 * \param depth scalar int texture depth
828 *
829 * XXX: The resulting lod is scalar, so ignore all but the first element of
830 * derivatives, lod_bias, etc that are passed by the shader.
831 */
832 static LLVMValueRef
833 lp_build_lod_selector(struct lp_build_sample_context *bld,
834 const LLVMValueRef ddx[4],
835 const LLVMValueRef ddy[4],
836 LLVMValueRef lod_bias, /* optional */
837 LLVMValueRef explicit_lod, /* optional */
838 LLVMValueRef width,
839 LLVMValueRef height,
840 LLVMValueRef depth)
841
842 {
843 if (bld->static_state->min_lod == bld->static_state->max_lod) {
844 /* User is forcing sampling from a particular mipmap level.
845 * This is hit during mipmap generation.
846 */
847 return LLVMConstReal(LLVMFloatType(), bld->static_state->min_lod);
848 }
849 else {
850 struct lp_build_context *float_bld = &bld->float_bld;
851 LLVMValueRef sampler_lod_bias = LLVMConstReal(LLVMFloatType(),
852 bld->static_state->lod_bias);
853 LLVMValueRef min_lod = LLVMConstReal(LLVMFloatType(),
854 bld->static_state->min_lod);
855 LLVMValueRef max_lod = LLVMConstReal(LLVMFloatType(),
856 bld->static_state->max_lod);
857 LLVMValueRef index0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
858 LLVMValueRef lod;
859
860 if (explicit_lod) {
861 lod = LLVMBuildExtractElement(bld->builder, explicit_lod,
862 index0, "");
863 }
864 else {
865 const int dims = texture_dims(bld->static_state->target);
866 LLVMValueRef dsdx, dsdy;
867 LLVMValueRef dtdx = NULL, dtdy = NULL, drdx = NULL, drdy = NULL;
868 LLVMValueRef rho;
869
870 dsdx = LLVMBuildExtractElement(bld->builder, ddx[0], index0, "dsdx");
871 dsdx = lp_build_abs(float_bld, dsdx);
872 dsdy = LLVMBuildExtractElement(bld->builder, ddy[0], index0, "dsdy");
873 dsdy = lp_build_abs(float_bld, dsdy);
874 if (dims > 1) {
875 dtdx = LLVMBuildExtractElement(bld->builder, ddx[1], index0, "dtdx");
876 dtdx = lp_build_abs(float_bld, dtdx);
877 dtdy = LLVMBuildExtractElement(bld->builder, ddy[1], index0, "dtdy");
878 dtdy = lp_build_abs(float_bld, dtdy);
879 if (dims > 2) {
880 drdx = LLVMBuildExtractElement(bld->builder, ddx[2], index0, "drdx");
881 drdx = lp_build_abs(float_bld, drdx);
882 drdy = LLVMBuildExtractElement(bld->builder, ddy[2], index0, "drdy");
883 drdy = lp_build_abs(float_bld, drdy);
884 }
885 }
886
887 /* Compute rho = max of all partial derivatives scaled by texture size.
888 * XXX this could be vectorized somewhat
889 */
890 rho = LLVMBuildFMul(bld->builder,
891 lp_build_max(float_bld, dsdx, dsdy),
892 lp_build_int_to_float(float_bld, width), "");
893 if (dims > 1) {
894 LLVMValueRef max;
895 max = LLVMBuildFMul(bld->builder,
896 lp_build_max(float_bld, dtdx, dtdy),
897 lp_build_int_to_float(float_bld, height), "");
898 rho = lp_build_max(float_bld, rho, max);
899 if (dims > 2) {
900 max = LLVMBuildFMul(bld->builder,
901 lp_build_max(float_bld, drdx, drdy),
902 lp_build_int_to_float(float_bld, depth), "");
903 rho = lp_build_max(float_bld, rho, max);
904 }
905 }
906
907 /* compute lod = log2(rho) */
908 lod = lp_build_log2(float_bld, rho);
909
910 /* add shader lod bias */
911 if (lod_bias) {
912 lod_bias = LLVMBuildExtractElement(bld->builder, lod_bias,
913 index0, "");
914 lod = LLVMBuildFAdd(bld->builder, lod, lod_bias, "shader_lod_bias");
915 }
916 }
917
918 /* add sampler lod bias */
919 lod = LLVMBuildFAdd(bld->builder, lod, sampler_lod_bias, "sampler_lod_bias");
920
921 /* clamp lod */
922 lod = lp_build_clamp(float_bld, lod, min_lod, max_lod);
923
924 return lod;
925 }
926 }
927
928
929 /**
930 * For PIPE_TEX_MIPFILTER_NEAREST, convert float LOD to integer
931 * mipmap level index.
932 * Note: this is all scalar code.
933 * \param lod scalar float texture level of detail
934 * \param level_out returns integer
935 */
936 static void
937 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
938 unsigned unit,
939 LLVMValueRef lod,
940 LLVMValueRef *level_out)
941 {
942 struct lp_build_context *float_bld = &bld->float_bld;
943 struct lp_build_context *int_bld = &bld->int_bld;
944 LLVMValueRef last_level, level;
945
946 LLVMValueRef zero = LLVMConstInt(LLVMInt32Type(), 0, 0);
947
948 last_level = bld->dynamic_state->last_level(bld->dynamic_state,
949 bld->builder, unit);
950
951 /* convert float lod to integer */
952 level = lp_build_iround(float_bld, lod);
953
954 /* clamp level to legal range of levels */
955 *level_out = lp_build_clamp(int_bld, level, zero, last_level);
956 }
957
958
959 /**
960 * For PIPE_TEX_MIPFILTER_LINEAR, convert float LOD to integer to
961 * two (adjacent) mipmap level indexes. Later, we'll sample from those
962 * two mipmap levels and interpolate between them.
963 */
964 static void
965 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
966 unsigned unit,
967 LLVMValueRef lod,
968 LLVMValueRef *level0_out,
969 LLVMValueRef *level1_out,
970 LLVMValueRef *weight_out)
971 {
972 struct lp_build_context *float_bld = &bld->float_bld;
973 struct lp_build_context *int_bld = &bld->int_bld;
974 LLVMValueRef last_level, level;
975
976 last_level = bld->dynamic_state->last_level(bld->dynamic_state,
977 bld->builder, unit);
978
979 /* convert float lod to integer */
980 level = lp_build_ifloor(float_bld, lod);
981
982 /* compute level 0 and clamp to legal range of levels */
983 *level0_out = lp_build_clamp(int_bld, level,
984 int_bld->zero,
985 last_level);
986 /* compute level 1 and clamp to legal range of levels */
987 level = lp_build_add(int_bld, level, int_bld->one);
988 *level1_out = lp_build_clamp(int_bld, level,
989 int_bld->zero,
990 last_level);
991
992 *weight_out = lp_build_fract(float_bld, lod);
993 }
994
995
996 /**
997 * Generate code to sample a mipmap level with nearest filtering.
998 * If sampling a cube texture, r = cube face in [0,5].
999 */
1000 static void
1001 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
1002 LLVMValueRef width_vec,
1003 LLVMValueRef height_vec,
1004 LLVMValueRef depth_vec,
1005 LLVMValueRef row_stride_vec,
1006 LLVMValueRef img_stride_vec,
1007 LLVMValueRef data_ptr,
1008 LLVMValueRef s,
1009 LLVMValueRef t,
1010 LLVMValueRef r,
1011 LLVMValueRef colors_out[4])
1012 {
1013 const int dims = texture_dims(bld->static_state->target);
1014 LLVMValueRef x, y, z;
1015
1016 /*
1017 * Compute integer texcoords.
1018 */
1019 x = lp_build_sample_wrap_nearest(bld, s, width_vec,
1020 bld->static_state->pot_width,
1021 bld->static_state->wrap_s);
1022 lp_build_name(x, "tex.x.wrapped");
1023
1024 if (dims >= 2) {
1025 y = lp_build_sample_wrap_nearest(bld, t, height_vec,
1026 bld->static_state->pot_height,
1027 bld->static_state->wrap_t);
1028 lp_build_name(y, "tex.y.wrapped");
1029
1030 if (dims == 3) {
1031 z = lp_build_sample_wrap_nearest(bld, r, depth_vec,
1032 bld->static_state->pot_height,
1033 bld->static_state->wrap_r);
1034 lp_build_name(z, "tex.z.wrapped");
1035 }
1036 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1037 z = r;
1038 }
1039 else {
1040 z = NULL;
1041 }
1042 }
1043 else {
1044 y = z = NULL;
1045 }
1046
1047 /*
1048 * Get texture colors.
1049 */
1050 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1051 x, y, z,
1052 row_stride_vec, img_stride_vec,
1053 data_ptr, colors_out);
1054 }
1055
1056
1057 /**
1058 * Generate code to sample a mipmap level with linear filtering.
1059 * If sampling a cube texture, r = cube face in [0,5].
1060 */
1061 static void
1062 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1063 LLVMValueRef width_vec,
1064 LLVMValueRef height_vec,
1065 LLVMValueRef depth_vec,
1066 LLVMValueRef row_stride_vec,
1067 LLVMValueRef img_stride_vec,
1068 LLVMValueRef data_ptr,
1069 LLVMValueRef s,
1070 LLVMValueRef t,
1071 LLVMValueRef r,
1072 LLVMValueRef colors_out[4])
1073 {
1074 const int dims = texture_dims(bld->static_state->target);
1075 LLVMValueRef x0, y0, z0, x1, y1, z1;
1076 LLVMValueRef s_fpart, t_fpart, r_fpart;
1077 LLVMValueRef neighbors[2][2][4];
1078 int chan;
1079
1080 /*
1081 * Compute integer texcoords.
1082 */
1083 lp_build_sample_wrap_linear(bld, s, width_vec,
1084 bld->static_state->pot_width,
1085 bld->static_state->wrap_s,
1086 &x0, &x1, &s_fpart);
1087 lp_build_name(x0, "tex.x0.wrapped");
1088 lp_build_name(x1, "tex.x1.wrapped");
1089
1090 if (dims >= 2) {
1091 lp_build_sample_wrap_linear(bld, t, height_vec,
1092 bld->static_state->pot_height,
1093 bld->static_state->wrap_t,
1094 &y0, &y1, &t_fpart);
1095 lp_build_name(y0, "tex.y0.wrapped");
1096 lp_build_name(y1, "tex.y1.wrapped");
1097
1098 if (dims == 3) {
1099 lp_build_sample_wrap_linear(bld, r, depth_vec,
1100 bld->static_state->pot_depth,
1101 bld->static_state->wrap_r,
1102 &z0, &z1, &r_fpart);
1103 lp_build_name(z0, "tex.z0.wrapped");
1104 lp_build_name(z1, "tex.z1.wrapped");
1105 }
1106 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1107 z0 = z1 = r; /* cube face */
1108 r_fpart = NULL;
1109 }
1110 else {
1111 z0 = z1 = NULL;
1112 r_fpart = NULL;
1113 }
1114 }
1115 else {
1116 y0 = y1 = t_fpart = NULL;
1117 z0 = z1 = r_fpart = NULL;
1118 }
1119
1120 /*
1121 * Get texture colors.
1122 */
1123 /* get x0/x1 texels */
1124 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1125 x0, y0, z0,
1126 row_stride_vec, img_stride_vec,
1127 data_ptr, neighbors[0][0]);
1128 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1129 x1, y0, z0,
1130 row_stride_vec, img_stride_vec,
1131 data_ptr, neighbors[0][1]);
1132
1133 if (dims == 1) {
1134 /* Interpolate two samples from 1D image to produce one color */
1135 for (chan = 0; chan < 4; chan++) {
1136 colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart,
1137 neighbors[0][0][chan],
1138 neighbors[0][1][chan]);
1139 }
1140 }
1141 else {
1142 /* 2D/3D texture */
1143 LLVMValueRef colors0[4];
1144
1145 /* get x0/x1 texels at y1 */
1146 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1147 x0, y1, z0,
1148 row_stride_vec, img_stride_vec,
1149 data_ptr, neighbors[1][0]);
1150 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1151 x1, y1, z0,
1152 row_stride_vec, img_stride_vec,
1153 data_ptr, neighbors[1][1]);
1154
1155 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1156 for (chan = 0; chan < 4; chan++) {
1157 colors0[chan] = lp_build_lerp_2d(&bld->texel_bld,
1158 s_fpart, t_fpart,
1159 neighbors[0][0][chan],
1160 neighbors[0][1][chan],
1161 neighbors[1][0][chan],
1162 neighbors[1][1][chan]);
1163 }
1164
1165 if (dims == 3) {
1166 LLVMValueRef neighbors1[2][2][4];
1167 LLVMValueRef colors1[4];
1168
1169 /* get x0/x1/y0/y1 texels at z1 */
1170 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1171 x0, y0, z1,
1172 row_stride_vec, img_stride_vec,
1173 data_ptr, neighbors1[0][0]);
1174 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1175 x1, y0, z1,
1176 row_stride_vec, img_stride_vec,
1177 data_ptr, neighbors1[0][1]);
1178 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1179 x0, y1, z1,
1180 row_stride_vec, img_stride_vec,
1181 data_ptr, neighbors1[1][0]);
1182 lp_build_sample_texel_soa(bld, width_vec, height_vec, depth_vec,
1183 x1, y1, z1,
1184 row_stride_vec, img_stride_vec,
1185 data_ptr, neighbors1[1][1]);
1186
1187 /* Bilinear interpolate the four samples from the second Z slice */
1188 for (chan = 0; chan < 4; chan++) {
1189 colors1[chan] = lp_build_lerp_2d(&bld->texel_bld,
1190 s_fpart, t_fpart,
1191 neighbors1[0][0][chan],
1192 neighbors1[0][1][chan],
1193 neighbors1[1][0][chan],
1194 neighbors1[1][1][chan]);
1195 }
1196
1197 /* Linearly interpolate the two samples from the two 3D slices */
1198 for (chan = 0; chan < 4; chan++) {
1199 colors_out[chan] = lp_build_lerp(&bld->texel_bld,
1200 r_fpart,
1201 colors0[chan], colors1[chan]);
1202 }
1203 }
1204 else {
1205 /* 2D tex */
1206 for (chan = 0; chan < 4; chan++) {
1207 colors_out[chan] = colors0[chan];
1208 }
1209 }
1210 }
1211 }
1212
1213
1214 /** Helper used by lp_build_cube_lookup() */
1215 static LLVMValueRef
1216 lp_build_cube_ima(struct lp_build_context *coord_bld, LLVMValueRef coord)
1217 {
1218 /* ima = -0.5 / abs(coord); */
1219 LLVMValueRef negHalf = lp_build_const_vec(coord_bld->type, -0.5);
1220 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1221 LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord);
1222 return ima;
1223 }
1224
1225
1226 /**
1227 * Helper used by lp_build_cube_lookup()
1228 * \param sign scalar +1 or -1
1229 * \param coord float vector
1230 * \param ima float vector
1231 */
1232 static LLVMValueRef
1233 lp_build_cube_coord(struct lp_build_context *coord_bld,
1234 LLVMValueRef sign, int negate_coord,
1235 LLVMValueRef coord, LLVMValueRef ima)
1236 {
1237 /* return negate(coord) * ima * sign + 0.5; */
1238 LLVMValueRef half = lp_build_const_vec(coord_bld->type, 0.5);
1239 LLVMValueRef res;
1240
1241 assert(negate_coord == +1 || negate_coord == -1);
1242
1243 if (negate_coord == -1) {
1244 coord = lp_build_negate(coord_bld, coord);
1245 }
1246
1247 res = lp_build_mul(coord_bld, coord, ima);
1248 if (sign) {
1249 sign = lp_build_broadcast_scalar(coord_bld, sign);
1250 res = lp_build_mul(coord_bld, res, sign);
1251 }
1252 res = lp_build_add(coord_bld, res, half);
1253
1254 return res;
1255 }
1256
1257
1258 /** Helper used by lp_build_cube_lookup()
1259 * Return (major_coord >= 0) ? pos_face : neg_face;
1260 */
1261 static LLVMValueRef
1262 lp_build_cube_face(struct lp_build_sample_context *bld,
1263 LLVMValueRef major_coord,
1264 unsigned pos_face, unsigned neg_face)
1265 {
1266 LLVMValueRef cmp = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1267 major_coord,
1268 bld->float_bld.zero, "");
1269 LLVMValueRef pos = LLVMConstInt(LLVMInt32Type(), pos_face, 0);
1270 LLVMValueRef neg = LLVMConstInt(LLVMInt32Type(), neg_face, 0);
1271 LLVMValueRef res = LLVMBuildSelect(bld->builder, cmp, pos, neg, "");
1272 return res;
1273 }
1274
1275
1276
1277 /**
1278 * Generate code to do cube face selection and compute per-face texcoords.
1279 */
1280 static void
1281 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1282 LLVMValueRef s,
1283 LLVMValueRef t,
1284 LLVMValueRef r,
1285 LLVMValueRef *face,
1286 LLVMValueRef *face_s,
1287 LLVMValueRef *face_t)
1288 {
1289 struct lp_build_context *float_bld = &bld->float_bld;
1290 struct lp_build_context *coord_bld = &bld->coord_bld;
1291 LLVMValueRef rx, ry, rz;
1292 LLVMValueRef arx, ary, arz;
1293 LLVMValueRef c25 = LLVMConstReal(LLVMFloatType(), 0.25);
1294 LLVMValueRef arx_ge_ary, arx_ge_arz;
1295 LLVMValueRef ary_ge_arx, ary_ge_arz;
1296 LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz;
1297 LLVMValueRef rx_pos, ry_pos, rz_pos;
1298
1299 assert(bld->coord_bld.type.length == 4);
1300
1301 /*
1302 * Use the average of the four pixel's texcoords to choose the face.
1303 */
1304 rx = lp_build_mul(float_bld, c25,
1305 lp_build_sum_vector(&bld->coord_bld, s));
1306 ry = lp_build_mul(float_bld, c25,
1307 lp_build_sum_vector(&bld->coord_bld, t));
1308 rz = lp_build_mul(float_bld, c25,
1309 lp_build_sum_vector(&bld->coord_bld, r));
1310
1311 arx = lp_build_abs(float_bld, rx);
1312 ary = lp_build_abs(float_bld, ry);
1313 arz = lp_build_abs(float_bld, rz);
1314
1315 /*
1316 * Compare sign/magnitude of rx,ry,rz to determine face
1317 */
1318 arx_ge_ary = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, ary, "");
1319 arx_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, arx, arz, "");
1320 ary_ge_arx = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arx, "");
1321 ary_ge_arz = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ary, arz, "");
1322
1323 arx_ge_ary_arz = LLVMBuildAnd(bld->builder, arx_ge_ary, arx_ge_arz, "");
1324 ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1325
1326 rx_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rx, float_bld->zero, "");
1327 ry_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, ry, float_bld->zero, "");
1328 rz_pos = LLVMBuildFCmp(bld->builder, LLVMRealUGE, rz, float_bld->zero, "");
1329
1330 {
1331 struct lp_build_flow_context *flow_ctx;
1332 struct lp_build_if_state if_ctx;
1333
1334 flow_ctx = lp_build_flow_create(bld->builder);
1335 lp_build_flow_scope_begin(flow_ctx);
1336
1337 *face_s = bld->coord_bld.undef;
1338 *face_t = bld->coord_bld.undef;
1339 *face = bld->int_bld.undef;
1340
1341 lp_build_name(*face_s, "face_s");
1342 lp_build_name(*face_t, "face_t");
1343 lp_build_name(*face, "face");
1344
1345 lp_build_flow_scope_declare(flow_ctx, face_s);
1346 lp_build_flow_scope_declare(flow_ctx, face_t);
1347 lp_build_flow_scope_declare(flow_ctx, face);
1348
1349 lp_build_if(&if_ctx, flow_ctx, bld->builder, arx_ge_ary_arz);
1350 {
1351 /* +/- X face */
1352 LLVMValueRef sign = lp_build_sgn(float_bld, rx);
1353 LLVMValueRef ima = lp_build_cube_ima(coord_bld, s);
1354 *face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima);
1355 *face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1356 *face = lp_build_cube_face(bld, rx,
1357 PIPE_TEX_FACE_POS_X,
1358 PIPE_TEX_FACE_NEG_X);
1359 }
1360 lp_build_else(&if_ctx);
1361 {
1362 struct lp_build_flow_context *flow_ctx2;
1363 struct lp_build_if_state if_ctx2;
1364
1365 LLVMValueRef face_s2 = bld->coord_bld.undef;
1366 LLVMValueRef face_t2 = bld->coord_bld.undef;
1367 LLVMValueRef face2 = bld->int_bld.undef;
1368
1369 flow_ctx2 = lp_build_flow_create(bld->builder);
1370 lp_build_flow_scope_begin(flow_ctx2);
1371 lp_build_flow_scope_declare(flow_ctx2, &face_s2);
1372 lp_build_flow_scope_declare(flow_ctx2, &face_t2);
1373 lp_build_flow_scope_declare(flow_ctx2, &face2);
1374
1375 ary_ge_arx_arz = LLVMBuildAnd(bld->builder, ary_ge_arx, ary_ge_arz, "");
1376
1377 lp_build_if(&if_ctx2, flow_ctx2, bld->builder, ary_ge_arx_arz);
1378 {
1379 /* +/- Y face */
1380 LLVMValueRef sign = lp_build_sgn(float_bld, ry);
1381 LLVMValueRef ima = lp_build_cube_ima(coord_bld, t);
1382 face_s2 = lp_build_cube_coord(coord_bld, NULL, -1, s, ima);
1383 face_t2 = lp_build_cube_coord(coord_bld, sign, -1, r, ima);
1384 face2 = lp_build_cube_face(bld, ry,
1385 PIPE_TEX_FACE_POS_Y,
1386 PIPE_TEX_FACE_NEG_Y);
1387 }
1388 lp_build_else(&if_ctx2);
1389 {
1390 /* +/- Z face */
1391 LLVMValueRef sign = lp_build_sgn(float_bld, rz);
1392 LLVMValueRef ima = lp_build_cube_ima(coord_bld, r);
1393 face_s2 = lp_build_cube_coord(coord_bld, sign, -1, s, ima);
1394 face_t2 = lp_build_cube_coord(coord_bld, NULL, +1, t, ima);
1395 face2 = lp_build_cube_face(bld, rz,
1396 PIPE_TEX_FACE_POS_Z,
1397 PIPE_TEX_FACE_NEG_Z);
1398 }
1399 lp_build_endif(&if_ctx2);
1400 lp_build_flow_scope_end(flow_ctx2);
1401 lp_build_flow_destroy(flow_ctx2);
1402 *face_s = face_s2;
1403 *face_t = face_t2;
1404 *face = face2;
1405 }
1406
1407 lp_build_endif(&if_ctx);
1408 lp_build_flow_scope_end(flow_ctx);
1409 lp_build_flow_destroy(flow_ctx);
1410 }
1411 }
1412
1413
1414
1415 /**
1416 * Sample the texture/mipmap using given image filter and mip filter.
1417 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1418 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1419 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1420 */
1421 static void
1422 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1423 unsigned img_filter,
1424 unsigned mip_filter,
1425 LLVMValueRef s,
1426 LLVMValueRef t,
1427 LLVMValueRef r,
1428 LLVMValueRef lod_fpart,
1429 LLVMValueRef width0_vec,
1430 LLVMValueRef width1_vec,
1431 LLVMValueRef height0_vec,
1432 LLVMValueRef height1_vec,
1433 LLVMValueRef depth0_vec,
1434 LLVMValueRef depth1_vec,
1435 LLVMValueRef row_stride0_vec,
1436 LLVMValueRef row_stride1_vec,
1437 LLVMValueRef img_stride0_vec,
1438 LLVMValueRef img_stride1_vec,
1439 LLVMValueRef data_ptr0,
1440 LLVMValueRef data_ptr1,
1441 LLVMValueRef *colors_out)
1442 {
1443 LLVMValueRef colors0[4], colors1[4];
1444 int chan;
1445
1446 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1447 /* sample the first mipmap level */
1448 lp_build_sample_image_nearest(bld,
1449 width0_vec, height0_vec, depth0_vec,
1450 row_stride0_vec, img_stride0_vec,
1451 data_ptr0, s, t, r, colors0);
1452
1453 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1454 /* sample the second mipmap level */
1455 lp_build_sample_image_nearest(bld,
1456 width1_vec, height1_vec, depth1_vec,
1457 row_stride1_vec, img_stride1_vec,
1458 data_ptr1, s, t, r, colors1);
1459 }
1460 }
1461 else {
1462 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1463
1464 /* sample the first mipmap level */
1465 lp_build_sample_image_linear(bld,
1466 width0_vec, height0_vec, depth0_vec,
1467 row_stride0_vec, img_stride0_vec,
1468 data_ptr0, s, t, r, colors0);
1469
1470 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1471 /* sample the second mipmap level */
1472 lp_build_sample_image_linear(bld,
1473 width1_vec, height1_vec, depth1_vec,
1474 row_stride1_vec, img_stride1_vec,
1475 data_ptr1, s, t, r, colors1);
1476 }
1477 }
1478
1479 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1480 /* interpolate samples from the two mipmap levels */
1481 for (chan = 0; chan < 4; chan++) {
1482 colors_out[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1483 colors0[chan], colors1[chan]);
1484 }
1485 }
1486 else {
1487 /* use first/only level's colors */
1488 for (chan = 0; chan < 4; chan++) {
1489 colors_out[chan] = colors0[chan];
1490 }
1491 }
1492 }
1493
1494
1495
1496 /**
1497 * General texture sampling codegen.
1498 * This function handles texture sampling for all texture targets (1D,
1499 * 2D, 3D, cube) and all filtering modes.
1500 */
1501 static void
1502 lp_build_sample_general(struct lp_build_sample_context *bld,
1503 unsigned unit,
1504 LLVMValueRef s,
1505 LLVMValueRef t,
1506 LLVMValueRef r,
1507 const LLVMValueRef *ddx,
1508 const LLVMValueRef *ddy,
1509 LLVMValueRef lod_bias, /* optional */
1510 LLVMValueRef explicit_lod, /* optional */
1511 LLVMValueRef width,
1512 LLVMValueRef height,
1513 LLVMValueRef depth,
1514 LLVMValueRef width_vec,
1515 LLVMValueRef height_vec,
1516 LLVMValueRef depth_vec,
1517 LLVMValueRef row_stride_array,
1518 LLVMValueRef img_stride_array,
1519 LLVMValueRef data_array,
1520 LLVMValueRef *colors_out)
1521 {
1522 struct lp_build_context *float_bld = &bld->float_bld;
1523 const unsigned mip_filter = bld->static_state->min_mip_filter;
1524 const unsigned min_filter = bld->static_state->min_img_filter;
1525 const unsigned mag_filter = bld->static_state->mag_img_filter;
1526 const int dims = texture_dims(bld->static_state->target);
1527 LLVMValueRef lod = NULL, lod_fpart = NULL;
1528 LLVMValueRef ilevel0, ilevel1 = NULL, ilevel0_vec, ilevel1_vec = NULL;
1529 LLVMValueRef width0_vec = NULL, height0_vec = NULL, depth0_vec = NULL;
1530 LLVMValueRef width1_vec = NULL, height1_vec = NULL, depth1_vec = NULL;
1531 LLVMValueRef row_stride0_vec = NULL, row_stride1_vec = NULL;
1532 LLVMValueRef img_stride0_vec = NULL, img_stride1_vec = NULL;
1533 LLVMValueRef data_ptr0, data_ptr1 = NULL;
1534 LLVMValueRef face_ddx[4], face_ddy[4];
1535
1536 /*
1537 printf("%s mip %d min %d mag %d\n", __FUNCTION__,
1538 mip_filter, min_filter, mag_filter);
1539 */
1540
1541 /*
1542 * Choose cube face, recompute texcoords and derivatives for the chosen face.
1543 */
1544 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1545 LLVMValueRef face, face_s, face_t;
1546 lp_build_cube_lookup(bld, s, t, r, &face, &face_s, &face_t);
1547 s = face_s; /* vec */
1548 t = face_t; /* vec */
1549 /* use 'r' to indicate cube face */
1550 r = lp_build_broadcast_scalar(&bld->int_coord_bld, face); /* vec */
1551
1552 /* recompute ddx, ddy using the new (s,t) face texcoords */
1553 face_ddx[0] = lp_build_ddx(&bld->coord_bld, s);
1554 face_ddx[1] = lp_build_ddx(&bld->coord_bld, t);
1555 face_ddx[2] = NULL;
1556 face_ddx[3] = NULL;
1557 face_ddy[0] = lp_build_ddy(&bld->coord_bld, s);
1558 face_ddy[1] = lp_build_ddy(&bld->coord_bld, t);
1559 face_ddy[2] = NULL;
1560 face_ddy[3] = NULL;
1561 ddx = face_ddx;
1562 ddy = face_ddy;
1563 }
1564
1565 /*
1566 * Compute the level of detail (float).
1567 */
1568 if (min_filter != mag_filter ||
1569 mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1570 /* Need to compute lod either to choose mipmap levels or to
1571 * distinguish between minification/magnification with one mipmap level.
1572 */
1573 lod = lp_build_lod_selector(bld, ddx, ddy,
1574 lod_bias, explicit_lod,
1575 width, height, depth);
1576 }
1577
1578 /*
1579 * Compute integer mipmap level(s) to fetch texels from.
1580 */
1581 if (mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1582 /* always use mip level 0 */
1583 if (bld->static_state->target == PIPE_TEXTURE_CUBE) {
1584 /* XXX this is a work-around for an apparent bug in LLVM 2.7.
1585 * We should be able to set ilevel0 = const(0) but that causes
1586 * bad x86 code to be emitted.
1587 */
1588 lod = lp_build_const_elem(bld->coord_bld.type, 0.0);
1589 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1590 }
1591 else {
1592 ilevel0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
1593 }
1594 }
1595 else {
1596 assert(lod);
1597 if (mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
1598 lp_build_nearest_mip_level(bld, unit, lod, &ilevel0);
1599 }
1600 else {
1601 assert(mip_filter == PIPE_TEX_MIPFILTER_LINEAR);
1602 lp_build_linear_mip_levels(bld, unit, lod, &ilevel0, &ilevel1,
1603 &lod_fpart);
1604 lod_fpart = lp_build_broadcast_scalar(&bld->coord_bld, lod_fpart);
1605 }
1606 }
1607
1608 /*
1609 * Convert scalar integer mipmap levels into vectors.
1610 */
1611 ilevel0_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel0);
1612 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR)
1613 ilevel1_vec = lp_build_broadcast_scalar(&bld->int_coord_bld, ilevel1);
1614
1615 /*
1616 * Compute width, height at mipmap level 'ilevel0'
1617 */
1618 width0_vec = lp_build_minify(bld, width_vec, ilevel0_vec);
1619 if (dims >= 2) {
1620 height0_vec = lp_build_minify(bld, height_vec, ilevel0_vec);
1621 row_stride0_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1622 ilevel0);
1623 if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1624 img_stride0_vec = lp_build_get_level_stride_vec(bld,
1625 img_stride_array,
1626 ilevel0);
1627 if (dims == 3) {
1628 depth0_vec = lp_build_minify(bld, depth_vec, ilevel0_vec);
1629 }
1630 }
1631 }
1632 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1633 /* compute width, height, depth for second mipmap level at 'ilevel1' */
1634 width1_vec = lp_build_minify(bld, width_vec, ilevel1_vec);
1635 if (dims >= 2) {
1636 height1_vec = lp_build_minify(bld, height_vec, ilevel1_vec);
1637 row_stride1_vec = lp_build_get_level_stride_vec(bld, row_stride_array,
1638 ilevel1);
1639 if (dims == 3 || bld->static_state->target == PIPE_TEXTURE_CUBE) {
1640 img_stride1_vec = lp_build_get_level_stride_vec(bld,
1641 img_stride_array,
1642 ilevel1);
1643 if (dims ==3) {
1644 depth1_vec = lp_build_minify(bld, depth_vec, ilevel1_vec);
1645 }
1646 }
1647 }
1648 }
1649
1650 /*
1651 * Get pointer(s) to image data for mipmap level(s).
1652 */
1653 data_ptr0 = lp_build_get_mipmap_level(bld, data_array, ilevel0);
1654 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1655 data_ptr1 = lp_build_get_mipmap_level(bld, data_array, ilevel1);
1656 }
1657
1658 /*
1659 * Get/interpolate texture colors.
1660 */
1661 if (min_filter == mag_filter) {
1662 /* no need to distinquish between minification and magnification */
1663 lp_build_sample_mipmap(bld, min_filter, mip_filter, s, t, r, lod_fpart,
1664 width0_vec, width1_vec,
1665 height0_vec, height1_vec,
1666 depth0_vec, depth1_vec,
1667 row_stride0_vec, row_stride1_vec,
1668 img_stride0_vec, img_stride1_vec,
1669 data_ptr0, data_ptr1,
1670 colors_out);
1671 }
1672 else {
1673 /* Emit conditional to choose min image filter or mag image filter
1674 * depending on the lod being >0 or <= 0, respectively.
1675 */
1676 struct lp_build_flow_context *flow_ctx;
1677 struct lp_build_if_state if_ctx;
1678 LLVMValueRef minify;
1679
1680 flow_ctx = lp_build_flow_create(bld->builder);
1681 lp_build_flow_scope_begin(flow_ctx);
1682
1683 lp_build_flow_scope_declare(flow_ctx, &colors_out[0]);
1684 lp_build_flow_scope_declare(flow_ctx, &colors_out[1]);
1685 lp_build_flow_scope_declare(flow_ctx, &colors_out[2]);
1686 lp_build_flow_scope_declare(flow_ctx, &colors_out[3]);
1687
1688 /* minify = lod > 0.0 */
1689 minify = LLVMBuildFCmp(bld->builder, LLVMRealUGE,
1690 lod, float_bld->zero, "");
1691
1692 lp_build_if(&if_ctx, flow_ctx, bld->builder, minify);
1693 {
1694 /* Use the minification filter */
1695 lp_build_sample_mipmap(bld, min_filter, mip_filter,
1696 s, t, r, lod_fpart,
1697 width0_vec, width1_vec,
1698 height0_vec, height1_vec,
1699 depth0_vec, depth1_vec,
1700 row_stride0_vec, row_stride1_vec,
1701 img_stride0_vec, img_stride1_vec,
1702 data_ptr0, data_ptr1,
1703 colors_out);
1704 }
1705 lp_build_else(&if_ctx);
1706 {
1707 /* Use the magnification filter */
1708 lp_build_sample_mipmap(bld, mag_filter, mip_filter,
1709 s, t, r, lod_fpart,
1710 width0_vec, width1_vec,
1711 height0_vec, height1_vec,
1712 depth0_vec, depth1_vec,
1713 row_stride0_vec, row_stride1_vec,
1714 img_stride0_vec, img_stride1_vec,
1715 data_ptr0, data_ptr1,
1716 colors_out);
1717 }
1718 lp_build_endif(&if_ctx);
1719
1720 lp_build_flow_scope_end(flow_ctx);
1721 lp_build_flow_destroy(flow_ctx);
1722 }
1723 }
1724
1725
1726
1727 static void
1728 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
1729 LLVMValueRef s,
1730 LLVMValueRef t,
1731 LLVMValueRef width,
1732 LLVMValueRef height,
1733 LLVMValueRef stride_array,
1734 LLVMValueRef data_array,
1735 LLVMValueRef texel_out[4])
1736 {
1737 LLVMBuilderRef builder = bld->builder;
1738 struct lp_build_context i32, h16, u8n;
1739 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
1740 LLVMValueRef i32_c8, i32_c128, i32_c255;
1741 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
1742 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
1743 LLVMValueRef x0, x1;
1744 LLVMValueRef y0, y1;
1745 LLVMValueRef neighbors[2][2];
1746 LLVMValueRef neighbors_lo[2][2];
1747 LLVMValueRef neighbors_hi[2][2];
1748 LLVMValueRef packed, packed_lo, packed_hi;
1749 LLVMValueRef unswizzled[4];
1750 LLVMValueRef stride;
1751
1752 assert(bld->static_state->target == PIPE_TEXTURE_2D);
1753 assert(bld->static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR);
1754 assert(bld->static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR);
1755 assert(bld->static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE);
1756
1757 lp_build_context_init(&i32, builder, lp_type_int_vec(32));
1758 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
1759 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
1760
1761 i32_vec_type = lp_build_vec_type(i32.type);
1762 h16_vec_type = lp_build_vec_type(h16.type);
1763 u8n_vec_type = lp_build_vec_type(u8n.type);
1764
1765 if (bld->static_state->normalized_coords) {
1766 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
1767 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
1768 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
1769 s = lp_build_mul(&bld->coord_bld, s, fp_width);
1770 t = lp_build_mul(&bld->coord_bld, t, fp_height);
1771 }
1772
1773 /* scale coords by 256 (8 fractional bits) */
1774 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1775 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1776
1777 /* convert float to int */
1778 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1779 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1780
1781 /* subtract 0.5 (add -128) */
1782 i32_c128 = lp_build_const_int_vec(i32.type, -128);
1783 s = LLVMBuildAdd(builder, s, i32_c128, "");
1784 t = LLVMBuildAdd(builder, t, i32_c128, "");
1785
1786 /* compute floor (shift right 8) */
1787 i32_c8 = lp_build_const_int_vec(i32.type, 8);
1788 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1789 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1790
1791 /* compute fractional part (AND with 0xff) */
1792 i32_c255 = lp_build_const_int_vec(i32.type, 255);
1793 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1794 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1795
1796 x0 = s_ipart;
1797 y0 = t_ipart;
1798
1799 x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
1800 y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
1801
1802 x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
1803 bld->static_state->wrap_s);
1804 y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
1805 bld->static_state->wrap_t);
1806
1807 x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
1808 bld->static_state->wrap_s);
1809 y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
1810 bld->static_state->wrap_t);
1811
1812 /*
1813 * Transform 4 x i32 in
1814 *
1815 * s_fpart = {s0, s1, s2, s3}
1816 *
1817 * into 8 x i16
1818 *
1819 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
1820 *
1821 * into two 8 x i16
1822 *
1823 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
1824 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
1825 *
1826 * and likewise for t_fpart. There is no risk of loosing precision here
1827 * since the fractional parts only use the lower 8bits.
1828 */
1829
1830 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
1831 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
1832
1833 {
1834 LLVMTypeRef elem_type = LLVMInt32Type();
1835 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
1836 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
1837 LLVMValueRef shuffle_lo;
1838 LLVMValueRef shuffle_hi;
1839 unsigned i, j;
1840
1841 for(j = 0; j < h16.type.length; j += 4) {
1842 #ifdef PIPE_ARCH_LITTLE_ENDIAN
1843 unsigned subindex = 0;
1844 #else
1845 unsigned subindex = 1;
1846 #endif
1847 LLVMValueRef index;
1848
1849 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
1850 for(i = 0; i < 4; ++i)
1851 shuffles_lo[j + i] = index;
1852
1853 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
1854 for(i = 0; i < 4; ++i)
1855 shuffles_hi[j + i] = index;
1856 }
1857
1858 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
1859 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
1860
1861 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
1862 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
1863 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
1864 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
1865 }
1866
1867 stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
1868
1869 /*
1870 * Fetch the pixels as 4 x 32bit (rgba order might differ):
1871 *
1872 * rgba0 rgba1 rgba2 rgba3
1873 *
1874 * bit cast them into 16 x u8
1875 *
1876 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
1877 *
1878 * unpack them into two 8 x i16:
1879 *
1880 * r0 g0 b0 a0 r1 g1 b1 a1
1881 * r2 g2 b2 a2 r3 g3 b3 a3
1882 *
1883 * The higher 8 bits of the resulting elements will be zero.
1884 */
1885
1886 neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
1887 neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
1888 neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
1889 neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
1890
1891 neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
1892 neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
1893 neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
1894 neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
1895
1896 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
1897 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
1898 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
1899 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
1900
1901 /*
1902 * Linear interpolate with 8.8 fixed point.
1903 */
1904
1905 packed_lo = lp_build_lerp_2d(&h16,
1906 s_fpart_lo, t_fpart_lo,
1907 neighbors_lo[0][0],
1908 neighbors_lo[0][1],
1909 neighbors_lo[1][0],
1910 neighbors_lo[1][1]);
1911
1912 packed_hi = lp_build_lerp_2d(&h16,
1913 s_fpart_hi, t_fpart_hi,
1914 neighbors_hi[0][0],
1915 neighbors_hi[0][1],
1916 neighbors_hi[1][0],
1917 neighbors_hi[1][1]);
1918
1919 packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
1920
1921 /*
1922 * Convert to SoA and swizzle.
1923 */
1924
1925 lp_build_rgba8_to_f32_soa(bld->builder,
1926 bld->texel_type,
1927 packed, unswizzled);
1928
1929 if (util_format_is_rgba8_variant(bld->format_desc)) {
1930 lp_build_format_swizzle_soa(bld->format_desc,
1931 &bld->texel_bld,
1932 unswizzled, texel_out);
1933 } else {
1934 texel_out[0] = unswizzled[0];
1935 texel_out[1] = unswizzled[1];
1936 texel_out[2] = unswizzled[2];
1937 texel_out[3] = unswizzled[3];
1938 }
1939
1940 apply_sampler_swizzle(bld, texel_out);
1941 }
1942
1943
1944 static void
1945 lp_build_sample_compare(struct lp_build_sample_context *bld,
1946 LLVMValueRef p,
1947 LLVMValueRef texel[4])
1948 {
1949 struct lp_build_context *texel_bld = &bld->texel_bld;
1950 LLVMValueRef res;
1951 unsigned chan;
1952
1953 if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
1954 return;
1955
1956 /* TODO: Compare before swizzling, to avoid redundant computations */
1957 res = NULL;
1958 for(chan = 0; chan < 4; ++chan) {
1959 LLVMValueRef cmp;
1960 cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
1961 cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
1962
1963 if(res)
1964 res = lp_build_add(texel_bld, res, cmp);
1965 else
1966 res = cmp;
1967 }
1968
1969 assert(res);
1970 res = lp_build_mul(texel_bld, res, lp_build_const_vec(texel_bld->type, 0.25));
1971
1972 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1973 for(chan = 0; chan < 3; ++chan)
1974 texel[chan] = res;
1975 texel[3] = texel_bld->one;
1976 }
1977
1978
1979 /**
1980 * Just set texels to white instead of actually sampling the texture.
1981 * For debugging.
1982 */
1983 static void
1984 lp_build_sample_nop(struct lp_build_sample_context *bld,
1985 LLVMValueRef texel_out[4])
1986 {
1987 struct lp_build_context *texel_bld = &bld->texel_bld;
1988 unsigned chan;
1989
1990 for (chan = 0; chan < 4; chan++) {
1991 /*lp_bld_mov(texel_bld, texel, texel_bld->one);*/
1992 texel_out[chan] = texel_bld->one;
1993 }
1994 }
1995
1996
1997 /**
1998 * Build texture sampling code.
1999 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2000 * R, G, B, A.
2001 * \param type vector float type to use for coords, etc.
2002 * \param ddx partial derivatives of (s,t,r,q) with respect to x
2003 * \param ddy partial derivatives of (s,t,r,q) with respect to y
2004 */
2005 void
2006 lp_build_sample_soa(LLVMBuilderRef builder,
2007 const struct lp_sampler_static_state *static_state,
2008 struct lp_sampler_dynamic_state *dynamic_state,
2009 struct lp_type type,
2010 unsigned unit,
2011 unsigned num_coords,
2012 const LLVMValueRef *coords,
2013 const LLVMValueRef ddx[4],
2014 const LLVMValueRef ddy[4],
2015 LLVMValueRef lod_bias, /* optional */
2016 LLVMValueRef explicit_lod, /* optional */
2017 LLVMValueRef texel_out[4])
2018 {
2019 struct lp_build_sample_context bld;
2020 LLVMValueRef width, width_vec;
2021 LLVMValueRef height, height_vec;
2022 LLVMValueRef depth, depth_vec;
2023 LLVMValueRef row_stride_array, img_stride_array;
2024 LLVMValueRef data_array;
2025 LLVMValueRef s;
2026 LLVMValueRef t;
2027 LLVMValueRef r;
2028
2029 if (0) {
2030 enum pipe_format fmt = static_state->format;
2031 debug_printf("Sample from %s\n", util_format_name(fmt));
2032 }
2033
2034 assert(type.floating);
2035
2036 /* Setup our build context */
2037 memset(&bld, 0, sizeof bld);
2038 bld.builder = builder;
2039 bld.static_state = static_state;
2040 bld.dynamic_state = dynamic_state;
2041 bld.format_desc = util_format_description(static_state->format);
2042
2043 bld.float_type = lp_type_float(32);
2044 bld.int_type = lp_type_int(32);
2045 bld.coord_type = type;
2046 bld.uint_coord_type = lp_uint_type(type);
2047 bld.int_coord_type = lp_int_type(type);
2048 bld.texel_type = type;
2049
2050 lp_build_context_init(&bld.float_bld, builder, bld.float_type);
2051 lp_build_context_init(&bld.int_bld, builder, bld.int_type);
2052 lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
2053 lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
2054 lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
2055 lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
2056
2057 /* Get the dynamic state */
2058 width = dynamic_state->width(dynamic_state, builder, unit);
2059 height = dynamic_state->height(dynamic_state, builder, unit);
2060 depth = dynamic_state->depth(dynamic_state, builder, unit);
2061 row_stride_array = dynamic_state->row_stride(dynamic_state, builder, unit);
2062 img_stride_array = dynamic_state->img_stride(dynamic_state, builder, unit);
2063 data_array = dynamic_state->data_ptr(dynamic_state, builder, unit);
2064 /* Note that data_array is an array[level] of pointers to texture images */
2065
2066 s = coords[0];
2067 t = coords[1];
2068 r = coords[2];
2069
2070 width_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
2071 height_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
2072 depth_vec = lp_build_broadcast_scalar(&bld.uint_coord_bld, depth);
2073
2074 if (0) {
2075 /* For debug: no-op texture sampling */
2076 lp_build_sample_nop(&bld, texel_out);
2077 }
2078 else if (util_format_fits_8unorm(bld.format_desc) &&
2079 bld.format_desc->nr_channels > 1 &&
2080 static_state->target == PIPE_TEXTURE_2D &&
2081 static_state->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
2082 static_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR &&
2083 static_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE &&
2084 is_simple_wrap_mode(static_state->wrap_s) &&
2085 is_simple_wrap_mode(static_state->wrap_t)) {
2086 /* special case */
2087 lp_build_sample_2d_linear_aos(&bld, s, t, width_vec, height_vec,
2088 row_stride_array, data_array, texel_out);
2089 }
2090 else {
2091 lp_build_sample_general(&bld, unit, s, t, r, ddx, ddy,
2092 lod_bias, explicit_lod,
2093 width, height, depth,
2094 width_vec, height_vec, depth_vec,
2095 row_stride_array, img_stride_array,
2096 data_array,
2097 texel_out);
2098 }
2099
2100 lp_build_sample_compare(&bld, r, texel_out);
2101 }