Merge branch 'object-purgeable'
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_soa.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_debug.h"
38 #include "util/u_dump.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_cpu_detect.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_format.h"
52 #include "lp_bld_sample.h"
53
54
55 /**
56 * Keep all information for sampling code generation in a single place.
57 */
58 struct lp_build_sample_context
59 {
60 LLVMBuilderRef builder;
61
62 const struct lp_sampler_static_state *static_state;
63
64 struct lp_sampler_dynamic_state *dynamic_state;
65
66 const struct util_format_description *format_desc;
67
68 /** Incoming coordinates type and build context */
69 struct lp_type coord_type;
70 struct lp_build_context coord_bld;
71
72 /** Unsigned integer coordinates */
73 struct lp_type uint_coord_type;
74 struct lp_build_context uint_coord_bld;
75
76 /** Signed integer coordinates */
77 struct lp_type int_coord_type;
78 struct lp_build_context int_coord_bld;
79
80 /** Output texels type and build context */
81 struct lp_type texel_type;
82 struct lp_build_context texel_bld;
83 };
84
85
86 /**
87 * Does the given texture wrap mode allow sampling the texture border color?
88 * XXX maybe move this into gallium util code.
89 */
90 static boolean
91 wrap_mode_uses_border_color(unsigned mode)
92 {
93 switch (mode) {
94 case PIPE_TEX_WRAP_REPEAT:
95 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
96 case PIPE_TEX_WRAP_MIRROR_REPEAT:
97 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
98 return FALSE;
99 case PIPE_TEX_WRAP_CLAMP:
100 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
101 case PIPE_TEX_WRAP_MIRROR_CLAMP:
102 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
103 return TRUE;
104 default:
105 assert(0 && "unexpected wrap mode");
106 return FALSE;
107 }
108 }
109
110
111
112 /**
113 * Gen code to fetch a texel from a texture at int coords (x, y).
114 * The result, texel, will be:
115 * texel[0] = red values
116 * texel[1] = green values
117 * texel[2] = blue values
118 * texel[3] = alpha values
119 */
120 static void
121 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
122 LLVMValueRef width,
123 LLVMValueRef height,
124 LLVMValueRef x,
125 LLVMValueRef y,
126 LLVMValueRef y_stride,
127 LLVMValueRef data_ptr,
128 LLVMValueRef *texel)
129 {
130 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
131 LLVMValueRef offset;
132 LLVMValueRef packed;
133 LLVMValueRef use_border = NULL;
134
135 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
136 if (wrap_mode_uses_border_color(bld->static_state->wrap_s)) {
137 LLVMValueRef b1, b2;
138 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
139 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
140 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
141 }
142
143 if (wrap_mode_uses_border_color(bld->static_state->wrap_t)) {
144 LLVMValueRef b1, b2;
145 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
146 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
147 if (use_border) {
148 use_border = LLVMBuildOr(bld->builder, use_border, b1, "ub_or_b1");
149 use_border = LLVMBuildOr(bld->builder, use_border, b2, "ub_or_b2");
150 }
151 else {
152 use_border = LLVMBuildOr(bld->builder, b1, b2, "b1_or_b2");
153 }
154 }
155
156 /*
157 * Note: if we find an app which frequently samples the texture border
158 * we might want to implement a true conditional here to avoid sampling
159 * the texture whenever possible (since that's quite a bit of code).
160 * Ex:
161 * if (use_border) {
162 * texel = border_color;
163 * }
164 * else {
165 * texel = sample_texture(coord);
166 * }
167 * As it is now, we always sample the texture, then selectively replace
168 * the texel color results with the border color.
169 */
170
171 /* convert x,y coords to linear offset from start of texture, in bytes */
172 offset = lp_build_sample_offset(&bld->uint_coord_bld,
173 bld->format_desc,
174 x, y, y_stride,
175 data_ptr);
176
177 assert(bld->format_desc->block.width == 1);
178 assert(bld->format_desc->block.height == 1);
179 assert(bld->format_desc->block.bits <= bld->texel_type.width);
180
181 /* gather the texels from the texture */
182 packed = lp_build_gather(bld->builder,
183 bld->texel_type.length,
184 bld->format_desc->block.bits,
185 bld->texel_type.width,
186 data_ptr, offset);
187
188 /* convert texels to float rgba */
189 lp_build_unpack_rgba_soa(bld->builder,
190 bld->format_desc,
191 bld->texel_type,
192 packed, texel);
193
194 if (use_border) {
195 /* select texel color or border color depending on use_border */
196 int chan;
197 for (chan = 0; chan < 4; chan++) {
198 LLVMValueRef border_chan =
199 lp_build_const_scalar(bld->texel_type,
200 bld->static_state->border_color[chan]);
201 texel[chan] = lp_build_select(&bld->texel_bld, use_border,
202 border_chan, texel[chan]);
203 }
204 }
205 }
206
207
208 static LLVMValueRef
209 lp_build_sample_packed(struct lp_build_sample_context *bld,
210 LLVMValueRef x,
211 LLVMValueRef y,
212 LLVMValueRef y_stride,
213 LLVMValueRef data_ptr)
214 {
215 LLVMValueRef offset;
216
217 offset = lp_build_sample_offset(&bld->uint_coord_bld,
218 bld->format_desc,
219 x, y, y_stride,
220 data_ptr);
221
222 assert(bld->format_desc->block.width == 1);
223 assert(bld->format_desc->block.height == 1);
224 assert(bld->format_desc->block.bits <= bld->texel_type.width);
225
226 return lp_build_gather(bld->builder,
227 bld->texel_type.length,
228 bld->format_desc->block.bits,
229 bld->texel_type.width,
230 data_ptr, offset);
231 }
232
233
234 /**
235 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
236 */
237 static LLVMValueRef
238 lp_build_coord_mirror(struct lp_build_sample_context *bld,
239 LLVMValueRef coord)
240 {
241 struct lp_build_context *coord_bld = &bld->coord_bld;
242 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
243 LLVMValueRef fract, flr, isOdd;
244
245 /* fract = coord - floor(coord) */
246 fract = lp_build_sub(coord_bld, coord, lp_build_floor(coord_bld, coord));
247
248 /* flr = ifloor(coord); */
249 flr = lp_build_ifloor(coord_bld, coord);
250
251 /* isOdd = flr & 1 */
252 isOdd = LLVMBuildAnd(bld->builder, flr, int_coord_bld->one, "");
253
254 /* make coord positive or negative depending on isOdd */
255 coord = lp_build_set_sign(coord_bld, fract, isOdd);
256
257 /* convert isOdd to float */
258 isOdd = lp_build_int_to_float(coord_bld, isOdd);
259
260 /* add isOdd to coord */
261 coord = lp_build_add(coord_bld, coord, isOdd);
262
263 return coord;
264 }
265
266
267 /**
268 * We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
269 * Return whether the given mode is supported by that function.
270 */
271 static boolean
272 is_simple_wrap_mode(unsigned mode)
273 {
274 switch (mode) {
275 case PIPE_TEX_WRAP_REPEAT:
276 case PIPE_TEX_WRAP_CLAMP:
277 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
278 return TRUE;
279 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
280 default:
281 return FALSE;
282 }
283 }
284
285
286 /**
287 * Build LLVM code for texture wrap mode, for scaled integer texcoords.
288 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size
289 * \param length the texture size along one dimension
290 * \param is_pot if TRUE, length is a power of two
291 * \param wrap_mode one of PIPE_TEX_WRAP_x
292 */
293 static LLVMValueRef
294 lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
295 LLVMValueRef coord,
296 LLVMValueRef length,
297 boolean is_pot,
298 unsigned wrap_mode)
299 {
300 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
301 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
302 LLVMValueRef length_minus_one;
303
304 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
305
306 switch(wrap_mode) {
307 case PIPE_TEX_WRAP_REPEAT:
308 if(is_pot)
309 coord = LLVMBuildAnd(bld->builder, coord, length_minus_one, "");
310 else
311 /* Signed remainder won't give the right results for negative
312 * dividends but unsigned remainder does.*/
313 coord = LLVMBuildURem(bld->builder, coord, length, "");
314 break;
315
316 case PIPE_TEX_WRAP_CLAMP:
317 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
318 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
319 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
320 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
321 break;
322
323 case PIPE_TEX_WRAP_MIRROR_REPEAT:
324 case PIPE_TEX_WRAP_MIRROR_CLAMP:
325 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
326 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
327 /* FIXME */
328 _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
329 util_dump_tex_wrap(wrap_mode, TRUE));
330 coord = lp_build_max(uint_coord_bld, coord, uint_coord_bld->zero);
331 coord = lp_build_min(uint_coord_bld, coord, length_minus_one);
332 break;
333
334 default:
335 assert(0);
336 }
337
338 return coord;
339 }
340
341
342 /**
343 * Build LLVM code for texture wrap mode for linear filtering.
344 * \param x0_out returns first integer texcoord
345 * \param x1_out returns second integer texcoord
346 * \param weight_out returns linear interpolation weight
347 */
348 static void
349 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
350 LLVMValueRef coord,
351 LLVMValueRef length,
352 boolean is_pot,
353 unsigned wrap_mode,
354 LLVMValueRef *x0_out,
355 LLVMValueRef *x1_out,
356 LLVMValueRef *weight_out)
357 {
358 struct lp_build_context *coord_bld = &bld->coord_bld;
359 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
360 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
361 LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
362 LLVMValueRef half = lp_build_const_scalar(coord_bld->type, 0.5);
363 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
364 LLVMValueRef length_minus_one;
365 LLVMValueRef length_f_minus_one;
366 LLVMValueRef coord0, coord1, weight;
367
368 /* XXX check for normalized vs. unnormalized coords */
369
370 length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
371 length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
372
373 switch(wrap_mode) {
374 case PIPE_TEX_WRAP_REPEAT:
375 /* mul by size and subtract 0.5 */
376 coord = lp_build_mul(coord_bld, coord, length_f);
377 coord = lp_build_sub(coord_bld, coord, half);
378 /* convert to int */
379 coord0 = lp_build_ifloor(coord_bld, coord);
380 coord1 = lp_build_add(uint_coord_bld, coord0, uint_coord_bld->one);
381 /* compute lerp weight */
382 weight = lp_build_fract(coord_bld, coord);
383 /* repeat wrap */
384 if (is_pot) {
385 coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
386 coord1 = LLVMBuildAnd(bld->builder, coord1, length_minus_one, "");
387 }
388 else {
389 /* Signed remainder won't give the right results for negative
390 * dividends but unsigned remainder does.*/
391 coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
392 coord1 = LLVMBuildURem(bld->builder, coord1, length, "");
393 }
394 break;
395
396 case PIPE_TEX_WRAP_CLAMP:
397 coord = lp_build_mul(coord_bld, coord, length_f);
398 weight = lp_build_fract(coord_bld, coord);
399 coord0 = lp_build_clamp(coord_bld, coord, coord_bld->zero,
400 length_f_minus_one);
401 coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
402 coord1 = lp_build_clamp(coord_bld, coord1, coord_bld->zero,
403 length_f_minus_one);
404 coord0 = lp_build_ifloor(coord_bld, coord0);
405 coord1 = lp_build_ifloor(coord_bld, coord1);
406 break;
407
408 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
409 /* clamp to [0,1] */
410 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, coord_bld->one);
411 /* mul by tex size and subtract 0.5 */
412 coord = lp_build_mul(coord_bld, coord, length_f);
413 coord = lp_build_sub(coord_bld, coord, half);
414 /* compute lerp weight */
415 weight = lp_build_fract(coord_bld, coord);
416 /* coord0 = floor(coord); */
417 coord0 = lp_build_ifloor(coord_bld, coord);
418 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
419 /* coord0 = max(coord0, 0) */
420 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
421 /* coord1 = min(coord1, length-1) */
422 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
423 break;
424
425 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
426 {
427 LLVMValueRef min, max;
428 /* min = -1.0 / (2 * length) */
429 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
430 min = lp_build_negate(coord_bld, min);
431 /* max = 1.0 - min */
432 max = lp_build_sub(coord_bld, coord_bld->one, min);
433 /* coord = clamp(coord, min, max) */
434 coord = lp_build_clamp(coord_bld, coord, min, max);
435 /* scale coord to length (and sub 0.5?) */
436 coord = lp_build_mul(coord_bld, coord, length_f);
437 coord = lp_build_sub(coord_bld, coord, half);
438 /* compute lerp weight */
439 weight = lp_build_fract(coord_bld, coord);
440 /* convert to int */
441 coord0 = lp_build_ifloor(coord_bld, coord);
442 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
443 }
444 break;
445
446 case PIPE_TEX_WRAP_MIRROR_REPEAT:
447 /* compute mirror function */
448 coord = lp_build_coord_mirror(bld, coord);
449
450 /* scale coord to length */
451 coord = lp_build_mul(coord_bld, coord, length_f);
452 coord = lp_build_sub(coord_bld, coord, half);
453
454 /* compute lerp weight */
455 weight = lp_build_fract(coord_bld, coord);
456
457 /* convert to int coords */
458 coord0 = lp_build_ifloor(coord_bld, coord);
459 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
460
461 /* coord0 = max(coord0, 0) */
462 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
463 /* coord1 = min(coord1, length-1) */
464 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
465 break;
466
467 case PIPE_TEX_WRAP_MIRROR_CLAMP:
468 {
469 LLVMValueRef min, max;
470 /* min = 1.0 / (2 * length) */
471 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
472 /* max = 1.0 - min */
473 max = lp_build_sub(coord_bld, coord_bld->one, min);
474
475 coord = lp_build_abs(coord_bld, coord);
476 coord = lp_build_clamp(coord_bld, coord, min, max);
477 coord = lp_build_mul(coord_bld, coord, length_f);
478 if(0)coord = lp_build_sub(coord_bld, coord, half);
479 weight = lp_build_fract(coord_bld, coord);
480 coord0 = lp_build_ifloor(coord_bld, coord);
481 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
482 }
483 break;
484
485 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
486 {
487 LLVMValueRef min, max;
488 /* min = 1.0 / (2 * length) */
489 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
490 /* max = 1.0 - min */
491 max = lp_build_sub(coord_bld, coord_bld->one, min);
492
493 coord = lp_build_abs(coord_bld, coord);
494 coord = lp_build_clamp(coord_bld, coord, min, max);
495 coord = lp_build_mul(coord_bld, coord, length_f);
496 coord = lp_build_sub(coord_bld, coord, half);
497 weight = lp_build_fract(coord_bld, coord);
498 coord0 = lp_build_ifloor(coord_bld, coord);
499 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
500 }
501 break;
502
503 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
504 {
505 LLVMValueRef min, max;
506 /* min = -1.0 / (2 * length) */
507 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
508 min = lp_build_negate(coord_bld, min);
509 /* max = 1.0 - min */
510 max = lp_build_sub(coord_bld, coord_bld->one, min);
511
512 coord = lp_build_abs(coord_bld, coord);
513 coord = lp_build_clamp(coord_bld, coord, min, max);
514 coord = lp_build_mul(coord_bld, coord, length_f);
515 coord = lp_build_sub(coord_bld, coord, half);
516 weight = lp_build_fract(coord_bld, coord);
517 coord0 = lp_build_ifloor(coord_bld, coord);
518 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
519 }
520 break;
521
522 default:
523 assert(0);
524 }
525
526 *x0_out = coord0;
527 *x1_out = coord1;
528 *weight_out = weight;
529 }
530
531
532 /**
533 * Build LLVM code for texture wrap mode for nearest filtering.
534 * \param coord the incoming texcoord (nominally in [0,1])
535 * \param length the texture size along one dimension, as int
536 * \param is_pot if TRUE, length is a power of two
537 * \param wrap_mode one of PIPE_TEX_WRAP_x
538 */
539 static LLVMValueRef
540 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
541 LLVMValueRef coord,
542 LLVMValueRef length,
543 boolean is_pot,
544 unsigned wrap_mode)
545 {
546 struct lp_build_context *coord_bld = &bld->coord_bld;
547 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
548 struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
549 LLVMValueRef two = lp_build_const_scalar(coord_bld->type, 2.0);
550 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
551 LLVMValueRef length_minus_one = lp_build_sub(uint_coord_bld, length, uint_coord_bld->one);
552 LLVMValueRef length_f_minus_one = lp_build_sub(coord_bld, length_f, coord_bld->one);
553 LLVMValueRef icoord;
554
555 /* XXX check for normalized vs. unnormalized coords */
556
557 switch(wrap_mode) {
558 case PIPE_TEX_WRAP_REPEAT:
559 coord = lp_build_mul(coord_bld, coord, length_f);
560 icoord = lp_build_ifloor(coord_bld, coord);
561 if (is_pot)
562 icoord = LLVMBuildAnd(bld->builder, icoord, length_minus_one, "");
563 else
564 /* Signed remainder won't give the right results for negative
565 * dividends but unsigned remainder does.*/
566 icoord = LLVMBuildURem(bld->builder, icoord, length, "");
567 break;
568
569 case PIPE_TEX_WRAP_CLAMP:
570 /* mul by size */
571 coord = lp_build_mul(coord_bld, coord, length_f);
572 /* floor */
573 icoord = lp_build_ifloor(coord_bld, coord);
574 /* clamp to [0, size-1]. Note: int coord builder type */
575 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
576 length_minus_one);
577 break;
578
579 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
580 {
581 LLVMValueRef min, max;
582 /* min = 1.0 / (2 * length) */
583 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
584 /* max = length - min */
585 max = lp_build_sub(coord_bld, length_f, min);
586 /* scale coord to length */
587 coord = lp_build_mul(coord_bld, coord, length_f);
588 /* coord = clamp(coord, min, max) */
589 coord = lp_build_clamp(coord_bld, coord, min, max);
590 icoord = lp_build_ifloor(coord_bld, coord);
591 }
592 break;
593
594 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
595 /* Note: this is the same as CLAMP_TO_EDGE, except min = -min */
596 {
597 LLVMValueRef min, max;
598 /* min = -1.0 / (2 * length) */
599 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
600 min = lp_build_negate(coord_bld, min);
601 /* max = length - min */
602 max = lp_build_sub(coord_bld, length_f, min);
603 /* scale coord to length */
604 coord = lp_build_mul(coord_bld, coord, length_f);
605 /* coord = clamp(coord, min, max) */
606 coord = lp_build_clamp(coord_bld, coord, min, max);
607 icoord = lp_build_ifloor(coord_bld, coord);
608 }
609 break;
610
611 case PIPE_TEX_WRAP_MIRROR_REPEAT:
612 {
613 LLVMValueRef min, max;
614 /* min = 1.0 / (2 * length) */
615 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
616 /* max = length - min */
617 max = lp_build_sub(coord_bld, length_f, min);
618
619 /* compute mirror function */
620 coord = lp_build_coord_mirror(bld, coord);
621
622 /* scale coord to length */
623 coord = lp_build_mul(coord_bld, coord, length_f);
624
625 /* coord = clamp(coord, min, max) */
626 coord = lp_build_clamp(coord_bld, coord, min, max);
627 icoord = lp_build_ifloor(coord_bld, coord);
628 }
629 break;
630
631 case PIPE_TEX_WRAP_MIRROR_CLAMP:
632 coord = lp_build_abs(coord_bld, coord);
633 coord = lp_build_mul(coord_bld, coord, length_f);
634 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f_minus_one);
635 icoord = lp_build_ifloor(coord_bld, coord);
636 break;
637
638 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
639 {
640 LLVMValueRef min, max;
641 /* min = 1.0 / (2 * length) */
642 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
643 /* max = length - min */
644 max = lp_build_sub(coord_bld, length_f, min);
645
646 coord = lp_build_abs(coord_bld, coord);
647 coord = lp_build_mul(coord_bld, coord, length_f);
648 coord = lp_build_clamp(coord_bld, coord, min, max);
649 icoord = lp_build_ifloor(coord_bld, coord);
650 }
651 break;
652
653 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
654 {
655 LLVMValueRef min, max;
656 /* min = 1.0 / (2 * length) */
657 min = lp_build_rcp(coord_bld, lp_build_mul(coord_bld, two, length_f));
658 min = lp_build_negate(coord_bld, min);
659 /* max = length - min */
660 max = lp_build_sub(coord_bld, length_f, min);
661
662 coord = lp_build_abs(coord_bld, coord);
663 coord = lp_build_mul(coord_bld, coord, length_f);
664 coord = lp_build_clamp(coord_bld, coord, min, max);
665 icoord = lp_build_ifloor(coord_bld, coord);
666 }
667 break;
668
669 default:
670 assert(0);
671 }
672
673 return icoord;
674 }
675
676
677 /**
678 * Sample 2D texture with nearest filtering.
679 */
680 static void
681 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
682 LLVMValueRef s,
683 LLVMValueRef t,
684 LLVMValueRef width,
685 LLVMValueRef height,
686 LLVMValueRef stride,
687 LLVMValueRef data_ptr,
688 LLVMValueRef *texel)
689 {
690 LLVMValueRef x, y;
691
692 x = lp_build_sample_wrap_nearest(bld, s, width,
693 bld->static_state->pot_width,
694 bld->static_state->wrap_s);
695 y = lp_build_sample_wrap_nearest(bld, t, height,
696 bld->static_state->pot_height,
697 bld->static_state->wrap_t);
698
699 lp_build_name(x, "tex.x.wrapped");
700 lp_build_name(y, "tex.y.wrapped");
701
702 lp_build_sample_texel_soa(bld, width, height, x, y, stride, data_ptr, texel);
703 }
704
705
706 /**
707 * Sample 2D texture with bilinear filtering.
708 */
709 static void
710 lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
711 LLVMValueRef s,
712 LLVMValueRef t,
713 LLVMValueRef width,
714 LLVMValueRef height,
715 LLVMValueRef stride,
716 LLVMValueRef data_ptr,
717 LLVMValueRef *texel)
718 {
719 LLVMValueRef s_fpart;
720 LLVMValueRef t_fpart;
721 LLVMValueRef x0, x1;
722 LLVMValueRef y0, y1;
723 LLVMValueRef neighbors[2][2][4];
724 unsigned chan;
725
726 lp_build_sample_wrap_linear(bld, s, width, bld->static_state->pot_width,
727 bld->static_state->wrap_s, &x0, &x1, &s_fpart);
728 lp_build_sample_wrap_linear(bld, t, height, bld->static_state->pot_height,
729 bld->static_state->wrap_t, &y0, &y1, &t_fpart);
730
731 lp_build_sample_texel_soa(bld, width, height, x0, y0, stride, data_ptr, neighbors[0][0]);
732 lp_build_sample_texel_soa(bld, width, height, x1, y0, stride, data_ptr, neighbors[0][1]);
733 lp_build_sample_texel_soa(bld, width, height, x0, y1, stride, data_ptr, neighbors[1][0]);
734 lp_build_sample_texel_soa(bld, width, height, x1, y1, stride, data_ptr, neighbors[1][1]);
735
736 /* TODO: Don't interpolate missing channels */
737 for(chan = 0; chan < 4; ++chan) {
738 texel[chan] = lp_build_lerp_2d(&bld->texel_bld,
739 s_fpart, t_fpart,
740 neighbors[0][0][chan],
741 neighbors[0][1][chan],
742 neighbors[1][0][chan],
743 neighbors[1][1][chan]);
744 }
745 }
746
747
748 static void
749 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
750 struct lp_type dst_type,
751 LLVMValueRef packed,
752 LLVMValueRef *rgba)
753 {
754 LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
755 unsigned chan;
756
757 /* Decode the input vector components */
758 for (chan = 0; chan < 4; ++chan) {
759 unsigned start = chan*8;
760 unsigned stop = start + 8;
761 LLVMValueRef input;
762
763 input = packed;
764
765 if(start)
766 input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
767
768 if(stop < 32)
769 input = LLVMBuildAnd(builder, input, mask, "");
770
771 input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
772
773 rgba[chan] = input;
774 }
775 }
776
777
778 static void
779 lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
780 LLVMValueRef s,
781 LLVMValueRef t,
782 LLVMValueRef width,
783 LLVMValueRef height,
784 LLVMValueRef stride,
785 LLVMValueRef data_ptr,
786 LLVMValueRef *texel)
787 {
788 LLVMBuilderRef builder = bld->builder;
789 struct lp_build_context i32, h16, u8n;
790 LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
791 LLVMValueRef i32_c8, i32_c128, i32_c255;
792 LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
793 LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
794 LLVMValueRef x0, x1;
795 LLVMValueRef y0, y1;
796 LLVMValueRef neighbors[2][2];
797 LLVMValueRef neighbors_lo[2][2];
798 LLVMValueRef neighbors_hi[2][2];
799 LLVMValueRef packed, packed_lo, packed_hi;
800 LLVMValueRef unswizzled[4];
801
802 lp_build_context_init(&i32, builder, lp_type_int(32));
803 lp_build_context_init(&h16, builder, lp_type_ufixed(16));
804 lp_build_context_init(&u8n, builder, lp_type_unorm(8));
805
806 i32_vec_type = lp_build_vec_type(i32.type);
807 h16_vec_type = lp_build_vec_type(h16.type);
808 u8n_vec_type = lp_build_vec_type(u8n.type);
809
810 if (bld->static_state->normalized_coords) {
811 LLVMTypeRef coord_vec_type = lp_build_vec_type(bld->coord_type);
812 LLVMValueRef fp_width = LLVMBuildSIToFP(bld->builder, width, coord_vec_type, "");
813 LLVMValueRef fp_height = LLVMBuildSIToFP(bld->builder, height, coord_vec_type, "");
814 s = lp_build_mul(&bld->coord_bld, s, fp_width);
815 t = lp_build_mul(&bld->coord_bld, t, fp_height);
816 }
817
818 /* scale coords by 256 (8 fractional bits) */
819 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
820 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
821
822 /* convert float to int */
823 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
824 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
825
826 /* subtract 0.5 (add -128) */
827 i32_c128 = lp_build_int_const_scalar(i32.type, -128);
828 s = LLVMBuildAdd(builder, s, i32_c128, "");
829 t = LLVMBuildAdd(builder, t, i32_c128, "");
830
831 /* compute floor (shift right 8) */
832 i32_c8 = lp_build_int_const_scalar(i32.type, 8);
833 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
834 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
835
836 /* compute fractional part (AND with 0xff) */
837 i32_c255 = lp_build_int_const_scalar(i32.type, 255);
838 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
839 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
840
841 x0 = s_ipart;
842 y0 = t_ipart;
843
844 x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
845 y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
846
847 x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
848 bld->static_state->wrap_s);
849 y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
850 bld->static_state->wrap_t);
851
852 x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
853 bld->static_state->wrap_s);
854 y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
855 bld->static_state->wrap_t);
856
857 /*
858 * Transform 4 x i32 in
859 *
860 * s_fpart = {s0, s1, s2, s3}
861 *
862 * into 8 x i16
863 *
864 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
865 *
866 * into two 8 x i16
867 *
868 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
869 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
870 *
871 * and likewise for t_fpart. There is no risk of loosing precision here
872 * since the fractional parts only use the lower 8bits.
873 */
874
875 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
876 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
877
878 {
879 LLVMTypeRef elem_type = LLVMInt32Type();
880 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
881 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
882 LLVMValueRef shuffle_lo;
883 LLVMValueRef shuffle_hi;
884 unsigned i, j;
885
886 for(j = 0; j < h16.type.length; j += 4) {
887 unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
888 LLVMValueRef index;
889
890 index = LLVMConstInt(elem_type, j/2 + subindex, 0);
891 for(i = 0; i < 4; ++i)
892 shuffles_lo[j + i] = index;
893
894 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
895 for(i = 0; i < 4; ++i)
896 shuffles_hi[j + i] = index;
897 }
898
899 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
900 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
901
902 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
903 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
904 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
905 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
906 }
907
908 /*
909 * Fetch the pixels as 4 x 32bit (rgba order might differ):
910 *
911 * rgba0 rgba1 rgba2 rgba3
912 *
913 * bit cast them into 16 x u8
914 *
915 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
916 *
917 * unpack them into two 8 x i16:
918 *
919 * r0 g0 b0 a0 r1 g1 b1 a1
920 * r2 g2 b2 a2 r3 g3 b3 a3
921 *
922 * The higher 8 bits of the resulting elements will be zero.
923 */
924
925 neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
926 neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
927 neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
928 neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
929
930 neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
931 neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
932 neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
933 neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
934
935 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
936 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
937 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
938 lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
939
940 /*
941 * Linear interpolate with 8.8 fixed point.
942 */
943
944 packed_lo = lp_build_lerp_2d(&h16,
945 s_fpart_lo, t_fpart_lo,
946 neighbors_lo[0][0],
947 neighbors_lo[0][1],
948 neighbors_lo[1][0],
949 neighbors_lo[1][1]);
950
951 packed_hi = lp_build_lerp_2d(&h16,
952 s_fpart_hi, t_fpart_hi,
953 neighbors_hi[0][0],
954 neighbors_hi[0][1],
955 neighbors_hi[1][0],
956 neighbors_hi[1][1]);
957
958 packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
959
960 /*
961 * Convert to SoA and swizzle.
962 */
963
964 packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
965
966 lp_build_rgba8_to_f32_soa(bld->builder,
967 bld->texel_type,
968 packed, unswizzled);
969
970 lp_build_format_swizzle_soa(bld->format_desc,
971 bld->texel_type, unswizzled,
972 texel);
973 }
974
975
976 static void
977 lp_build_sample_compare(struct lp_build_sample_context *bld,
978 LLVMValueRef p,
979 LLVMValueRef *texel)
980 {
981 struct lp_build_context *texel_bld = &bld->texel_bld;
982 LLVMValueRef res;
983 unsigned chan;
984
985 if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
986 return;
987
988 /* TODO: Compare before swizzling, to avoid redundant computations */
989 res = NULL;
990 for(chan = 0; chan < 4; ++chan) {
991 LLVMValueRef cmp;
992 cmp = lp_build_cmp(texel_bld, bld->static_state->compare_func, p, texel[chan]);
993 cmp = lp_build_select(texel_bld, cmp, texel_bld->one, texel_bld->zero);
994
995 if(res)
996 res = lp_build_add(texel_bld, res, cmp);
997 else
998 res = cmp;
999 }
1000
1001 assert(res);
1002 res = lp_build_mul(texel_bld, res, lp_build_const_scalar(texel_bld->type, 0.25));
1003
1004 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1005 for(chan = 0; chan < 3; ++chan)
1006 texel[chan] = res;
1007 texel[3] = texel_bld->one;
1008 }
1009
1010
1011 /**
1012 * Build texture sampling code.
1013 * 'texel' will return a vector of four LLVMValueRefs corresponding to
1014 * R, G, B, A.
1015 */
1016 void
1017 lp_build_sample_soa(LLVMBuilderRef builder,
1018 const struct lp_sampler_static_state *static_state,
1019 struct lp_sampler_dynamic_state *dynamic_state,
1020 struct lp_type type,
1021 unsigned unit,
1022 unsigned num_coords,
1023 const LLVMValueRef *coords,
1024 LLVMValueRef lodbias,
1025 LLVMValueRef *texel)
1026 {
1027 struct lp_build_sample_context bld;
1028 LLVMValueRef width;
1029 LLVMValueRef height;
1030 LLVMValueRef stride;
1031 LLVMValueRef data_ptr;
1032 LLVMValueRef s;
1033 LLVMValueRef t;
1034 LLVMValueRef p;
1035
1036 /* Setup our build context */
1037 memset(&bld, 0, sizeof bld);
1038 bld.builder = builder;
1039 bld.static_state = static_state;
1040 bld.dynamic_state = dynamic_state;
1041 bld.format_desc = util_format_description(static_state->format);
1042 bld.coord_type = type;
1043 bld.uint_coord_type = lp_uint_type(type);
1044 bld.int_coord_type = lp_int_type(type);
1045 bld.texel_type = type;
1046 lp_build_context_init(&bld.coord_bld, builder, bld.coord_type);
1047 lp_build_context_init(&bld.uint_coord_bld, builder, bld.uint_coord_type);
1048 lp_build_context_init(&bld.int_coord_bld, builder, bld.int_coord_type);
1049 lp_build_context_init(&bld.texel_bld, builder, bld.texel_type);
1050
1051 /* Get the dynamic state */
1052 width = dynamic_state->width(dynamic_state, builder, unit);
1053 height = dynamic_state->height(dynamic_state, builder, unit);
1054 stride = dynamic_state->stride(dynamic_state, builder, unit);
1055 data_ptr = dynamic_state->data_ptr(dynamic_state, builder, unit);
1056
1057 s = coords[0];
1058 t = coords[1];
1059 p = coords[2];
1060
1061 width = lp_build_broadcast_scalar(&bld.uint_coord_bld, width);
1062 height = lp_build_broadcast_scalar(&bld.uint_coord_bld, height);
1063 stride = lp_build_broadcast_scalar(&bld.uint_coord_bld, stride);
1064
1065 if(static_state->target == PIPE_TEXTURE_1D)
1066 t = bld.coord_bld.zero;
1067
1068 switch (static_state->min_img_filter) {
1069 case PIPE_TEX_FILTER_NEAREST:
1070 lp_build_sample_2d_nearest_soa(&bld, s, t, width, height,
1071 stride, data_ptr, texel);
1072 break;
1073 case PIPE_TEX_FILTER_LINEAR:
1074 if(lp_format_is_rgba8(bld.format_desc) &&
1075 is_simple_wrap_mode(static_state->wrap_s) &&
1076 is_simple_wrap_mode(static_state->wrap_t))
1077 lp_build_sample_2d_linear_aos(&bld, s, t, width, height,
1078 stride, data_ptr, texel);
1079 else
1080 lp_build_sample_2d_linear_soa(&bld, s, t, width, height,
1081 stride, data_ptr, texel);
1082 break;
1083 default:
1084 assert(0);
1085 }
1086
1087 /* FIXME: respect static_state->min_mip_filter */;
1088 /* FIXME: respect static_state->mag_img_filter */;
1089
1090 lp_build_sample_compare(&bld, p, texel);
1091 }