1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_debug.h"
38 #include "util/u_debug_dump.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_cpu_detect.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_format.h"
52 #include "lp_bld_sample.h"
56 * Keep all information for sampling code generation in a single place.
58 struct lp_build_sample_context
60 LLVMBuilderRef builder
;
62 const struct lp_sampler_static_state
*static_state
;
64 struct lp_sampler_dynamic_state
*dynamic_state
;
66 const struct util_format_description
*format_desc
;
68 /** Incoming coordinates type and build context */
69 struct lp_type coord_type
;
70 struct lp_build_context coord_bld
;
72 /** Integer coordinates */
73 struct lp_type int_coord_type
;
74 struct lp_build_context int_coord_bld
;
76 /** Output texels type and build context */
77 struct lp_type texel_type
;
78 struct lp_build_context texel_bld
;
83 lp_build_sample_texel_soa(struct lp_build_sample_context
*bld
,
86 LLVMValueRef y_stride
,
87 LLVMValueRef data_ptr
,
93 offset
= lp_build_sample_offset(&bld
->int_coord_bld
,
98 assert(bld
->format_desc
->block
.width
== 1);
99 assert(bld
->format_desc
->block
.height
== 1);
100 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
102 packed
= lp_build_gather(bld
->builder
,
103 bld
->texel_type
.length
,
104 bld
->format_desc
->block
.bits
,
105 bld
->texel_type
.width
,
108 lp_build_unpack_rgba_soa(bld
->builder
,
116 lp_build_sample_packed(struct lp_build_sample_context
*bld
,
119 LLVMValueRef y_stride
,
120 LLVMValueRef data_ptr
)
124 offset
= lp_build_sample_offset(&bld
->int_coord_bld
,
129 assert(bld
->format_desc
->block
.width
== 1);
130 assert(bld
->format_desc
->block
.height
== 1);
131 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
133 return lp_build_gather(bld
->builder
,
134 bld
->texel_type
.length
,
135 bld
->format_desc
->block
.bits
,
136 bld
->texel_type
.width
,
142 lp_build_sample_wrap(struct lp_build_sample_context
*bld
,
148 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
149 LLVMValueRef length_minus_one
;
151 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
154 case PIPE_TEX_WRAP_REPEAT
:
156 coord
= LLVMBuildAnd(bld
->builder
, coord
, length_minus_one
, "");
158 /* Signed remainder won't give the right results for negative
159 * dividends but unsigned remainder does.*/
160 coord
= LLVMBuildURem(bld
->builder
, coord
, length
, "");
163 case PIPE_TEX_WRAP_CLAMP
:
164 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
165 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
168 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
169 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
170 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
171 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
172 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
173 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
175 _debug_printf("warning: failed to translate texture wrap mode %s\n",
176 debug_dump_tex_wrap(wrap_mode
, TRUE
));
177 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
178 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
190 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context
*bld
,
196 LLVMValueRef data_ptr
,
202 x
= lp_build_ifloor(&bld
->coord_bld
, s
);
203 y
= lp_build_ifloor(&bld
->coord_bld
, t
);
205 x
= lp_build_sample_wrap(bld
, x
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
206 y
= lp_build_sample_wrap(bld
, y
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
208 lp_build_sample_texel_soa(bld
, x
, y
, stride
, data_ptr
, texel
);
213 lp_build_sample_2d_linear_soa(struct lp_build_sample_context
*bld
,
219 LLVMValueRef data_ptr
,
223 LLVMValueRef s_ipart
;
224 LLVMValueRef t_ipart
;
225 LLVMValueRef s_fpart
;
226 LLVMValueRef t_fpart
;
229 LLVMValueRef neighbors
[2][2][4];
232 half
= lp_build_const_scalar(bld
->coord_type
, 0.5);
233 s
= lp_build_sub(&bld
->coord_bld
, s
, half
);
234 t
= lp_build_sub(&bld
->coord_bld
, t
, half
);
236 s_ipart
= lp_build_floor(&bld
->coord_bld
, s
);
237 t_ipart
= lp_build_floor(&bld
->coord_bld
, t
);
239 s_fpart
= lp_build_sub(&bld
->coord_bld
, s
, s_ipart
);
240 t_fpart
= lp_build_sub(&bld
->coord_bld
, t
, t_ipart
);
242 x0
= lp_build_itrunc(&bld
->coord_bld
, s_ipart
);
243 y0
= lp_build_itrunc(&bld
->coord_bld
, t_ipart
);
245 x0
= lp_build_sample_wrap(bld
, x0
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
246 y0
= lp_build_sample_wrap(bld
, y0
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
248 x1
= lp_build_add(&bld
->int_coord_bld
, x0
, bld
->int_coord_bld
.one
);
249 y1
= lp_build_add(&bld
->int_coord_bld
, y0
, bld
->int_coord_bld
.one
);
251 x1
= lp_build_sample_wrap(bld
, x1
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
252 y1
= lp_build_sample_wrap(bld
, y1
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
254 lp_build_sample_texel_soa(bld
, x0
, y0
, stride
, data_ptr
, neighbors
[0][0]);
255 lp_build_sample_texel_soa(bld
, x1
, y0
, stride
, data_ptr
, neighbors
[0][1]);
256 lp_build_sample_texel_soa(bld
, x0
, y1
, stride
, data_ptr
, neighbors
[1][0]);
257 lp_build_sample_texel_soa(bld
, x1
, y1
, stride
, data_ptr
, neighbors
[1][1]);
259 /* TODO: Don't interpolate missing channels */
260 for(chan
= 0; chan
< 4; ++chan
) {
261 texel
[chan
] = lp_build_lerp_2d(&bld
->texel_bld
,
263 neighbors
[0][0][chan
],
264 neighbors
[0][1][chan
],
265 neighbors
[1][0][chan
],
266 neighbors
[1][1][chan
]);
272 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder
,
273 struct lp_type dst_type
,
277 LLVMValueRef mask
= lp_build_int_const_scalar(dst_type
, 0xff);
280 /* Decode the input vector components */
281 for (chan
= 0; chan
< 4; ++chan
) {
282 unsigned start
= chan
*8;
283 unsigned stop
= start
+ 8;
289 input
= LLVMBuildLShr(builder
, input
, lp_build_int_const_scalar(dst_type
, start
), "");
292 input
= LLVMBuildAnd(builder
, input
, mask
, "");
294 input
= lp_build_unsigned_norm_to_float(builder
, 8, dst_type
, input
);
302 lp_build_sample_2d_linear_aos(struct lp_build_sample_context
*bld
,
308 LLVMValueRef data_ptr
,
311 LLVMBuilderRef builder
= bld
->builder
;
312 struct lp_build_context i32
, h16
, u8n
;
313 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
314 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
315 LLVMValueRef s_ipart
, s_fpart
, s_fpart_lo
, s_fpart_hi
;
316 LLVMValueRef t_ipart
, t_fpart
, t_fpart_lo
, t_fpart_hi
;
319 LLVMValueRef neighbors
[2][2];
320 LLVMValueRef neighbors_lo
[2][2];
321 LLVMValueRef neighbors_hi
[2][2];
322 LLVMValueRef packed
, packed_lo
, packed_hi
;
323 LLVMValueRef unswizzled
[4];
325 lp_build_context_init(&i32
, builder
, lp_type_int(32));
326 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
327 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
329 i32_vec_type
= lp_build_vec_type(i32
.type
);
330 h16_vec_type
= lp_build_vec_type(h16
.type
);
331 u8n_vec_type
= lp_build_vec_type(u8n
.type
);
333 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
334 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
336 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
337 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
339 i32_c128
= lp_build_int_const_scalar(i32
.type
, -128);
340 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
341 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
343 i32_c8
= lp_build_int_const_scalar(i32
.type
, 8);
344 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
345 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
347 i32_c255
= lp_build_int_const_scalar(i32
.type
, 255);
348 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
349 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
354 x0
= lp_build_sample_wrap(bld
, x0
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
355 y0
= lp_build_sample_wrap(bld
, y0
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
357 x1
= lp_build_add(&bld
->int_coord_bld
, x0
, bld
->int_coord_bld
.one
);
358 y1
= lp_build_add(&bld
->int_coord_bld
, y0
, bld
->int_coord_bld
.one
);
360 x1
= lp_build_sample_wrap(bld
, x1
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
361 y1
= lp_build_sample_wrap(bld
, y1
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
364 * Transform 4 x i32 in
366 * s_fpart = {s0, s1, s2, s3}
370 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
374 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
375 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
377 * and likewise for t_fpart. There is no risk of loosing precision here
378 * since the fractional parts only use the lower 8bits.
381 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
382 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
385 LLVMTypeRef elem_type
= LLVMInt32Type();
386 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
387 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
388 LLVMValueRef shuffle_lo
;
389 LLVMValueRef shuffle_hi
;
392 for(j
= 0; j
< h16
.type
.length
; j
+= 4) {
393 unsigned subindex
= util_cpu_caps
.little_endian
? 0 : 1;
396 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
397 for(i
= 0; i
< 4; ++i
)
398 shuffles_lo
[j
+ i
] = index
;
400 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
401 for(i
= 0; i
< 4; ++i
)
402 shuffles_hi
[j
+ i
] = index
;
405 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
406 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
408 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_lo
, "");
409 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_lo
, "");
410 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_hi
, "");
411 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_hi
, "");
415 * Fetch the pixels as 4 x 32bit (rgba order might differ):
417 * rgba0 rgba1 rgba2 rgba3
419 * bit cast them into 16 x u8
421 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
423 * unpack them into two 8 x i16:
425 * r0 g0 b0 a0 r1 g1 b1 a1
426 * r2 g2 b2 a2 r3 g3 b3 a3
428 * The higher 8 bits of the resulting elements will be zero.
431 neighbors
[0][0] = lp_build_sample_packed(bld
, x0
, y0
, stride
, data_ptr
);
432 neighbors
[0][1] = lp_build_sample_packed(bld
, x1
, y0
, stride
, data_ptr
);
433 neighbors
[1][0] = lp_build_sample_packed(bld
, x0
, y1
, stride
, data_ptr
);
434 neighbors
[1][1] = lp_build_sample_packed(bld
, x1
, y1
, stride
, data_ptr
);
436 neighbors
[0][0] = LLVMBuildBitCast(builder
, neighbors
[0][0], u8n_vec_type
, "");
437 neighbors
[0][1] = LLVMBuildBitCast(builder
, neighbors
[0][1], u8n_vec_type
, "");
438 neighbors
[1][0] = LLVMBuildBitCast(builder
, neighbors
[1][0], u8n_vec_type
, "");
439 neighbors
[1][1] = LLVMBuildBitCast(builder
, neighbors
[1][1], u8n_vec_type
, "");
441 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][0], &neighbors_lo
[0][0], &neighbors_hi
[0][0]);
442 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][1], &neighbors_lo
[0][1], &neighbors_hi
[0][1]);
443 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][0], &neighbors_lo
[1][0], &neighbors_hi
[1][0]);
444 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][1], &neighbors_lo
[1][1], &neighbors_hi
[1][1]);
447 * Linear interpolate with 8.8 fixed point.
450 packed_lo
= lp_build_lerp_2d(&h16
,
451 s_fpart_lo
, t_fpart_lo
,
457 packed_hi
= lp_build_lerp_2d(&h16
,
458 s_fpart_hi
, t_fpart_hi
,
464 packed
= lp_build_pack2(builder
, h16
.type
, u8n
.type
, packed_lo
, packed_hi
);
467 * Convert to SoA and swizzle.
470 packed
= LLVMBuildBitCast(builder
, packed
, i32_vec_type
, "");
472 lp_build_rgba8_to_f32_soa(bld
->builder
,
476 lp_build_format_swizzle_soa(bld
->format_desc
,
477 bld
->texel_type
, unswizzled
,
483 lp_build_sample_compare(struct lp_build_sample_context
*bld
,
487 struct lp_build_context
*texel_bld
= &bld
->texel_bld
;
491 if(!bld
->static_state
->compare_mode
)
494 /* TODO: Compare before swizzling, to avoid redundant computations */
496 for(chan
= 0; chan
< 4; ++chan
) {
498 cmp
= lp_build_cmp(texel_bld
, bld
->static_state
->compare_func
, p
, texel
[chan
]);
499 cmp
= lp_build_select(texel_bld
, cmp
, texel_bld
->one
, texel_bld
->zero
);
502 res
= lp_build_add(texel_bld
, res
, cmp
);
508 res
= lp_build_mul(texel_bld
, res
, lp_build_const_scalar(texel_bld
->type
, 0.25));
510 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
511 for(chan
= 0; chan
< 3; ++chan
)
513 texel
[3] = texel_bld
->one
;
518 lp_build_sample_soa(LLVMBuilderRef builder
,
519 const struct lp_sampler_static_state
*static_state
,
520 struct lp_sampler_dynamic_state
*dynamic_state
,
524 const LLVMValueRef
*coords
,
525 LLVMValueRef lodbias
,
528 struct lp_build_sample_context bld
;
532 LLVMValueRef data_ptr
;
537 /* Setup our build context */
538 memset(&bld
, 0, sizeof bld
);
539 bld
.builder
= builder
;
540 bld
.static_state
= static_state
;
541 bld
.dynamic_state
= dynamic_state
;
542 bld
.format_desc
= util_format_description(static_state
->format
);
543 bld
.coord_type
= type
;
544 bld
.int_coord_type
= lp_int_type(type
);
545 bld
.texel_type
= type
;
546 lp_build_context_init(&bld
.coord_bld
, builder
, bld
.coord_type
);
547 lp_build_context_init(&bld
.int_coord_bld
, builder
, bld
.int_coord_type
);
548 lp_build_context_init(&bld
.texel_bld
, builder
, bld
.texel_type
);
550 /* Get the dynamic state */
551 width
= dynamic_state
->width(dynamic_state
, builder
, unit
);
552 height
= dynamic_state
->height(dynamic_state
, builder
, unit
);
553 stride
= dynamic_state
->stride(dynamic_state
, builder
, unit
);
554 data_ptr
= dynamic_state
->data_ptr(dynamic_state
, builder
, unit
);
560 width
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, width
);
561 height
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, height
);
562 stride
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, stride
);
564 if(static_state
->target
== PIPE_TEXTURE_1D
)
565 t
= bld
.coord_bld
.zero
;
567 if(static_state
->normalized_coords
) {
568 LLVMTypeRef coord_vec_type
= lp_build_vec_type(bld
.coord_type
);
569 LLVMValueRef fp_width
= LLVMBuildSIToFP(builder
, width
, coord_vec_type
, "");
570 LLVMValueRef fp_height
= LLVMBuildSIToFP(builder
, height
, coord_vec_type
, "");
571 s
= lp_build_mul(&bld
.coord_bld
, s
, fp_width
);
572 t
= lp_build_mul(&bld
.coord_bld
, t
, fp_height
);
575 switch (static_state
->min_img_filter
) {
576 case PIPE_TEX_FILTER_NEAREST
:
577 lp_build_sample_2d_nearest_soa(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
579 case PIPE_TEX_FILTER_LINEAR
:
580 case PIPE_TEX_FILTER_ANISO
:
581 if(lp_format_is_rgba8(bld
.format_desc
))
582 lp_build_sample_2d_linear_aos(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
584 lp_build_sample_2d_linear_soa(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
590 /* FIXME: respect static_state->min_mip_filter */;
591 /* FIXME: respect static_state->mag_img_filter */;
592 /* FIXME: respect static_state->prefilter */;
594 lp_build_sample_compare(&bld
, p
, texel
);