1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Texture sampling -- SoA.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/u_debug.h"
38 #include "util/u_dump.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_cpu_detect.h"
43 #include "lp_bld_debug.h"
44 #include "lp_bld_type.h"
45 #include "lp_bld_const.h"
46 #include "lp_bld_conv.h"
47 #include "lp_bld_arit.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_swizzle.h"
50 #include "lp_bld_pack.h"
51 #include "lp_bld_format.h"
52 #include "lp_bld_sample.h"
56 * Keep all information for sampling code generation in a single place.
58 struct lp_build_sample_context
60 LLVMBuilderRef builder
;
62 const struct lp_sampler_static_state
*static_state
;
64 struct lp_sampler_dynamic_state
*dynamic_state
;
66 const struct util_format_description
*format_desc
;
68 /** Incoming coordinates type and build context */
69 struct lp_type coord_type
;
70 struct lp_build_context coord_bld
;
72 /** Integer coordinates */
73 struct lp_type int_coord_type
;
74 struct lp_build_context int_coord_bld
;
76 /** Output texels type and build context */
77 struct lp_type texel_type
;
78 struct lp_build_context texel_bld
;
83 lp_build_sample_texel_soa(struct lp_build_sample_context
*bld
,
86 LLVMValueRef y_stride
,
87 LLVMValueRef data_ptr
,
93 offset
= lp_build_sample_offset(&bld
->int_coord_bld
,
98 assert(bld
->format_desc
->block
.width
== 1);
99 assert(bld
->format_desc
->block
.height
== 1);
100 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
102 packed
= lp_build_gather(bld
->builder
,
103 bld
->texel_type
.length
,
104 bld
->format_desc
->block
.bits
,
105 bld
->texel_type
.width
,
108 lp_build_unpack_rgba_soa(bld
->builder
,
116 lp_build_sample_packed(struct lp_build_sample_context
*bld
,
119 LLVMValueRef y_stride
,
120 LLVMValueRef data_ptr
)
124 offset
= lp_build_sample_offset(&bld
->int_coord_bld
,
129 assert(bld
->format_desc
->block
.width
== 1);
130 assert(bld
->format_desc
->block
.height
== 1);
131 assert(bld
->format_desc
->block
.bits
<= bld
->texel_type
.width
);
133 return lp_build_gather(bld
->builder
,
134 bld
->texel_type
.length
,
135 bld
->format_desc
->block
.bits
,
136 bld
->texel_type
.width
,
142 lp_build_sample_wrap(struct lp_build_sample_context
*bld
,
148 struct lp_build_context
*int_coord_bld
= &bld
->int_coord_bld
;
149 LLVMValueRef length_minus_one
;
151 length_minus_one
= lp_build_sub(int_coord_bld
, length
, int_coord_bld
->one
);
154 case PIPE_TEX_WRAP_REPEAT
:
156 coord
= LLVMBuildAnd(bld
->builder
, coord
, length_minus_one
, "");
158 /* Signed remainder won't give the right results for negative
159 * dividends but unsigned remainder does.*/
160 coord
= LLVMBuildURem(bld
->builder
, coord
, length
, "");
163 case PIPE_TEX_WRAP_CLAMP
:
164 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
165 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
168 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
:
169 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
:
170 case PIPE_TEX_WRAP_MIRROR_REPEAT
:
171 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
172 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
173 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
175 _debug_printf("llvmpipe: failed to translate texture wrap mode %s\n",
176 util_dump_tex_wrap(wrap_mode
, TRUE
));
177 coord
= lp_build_max(int_coord_bld
, coord
, int_coord_bld
->zero
);
178 coord
= lp_build_min(int_coord_bld
, coord
, length_minus_one
);
190 lp_build_sample_2d_nearest_soa(struct lp_build_sample_context
*bld
,
196 LLVMValueRef data_ptr
,
202 x
= lp_build_ifloor(&bld
->coord_bld
, s
);
203 y
= lp_build_ifloor(&bld
->coord_bld
, t
);
204 lp_build_name(x
, "tex.x.floor");
205 lp_build_name(y
, "tex.y.floor");
207 x
= lp_build_sample_wrap(bld
, x
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
208 y
= lp_build_sample_wrap(bld
, y
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
209 lp_build_name(x
, "tex.x.wrapped");
210 lp_build_name(y
, "tex.y.wrapped");
212 lp_build_sample_texel_soa(bld
, x
, y
, stride
, data_ptr
, texel
);
217 lp_build_sample_2d_linear_soa(struct lp_build_sample_context
*bld
,
223 LLVMValueRef data_ptr
,
227 LLVMValueRef s_ipart
;
228 LLVMValueRef t_ipart
;
229 LLVMValueRef s_fpart
;
230 LLVMValueRef t_fpart
;
233 LLVMValueRef neighbors
[2][2][4];
236 half
= lp_build_const_scalar(bld
->coord_type
, 0.5);
237 s
= lp_build_sub(&bld
->coord_bld
, s
, half
);
238 t
= lp_build_sub(&bld
->coord_bld
, t
, half
);
240 s_ipart
= lp_build_floor(&bld
->coord_bld
, s
);
241 t_ipart
= lp_build_floor(&bld
->coord_bld
, t
);
243 s_fpart
= lp_build_sub(&bld
->coord_bld
, s
, s_ipart
);
244 t_fpart
= lp_build_sub(&bld
->coord_bld
, t
, t_ipart
);
246 x0
= lp_build_itrunc(&bld
->coord_bld
, s_ipart
);
247 y0
= lp_build_itrunc(&bld
->coord_bld
, t_ipart
);
249 x0
= lp_build_sample_wrap(bld
, x0
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
250 y0
= lp_build_sample_wrap(bld
, y0
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
252 x1
= lp_build_add(&bld
->int_coord_bld
, x0
, bld
->int_coord_bld
.one
);
253 y1
= lp_build_add(&bld
->int_coord_bld
, y0
, bld
->int_coord_bld
.one
);
255 x1
= lp_build_sample_wrap(bld
, x1
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
256 y1
= lp_build_sample_wrap(bld
, y1
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
258 lp_build_sample_texel_soa(bld
, x0
, y0
, stride
, data_ptr
, neighbors
[0][0]);
259 lp_build_sample_texel_soa(bld
, x1
, y0
, stride
, data_ptr
, neighbors
[0][1]);
260 lp_build_sample_texel_soa(bld
, x0
, y1
, stride
, data_ptr
, neighbors
[1][0]);
261 lp_build_sample_texel_soa(bld
, x1
, y1
, stride
, data_ptr
, neighbors
[1][1]);
263 /* TODO: Don't interpolate missing channels */
264 for(chan
= 0; chan
< 4; ++chan
) {
265 texel
[chan
] = lp_build_lerp_2d(&bld
->texel_bld
,
267 neighbors
[0][0][chan
],
268 neighbors
[0][1][chan
],
269 neighbors
[1][0][chan
],
270 neighbors
[1][1][chan
]);
276 lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder
,
277 struct lp_type dst_type
,
281 LLVMValueRef mask
= lp_build_int_const_scalar(dst_type
, 0xff);
284 /* Decode the input vector components */
285 for (chan
= 0; chan
< 4; ++chan
) {
286 unsigned start
= chan
*8;
287 unsigned stop
= start
+ 8;
293 input
= LLVMBuildLShr(builder
, input
, lp_build_int_const_scalar(dst_type
, start
), "");
296 input
= LLVMBuildAnd(builder
, input
, mask
, "");
298 input
= lp_build_unsigned_norm_to_float(builder
, 8, dst_type
, input
);
306 lp_build_sample_2d_linear_aos(struct lp_build_sample_context
*bld
,
312 LLVMValueRef data_ptr
,
315 LLVMBuilderRef builder
= bld
->builder
;
316 struct lp_build_context i32
, h16
, u8n
;
317 LLVMTypeRef i32_vec_type
, h16_vec_type
, u8n_vec_type
;
318 LLVMValueRef i32_c8
, i32_c128
, i32_c255
;
319 LLVMValueRef s_ipart
, s_fpart
, s_fpart_lo
, s_fpart_hi
;
320 LLVMValueRef t_ipart
, t_fpart
, t_fpart_lo
, t_fpart_hi
;
323 LLVMValueRef neighbors
[2][2];
324 LLVMValueRef neighbors_lo
[2][2];
325 LLVMValueRef neighbors_hi
[2][2];
326 LLVMValueRef packed
, packed_lo
, packed_hi
;
327 LLVMValueRef unswizzled
[4];
329 lp_build_context_init(&i32
, builder
, lp_type_int(32));
330 lp_build_context_init(&h16
, builder
, lp_type_ufixed(16));
331 lp_build_context_init(&u8n
, builder
, lp_type_unorm(8));
333 i32_vec_type
= lp_build_vec_type(i32
.type
);
334 h16_vec_type
= lp_build_vec_type(h16
.type
);
335 u8n_vec_type
= lp_build_vec_type(u8n
.type
);
337 s
= lp_build_mul_imm(&bld
->coord_bld
, s
, 256);
338 t
= lp_build_mul_imm(&bld
->coord_bld
, t
, 256);
340 s
= LLVMBuildFPToSI(builder
, s
, i32_vec_type
, "");
341 t
= LLVMBuildFPToSI(builder
, t
, i32_vec_type
, "");
343 i32_c128
= lp_build_int_const_scalar(i32
.type
, -128);
344 s
= LLVMBuildAdd(builder
, s
, i32_c128
, "");
345 t
= LLVMBuildAdd(builder
, t
, i32_c128
, "");
347 i32_c8
= lp_build_int_const_scalar(i32
.type
, 8);
348 s_ipart
= LLVMBuildAShr(builder
, s
, i32_c8
, "");
349 t_ipart
= LLVMBuildAShr(builder
, t
, i32_c8
, "");
351 i32_c255
= lp_build_int_const_scalar(i32
.type
, 255);
352 s_fpart
= LLVMBuildAnd(builder
, s
, i32_c255
, "");
353 t_fpart
= LLVMBuildAnd(builder
, t
, i32_c255
, "");
358 x0
= lp_build_sample_wrap(bld
, x0
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
359 y0
= lp_build_sample_wrap(bld
, y0
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
361 x1
= lp_build_add(&bld
->int_coord_bld
, x0
, bld
->int_coord_bld
.one
);
362 y1
= lp_build_add(&bld
->int_coord_bld
, y0
, bld
->int_coord_bld
.one
);
364 x1
= lp_build_sample_wrap(bld
, x1
, width
, bld
->static_state
->pot_width
, bld
->static_state
->wrap_s
);
365 y1
= lp_build_sample_wrap(bld
, y1
, height
, bld
->static_state
->pot_height
, bld
->static_state
->wrap_t
);
368 * Transform 4 x i32 in
370 * s_fpart = {s0, s1, s2, s3}
374 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
378 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
379 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
381 * and likewise for t_fpart. There is no risk of loosing precision here
382 * since the fractional parts only use the lower 8bits.
385 s_fpart
= LLVMBuildBitCast(builder
, s_fpart
, h16_vec_type
, "");
386 t_fpart
= LLVMBuildBitCast(builder
, t_fpart
, h16_vec_type
, "");
389 LLVMTypeRef elem_type
= LLVMInt32Type();
390 LLVMValueRef shuffles_lo
[LP_MAX_VECTOR_LENGTH
];
391 LLVMValueRef shuffles_hi
[LP_MAX_VECTOR_LENGTH
];
392 LLVMValueRef shuffle_lo
;
393 LLVMValueRef shuffle_hi
;
396 for(j
= 0; j
< h16
.type
.length
; j
+= 4) {
397 unsigned subindex
= util_cpu_caps
.little_endian
? 0 : 1;
400 index
= LLVMConstInt(elem_type
, j
/2 + subindex
, 0);
401 for(i
= 0; i
< 4; ++i
)
402 shuffles_lo
[j
+ i
] = index
;
404 index
= LLVMConstInt(elem_type
, h16
.type
.length
/2 + j
/2 + subindex
, 0);
405 for(i
= 0; i
< 4; ++i
)
406 shuffles_hi
[j
+ i
] = index
;
409 shuffle_lo
= LLVMConstVector(shuffles_lo
, h16
.type
.length
);
410 shuffle_hi
= LLVMConstVector(shuffles_hi
, h16
.type
.length
);
412 s_fpart_lo
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_lo
, "");
413 t_fpart_lo
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_lo
, "");
414 s_fpart_hi
= LLVMBuildShuffleVector(builder
, s_fpart
, h16
.undef
, shuffle_hi
, "");
415 t_fpart_hi
= LLVMBuildShuffleVector(builder
, t_fpart
, h16
.undef
, shuffle_hi
, "");
419 * Fetch the pixels as 4 x 32bit (rgba order might differ):
421 * rgba0 rgba1 rgba2 rgba3
423 * bit cast them into 16 x u8
425 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
427 * unpack them into two 8 x i16:
429 * r0 g0 b0 a0 r1 g1 b1 a1
430 * r2 g2 b2 a2 r3 g3 b3 a3
432 * The higher 8 bits of the resulting elements will be zero.
435 neighbors
[0][0] = lp_build_sample_packed(bld
, x0
, y0
, stride
, data_ptr
);
436 neighbors
[0][1] = lp_build_sample_packed(bld
, x1
, y0
, stride
, data_ptr
);
437 neighbors
[1][0] = lp_build_sample_packed(bld
, x0
, y1
, stride
, data_ptr
);
438 neighbors
[1][1] = lp_build_sample_packed(bld
, x1
, y1
, stride
, data_ptr
);
440 neighbors
[0][0] = LLVMBuildBitCast(builder
, neighbors
[0][0], u8n_vec_type
, "");
441 neighbors
[0][1] = LLVMBuildBitCast(builder
, neighbors
[0][1], u8n_vec_type
, "");
442 neighbors
[1][0] = LLVMBuildBitCast(builder
, neighbors
[1][0], u8n_vec_type
, "");
443 neighbors
[1][1] = LLVMBuildBitCast(builder
, neighbors
[1][1], u8n_vec_type
, "");
445 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][0], &neighbors_lo
[0][0], &neighbors_hi
[0][0]);
446 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[0][1], &neighbors_lo
[0][1], &neighbors_hi
[0][1]);
447 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][0], &neighbors_lo
[1][0], &neighbors_hi
[1][0]);
448 lp_build_unpack2(builder
, u8n
.type
, h16
.type
, neighbors
[1][1], &neighbors_lo
[1][1], &neighbors_hi
[1][1]);
451 * Linear interpolate with 8.8 fixed point.
454 packed_lo
= lp_build_lerp_2d(&h16
,
455 s_fpart_lo
, t_fpart_lo
,
461 packed_hi
= lp_build_lerp_2d(&h16
,
462 s_fpart_hi
, t_fpart_hi
,
468 packed
= lp_build_pack2(builder
, h16
.type
, u8n
.type
, packed_lo
, packed_hi
);
471 * Convert to SoA and swizzle.
474 packed
= LLVMBuildBitCast(builder
, packed
, i32_vec_type
, "");
476 lp_build_rgba8_to_f32_soa(bld
->builder
,
480 lp_build_format_swizzle_soa(bld
->format_desc
,
481 bld
->texel_type
, unswizzled
,
487 lp_build_sample_compare(struct lp_build_sample_context
*bld
,
491 struct lp_build_context
*texel_bld
= &bld
->texel_bld
;
495 if(bld
->static_state
->compare_mode
== PIPE_TEX_COMPARE_NONE
)
498 /* TODO: Compare before swizzling, to avoid redundant computations */
500 for(chan
= 0; chan
< 4; ++chan
) {
502 cmp
= lp_build_cmp(texel_bld
, bld
->static_state
->compare_func
, p
, texel
[chan
]);
503 cmp
= lp_build_select(texel_bld
, cmp
, texel_bld
->one
, texel_bld
->zero
);
506 res
= lp_build_add(texel_bld
, res
, cmp
);
512 res
= lp_build_mul(texel_bld
, res
, lp_build_const_scalar(texel_bld
->type
, 0.25));
514 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
515 for(chan
= 0; chan
< 3; ++chan
)
517 texel
[3] = texel_bld
->one
;
522 lp_build_sample_soa(LLVMBuilderRef builder
,
523 const struct lp_sampler_static_state
*static_state
,
524 struct lp_sampler_dynamic_state
*dynamic_state
,
528 const LLVMValueRef
*coords
,
529 LLVMValueRef lodbias
,
532 struct lp_build_sample_context bld
;
536 LLVMValueRef data_ptr
;
541 /* Setup our build context */
542 memset(&bld
, 0, sizeof bld
);
543 bld
.builder
= builder
;
544 bld
.static_state
= static_state
;
545 bld
.dynamic_state
= dynamic_state
;
546 bld
.format_desc
= util_format_description(static_state
->format
);
547 bld
.coord_type
= type
;
548 bld
.int_coord_type
= lp_int_type(type
);
549 bld
.texel_type
= type
;
550 lp_build_context_init(&bld
.coord_bld
, builder
, bld
.coord_type
);
551 lp_build_context_init(&bld
.int_coord_bld
, builder
, bld
.int_coord_type
);
552 lp_build_context_init(&bld
.texel_bld
, builder
, bld
.texel_type
);
554 /* Get the dynamic state */
555 width
= dynamic_state
->width(dynamic_state
, builder
, unit
);
556 height
= dynamic_state
->height(dynamic_state
, builder
, unit
);
557 stride
= dynamic_state
->stride(dynamic_state
, builder
, unit
);
558 data_ptr
= dynamic_state
->data_ptr(dynamic_state
, builder
, unit
);
564 width
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, width
);
565 height
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, height
);
566 stride
= lp_build_broadcast_scalar(&bld
.int_coord_bld
, stride
);
568 if(static_state
->target
== PIPE_TEXTURE_1D
)
569 t
= bld
.coord_bld
.zero
;
571 if(static_state
->normalized_coords
) {
572 LLVMTypeRef coord_vec_type
= lp_build_vec_type(bld
.coord_type
);
573 LLVMValueRef fp_width
= LLVMBuildSIToFP(builder
, width
, coord_vec_type
, "");
574 LLVMValueRef fp_height
= LLVMBuildSIToFP(builder
, height
, coord_vec_type
, "");
575 s
= lp_build_mul(&bld
.coord_bld
, s
, fp_width
);
576 t
= lp_build_mul(&bld
.coord_bld
, t
, fp_height
);
579 switch (static_state
->min_img_filter
) {
580 case PIPE_TEX_FILTER_NEAREST
:
581 lp_build_sample_2d_nearest_soa(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
583 case PIPE_TEX_FILTER_LINEAR
:
584 if(lp_format_is_rgba8(bld
.format_desc
))
585 lp_build_sample_2d_linear_aos(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
587 lp_build_sample_2d_linear_soa(&bld
, s
, t
, width
, height
, stride
, data_ptr
, texel
);
593 /* FIXME: respect static_state->min_mip_filter */;
594 /* FIXME: respect static_state->mag_img_filter */;
596 lp_build_sample_compare(&bld
, p
, texel
);