gallivm: (trivial) fix linear aos sampling of 3d compressed formats
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_sample_aos.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- AoS.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 * @author Brian Paul <brianp@vmware.com>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "util/u_debug.h"
39 #include "util/u_dump.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_format.h"
43 #include "util/u_cpu_detect.h"
44 #include "lp_bld_debug.h"
45 #include "lp_bld_type.h"
46 #include "lp_bld_const.h"
47 #include "lp_bld_conv.h"
48 #include "lp_bld_arit.h"
49 #include "lp_bld_bitarit.h"
50 #include "lp_bld_logic.h"
51 #include "lp_bld_swizzle.h"
52 #include "lp_bld_pack.h"
53 #include "lp_bld_flow.h"
54 #include "lp_bld_gather.h"
55 #include "lp_bld_format.h"
56 #include "lp_bld_init.h"
57 #include "lp_bld_sample.h"
58 #include "lp_bld_sample_aos.h"
59 #include "lp_bld_quad.h"
60
61
62 /**
63 * Build LLVM code for texture coord wrapping, for nearest filtering,
64 * for scaled integer texcoords.
65 * \param block_length is the length of the pixel block along the
66 * coordinate axis
67 * \param coord the incoming texcoord (s,t or r) scaled to the texture size
68 * \param coord_f the incoming texcoord (s,t or r) as float vec
69 * \param length the texture size along one dimension
70 * \param stride pixel stride along the coordinate axis (in bytes)
71 * \param offset the texel offset along the coord axis
72 * \param is_pot if TRUE, length is a power of two
73 * \param wrap_mode one of PIPE_TEX_WRAP_x
74 * \param out_offset byte offset for the wrapped coordinate
75 * \param out_i resulting sub-block pixel coordinate for coord0
76 */
77 static void
78 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
79 unsigned block_length,
80 LLVMValueRef coord,
81 LLVMValueRef coord_f,
82 LLVMValueRef length,
83 LLVMValueRef stride,
84 LLVMValueRef offset,
85 boolean is_pot,
86 unsigned wrap_mode,
87 LLVMValueRef *out_offset,
88 LLVMValueRef *out_i)
89 {
90 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91 LLVMBuilderRef builder = bld->gallivm->builder;
92 LLVMValueRef length_minus_one;
93
94 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
95
96 switch(wrap_mode) {
97 case PIPE_TEX_WRAP_REPEAT:
98 if(is_pot)
99 coord = LLVMBuildAnd(builder, coord, length_minus_one, "");
100 else {
101 struct lp_build_context *coord_bld = &bld->coord_bld;
102 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length);
103 if (offset) {
104 offset = lp_build_int_to_float(coord_bld, offset);
105 offset = lp_build_div(coord_bld, offset, length_f);
106 coord_f = lp_build_add(coord_bld, coord_f, offset);
107 }
108 coord = lp_build_fract_safe(coord_bld, coord_f);
109 coord = lp_build_mul(coord_bld, coord, length_f);
110 coord = lp_build_itrunc(coord_bld, coord);
111 }
112 break;
113
114 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
115 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
116 coord = lp_build_min(int_coord_bld, coord, length_minus_one);
117 break;
118
119 case PIPE_TEX_WRAP_CLAMP:
120 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
121 case PIPE_TEX_WRAP_MIRROR_REPEAT:
122 case PIPE_TEX_WRAP_MIRROR_CLAMP:
123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
124 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
125 default:
126 assert(0);
127 }
128
129 lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride,
130 out_offset, out_i);
131 }
132
133
134 /**
135 * Build LLVM code for texture coord wrapping, for nearest filtering,
136 * for float texcoords.
137 * \param coord the incoming texcoord (s,t or r)
138 * \param length the texture size along one dimension
139 * \param offset the texel offset along the coord axis
140 * \param is_pot if TRUE, length is a power of two
141 * \param wrap_mode one of PIPE_TEX_WRAP_x
142 * \param icoord the texcoord after wrapping, as int
143 */
144 static void
145 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld,
146 LLVMValueRef coord,
147 LLVMValueRef length,
148 LLVMValueRef offset,
149 boolean is_pot,
150 unsigned wrap_mode,
151 LLVMValueRef *icoord)
152 {
153 struct lp_build_context *coord_bld = &bld->coord_bld;
154 LLVMValueRef length_minus_one;
155
156 switch(wrap_mode) {
157 case PIPE_TEX_WRAP_REPEAT:
158 if (offset) {
159 /* this is definitely not ideal for POT case */
160 offset = lp_build_int_to_float(coord_bld, offset);
161 offset = lp_build_div(coord_bld, offset, length);
162 coord = lp_build_add(coord_bld, coord, offset);
163 }
164 /* take fraction, unnormalize */
165 coord = lp_build_fract_safe(coord_bld, coord);
166 coord = lp_build_mul(coord_bld, coord, length);
167 *icoord = lp_build_itrunc(coord_bld, coord);
168 break;
169 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
170 length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
171 if (bld->static_sampler_state->normalized_coords) {
172 /* scale coord to length */
173 coord = lp_build_mul(coord_bld, coord, length);
174 }
175 if (offset) {
176 offset = lp_build_int_to_float(coord_bld, offset);
177 coord = lp_build_add(coord_bld, coord, offset);
178 }
179 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero,
180 length_minus_one);
181 *icoord = lp_build_itrunc(coord_bld, coord);
182 break;
183
184 case PIPE_TEX_WRAP_CLAMP:
185 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
186 case PIPE_TEX_WRAP_MIRROR_REPEAT:
187 case PIPE_TEX_WRAP_MIRROR_CLAMP:
188 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
189 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
190 default:
191 assert(0);
192 }
193 }
194
195
196 /**
197 * Build LLVM code for texture coord wrapping, for linear filtering,
198 * for scaled integer texcoords.
199 * \param block_length is the length of the pixel block along the
200 * coordinate axis
201 * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size
202 * \param coord_f the incoming texcoord (s,t or r) as float vec
203 * \param length the texture size along one dimension
204 * \param stride pixel stride along the coordinate axis (in bytes)
205 * \param offset the texel offset along the coord axis
206 * \param is_pot if TRUE, length is a power of two
207 * \param wrap_mode one of PIPE_TEX_WRAP_x
208 * \param offset0 resulting relative offset for coord0
209 * \param offset1 resulting relative offset for coord0 + 1
210 * \param i0 resulting sub-block pixel coordinate for coord0
211 * \param i1 resulting sub-block pixel coordinate for coord0 + 1
212 */
213 static void
214 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
215 unsigned block_length,
216 LLVMValueRef coord0,
217 LLVMValueRef *weight_i,
218 LLVMValueRef coord_f,
219 LLVMValueRef length,
220 LLVMValueRef stride,
221 LLVMValueRef offset,
222 boolean is_pot,
223 unsigned wrap_mode,
224 LLVMValueRef *offset0,
225 LLVMValueRef *offset1,
226 LLVMValueRef *i0,
227 LLVMValueRef *i1)
228 {
229 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
230 LLVMBuilderRef builder = bld->gallivm->builder;
231 LLVMValueRef length_minus_one;
232 LLVMValueRef lmask, umask, mask;
233
234 /*
235 * If the pixel block covers more than one pixel then there is no easy
236 * way to calculate offset1 relative to offset0. Instead, compute them
237 * independently. Otherwise, try to compute offset0 and offset1 with
238 * a single stride multiplication.
239 */
240
241 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
242
243 if (block_length != 1) {
244 LLVMValueRef coord1;
245 switch(wrap_mode) {
246 case PIPE_TEX_WRAP_REPEAT:
247 if (is_pot) {
248 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
249 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
250 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
251 }
252 else {
253 LLVMValueRef mask;
254 LLVMValueRef weight;
255 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
256 if (offset) {
257 offset = lp_build_int_to_float(&bld->coord_bld, offset);
258 offset = lp_build_div(&bld->coord_bld, offset, length_f);
259 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
260 }
261 lp_build_coord_repeat_npot_linear(bld, coord_f,
262 length, length_f,
263 &coord0, &weight);
264 mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
265 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
266 coord1 = LLVMBuildAnd(builder,
267 lp_build_add(int_coord_bld, coord0,
268 int_coord_bld->one),
269 mask, "");
270 weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
271 *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
272 }
273 break;
274
275 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
276 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
277 coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero,
278 length_minus_one);
279 coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero,
280 length_minus_one);
281 break;
282
283 case PIPE_TEX_WRAP_CLAMP:
284 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
285 case PIPE_TEX_WRAP_MIRROR_REPEAT:
286 case PIPE_TEX_WRAP_MIRROR_CLAMP:
287 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
289 default:
290 assert(0);
291 coord0 = int_coord_bld->zero;
292 coord1 = int_coord_bld->zero;
293 break;
294 }
295 lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride,
296 offset0, i0);
297 lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride,
298 offset1, i1);
299 return;
300 }
301
302 *i0 = int_coord_bld->zero;
303 *i1 = int_coord_bld->zero;
304
305 switch(wrap_mode) {
306 case PIPE_TEX_WRAP_REPEAT:
307 if (is_pot) {
308 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
309 }
310 else {
311 LLVMValueRef weight;
312 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length);
313 if (offset) {
314 offset = lp_build_int_to_float(&bld->coord_bld, offset);
315 offset = lp_build_div(&bld->coord_bld, offset, length_f);
316 coord_f = lp_build_add(&bld->coord_bld, coord_f, offset);
317 }
318 lp_build_coord_repeat_npot_linear(bld, coord_f,
319 length, length_f,
320 &coord0, &weight);
321 weight = lp_build_mul_imm(&bld->coord_bld, weight, 256);
322 *weight_i = lp_build_itrunc(&bld->coord_bld, weight);
323 }
324
325 mask = lp_build_compare(bld->gallivm, int_coord_bld->type,
326 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
327
328 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
329 *offset1 = LLVMBuildAnd(builder,
330 lp_build_add(int_coord_bld, *offset0, stride),
331 mask, "");
332 break;
333
334 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
335 /* XXX this might be slower than the separate path
336 * on some newer cpus. With sse41 this is 8 instructions vs. 7
337 * - at least on SNB this is almost certainly slower since
338 * min/max are cheaper than selects, and the muls aren't bad.
339 */
340 lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
341 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
342 umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
343 PIPE_FUNC_LESS, coord0, length_minus_one);
344
345 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
346 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
347
348 mask = LLVMBuildAnd(builder, lmask, umask, "");
349
350 *offset0 = lp_build_mul(int_coord_bld, coord0, stride);
351 *offset1 = lp_build_add(int_coord_bld,
352 *offset0,
353 LLVMBuildAnd(builder, stride, mask, ""));
354 break;
355
356 case PIPE_TEX_WRAP_CLAMP:
357 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
358 case PIPE_TEX_WRAP_MIRROR_REPEAT:
359 case PIPE_TEX_WRAP_MIRROR_CLAMP:
360 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
361 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
362 default:
363 assert(0);
364 *offset0 = int_coord_bld->zero;
365 *offset1 = int_coord_bld->zero;
366 break;
367 }
368 }
369
370
371 /**
372 * Build LLVM code for texture coord wrapping, for linear filtering,
373 * for float texcoords.
374 * \param block_length is the length of the pixel block along the
375 * coordinate axis
376 * \param coord the incoming texcoord (s,t or r)
377 * \param length the texture size along one dimension
378 * \param offset the texel offset along the coord axis
379 * \param is_pot if TRUE, length is a power of two
380 * \param wrap_mode one of PIPE_TEX_WRAP_x
381 * \param coord0 the first texcoord after wrapping, as int
382 * \param coord1 the second texcoord after wrapping, as int
383 * \param weight the filter weight as int (0-255)
384 * \param force_nearest if this coord actually uses nearest filtering
385 */
386 static void
387 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
388 unsigned block_length,
389 LLVMValueRef coord,
390 LLVMValueRef length,
391 LLVMValueRef offset,
392 boolean is_pot,
393 unsigned wrap_mode,
394 LLVMValueRef *coord0,
395 LLVMValueRef *coord1,
396 LLVMValueRef *weight,
397 unsigned force_nearest)
398 {
399 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
400 struct lp_build_context *coord_bld = &bld->coord_bld;
401 LLVMBuilderRef builder = bld->gallivm->builder;
402 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
403 LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one);
404
405 switch(wrap_mode) {
406 case PIPE_TEX_WRAP_REPEAT:
407 if (is_pot) {
408 /* mul by size and subtract 0.5 */
409 coord = lp_build_mul(coord_bld, coord, length);
410 if (offset) {
411 offset = lp_build_int_to_float(coord_bld, offset);
412 coord = lp_build_add(coord_bld, coord, offset);
413 }
414 if (!force_nearest)
415 coord = lp_build_sub(coord_bld, coord, half);
416 *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
417 /* convert to int, compute lerp weight */
418 lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
419 *coord1 = lp_build_ifloor(coord_bld, *coord1);
420 /* repeat wrap */
421 length_minus_one = lp_build_itrunc(coord_bld, length_minus_one);
422 *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, "");
423 *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, "");
424 }
425 else {
426 LLVMValueRef mask;
427 if (offset) {
428 offset = lp_build_int_to_float(coord_bld, offset);
429 offset = lp_build_div(coord_bld, offset, length);
430 coord = lp_build_add(coord_bld, coord, offset);
431 }
432 /* wrap with normalized floats is just fract */
433 coord = lp_build_fract(coord_bld, coord);
434 /* unnormalize */
435 coord = lp_build_mul(coord_bld, coord, length);
436 /*
437 * we avoided the 0.5/length division, have to fix up wrong
438 * edge cases with selects
439 */
440 *coord1 = lp_build_add(coord_bld, coord, half);
441 coord = lp_build_sub(coord_bld, coord, half);
442 *weight = lp_build_fract(coord_bld, coord);
443 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
444 PIPE_FUNC_LESS, coord, coord_bld->zero);
445 *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
446 *coord0 = lp_build_itrunc(coord_bld, *coord0);
447 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
448 PIPE_FUNC_LESS, *coord1, length);
449 *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero);
450 *coord1 = lp_build_itrunc(coord_bld, *coord1);
451 }
452 break;
453 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
454 if (bld->static_sampler_state->normalized_coords) {
455 /* mul by tex size */
456 coord = lp_build_mul(coord_bld, coord, length);
457 }
458 if (offset) {
459 offset = lp_build_int_to_float(coord_bld, offset);
460 coord = lp_build_add(coord_bld, coord, offset);
461 }
462 /* subtract 0.5 */
463 if (!force_nearest) {
464 coord = lp_build_sub(coord_bld, coord, half);
465 }
466 /* clamp to [0, length - 1] */
467 coord = lp_build_min(coord_bld, coord, length_minus_one);
468 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
469 *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
470 /* convert to int, compute lerp weight */
471 lp_build_ifloor_fract(coord_bld, coord, coord0, weight);
472 /* coord1 = min(coord1, length-1) */
473 *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one);
474 *coord1 = lp_build_itrunc(coord_bld, *coord1);
475 break;
476 default:
477 assert(0);
478 *coord0 = int_coord_bld->zero;
479 *coord1 = int_coord_bld->zero;
480 *weight = coord_bld->zero;
481 break;
482 }
483 *weight = lp_build_mul_imm(coord_bld, *weight, 256);
484 *weight = lp_build_itrunc(coord_bld, *weight);
485 return;
486 }
487
488
489 /**
490 * Fetch texels for image with nearest sampling.
491 * Return filtered color as two vectors of 16-bit fixed point values.
492 */
493 static void
494 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
495 LLVMValueRef data_ptr,
496 LLVMValueRef offset,
497 LLVMValueRef x_subcoord,
498 LLVMValueRef y_subcoord,
499 LLVMValueRef *colors)
500 {
501 /*
502 * Fetch the pixels as 4 x 32bit (rgba order might differ):
503 *
504 * rgba0 rgba1 rgba2 rgba3
505 *
506 * bit cast them into 16 x u8
507 *
508 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
509 *
510 * unpack them into two 8 x i16:
511 *
512 * r0 g0 b0 a0 r1 g1 b1 a1
513 * r2 g2 b2 a2 r3 g3 b3 a3
514 *
515 * The higher 8 bits of the resulting elements will be zero.
516 */
517 LLVMBuilderRef builder = bld->gallivm->builder;
518 LLVMValueRef rgba8;
519 struct lp_build_context u8n;
520 LLVMTypeRef u8n_vec_type;
521
522 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
523 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
524
525 if (util_format_is_rgba8_variant(bld->format_desc)) {
526 /*
527 * Given the format is a rgba8, just read the pixels as is,
528 * without any swizzling. Swizzling will be done later.
529 */
530 rgba8 = lp_build_gather(bld->gallivm,
531 bld->texel_type.length,
532 bld->format_desc->block.bits,
533 bld->texel_type.width,
534 data_ptr, offset, TRUE);
535
536 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
537 }
538 else {
539 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
540 bld->format_desc,
541 u8n.type,
542 data_ptr, offset,
543 x_subcoord,
544 y_subcoord);
545 }
546
547 *colors = rgba8;
548 }
549
550
551 /**
552 * Sample a single texture image with nearest sampling.
553 * If sampling a cube texture, r = cube face in [0,5].
554 * Return filtered color as two vectors of 16-bit fixed point values.
555 */
556 static void
557 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
558 LLVMValueRef int_size,
559 LLVMValueRef row_stride_vec,
560 LLVMValueRef img_stride_vec,
561 LLVMValueRef data_ptr,
562 LLVMValueRef mipoffsets,
563 LLVMValueRef s,
564 LLVMValueRef t,
565 LLVMValueRef r,
566 const LLVMValueRef *offsets,
567 LLVMValueRef *colors)
568 {
569 const unsigned dims = bld->dims;
570 LLVMBuilderRef builder = bld->gallivm->builder;
571 struct lp_build_context i32;
572 LLVMTypeRef i32_vec_type;
573 LLVMValueRef i32_c8;
574 LLVMValueRef width_vec, height_vec, depth_vec;
575 LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL;
576 LLVMValueRef s_float, t_float = NULL, r_float = NULL;
577 LLVMValueRef x_stride;
578 LLVMValueRef x_offset, offset;
579 LLVMValueRef x_subcoord, y_subcoord, z_subcoord;
580
581 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
582
583 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
584
585 lp_build_extract_image_sizes(bld,
586 &bld->int_size_bld,
587 bld->int_coord_type,
588 int_size,
589 &width_vec,
590 &height_vec,
591 &depth_vec);
592
593 s_float = s; t_float = t; r_float = r;
594
595 if (bld->static_sampler_state->normalized_coords) {
596 LLVMValueRef scaled_size;
597 LLVMValueRef flt_size;
598
599 /* scale size by 256 (8 fractional bits) */
600 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
601
602 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
603
604 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
605 }
606 else {
607 /* scale coords by 256 (8 fractional bits) */
608 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
609 if (dims >= 2)
610 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
611 if (dims >= 3)
612 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
613 }
614
615 /* convert float to int */
616 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
617 if (dims >= 2)
618 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
619 if (dims >= 3)
620 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
621
622 /* compute floor (shift right 8) */
623 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
624 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
625 if (dims >= 2)
626 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
627 if (dims >= 3)
628 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
629
630 /* add texel offsets */
631 if (offsets[0]) {
632 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
633 if (dims >= 2) {
634 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
635 if (dims >= 3) {
636 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
637 }
638 }
639 }
640
641 /* get pixel, row, image strides */
642 x_stride = lp_build_const_vec(bld->gallivm,
643 bld->int_coord_bld.type,
644 bld->format_desc->block.bits/8);
645
646 /* Do texcoord wrapping, compute texel offset */
647 lp_build_sample_wrap_nearest_int(bld,
648 bld->format_desc->block.width,
649 s_ipart, s_float,
650 width_vec, x_stride, offsets[0],
651 bld->static_texture_state->pot_width,
652 bld->static_sampler_state->wrap_s,
653 &x_offset, &x_subcoord);
654 offset = x_offset;
655 if (dims >= 2) {
656 LLVMValueRef y_offset;
657 lp_build_sample_wrap_nearest_int(bld,
658 bld->format_desc->block.height,
659 t_ipart, t_float,
660 height_vec, row_stride_vec, offsets[1],
661 bld->static_texture_state->pot_height,
662 bld->static_sampler_state->wrap_t,
663 &y_offset, &y_subcoord);
664 offset = lp_build_add(&bld->int_coord_bld, offset, y_offset);
665 if (dims >= 3) {
666 LLVMValueRef z_offset;
667 lp_build_sample_wrap_nearest_int(bld,
668 1, /* block length (depth) */
669 r_ipart, r_float,
670 depth_vec, img_stride_vec, offsets[2],
671 bld->static_texture_state->pot_depth,
672 bld->static_sampler_state->wrap_r,
673 &z_offset, &z_subcoord);
674 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
675 }
676 }
677 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
678 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
679 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
680 LLVMValueRef z_offset;
681 /* The r coord is the cube face in [0,5] or array layer */
682 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
683 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset);
684 }
685 if (mipoffsets) {
686 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
687 }
688
689 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
690 x_subcoord, y_subcoord,
691 colors);
692 }
693
694
695 /**
696 * Sample a single texture image with nearest sampling.
697 * If sampling a cube texture, r = cube face in [0,5].
698 * Return filtered color as two vectors of 16-bit fixed point values.
699 * Does address calcs (except offsets) with floats.
700 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
701 */
702 static void
703 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld,
704 LLVMValueRef int_size,
705 LLVMValueRef row_stride_vec,
706 LLVMValueRef img_stride_vec,
707 LLVMValueRef data_ptr,
708 LLVMValueRef mipoffsets,
709 LLVMValueRef s,
710 LLVMValueRef t,
711 LLVMValueRef r,
712 const LLVMValueRef *offsets,
713 LLVMValueRef *colors)
714 {
715 const unsigned dims = bld->dims;
716 LLVMValueRef width_vec, height_vec, depth_vec;
717 LLVMValueRef offset;
718 LLVMValueRef x_subcoord, y_subcoord;
719 LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL;
720 LLVMValueRef flt_size;
721
722 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
723
724 lp_build_extract_image_sizes(bld,
725 &bld->float_size_bld,
726 bld->coord_type,
727 flt_size,
728 &width_vec,
729 &height_vec,
730 &depth_vec);
731
732 /* Do texcoord wrapping */
733 lp_build_sample_wrap_nearest_float(bld,
734 s, width_vec, offsets[0],
735 bld->static_texture_state->pot_width,
736 bld->static_sampler_state->wrap_s,
737 &x_icoord);
738
739 if (dims >= 2) {
740 lp_build_sample_wrap_nearest_float(bld,
741 t, height_vec, offsets[1],
742 bld->static_texture_state->pot_height,
743 bld->static_sampler_state->wrap_t,
744 &y_icoord);
745
746 if (dims >= 3) {
747 lp_build_sample_wrap_nearest_float(bld,
748 r, depth_vec, offsets[2],
749 bld->static_texture_state->pot_depth,
750 bld->static_sampler_state->wrap_r,
751 &z_icoord);
752 }
753 }
754 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
755 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
756 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
757 z_icoord = r;
758 }
759
760 /*
761 * From here on we deal with ints, and we should split up the 256bit
762 * vectors manually for better generated code.
763 */
764
765 /*
766 * compute texel offsets -
767 * cannot do offset calc with floats, difficult for block-based formats,
768 * and not enough precision anyway.
769 */
770 lp_build_sample_offset(&bld->int_coord_bld,
771 bld->format_desc,
772 x_icoord, y_icoord,
773 z_icoord,
774 row_stride_vec, img_stride_vec,
775 &offset,
776 &x_subcoord, &y_subcoord);
777 if (mipoffsets) {
778 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
779 }
780
781 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset,
782 x_subcoord, y_subcoord,
783 colors);
784 }
785
786
787 /**
788 * Fetch texels for image with linear sampling.
789 * Return filtered color as two vectors of 16-bit fixed point values.
790 */
791 static void
792 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
793 LLVMValueRef data_ptr,
794 LLVMValueRef offset[2][2][2],
795 LLVMValueRef x_subcoord[2],
796 LLVMValueRef y_subcoord[2],
797 LLVMValueRef s_fpart,
798 LLVMValueRef t_fpart,
799 LLVMValueRef r_fpart,
800 LLVMValueRef *colors)
801 {
802 const unsigned dims = bld->dims;
803 LLVMBuilderRef builder = bld->gallivm->builder;
804 struct lp_build_context u8n;
805 LLVMTypeRef u8n_vec_type;
806 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
807 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
808 LLVMValueRef shuffle;
809 LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */
810 LLVMValueRef packed;
811 unsigned i, j, k;
812 unsigned numj, numk;
813
814 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width));
815 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type);
816
817 /*
818 * Transform 4 x i32 in
819 *
820 * s_fpart = {s0, s1, s2, s3}
821 *
822 * where each value is between 0 and 0xff,
823 *
824 * into one 16 x i20
825 *
826 * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3}
827 *
828 * and likewise for t_fpart. There is no risk of loosing precision here
829 * since the fractional parts only use the lower 8bits.
830 */
831 s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, "");
832 if (dims >= 2)
833 t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, "");
834 if (dims >= 3)
835 r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, "");
836
837 for (j = 0; j < u8n.type.length; j += 4) {
838 #ifdef PIPE_ARCH_LITTLE_ENDIAN
839 unsigned subindex = 0;
840 #else
841 unsigned subindex = 3;
842 #endif
843 LLVMValueRef index;
844
845 index = LLVMConstInt(elem_type, j + subindex, 0);
846 for (i = 0; i < 4; ++i)
847 shuffles[j + i] = index;
848 }
849
850 shuffle = LLVMConstVector(shuffles, u8n.type.length);
851
852 s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef,
853 shuffle, "");
854 if (dims >= 2) {
855 t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef,
856 shuffle, "");
857 }
858 if (dims >= 3) {
859 r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef,
860 shuffle, "");
861 }
862
863 /*
864 * Fetch the pixels as 4 x 32bit (rgba order might differ):
865 *
866 * rgba0 rgba1 rgba2 rgba3
867 *
868 * bit cast them into 16 x u8
869 *
870 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
871 *
872 * unpack them into two 8 x i16:
873 *
874 * r0 g0 b0 a0 r1 g1 b1 a1
875 * r2 g2 b2 a2 r3 g3 b3 a3
876 *
877 * The higher 8 bits of the resulting elements will be zero.
878 */
879 numj = 1 + (dims >= 2);
880 numk = 1 + (dims >= 3);
881
882 for (k = 0; k < numk; k++) {
883 for (j = 0; j < numj; j++) {
884 for (i = 0; i < 2; i++) {
885 LLVMValueRef rgba8;
886
887 if (util_format_is_rgba8_variant(bld->format_desc)) {
888 /*
889 * Given the format is a rgba8, just read the pixels as is,
890 * without any swizzling. Swizzling will be done later.
891 */
892 rgba8 = lp_build_gather(bld->gallivm,
893 bld->texel_type.length,
894 bld->format_desc->block.bits,
895 bld->texel_type.width,
896 data_ptr, offset[k][j][i], TRUE);
897
898 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
899 }
900 else {
901 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm,
902 bld->format_desc,
903 u8n.type,
904 data_ptr, offset[k][j][i],
905 x_subcoord[i],
906 y_subcoord[j]);
907 }
908
909 neighbors[k][j][i] = rgba8;
910 }
911 }
912 }
913
914 /*
915 * Linear interpolation with 8.8 fixed point.
916 */
917 if (bld->static_sampler_state->force_nearest_s) {
918 /* special case 1-D lerp */
919 packed = lp_build_lerp(&u8n,
920 t_fpart,
921 neighbors[0][0][0],
922 neighbors[0][0][1],
923 LP_BLD_LERP_PRESCALED_WEIGHTS);
924 }
925 else if (bld->static_sampler_state->force_nearest_t) {
926 /* special case 1-D lerp */
927 packed = lp_build_lerp(&u8n,
928 s_fpart,
929 neighbors[0][0][0],
930 neighbors[0][0][1],
931 LP_BLD_LERP_PRESCALED_WEIGHTS);
932 }
933 else {
934 /* general 1/2/3-D lerping */
935 if (dims == 1) {
936 packed = lp_build_lerp(&u8n,
937 s_fpart,
938 neighbors[0][0][0],
939 neighbors[0][0][1],
940 LP_BLD_LERP_PRESCALED_WEIGHTS);
941 } else if (dims == 2) {
942 /* 2-D lerp */
943 packed = lp_build_lerp_2d(&u8n,
944 s_fpart, t_fpart,
945 neighbors[0][0][0],
946 neighbors[0][0][1],
947 neighbors[0][1][0],
948 neighbors[0][1][1],
949 LP_BLD_LERP_PRESCALED_WEIGHTS);
950 } else {
951 /* 3-D lerp */
952 assert(dims == 3);
953 packed = lp_build_lerp_3d(&u8n,
954 s_fpart, t_fpart, r_fpart,
955 neighbors[0][0][0],
956 neighbors[0][0][1],
957 neighbors[0][1][0],
958 neighbors[0][1][1],
959 neighbors[1][0][0],
960 neighbors[1][0][1],
961 neighbors[1][1][0],
962 neighbors[1][1][1],
963 LP_BLD_LERP_PRESCALED_WEIGHTS);
964 }
965 }
966
967 *colors = packed;
968 }
969
970 /**
971 * Sample a single texture image with (bi-)(tri-)linear sampling.
972 * Return filtered color as two vectors of 16-bit fixed point values.
973 */
974 static void
975 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
976 LLVMValueRef int_size,
977 LLVMValueRef row_stride_vec,
978 LLVMValueRef img_stride_vec,
979 LLVMValueRef data_ptr,
980 LLVMValueRef mipoffsets,
981 LLVMValueRef s,
982 LLVMValueRef t,
983 LLVMValueRef r,
984 const LLVMValueRef *offsets,
985 LLVMValueRef *colors)
986 {
987 const unsigned dims = bld->dims;
988 LLVMBuilderRef builder = bld->gallivm->builder;
989 struct lp_build_context i32;
990 LLVMTypeRef i32_vec_type;
991 LLVMValueRef i32_c8, i32_c128, i32_c255;
992 LLVMValueRef width_vec, height_vec, depth_vec;
993 LLVMValueRef s_ipart, s_fpart, s_float;
994 LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL;
995 LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL;
996 LLVMValueRef x_stride, y_stride, z_stride;
997 LLVMValueRef x_offset0, x_offset1;
998 LLVMValueRef y_offset0, y_offset1;
999 LLVMValueRef z_offset0, z_offset1;
1000 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1001 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2];
1002 unsigned x, y, z;
1003
1004 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width));
1005
1006 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type);
1007
1008 lp_build_extract_image_sizes(bld,
1009 &bld->int_size_bld,
1010 bld->int_coord_type,
1011 int_size,
1012 &width_vec,
1013 &height_vec,
1014 &depth_vec);
1015
1016 s_float = s; t_float = t; r_float = r;
1017
1018 if (bld->static_sampler_state->normalized_coords) {
1019 LLVMValueRef scaled_size;
1020 LLVMValueRef flt_size;
1021
1022 /* scale size by 256 (8 fractional bits) */
1023 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8);
1024
1025 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size);
1026
1027 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r);
1028 }
1029 else {
1030 /* scale coords by 256 (8 fractional bits) */
1031 s = lp_build_mul_imm(&bld->coord_bld, s, 256);
1032 if (dims >= 2)
1033 t = lp_build_mul_imm(&bld->coord_bld, t, 256);
1034 if (dims >= 3)
1035 r = lp_build_mul_imm(&bld->coord_bld, r, 256);
1036 }
1037
1038 /* convert float to int */
1039 s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
1040 if (dims >= 2)
1041 t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
1042 if (dims >= 3)
1043 r = LLVMBuildFPToSI(builder, r, i32_vec_type, "");
1044
1045 /* subtract 0.5 (add -128) */
1046 i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128);
1047 if (!bld->static_sampler_state->force_nearest_s) {
1048 s = LLVMBuildAdd(builder, s, i32_c128, "");
1049 }
1050 if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) {
1051 t = LLVMBuildAdd(builder, t, i32_c128, "");
1052 }
1053 if (dims >= 3) {
1054 r = LLVMBuildAdd(builder, r, i32_c128, "");
1055 }
1056
1057 /* compute floor (shift right 8) */
1058 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8);
1059 s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
1060 if (dims >= 2)
1061 t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
1062 if (dims >= 3)
1063 r_ipart = LLVMBuildAShr(builder, r, i32_c8, "");
1064
1065 /* add texel offsets */
1066 if (offsets[0]) {
1067 s_ipart = lp_build_add(&i32, s_ipart, offsets[0]);
1068 if (dims >= 2) {
1069 t_ipart = lp_build_add(&i32, t_ipart, offsets[1]);
1070 if (dims >= 3) {
1071 r_ipart = lp_build_add(&i32, r_ipart, offsets[2]);
1072 }
1073 }
1074 }
1075
1076 /* compute fractional part (AND with 0xff) */
1077 i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255);
1078 s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
1079 if (dims >= 2)
1080 t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
1081 if (dims >= 3)
1082 r_fpart = LLVMBuildAnd(builder, r, i32_c255, "");
1083
1084 /* get pixel, row and image strides */
1085 x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type,
1086 bld->format_desc->block.bits/8);
1087 y_stride = row_stride_vec;
1088 z_stride = img_stride_vec;
1089
1090 /* do texcoord wrapping and compute texel offsets */
1091 lp_build_sample_wrap_linear_int(bld,
1092 bld->format_desc->block.width,
1093 s_ipart, &s_fpart, s_float,
1094 width_vec, x_stride, offsets[0],
1095 bld->static_texture_state->pot_width,
1096 bld->static_sampler_state->wrap_s,
1097 &x_offset0, &x_offset1,
1098 &x_subcoord[0], &x_subcoord[1]);
1099
1100 /* add potential cube/array/mip offsets now as they are constant per pixel */
1101 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1102 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
1103 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
1104 LLVMValueRef z_offset;
1105 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1106 /* The r coord is the cube face in [0,5] or array layer */
1107 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1108 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1109 }
1110 if (mipoffsets) {
1111 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1112 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1113 }
1114
1115 for (z = 0; z < 2; z++) {
1116 for (y = 0; y < 2; y++) {
1117 offset[z][y][0] = x_offset0;
1118 offset[z][y][1] = x_offset1;
1119 }
1120 }
1121
1122 if (dims >= 2) {
1123 lp_build_sample_wrap_linear_int(bld,
1124 bld->format_desc->block.height,
1125 t_ipart, &t_fpart, t_float,
1126 height_vec, y_stride, offsets[1],
1127 bld->static_texture_state->pot_height,
1128 bld->static_sampler_state->wrap_t,
1129 &y_offset0, &y_offset1,
1130 &y_subcoord[0], &y_subcoord[1]);
1131
1132 for (z = 0; z < 2; z++) {
1133 for (x = 0; x < 2; x++) {
1134 offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1135 offset[z][0][x], y_offset0);
1136 offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1137 offset[z][1][x], y_offset1);
1138 }
1139 }
1140 }
1141
1142 if (dims >= 3) {
1143 lp_build_sample_wrap_linear_int(bld,
1144 1, /* block length (depth) */
1145 r_ipart, &r_fpart, r_float,
1146 depth_vec, z_stride, offsets[2],
1147 bld->static_texture_state->pot_depth,
1148 bld->static_sampler_state->wrap_r,
1149 &z_offset0, &z_offset1,
1150 &z_subcoord[0], &z_subcoord[1]);
1151 for (y = 0; y < 2; y++) {
1152 for (x = 0; x < 2; x++) {
1153 offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1154 offset[0][y][x], z_offset0);
1155 offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1156 offset[1][y][x], z_offset1);
1157 }
1158 }
1159 }
1160
1161 lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1162 x_subcoord, y_subcoord,
1163 s_fpart, t_fpart, r_fpart,
1164 colors);
1165 }
1166
1167
1168 /**
1169 * Sample a single texture image with (bi-)(tri-)linear sampling.
1170 * Return filtered color as two vectors of 16-bit fixed point values.
1171 * Does address calcs (except offsets) with floats.
1172 * Useful for AVX which has support for 8x32 floats but not 8x32 ints.
1173 */
1174 static void
1175 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld,
1176 LLVMValueRef int_size,
1177 LLVMValueRef row_stride_vec,
1178 LLVMValueRef img_stride_vec,
1179 LLVMValueRef data_ptr,
1180 LLVMValueRef mipoffsets,
1181 LLVMValueRef s,
1182 LLVMValueRef t,
1183 LLVMValueRef r,
1184 const LLVMValueRef *offsets,
1185 LLVMValueRef *colors)
1186 {
1187 const unsigned dims = bld->dims;
1188 LLVMValueRef width_vec, height_vec, depth_vec;
1189 LLVMValueRef s_fpart;
1190 LLVMValueRef t_fpart = NULL;
1191 LLVMValueRef r_fpart = NULL;
1192 LLVMValueRef x_stride, y_stride, z_stride;
1193 LLVMValueRef x_offset0, x_offset1;
1194 LLVMValueRef y_offset0, y_offset1;
1195 LLVMValueRef z_offset0, z_offset1;
1196 LLVMValueRef offset[2][2][2]; /* [z][y][x] */
1197 LLVMValueRef x_subcoord[2], y_subcoord[2];
1198 LLVMValueRef flt_size;
1199 LLVMValueRef x_icoord0, x_icoord1;
1200 LLVMValueRef y_icoord0, y_icoord1;
1201 LLVMValueRef z_icoord0, z_icoord1;
1202 unsigned x, y, z;
1203
1204 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size);
1205
1206 lp_build_extract_image_sizes(bld,
1207 &bld->float_size_bld,
1208 bld->coord_type,
1209 flt_size,
1210 &width_vec,
1211 &height_vec,
1212 &depth_vec);
1213
1214 /* do texcoord wrapping and compute texel offsets */
1215 lp_build_sample_wrap_linear_float(bld,
1216 bld->format_desc->block.width,
1217 s, width_vec, offsets[0],
1218 bld->static_texture_state->pot_width,
1219 bld->static_sampler_state->wrap_s,
1220 &x_icoord0, &x_icoord1,
1221 &s_fpart,
1222 bld->static_sampler_state->force_nearest_s);
1223
1224 if (dims >= 2) {
1225 lp_build_sample_wrap_linear_float(bld,
1226 bld->format_desc->block.height,
1227 t, height_vec, offsets[1],
1228 bld->static_texture_state->pot_height,
1229 bld->static_sampler_state->wrap_t,
1230 &y_icoord0, &y_icoord1,
1231 &t_fpart,
1232 bld->static_sampler_state->force_nearest_t);
1233
1234 if (dims >= 3) {
1235 lp_build_sample_wrap_linear_float(bld,
1236 1, /* block length (depth) */
1237 r, depth_vec, offsets[2],
1238 bld->static_texture_state->pot_depth,
1239 bld->static_sampler_state->wrap_r,
1240 &z_icoord0, &z_icoord1,
1241 &r_fpart, 0);
1242 }
1243 }
1244
1245 /*
1246 * From here on we deal with ints, and we should split up the 256bit
1247 * vectors manually for better generated code.
1248 */
1249
1250 /* get pixel, row and image strides */
1251 x_stride = lp_build_const_vec(bld->gallivm,
1252 bld->int_coord_bld.type,
1253 bld->format_desc->block.bits/8);
1254 y_stride = row_stride_vec;
1255 z_stride = img_stride_vec;
1256
1257 /*
1258 * compute texel offset -
1259 * cannot do offset calc with floats, difficult for block-based formats,
1260 * and not enough precision anyway.
1261 */
1262 lp_build_sample_partial_offset(&bld->int_coord_bld,
1263 bld->format_desc->block.width,
1264 x_icoord0, x_stride,
1265 &x_offset0, &x_subcoord[0]);
1266 lp_build_sample_partial_offset(&bld->int_coord_bld,
1267 bld->format_desc->block.width,
1268 x_icoord1, x_stride,
1269 &x_offset1, &x_subcoord[1]);
1270
1271 /* add potential cube/array/mip offsets now as they are constant per pixel */
1272 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1273 bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY ||
1274 bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) {
1275 LLVMValueRef z_offset;
1276 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec);
1277 /* The r coord is the cube face in [0,5] or array layer */
1278 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset);
1279 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset);
1280 }
1281 if (mipoffsets) {
1282 x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets);
1283 x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets);
1284 }
1285
1286 for (z = 0; z < 2; z++) {
1287 for (y = 0; y < 2; y++) {
1288 offset[z][y][0] = x_offset0;
1289 offset[z][y][1] = x_offset1;
1290 }
1291 }
1292
1293 if (dims >= 2) {
1294 lp_build_sample_partial_offset(&bld->int_coord_bld,
1295 bld->format_desc->block.height,
1296 y_icoord0, y_stride,
1297 &y_offset0, &y_subcoord[0]);
1298 lp_build_sample_partial_offset(&bld->int_coord_bld,
1299 bld->format_desc->block.height,
1300 y_icoord1, y_stride,
1301 &y_offset1, &y_subcoord[1]);
1302 for (z = 0; z < 2; z++) {
1303 for (x = 0; x < 2; x++) {
1304 offset[z][0][x] = lp_build_add(&bld->int_coord_bld,
1305 offset[z][0][x], y_offset0);
1306 offset[z][1][x] = lp_build_add(&bld->int_coord_bld,
1307 offset[z][1][x], y_offset1);
1308 }
1309 }
1310 }
1311
1312 if (dims >= 3) {
1313 LLVMValueRef z_subcoord[2];
1314 lp_build_sample_partial_offset(&bld->int_coord_bld,
1315 1,
1316 z_icoord0, z_stride,
1317 &z_offset0, &z_subcoord[0]);
1318 lp_build_sample_partial_offset(&bld->int_coord_bld,
1319 1,
1320 z_icoord1, z_stride,
1321 &z_offset1, &z_subcoord[1]);
1322 for (y = 0; y < 2; y++) {
1323 for (x = 0; x < 2; x++) {
1324 offset[0][y][x] = lp_build_add(&bld->int_coord_bld,
1325 offset[0][y][x], z_offset0);
1326 offset[1][y][x] = lp_build_add(&bld->int_coord_bld,
1327 offset[1][y][x], z_offset1);
1328 }
1329 }
1330 }
1331
1332 lp_build_sample_fetch_image_linear(bld, data_ptr, offset,
1333 x_subcoord, y_subcoord,
1334 s_fpart, t_fpart, r_fpart,
1335 colors);
1336 }
1337
1338
1339 /**
1340 * Sample the texture/mipmap using given image filter and mip filter.
1341 * data0_ptr and data1_ptr point to the two mipmap levels to sample
1342 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes.
1343 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1344 */
1345 static void
1346 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1347 unsigned img_filter,
1348 unsigned mip_filter,
1349 LLVMValueRef s,
1350 LLVMValueRef t,
1351 LLVMValueRef r,
1352 const LLVMValueRef *offsets,
1353 LLVMValueRef ilevel0,
1354 LLVMValueRef ilevel1,
1355 LLVMValueRef lod_fpart,
1356 LLVMValueRef colors_var)
1357 {
1358 LLVMBuilderRef builder = bld->gallivm->builder;
1359 LLVMValueRef size0;
1360 LLVMValueRef size1;
1361 LLVMValueRef row_stride0_vec = NULL;
1362 LLVMValueRef row_stride1_vec = NULL;
1363 LLVMValueRef img_stride0_vec = NULL;
1364 LLVMValueRef img_stride1_vec = NULL;
1365 LLVMValueRef data_ptr0;
1366 LLVMValueRef data_ptr1;
1367 LLVMValueRef mipoff0 = NULL;
1368 LLVMValueRef mipoff1 = NULL;
1369 LLVMValueRef colors0;
1370 LLVMValueRef colors1;
1371
1372 /* sample the first mipmap level */
1373 lp_build_mipmap_level_sizes(bld, ilevel0,
1374 &size0,
1375 &row_stride0_vec, &img_stride0_vec);
1376 if (bld->num_lods == 1) {
1377 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1378 }
1379 else {
1380 /* This path should work for num_lods 1 too but slightly less efficient */
1381 data_ptr0 = bld->base_ptr;
1382 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1383 }
1384
1385 if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1386 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1387 lp_build_sample_image_nearest_afloat(bld,
1388 size0,
1389 row_stride0_vec, img_stride0_vec,
1390 data_ptr0, mipoff0, s, t, r, offsets,
1391 &colors0);
1392 }
1393 else {
1394 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1395 lp_build_sample_image_linear_afloat(bld,
1396 size0,
1397 row_stride0_vec, img_stride0_vec,
1398 data_ptr0, mipoff0, s, t, r, offsets,
1399 &colors0);
1400 }
1401 }
1402 else {
1403 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1404 lp_build_sample_image_nearest(bld,
1405 size0,
1406 row_stride0_vec, img_stride0_vec,
1407 data_ptr0, mipoff0, s, t, r, offsets,
1408 &colors0);
1409 }
1410 else {
1411 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1412 lp_build_sample_image_linear(bld,
1413 size0,
1414 row_stride0_vec, img_stride0_vec,
1415 data_ptr0, mipoff0, s, t, r, offsets,
1416 &colors0);
1417 }
1418 }
1419
1420 /* Store the first level's colors in the output variables */
1421 LLVMBuildStore(builder, colors0, colors_var);
1422
1423 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1424 LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
1425 bld->levelf_bld.type, 256.0);
1426 LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
1427 struct lp_build_if_state if_ctx;
1428 LLVMValueRef need_lerp;
1429 unsigned num_quads = bld->coord_bld.type.length / 4;
1430 unsigned i;
1431
1432 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, "");
1433 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");
1434
1435 /* need_lerp = lod_fpart > 0 */
1436 if (bld->num_lods == 1) {
1437 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
1438 lod_fpart, bld->leveli_bld.zero,
1439 "need_lerp");
1440 }
1441 else {
1442 /*
1443 * We'll do mip filtering if any of the quads need it.
1444 * It might be better to split the vectors here and only fetch/filter
1445 * quads which need it.
1446 */
1447 /*
1448 * We need to clamp lod_fpart here since we can get negative
1449 * values which would screw up filtering if not all
1450 * lod_fpart values have same sign.
1451 * We can however then skip the greater than comparison.
1452 */
1453 lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
1454 bld->leveli_bld.zero);
1455 need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
1456 }
1457
1458 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1459 {
1460 struct lp_build_context u8n_bld;
1461
1462 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1463
1464 /* sample the second mipmap level */
1465 lp_build_mipmap_level_sizes(bld, ilevel1,
1466 &size1,
1467 &row_stride1_vec, &img_stride1_vec);
1468 if (bld->num_lods == 1) {
1469 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1470 }
1471 else {
1472 data_ptr1 = bld->base_ptr;
1473 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1474 }
1475
1476 if (util_cpu_caps.has_avx && bld->coord_type.length > 4) {
1477 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1478 lp_build_sample_image_nearest_afloat(bld,
1479 size1,
1480 row_stride1_vec, img_stride1_vec,
1481 data_ptr1, mipoff1, s, t, r, offsets,
1482 &colors1);
1483 }
1484 else {
1485 lp_build_sample_image_linear_afloat(bld,
1486 size1,
1487 row_stride1_vec, img_stride1_vec,
1488 data_ptr1, mipoff1, s, t, r, offsets,
1489 &colors1);
1490 }
1491 }
1492 else {
1493 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1494 lp_build_sample_image_nearest(bld,
1495 size1,
1496 row_stride1_vec, img_stride1_vec,
1497 data_ptr1, mipoff1, s, t, r, offsets,
1498 &colors1);
1499 }
1500 else {
1501 lp_build_sample_image_linear(bld,
1502 size1,
1503 row_stride1_vec, img_stride1_vec,
1504 data_ptr1, mipoff1, s, t, r, offsets,
1505 &colors1);
1506 }
1507 }
1508
1509 /* interpolate samples from the two mipmap levels */
1510
1511 if (num_quads == 1 && bld->num_lods == 1) {
1512 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
1513 lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);
1514
1515 #if HAVE_LLVM == 0x208
1516 /* This was a work-around for a bug in LLVM 2.8.
1517 * Evidently, something goes wrong in the construction of the
1518 * lod_fpart short[8] vector. Adding this no-effect shuffle seems
1519 * to force the vector to be properly constructed.
1520 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f).
1521 */
1522 #error Unsupported
1523 #endif
1524 }
1525 else {
1526 unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
1527 LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
1528 LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];
1529
1530 /* Take the LSB of lod_fpart */
1531 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");
1532
1533 /* Broadcast each lod weight into their respective channels */
1534 for (i = 0; i < u8n_bld.type.length; ++i) {
1535 shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
1536 }
1537 lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
1538 LLVMConstVector(shuffle, u8n_bld.type.length), "");
1539 }
1540
1541 colors0 = lp_build_lerp(&u8n_bld, lod_fpart,
1542 colors0, colors1,
1543 LP_BLD_LERP_PRESCALED_WEIGHTS);
1544
1545 LLVMBuildStore(builder, colors0, colors_var);
1546 }
1547 lp_build_endif(&if_ctx);
1548 }
1549 }
1550
1551
1552
1553 /**
1554 * Texture sampling in AoS format. Used when sampling common 32-bit/texel
1555 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes
1556 * but only limited texture coord wrap modes.
1557 */
1558 void
1559 lp_build_sample_aos(struct lp_build_sample_context *bld,
1560 unsigned sampler_unit,
1561 LLVMValueRef s,
1562 LLVMValueRef t,
1563 LLVMValueRef r,
1564 const LLVMValueRef *offsets,
1565 LLVMValueRef lod_ipart,
1566 LLVMValueRef lod_fpart,
1567 LLVMValueRef ilevel0,
1568 LLVMValueRef ilevel1,
1569 LLVMValueRef texel_out[4])
1570 {
1571 struct lp_build_context *int_bld = &bld->int_bld;
1572 LLVMBuilderRef builder = bld->gallivm->builder;
1573 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1574 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1575 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1576 const unsigned dims = bld->dims;
1577 LLVMValueRef packed_var, packed;
1578 LLVMValueRef unswizzled[4];
1579 struct lp_build_context u8n_bld;
1580
1581 /* we only support the common/simple wrap modes at this time */
1582 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s));
1583 if (dims >= 2)
1584 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t));
1585 if (dims >= 3)
1586 assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r));
1587
1588
1589 /* make 8-bit unorm builder context */
1590 lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));
1591
1592 /*
1593 * Get/interpolate texture colors.
1594 */
1595
1596 packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var");
1597
1598 if (min_filter == mag_filter) {
1599 /* no need to distinguish between minification and magnification */
1600 lp_build_sample_mipmap(bld,
1601 min_filter, mip_filter,
1602 s, t, r, offsets,
1603 ilevel0, ilevel1, lod_fpart,
1604 packed_var);
1605 }
1606 else {
1607 /* Emit conditional to choose min image filter or mag image filter
1608 * depending on the lod being > 0 or <= 0, respectively.
1609 */
1610 struct lp_build_if_state if_ctx;
1611 LLVMValueRef minify;
1612
1613 /*
1614 * XXX this should take all lods into account, if some are min
1615 * some max probably could hack up the coords/weights in the linear
1616 * path with selects to work for nearest.
1617 * If that's just two quads sitting next to each other it seems
1618 * quite ok to do the same filtering method on both though, at
1619 * least unless we have explicit lod (and who uses different
1620 * min/mag filter with that?)
1621 */
1622 if (bld->num_lods > 1)
1623 lod_ipart = LLVMBuildExtractElement(builder, lod_ipart,
1624 lp_build_const_int32(bld->gallivm, 0), "");
1625
1626 /* minify = lod >= 0.0 */
1627 minify = LLVMBuildICmp(builder, LLVMIntSGE,
1628 lod_ipart, int_bld->zero, "");
1629
1630 lp_build_if(&if_ctx, bld->gallivm, minify);
1631 {
1632 /* Use the minification filter */
1633 lp_build_sample_mipmap(bld,
1634 min_filter, mip_filter,
1635 s, t, r, offsets,
1636 ilevel0, ilevel1, lod_fpart,
1637 packed_var);
1638 }
1639 lp_build_else(&if_ctx);
1640 {
1641 /* Use the magnification filter */
1642 lp_build_sample_mipmap(bld,
1643 mag_filter, PIPE_TEX_MIPFILTER_NONE,
1644 s, t, r, offsets,
1645 ilevel0, NULL, NULL,
1646 packed_var);
1647 }
1648 lp_build_endif(&if_ctx);
1649 }
1650
1651 packed = LLVMBuildLoad(builder, packed_var, "");
1652
1653 /*
1654 * Convert to SoA and swizzle.
1655 */
1656 lp_build_rgba8_to_fi32_soa(bld->gallivm,
1657 bld->texel_type,
1658 packed, unswizzled);
1659
1660 if (util_format_is_rgba8_variant(bld->format_desc)) {
1661 lp_build_format_swizzle_soa(bld->format_desc,
1662 &bld->texel_bld,
1663 unswizzled, texel_out);
1664 }
1665 else {
1666 texel_out[0] = unswizzled[0];
1667 texel_out[1] = unswizzled[1];
1668 texel_out[2] = unswizzled[2];
1669 texel_out[3] = unswizzled[3];
1670 }
1671 }