1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "gallivm/lp_bld_logic.h"
47 #include "gallivm/lp_bld_struct.h"
48 #include "gallivm/lp_bld_gather.h"
49 #include "lp_bld_interp.h"
53 * The shader JIT function operates on blocks of quads.
54 * Each block has 2x2 quads and each quad has 2x2 pixels.
56 * We iterate over the quads in order 0, 1, 2, 3:
68 * If we iterate over multiple quads at once, quads 01 and 23 are processed
71 * Within each quad, we have four pixels which are represented in SOA
80 * So the green channel (for example) of the four pixels is stored in
81 * a single vector register: {g0, g1, g2, g3}.
82 * The order stays the same even with multiple quads:
90 * Do one perspective divide per quad.
92 * For perspective interpolation, the final attribute value is given
98 * a = a0 + dadx*x + dady*y
99 * w = w0 + dwdx*x + dwdy*y
100 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
102 * Instead of computing the division per pixel, with this macro we compute the
103 * division on the upper left pixel of each quad, and use a linear
104 * approximation in the remaining pixels, given by:
106 * da'dx = (dadx - dwdx*a)*oow
107 * da'dy = (dady - dwdy*a)*oow
109 * Ironically, this actually makes things slower -- probably because the
110 * divide hardware unit is rarely used, whereas the multiply unit is typically
113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
116 static const unsigned char quad_offset_x
[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
117 static const unsigned char quad_offset_y
[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
121 attrib_name(LLVMValueRef val
, unsigned attrib
, unsigned chan
, const char *suffix
)
124 lp_build_name(val
, "pos.%c%s", "xyzw"[chan
], suffix
);
126 lp_build_name(val
, "input%u.%c%s", attrib
- 1, "xyzw"[chan
], suffix
);
130 calc_offsets(struct lp_build_context
*coeff_bld
,
131 unsigned quad_start_index
,
132 LLVMValueRef
*pixoffx
,
133 LLVMValueRef
*pixoffy
)
136 unsigned num_pix
= coeff_bld
->type
.length
;
137 struct gallivm_state
*gallivm
= coeff_bld
->gallivm
;
138 LLVMBuilderRef builder
= coeff_bld
->gallivm
->builder
;
139 LLVMValueRef nr
, pixxf
, pixyf
;
141 *pixoffx
= coeff_bld
->undef
;
142 *pixoffy
= coeff_bld
->undef
;
144 for (i
= 0; i
< num_pix
; i
++) {
145 nr
= lp_build_const_int32(gallivm
, i
);
146 pixxf
= lp_build_const_float(gallivm
, quad_offset_x
[i
% num_pix
] +
147 (quad_start_index
& 1) * 2);
148 pixyf
= lp_build_const_float(gallivm
, quad_offset_y
[i
% num_pix
] +
149 (quad_start_index
& 2));
150 *pixoffx
= LLVMBuildInsertElement(builder
, *pixoffx
, pixxf
, nr
, "");
151 *pixoffy
= LLVMBuildInsertElement(builder
, *pixoffy
, pixyf
, nr
, "");
156 calc_centroid_offsets(struct lp_build_interp_soa_context
*bld
,
157 struct gallivm_state
*gallivm
,
158 LLVMValueRef loop_iter
,
159 LLVMValueRef mask_store
,
160 LLVMValueRef pix_center_offset
,
161 LLVMValueRef
*centroid_x
, LLVMValueRef
*centroid_y
)
163 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
164 LLVMBuilderRef builder
= gallivm
->builder
;
165 LLVMValueRef s_mask_and
= NULL
;
166 LLVMValueRef centroid_x_offset
= pix_center_offset
;
167 LLVMValueRef centroid_y_offset
= pix_center_offset
;
168 for (int s
= bld
->coverage_samples
- 1; s
>= 0; s
--) {
169 LLVMValueRef sample_cov
;
170 LLVMValueRef s_mask_idx
= LLVMBuildMul(builder
, bld
->num_loop
, lp_build_const_int32(gallivm
, s
), "");
172 s_mask_idx
= LLVMBuildAdd(builder
, s_mask_idx
, loop_iter
, "");
173 sample_cov
= lp_build_pointer_get(builder
, mask_store
, s_mask_idx
);
174 if (s
== bld
->coverage_samples
- 1)
175 s_mask_and
= sample_cov
;
177 s_mask_and
= LLVMBuildAnd(builder
, s_mask_and
, sample_cov
, "");
179 LLVMValueRef x_val_idx
= lp_build_const_int32(gallivm
, s
* 2);
180 LLVMValueRef y_val_idx
= lp_build_const_int32(gallivm
, s
* 2 + 1);
182 x_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, x_val_idx
);
183 y_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, y_val_idx
);
184 x_val_idx
= lp_build_broadcast_scalar(coeff_bld
, x_val_idx
);
185 y_val_idx
= lp_build_broadcast_scalar(coeff_bld
, y_val_idx
);
186 centroid_x_offset
= lp_build_select(coeff_bld
, sample_cov
, x_val_idx
, centroid_x_offset
);
187 centroid_y_offset
= lp_build_select(coeff_bld
, sample_cov
, y_val_idx
, centroid_y_offset
);
189 *centroid_x
= lp_build_select(coeff_bld
, s_mask_and
, pix_center_offset
, centroid_x_offset
);
190 *centroid_y
= lp_build_select(coeff_bld
, s_mask_and
, pix_center_offset
, centroid_y_offset
);
193 /* Much easier, and significantly less instructions in the per-stamp
194 * part (less than half) but overall more instructions so a loss if
195 * most quads are active. Might be a win though with larger vectors.
196 * No ability to do per-quad divide (doable but not implemented)
197 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
200 coeffs_init_simple(struct lp_build_interp_soa_context
*bld
,
202 LLVMValueRef dadx_ptr
,
203 LLVMValueRef dady_ptr
)
205 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
206 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
207 struct gallivm_state
*gallivm
= coeff_bld
->gallivm
;
208 LLVMBuilderRef builder
= gallivm
->builder
;
211 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
213 * always fetch all 4 values for performance/simplicity
214 * Note: we do that here because it seems to generate better
215 * code. It generates a lot of moves initially but less
216 * moves later. As far as I can tell this looks like a
217 * llvm issue, instead of simply reloading the values from
218 * the passed in pointers it if it runs out of registers
219 * it spills/reloads them. Maybe some optimization passes
221 * Might want to investigate this again later.
223 const unsigned interp
= bld
->interp
[attrib
];
224 LLVMValueRef index
= lp_build_const_int32(gallivm
,
225 attrib
* TGSI_NUM_CHANNELS
);
227 LLVMValueRef dadxaos
= setup_bld
->zero
;
228 LLVMValueRef dadyaos
= setup_bld
->zero
;
229 LLVMValueRef a0aos
= setup_bld
->zero
;
232 case LP_INTERP_PERSPECTIVE
:
235 case LP_INTERP_LINEAR
:
236 ptr
= LLVMBuildGEP(builder
, dadx_ptr
, &index
, 1, "");
237 ptr
= LLVMBuildBitCast(builder
, ptr
,
238 LLVMPointerType(setup_bld
->vec_type
, 0), "");
239 dadxaos
= LLVMBuildLoad(builder
, ptr
, "");
241 ptr
= LLVMBuildGEP(builder
, dady_ptr
, &index
, 1, "");
242 ptr
= LLVMBuildBitCast(builder
, ptr
,
243 LLVMPointerType(setup_bld
->vec_type
, 0), "");
244 dadyaos
= LLVMBuildLoad(builder
, ptr
, "");
246 attrib_name(dadxaos
, attrib
, 0, ".dadxaos");
247 attrib_name(dadyaos
, attrib
, 0, ".dadyaos");
250 case LP_INTERP_CONSTANT
:
251 case LP_INTERP_FACING
:
252 ptr
= LLVMBuildGEP(builder
, a0_ptr
, &index
, 1, "");
253 ptr
= LLVMBuildBitCast(builder
, ptr
,
254 LLVMPointerType(setup_bld
->vec_type
, 0), "");
255 a0aos
= LLVMBuildLoad(builder
, ptr
, "");
256 attrib_name(a0aos
, attrib
, 0, ".a0aos");
259 case LP_INTERP_POSITION
:
260 /* Nothing to do as the position coeffs are already setup in slot 0 */
267 bld
->a0aos
[attrib
] = a0aos
;
268 bld
->dadxaos
[attrib
] = dadxaos
;
269 bld
->dadyaos
[attrib
] = dadyaos
;
274 * Interpolate the shader input attribute values.
275 * This is called for each (group of) quad(s).
278 attribs_update_simple(struct lp_build_interp_soa_context
*bld
,
279 struct gallivm_state
*gallivm
,
280 LLVMValueRef loop_iter
,
281 LLVMValueRef mask_store
,
282 LLVMValueRef sample_id
,
286 LLVMBuilderRef builder
= gallivm
->builder
;
287 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
288 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
289 LLVMValueRef oow
= NULL
;
291 LLVMValueRef pixoffx
;
292 LLVMValueRef pixoffy
;
294 LLVMValueRef pix_center_offset
= lp_build_const_vec(gallivm
, coeff_bld
->type
, 0.5);
296 /* could do this with code-generated passed in pixel offsets too */
299 ptr
= LLVMBuildGEP(builder
, bld
->xoffset_store
, &loop_iter
, 1, "");
300 pixoffx
= LLVMBuildLoad(builder
, ptr
, "");
301 ptr
= LLVMBuildGEP(builder
, bld
->yoffset_store
, &loop_iter
, 1, "");
302 pixoffy
= LLVMBuildLoad(builder
, ptr
, "");
304 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
,
305 lp_build_broadcast_scalar(coeff_bld
, bld
->x
), "");
306 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
,
307 lp_build_broadcast_scalar(coeff_bld
, bld
->y
), "");
309 for (attrib
= start
; attrib
< end
; attrib
++) {
310 const unsigned mask
= bld
->mask
[attrib
];
311 const unsigned interp
= bld
->interp
[attrib
];
312 const unsigned loc
= bld
->interp_loc
[attrib
];
315 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
316 if (mask
& (1 << chan
)) {
318 LLVMValueRef dadx
= coeff_bld
->zero
;
319 LLVMValueRef dady
= coeff_bld
->zero
;
320 LLVMValueRef a
= coeff_bld
->zero
;
321 LLVMValueRef chan_pixoffx
= pixoffx
, chan_pixoffy
= pixoffy
;
323 index
= lp_build_const_int32(gallivm
, chan
);
325 case LP_INTERP_PERSPECTIVE
:
328 case LP_INTERP_LINEAR
:
329 if (attrib
== 0 && chan
== 0) {
330 dadx
= coeff_bld
->one
;
332 LLVMValueRef x_val_idx
= LLVMBuildMul(gallivm
->builder
, sample_id
, lp_build_const_int32(gallivm
, 2), "");
333 x_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, x_val_idx
);
334 a
= lp_build_broadcast_scalar(coeff_bld
, x_val_idx
);
336 a
= lp_build_const_vec(gallivm
, coeff_bld
->type
, bld
->pos_offset
);
339 else if (attrib
== 0 && chan
== 1) {
340 dady
= coeff_bld
->one
;
342 LLVMValueRef y_val_idx
= LLVMBuildMul(gallivm
->builder
, sample_id
, lp_build_const_int32(gallivm
, 2), "");
343 y_val_idx
= LLVMBuildAdd(gallivm
->builder
, y_val_idx
, lp_build_const_int32(gallivm
, 1), "");
344 y_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, y_val_idx
);
345 a
= lp_build_broadcast_scalar(coeff_bld
, y_val_idx
);
347 a
= lp_build_const_vec(gallivm
, coeff_bld
->type
, bld
->pos_offset
);
351 dadx
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
352 coeff_bld
->type
, bld
->dadxaos
[attrib
],
354 dady
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
355 coeff_bld
->type
, bld
->dadyaos
[attrib
],
357 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
358 coeff_bld
->type
, bld
->a0aos
[attrib
],
361 if (bld
->coverage_samples
> 1) {
362 LLVMValueRef xoffset
= pix_center_offset
;
363 LLVMValueRef yoffset
= pix_center_offset
;
364 if (loc
== TGSI_INTERPOLATE_LOC_SAMPLE
|| (attrib
== 0 && chan
== 2 && sample_id
)) {
365 LLVMValueRef x_val_idx
= LLVMBuildMul(gallivm
->builder
, sample_id
, lp_build_const_int32(gallivm
, 2), "");
366 LLVMValueRef y_val_idx
= LLVMBuildAdd(gallivm
->builder
, x_val_idx
, lp_build_const_int32(gallivm
, 1), "");
368 x_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, x_val_idx
);
369 y_val_idx
= lp_build_array_get(gallivm
, bld
->sample_pos_array
, y_val_idx
);
370 xoffset
= lp_build_broadcast_scalar(coeff_bld
, x_val_idx
);
371 yoffset
= lp_build_broadcast_scalar(coeff_bld
, y_val_idx
);
372 } else if (loc
== TGSI_INTERPOLATE_LOC_CENTROID
) {
373 calc_centroid_offsets(bld
, gallivm
, loop_iter
, mask_store
,
374 pix_center_offset
, &xoffset
, &yoffset
);
376 chan_pixoffx
= lp_build_add(coeff_bld
, chan_pixoffx
, xoffset
);
377 chan_pixoffy
= lp_build_add(coeff_bld
, chan_pixoffy
, yoffset
);
382 * a = a0 + (x * dadx + y * dady)
384 a
= lp_build_fmuladd(builder
, dadx
, chan_pixoffx
, a
);
385 a
= lp_build_fmuladd(builder
, dady
, chan_pixoffy
, a
);
387 if (interp
== LP_INTERP_PERSPECTIVE
) {
389 LLVMValueRef w
= bld
->attribs
[0][3];
391 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
392 oow
= lp_build_rcp(coeff_bld
, w
);
394 a
= lp_build_mul(coeff_bld
, a
, oow
);
398 case LP_INTERP_CONSTANT
:
399 case LP_INTERP_FACING
:
400 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
401 coeff_bld
->type
, bld
->a0aos
[attrib
],
405 case LP_INTERP_POSITION
:
407 a
= bld
->attribs
[0][chan
];
415 if ((attrib
== 0) && (chan
== 2) && !bld
->depth_clamp
){
416 /* FIXME: Depth values can exceed 1.0, due to the fact that
417 * setup interpolation coefficients refer to (0,0) which causes
418 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
419 * Note though values outside [0,1] are perfectly valid with
420 * depth clip disabled.
421 * XXX: If depth clip is disabled but we force depth clamp
422 * we may get values larger than 1.0 in the fs (but not in
423 * depth test). Not sure if that's an issue...
424 * Also, on a similar note, it is not obvious if the depth values
425 * appearing in fs (with depth clip disabled) should be clamped
426 * to [0,1], clamped to near/far or not be clamped at all...
428 a
= lp_build_min(coeff_bld
, a
, coeff_bld
->one
);
430 bld
->attribs
[attrib
][chan
] = a
;
437 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context
*bld
,
438 struct gallivm_state
*gallivm
,
439 unsigned attrib
, unsigned chan
,
440 LLVMValueRef indir_index
,
441 LLVMValueRef pixoffx
,
442 LLVMValueRef pixoffy
)
444 LLVMBuilderRef builder
= gallivm
->builder
;
445 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
446 const unsigned interp
= bld
->interp
[attrib
];
447 LLVMValueRef dadx
= coeff_bld
->zero
;
448 LLVMValueRef dady
= coeff_bld
->zero
;
449 LLVMValueRef a
= coeff_bld
->zero
;
451 LLVMTypeRef u8ptr
= LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0);
453 indir_index
= LLVMBuildAdd(builder
, indir_index
, lp_build_const_int_vec(gallivm
, coeff_bld
->type
, attrib
), "");
454 LLVMValueRef index
= LLVMBuildMul(builder
, indir_index
, lp_build_const_int_vec(gallivm
, coeff_bld
->type
, 4), "");
455 index
= LLVMBuildAdd(builder
, index
, lp_build_const_int_vec(gallivm
, coeff_bld
->type
, chan
), "");
457 /* size up to byte indices */
458 index
= LLVMBuildMul(builder
, index
, lp_build_const_int_vec(gallivm
, coeff_bld
->type
, 4), "");
460 struct lp_type dst_type
= coeff_bld
->type
;
463 case LP_INTERP_PERSPECTIVE
:
465 case LP_INTERP_LINEAR
:
467 dadx
= lp_build_gather(gallivm
, coeff_bld
->type
.length
,
468 coeff_bld
->type
.width
, dst_type
,
469 true, LLVMBuildBitCast(builder
, bld
->dadx_ptr
, u8ptr
, ""), index
, false);
471 dady
= lp_build_gather(gallivm
, coeff_bld
->type
.length
,
472 coeff_bld
->type
.width
, dst_type
,
473 true, LLVMBuildBitCast(builder
, bld
->dady_ptr
, u8ptr
, ""), index
, false);
475 a
= lp_build_gather(gallivm
, coeff_bld
->type
.length
,
476 coeff_bld
->type
.width
, dst_type
,
477 true, LLVMBuildBitCast(builder
, bld
->a0_ptr
, u8ptr
, ""), index
, false);
480 * a = a0 + (x * dadx + y * dady)
482 a
= lp_build_fmuladd(builder
, dadx
, pixoffx
, a
);
483 a
= lp_build_fmuladd(builder
, dady
, pixoffy
, a
);
485 if (interp
== LP_INTERP_PERSPECTIVE
) {
486 LLVMValueRef w
= bld
->attribs
[0][3];
488 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
489 LLVMValueRef oow
= lp_build_rcp(coeff_bld
, w
);
490 a
= lp_build_mul(coeff_bld
, a
, oow
);
494 case LP_INTERP_CONSTANT
:
495 case LP_INTERP_FACING
:
496 a
= lp_build_gather(gallivm
, coeff_bld
->type
.length
,
497 coeff_bld
->type
.width
, dst_type
,
498 true, LLVMBuildBitCast(builder
, bld
->a0_ptr
, u8ptr
, ""), index
, false);
508 lp_build_interp_soa(struct lp_build_interp_soa_context
*bld
,
509 struct gallivm_state
*gallivm
,
510 LLVMValueRef loop_iter
,
511 LLVMValueRef mask_store
,
512 unsigned attrib
, unsigned chan
,
514 LLVMValueRef indir_index
,
515 LLVMValueRef offsets
[2])
517 LLVMBuilderRef builder
= gallivm
->builder
;
518 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
519 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
520 LLVMValueRef pixoffx
;
521 LLVMValueRef pixoffy
;
524 /* could do this with code-generated passed in pixel offsets too */
527 ptr
= LLVMBuildGEP(builder
, bld
->xoffset_store
, &loop_iter
, 1, "");
528 pixoffx
= LLVMBuildLoad(builder
, ptr
, "");
529 ptr
= LLVMBuildGEP(builder
, bld
->yoffset_store
, &loop_iter
, 1, "");
530 pixoffy
= LLVMBuildLoad(builder
, ptr
, "");
532 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
,
533 lp_build_broadcast_scalar(coeff_bld
, bld
->x
), "");
534 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
,
535 lp_build_broadcast_scalar(coeff_bld
, bld
->y
), "");
537 LLVMValueRef pix_center_offset
= lp_build_const_vec(gallivm
, coeff_bld
->type
, 0.5);
539 if (loc
== TGSI_INTERPOLATE_LOC_CENTER
) {
540 if (bld
->coverage_samples
> 1) {
541 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
, pix_center_offset
, "");
542 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
, pix_center_offset
, "");
546 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
,
549 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
,
551 } else if (loc
== TGSI_INTERPOLATE_LOC_SAMPLE
) {
552 LLVMValueRef x_val_idx
= LLVMBuildMul(gallivm
->builder
, offsets
[0], lp_build_const_int_vec(gallivm
, bld
->coeff_bld
.type
, 2 * 4), "");
553 LLVMValueRef y_val_idx
= LLVMBuildAdd(gallivm
->builder
, x_val_idx
, lp_build_const_int_vec(gallivm
, bld
->coeff_bld
.type
, 4), "");
555 LLVMValueRef base_ptr
= LLVMBuildBitCast(gallivm
->builder
, bld
->sample_pos_array
,
556 LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0), "");
557 LLVMValueRef xoffset
= lp_build_gather(gallivm
,
558 bld
->coeff_bld
.type
.length
,
559 bld
->coeff_bld
.type
.width
,
560 lp_elem_type(bld
->coeff_bld
.type
),
564 LLVMValueRef yoffset
= lp_build_gather(gallivm
,
565 bld
->coeff_bld
.type
.length
,
566 bld
->coeff_bld
.type
.width
,
567 lp_elem_type(bld
->coeff_bld
.type
),
572 if (bld
->coverage_samples
> 1) {
573 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
, xoffset
, "");
574 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
, yoffset
, "");
576 } else if (loc
== TGSI_INTERPOLATE_LOC_CENTROID
) {
577 LLVMValueRef centroid_x_offset
, centroid_y_offset
;
579 /* for centroid find covered samples for this quad. */
580 /* if all samples are covered use pixel centers */
581 if (bld
->coverage_samples
> 1) {
582 calc_centroid_offsets(bld
, gallivm
, loop_iter
, mask_store
,
583 pix_center_offset
, ¢roid_x_offset
, ¢roid_y_offset
);
585 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
, centroid_x_offset
, "");
586 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
, centroid_y_offset
, "");
590 // remap attrib properly.
594 return lp_build_interp_soa_indirect(bld
, gallivm
, attrib
, chan
,
595 indir_index
, pixoffx
, pixoffy
);
598 const unsigned interp
= bld
->interp
[attrib
];
599 LLVMValueRef dadx
= coeff_bld
->zero
;
600 LLVMValueRef dady
= coeff_bld
->zero
;
601 LLVMValueRef a
= coeff_bld
->zero
;
603 LLVMValueRef index
= lp_build_const_int32(gallivm
, chan
);
606 case LP_INTERP_PERSPECTIVE
:
608 case LP_INTERP_LINEAR
:
609 dadx
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
610 coeff_bld
->type
, bld
->dadxaos
[attrib
],
613 dady
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
614 coeff_bld
->type
, bld
->dadyaos
[attrib
],
617 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
618 coeff_bld
->type
, bld
->a0aos
[attrib
],
622 * a = a0 + (x * dadx + y * dady)
624 a
= lp_build_fmuladd(builder
, dadx
, pixoffx
, a
);
625 a
= lp_build_fmuladd(builder
, dady
, pixoffy
, a
);
627 if (interp
== LP_INTERP_PERSPECTIVE
) {
628 LLVMValueRef w
= bld
->attribs
[0][3];
630 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
631 LLVMValueRef oow
= lp_build_rcp(coeff_bld
, w
);
632 a
= lp_build_mul(coeff_bld
, a
, oow
);
636 case LP_INTERP_CONSTANT
:
637 case LP_INTERP_FACING
:
638 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
639 coeff_bld
->type
, bld
->a0aos
[attrib
],
650 * Generate the position vectors.
652 * Parameter x0, y0 are the integer values with upper left coordinates.
655 pos_init(struct lp_build_interp_soa_context
*bld
,
659 LLVMBuilderRef builder
= bld
->coeff_bld
.gallivm
->builder
;
660 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
662 bld
->x
= LLVMBuildSIToFP(builder
, x0
, coeff_bld
->elem_type
, "");
663 bld
->y
= LLVMBuildSIToFP(builder
, y0
, coeff_bld
->elem_type
, "");
668 * Initialize fragment shader input attribute info.
671 lp_build_interp_soa_init(struct lp_build_interp_soa_context
*bld
,
672 struct gallivm_state
*gallivm
,
674 const struct lp_shader_input
*inputs
,
675 boolean pixel_center_integer
,
676 unsigned coverage_samples
,
677 LLVMValueRef sample_pos_array
,
678 LLVMValueRef num_loop
,
680 LLVMBuilderRef builder
,
683 LLVMValueRef dadx_ptr
,
684 LLVMValueRef dady_ptr
,
688 struct lp_type coeff_type
;
689 struct lp_type setup_type
;
693 memset(bld
, 0, sizeof *bld
);
695 memset(&coeff_type
, 0, sizeof coeff_type
);
696 coeff_type
.floating
= TRUE
;
697 coeff_type
.sign
= TRUE
;
698 coeff_type
.width
= 32;
699 coeff_type
.length
= type
.length
;
701 memset(&setup_type
, 0, sizeof setup_type
);
702 setup_type
.floating
= TRUE
;
703 setup_type
.sign
= TRUE
;
704 setup_type
.width
= 32;
705 setup_type
.length
= TGSI_NUM_CHANNELS
;
708 /* XXX: we don't support interpolating into any other types */
709 assert(memcmp(&coeff_type
, &type
, sizeof coeff_type
) == 0);
711 lp_build_context_init(&bld
->coeff_bld
, gallivm
, coeff_type
);
712 lp_build_context_init(&bld
->setup_bld
, gallivm
, setup_type
);
714 /* For convenience */
715 bld
->pos
= bld
->attribs
[0];
716 bld
->inputs
= (const LLVMValueRef (*)[TGSI_NUM_CHANNELS
]) bld
->attribs
[1];
719 bld
->mask
[0] = TGSI_WRITEMASK_XYZW
;
720 bld
->interp
[0] = LP_INTERP_LINEAR
;
721 bld
->interp_loc
[0] = 0;
724 for (attrib
= 0; attrib
< num_inputs
; ++attrib
) {
725 bld
->mask
[1 + attrib
] = inputs
[attrib
].usage_mask
;
726 bld
->interp
[1 + attrib
] = inputs
[attrib
].interp
;
727 bld
->interp_loc
[1 + attrib
] = inputs
[attrib
].location
;
729 bld
->num_attribs
= 1 + num_inputs
;
731 /* needed for indirect */
732 bld
->a0_ptr
= a0_ptr
;
733 bld
->dadx_ptr
= dadx_ptr
;
734 bld
->dady_ptr
= dady_ptr
;
736 /* Ensure all masked out input channels have a valid value */
737 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
738 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
) {
739 bld
->attribs
[attrib
][chan
] = bld
->coeff_bld
.undef
;
743 if (pixel_center_integer
) {
744 bld
->pos_offset
= 0.0;
746 bld
->pos_offset
= 0.5;
748 bld
->depth_clamp
= depth_clamp
;
749 bld
->coverage_samples
= coverage_samples
;
750 bld
->num_loop
= num_loop
;
751 bld
->sample_pos_array
= sample_pos_array
;
753 pos_init(bld
, x0
, y0
);
756 * Simple method (single step interpolation) may be slower if vector length
757 * is just 4, but the results are different (generally less accurate) with
758 * the other method, so always use more accurate version.
761 /* XXX this should use a global static table */
763 unsigned num_loops
= 16 / type
.length
;
764 LLVMValueRef pixoffx
, pixoffy
, index
;
767 bld
->xoffset_store
= lp_build_array_alloca(gallivm
,
768 lp_build_vec_type(gallivm
, type
),
769 lp_build_const_int32(gallivm
, num_loops
),
771 bld
->yoffset_store
= lp_build_array_alloca(gallivm
,
772 lp_build_vec_type(gallivm
, type
),
773 lp_build_const_int32(gallivm
, num_loops
),
775 for (i
= 0; i
< num_loops
; i
++) {
776 index
= lp_build_const_int32(gallivm
, i
);
777 calc_offsets(&bld
->coeff_bld
, i
*type
.length
/4, &pixoffx
, &pixoffy
);
778 ptr
= LLVMBuildGEP(builder
, bld
->xoffset_store
, &index
, 1, "");
779 LLVMBuildStore(builder
, pixoffx
, ptr
);
780 ptr
= LLVMBuildGEP(builder
, bld
->yoffset_store
, &index
, 1, "");
781 LLVMBuildStore(builder
, pixoffy
, ptr
);
784 coeffs_init_simple(bld
, a0_ptr
, dadx_ptr
, dady_ptr
);
789 * Advance the position and inputs to the given quad within the block.
793 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context
*bld
,
794 struct gallivm_state
*gallivm
,
795 LLVMValueRef quad_start_index
,
796 LLVMValueRef mask_store
,
797 LLVMValueRef sample_id
)
799 attribs_update_simple(bld
, gallivm
, quad_start_index
, mask_store
, sample_id
, 1, bld
->num_attribs
);
803 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context
*bld
,
804 struct gallivm_state
*gallivm
,
805 LLVMValueRef quad_start_index
,
806 LLVMValueRef sample_id
)
808 attribs_update_simple(bld
, gallivm
, quad_start_index
, NULL
, sample_id
, 0, 1);