1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "lp_bld_interp.h"
50 * The shader JIT function operates on blocks of quads.
51 * Each block has 2x2 quads and each quad has 2x2 pixels.
53 * We iterate over the quads in order 0, 1, 2, 3:
65 * If we iterate over multiple quads at once, quads 01 and 23 are processed
68 * Within each quad, we have four pixels which are represented in SOA
77 * So the green channel (for example) of the four pixels is stored in
78 * a single vector register: {g0, g1, g2, g3}.
79 * The order stays the same even with multiple quads:
87 * Do one perspective divide per quad.
89 * For perspective interpolation, the final attribute value is given
95 * a = a0 + dadx*x + dady*y
96 * w = w0 + dwdx*x + dwdy*y
97 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
99 * Instead of computing the division per pixel, with this macro we compute the
100 * division on the upper left pixel of each quad, and use a linear
101 * approximation in the remaining pixels, given by:
103 * da'dx = (dadx - dwdx*a)*oow
104 * da'dy = (dady - dwdy*a)*oow
106 * Ironically, this actually makes things slower -- probably because the
107 * divide hardware unit is rarely used, whereas the multiply unit is typically
110 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
113 static const unsigned char quad_offset_x
[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
114 static const unsigned char quad_offset_y
[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118 attrib_name(LLVMValueRef val
, unsigned attrib
, unsigned chan
, const char *suffix
)
121 lp_build_name(val
, "pos.%c%s", "xyzw"[chan
], suffix
);
123 lp_build_name(val
, "input%u.%c%s", attrib
- 1, "xyzw"[chan
], suffix
);
127 calc_offsets(struct lp_build_context
*coeff_bld
,
128 unsigned quad_start_index
,
129 LLVMValueRef
*pixoffx
,
130 LLVMValueRef
*pixoffy
)
133 unsigned num_pix
= coeff_bld
->type
.length
;
134 struct gallivm_state
*gallivm
= coeff_bld
->gallivm
;
135 LLVMBuilderRef builder
= coeff_bld
->gallivm
->builder
;
136 LLVMValueRef nr
, pixxf
, pixyf
;
138 *pixoffx
= coeff_bld
->undef
;
139 *pixoffy
= coeff_bld
->undef
;
141 for (i
= 0; i
< num_pix
; i
++) {
142 nr
= lp_build_const_int32(gallivm
, i
);
143 pixxf
= lp_build_const_float(gallivm
, quad_offset_x
[i
% num_pix
] +
144 (quad_start_index
& 1) * 2);
145 pixyf
= lp_build_const_float(gallivm
, quad_offset_y
[i
% num_pix
] +
146 (quad_start_index
& 2));
147 *pixoffx
= LLVMBuildInsertElement(builder
, *pixoffx
, pixxf
, nr
, "");
148 *pixoffy
= LLVMBuildInsertElement(builder
, *pixoffy
, pixyf
, nr
, "");
153 /* Much easier, and significantly less instructions in the per-stamp
154 * part (less than half) but overall more instructions so a loss if
155 * most quads are active. Might be a win though with larger vectors.
156 * No ability to do per-quad divide (doable but not implemented)
157 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
160 coeffs_init_simple(struct lp_build_interp_soa_context
*bld
,
162 LLVMValueRef dadx_ptr
,
163 LLVMValueRef dady_ptr
)
165 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
166 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
167 struct gallivm_state
*gallivm
= coeff_bld
->gallivm
;
168 LLVMBuilderRef builder
= gallivm
->builder
;
171 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
173 * always fetch all 4 values for performance/simplicity
174 * Note: we do that here because it seems to generate better
175 * code. It generates a lot of moves initially but less
176 * moves later. As far as I can tell this looks like a
177 * llvm issue, instead of simply reloading the values from
178 * the passed in pointers it if it runs out of registers
179 * it spills/reloads them. Maybe some optimization passes
181 * Might want to investigate this again later.
183 const unsigned interp
= bld
->interp
[attrib
];
184 LLVMValueRef index
= lp_build_const_int32(gallivm
,
185 attrib
* TGSI_NUM_CHANNELS
);
187 LLVMValueRef dadxaos
= setup_bld
->zero
;
188 LLVMValueRef dadyaos
= setup_bld
->zero
;
189 LLVMValueRef a0aos
= setup_bld
->zero
;
192 case LP_INTERP_PERSPECTIVE
:
195 case LP_INTERP_LINEAR
:
196 ptr
= LLVMBuildGEP(builder
, dadx_ptr
, &index
, 1, "");
197 ptr
= LLVMBuildBitCast(builder
, ptr
,
198 LLVMPointerType(setup_bld
->vec_type
, 0), "");
199 dadxaos
= LLVMBuildLoad(builder
, ptr
, "");
201 ptr
= LLVMBuildGEP(builder
, dady_ptr
, &index
, 1, "");
202 ptr
= LLVMBuildBitCast(builder
, ptr
,
203 LLVMPointerType(setup_bld
->vec_type
, 0), "");
204 dadyaos
= LLVMBuildLoad(builder
, ptr
, "");
206 attrib_name(dadxaos
, attrib
, 0, ".dadxaos");
207 attrib_name(dadyaos
, attrib
, 0, ".dadyaos");
210 case LP_INTERP_CONSTANT
:
211 case LP_INTERP_FACING
:
212 ptr
= LLVMBuildGEP(builder
, a0_ptr
, &index
, 1, "");
213 ptr
= LLVMBuildBitCast(builder
, ptr
,
214 LLVMPointerType(setup_bld
->vec_type
, 0), "");
215 a0aos
= LLVMBuildLoad(builder
, ptr
, "");
216 attrib_name(a0aos
, attrib
, 0, ".a0aos");
219 case LP_INTERP_POSITION
:
220 /* Nothing to do as the position coeffs are already setup in slot 0 */
227 bld
->a0aos
[attrib
] = a0aos
;
228 bld
->dadxaos
[attrib
] = dadxaos
;
229 bld
->dadyaos
[attrib
] = dadyaos
;
234 * Interpolate the shader input attribute values.
235 * This is called for each (group of) quad(s).
238 attribs_update_simple(struct lp_build_interp_soa_context
*bld
,
239 struct gallivm_state
*gallivm
,
240 LLVMValueRef loop_iter
,
244 LLVMBuilderRef builder
= gallivm
->builder
;
245 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
246 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
247 LLVMValueRef oow
= NULL
;
249 LLVMValueRef pixoffx
;
250 LLVMValueRef pixoffy
;
253 /* could do this with code-generated passed in pixel offsets too */
256 ptr
= LLVMBuildGEP(builder
, bld
->xoffset_store
, &loop_iter
, 1, "");
257 pixoffx
= LLVMBuildLoad(builder
, ptr
, "");
258 ptr
= LLVMBuildGEP(builder
, bld
->yoffset_store
, &loop_iter
, 1, "");
259 pixoffy
= LLVMBuildLoad(builder
, ptr
, "");
261 pixoffx
= LLVMBuildFAdd(builder
, pixoffx
,
262 lp_build_broadcast_scalar(coeff_bld
, bld
->x
), "");
263 pixoffy
= LLVMBuildFAdd(builder
, pixoffy
,
264 lp_build_broadcast_scalar(coeff_bld
, bld
->y
), "");
266 for (attrib
= start
; attrib
< end
; attrib
++) {
267 const unsigned mask
= bld
->mask
[attrib
];
268 const unsigned interp
= bld
->interp
[attrib
];
271 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; chan
++) {
272 if (mask
& (1 << chan
)) {
274 LLVMValueRef dadx
= coeff_bld
->zero
;
275 LLVMValueRef dady
= coeff_bld
->zero
;
276 LLVMValueRef a
= coeff_bld
->zero
;
278 index
= lp_build_const_int32(gallivm
, chan
);
280 case LP_INTERP_PERSPECTIVE
:
283 case LP_INTERP_LINEAR
:
284 if (attrib
== 0 && chan
== 0) {
285 dadx
= coeff_bld
->one
;
286 if (bld
->pos_offset
) {
287 a
= lp_build_const_vec(gallivm
, coeff_bld
->type
, bld
->pos_offset
);
290 else if (attrib
== 0 && chan
== 1) {
291 dady
= coeff_bld
->one
;
292 if (bld
->pos_offset
) {
293 a
= lp_build_const_vec(gallivm
, coeff_bld
->type
, bld
->pos_offset
);
297 dadx
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
298 coeff_bld
->type
, bld
->dadxaos
[attrib
],
300 dady
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
301 coeff_bld
->type
, bld
->dadyaos
[attrib
],
303 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
304 coeff_bld
->type
, bld
->a0aos
[attrib
],
308 * a = a0 + (x * dadx + y * dady)
310 a
= lp_build_fmuladd(builder
, dadx
, pixoffx
, a
);
311 a
= lp_build_fmuladd(builder
, dady
, pixoffy
, a
);
313 if (interp
== LP_INTERP_PERSPECTIVE
) {
315 LLVMValueRef w
= bld
->attribs
[0][3];
317 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
318 oow
= lp_build_rcp(coeff_bld
, w
);
320 a
= lp_build_mul(coeff_bld
, a
, oow
);
324 case LP_INTERP_CONSTANT
:
325 case LP_INTERP_FACING
:
326 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
327 coeff_bld
->type
, bld
->a0aos
[attrib
],
331 case LP_INTERP_POSITION
:
333 a
= bld
->attribs
[0][chan
];
341 if ((attrib
== 0) && (chan
== 2) && !bld
->depth_clamp
){
342 /* FIXME: Depth values can exceed 1.0, due to the fact that
343 * setup interpolation coefficients refer to (0,0) which causes
344 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
345 * Note though values outside [0,1] are perfectly valid with
346 * depth clip disabled.
347 * XXX: If depth clip is disabled but we force depth clamp
348 * we may get values larger than 1.0 in the fs (but not in
349 * depth test). Not sure if that's an issue...
350 * Also, on a similar note, it is not obvious if the depth values
351 * appearing in fs (with depth clip disabled) should be clamped
352 * to [0,1], clamped to near/far or not be clamped at all...
354 a
= lp_build_min(coeff_bld
, a
, coeff_bld
->one
);
356 bld
->attribs
[attrib
][chan
] = a
;
363 * Initialize the bld->a, dadq fields. This involves fetching
364 * those values from the arrays which are passed into the JIT function.
367 coeffs_init(struct lp_build_interp_soa_context
*bld
,
369 LLVMValueRef dadx_ptr
,
370 LLVMValueRef dady_ptr
)
372 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
373 struct lp_build_context
*setup_bld
= &bld
->setup_bld
;
374 struct gallivm_state
*gallivm
= coeff_bld
->gallivm
;
375 LLVMBuilderRef builder
= gallivm
->builder
;
376 LLVMValueRef pixoffx
, pixoffy
;
381 pixoffx
= coeff_bld
->undef
;
382 pixoffy
= coeff_bld
->undef
;
383 for (i
= 0; i
< coeff_bld
->type
.length
; i
++) {
384 LLVMValueRef nr
= lp_build_const_int32(gallivm
, i
);
385 LLVMValueRef pixxf
= lp_build_const_float(gallivm
, quad_offset_x
[i
]);
386 LLVMValueRef pixyf
= lp_build_const_float(gallivm
, quad_offset_y
[i
]);
387 pixoffx
= LLVMBuildInsertElement(builder
, pixoffx
, pixxf
, nr
, "");
388 pixoffy
= LLVMBuildInsertElement(builder
, pixoffy
, pixyf
, nr
, "");
392 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
393 const unsigned mask
= bld
->mask
[attrib
];
394 const unsigned interp
= bld
->interp
[attrib
];
395 LLVMValueRef index
= lp_build_const_int32(gallivm
,
396 attrib
* TGSI_NUM_CHANNELS
);
398 LLVMValueRef dadxaos
= setup_bld
->zero
;
399 LLVMValueRef dadyaos
= setup_bld
->zero
;
400 LLVMValueRef a0aos
= setup_bld
->zero
;
402 /* always fetch all 4 values for performance/simplicity */
404 case LP_INTERP_PERSPECTIVE
:
407 case LP_INTERP_LINEAR
:
408 ptr
= LLVMBuildGEP(builder
, dadx_ptr
, &index
, 1, "");
409 ptr
= LLVMBuildBitCast(builder
, ptr
,
410 LLVMPointerType(setup_bld
->vec_type
, 0), "");
411 dadxaos
= LLVMBuildLoad(builder
, ptr
, "");
413 ptr
= LLVMBuildGEP(builder
, dady_ptr
, &index
, 1, "");
414 ptr
= LLVMBuildBitCast(builder
, ptr
,
415 LLVMPointerType(setup_bld
->vec_type
, 0), "");
416 dadyaos
= LLVMBuildLoad(builder
, ptr
, "");
418 attrib_name(dadxaos
, attrib
, 0, ".dadxaos");
419 attrib_name(dadyaos
, attrib
, 0, ".dadyaos");
422 case LP_INTERP_CONSTANT
:
423 case LP_INTERP_FACING
:
424 ptr
= LLVMBuildGEP(builder
, a0_ptr
, &index
, 1, "");
425 ptr
= LLVMBuildBitCast(builder
, ptr
,
426 LLVMPointerType(setup_bld
->vec_type
, 0), "");
427 a0aos
= LLVMBuildLoad(builder
, ptr
, "");
428 attrib_name(a0aos
, attrib
, 0, ".a0aos");
431 case LP_INTERP_POSITION
:
432 /* Nothing to do as the position coeffs are already setup in slot 0 */
441 * a = a0 + (x * dadx + y * dady)
442 * a0aos is the attrib value at top left corner of stamp
444 if (interp
!= LP_INTERP_CONSTANT
&&
445 interp
!= LP_INTERP_FACING
) {
446 LLVMValueRef x
= lp_build_broadcast_scalar(setup_bld
, bld
->x
);
447 LLVMValueRef y
= lp_build_broadcast_scalar(setup_bld
, bld
->y
);
448 a0aos
= lp_build_fmuladd(builder
, x
, dadxaos
, a0aos
);
449 a0aos
= lp_build_fmuladd(builder
, y
, dadyaos
, a0aos
);
453 * dadq = {0, dadx, dady, dadx + dady}
454 * for two quads (side by side) this is:
455 * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
457 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
) {
458 /* this generates a CRAPLOAD of shuffles... */
459 if (mask
& (1 << chan
)) {
460 LLVMValueRef dadx
, dady
;
461 LLVMValueRef dadq
, dadq2
;
463 LLVMValueRef chan_index
= lp_build_const_int32(gallivm
, chan
);
465 if (attrib
== 0 && chan
== 0) {
467 if (bld
->pos_offset
) {
468 a
= LLVMBuildFAdd(builder
, a
, lp_build_const_float(gallivm
, bld
->pos_offset
), "");
470 a
= lp_build_broadcast_scalar(coeff_bld
, a
);
471 dadx
= coeff_bld
->one
;
472 dady
= coeff_bld
->zero
;
474 else if (attrib
== 0 && chan
== 1) {
476 if (bld
->pos_offset
) {
477 a
= LLVMBuildFAdd(builder
, a
, lp_build_const_float(gallivm
, bld
->pos_offset
), "");
479 a
= lp_build_broadcast_scalar(coeff_bld
, a
);
480 dady
= coeff_bld
->one
;
481 dadx
= coeff_bld
->zero
;
484 dadx
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
485 coeff_bld
->type
, dadxaos
, chan_index
);
486 dady
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
487 coeff_bld
->type
, dadyaos
, chan_index
);
492 a
= lp_build_extract_broadcast(gallivm
, setup_bld
->type
,
493 coeff_bld
->type
, a0aos
, chan_index
);
496 dadx
= LLVMBuildFMul(builder
, dadx
, pixoffx
, "");
497 dady
= LLVMBuildFMul(builder
, dady
, pixoffy
, "");
498 dadq
= LLVMBuildFAdd(builder
, dadx
, dady
, "");
501 * Compute the attrib values on the upper-left corner of each
503 * Note that if we process 2 quads at once this doesn't
504 * really exactly to what we want.
505 * We need to access elem 0 and 2 respectively later if we process
509 if (interp
!= LP_INTERP_CONSTANT
&&
510 interp
!= LP_INTERP_FACING
) {
511 dadq2
= LLVMBuildFAdd(builder
, dadq
, dadq
, "");
512 a
= LLVMBuildFAdd(builder
, a
, dadq2
, "");
515 #if PERSPECTIVE_DIVIDE_PER_QUAD
521 * XXX since we're only going to access elements 0,2 out of 8
522 * if we have 8-wide vectors we should do the division only 4-wide.
523 * a is really a 2-elements in a 4-wide vector disguised as 8-wide
526 if (interp
== LP_INTERP_PERSPECTIVE
) {
527 LLVMValueRef w
= bld
->a
[0][3];
529 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
531 bld
->oow
= lp_build_rcp(coeff_bld
, w
);
532 lp_build_name(bld
->oow
, "oow");
534 a
= lp_build_mul(coeff_bld
, a
, bld
->oow
);
538 attrib_name(a
, attrib
, chan
, ".a");
539 attrib_name(dadq
, attrib
, chan
, ".dadq");
541 bld
->a
[attrib
][chan
] = lp_build_alloca(gallivm
,
543 LLVMBuildStore(builder
, a
, bld
->a
[attrib
][chan
]);
544 bld
->dadq
[attrib
][chan
] = dadq
;
552 * Increment the shader input attribute values.
553 * This is called when we move from one quad to the next.
556 attribs_update(struct lp_build_interp_soa_context
*bld
,
557 struct gallivm_state
*gallivm
,
558 LLVMValueRef loop_iter
,
562 LLVMBuilderRef builder
= gallivm
->builder
;
563 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
564 LLVMValueRef oow
= NULL
;
568 for(attrib
= start
; attrib
< end
; ++attrib
) {
569 const unsigned mask
= bld
->mask
[attrib
];
570 const unsigned interp
= bld
->interp
[attrib
];
571 for(chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
) {
572 if(mask
& (1 << chan
)) {
574 if (interp
== LP_INTERP_CONSTANT
||
575 interp
== LP_INTERP_FACING
) {
576 a
= LLVMBuildLoad(builder
, bld
->a
[attrib
][chan
], "");
578 else if (interp
== LP_INTERP_POSITION
) {
580 a
= bld
->attribs
[0][chan
];
585 a
= bld
->a
[attrib
][chan
];
588 * Broadcast the attribute value for this quad into all elements
592 /* stored as vector load as float */
593 LLVMTypeRef ptr_type
= LLVMPointerType(LLVMFloatTypeInContext(
594 gallivm
->context
), 0);
596 a
= LLVMBuildBitCast(builder
, a
, ptr_type
, "");
597 ptr
= LLVMBuildGEP(builder
, a
, &loop_iter
, 1, "");
598 a
= LLVMBuildLoad(builder
, ptr
, "");
599 a
= lp_build_broadcast_scalar(&bld
->coeff_bld
, a
);
603 * Get the derivatives.
606 dadq
= bld
->dadq
[attrib
][chan
];
608 #if PERSPECTIVE_DIVIDE_PER_QUAD
609 if (interp
== LP_INTERP_PERSPECTIVE
) {
610 LLVMValueRef dwdq
= bld
->dadq
[0][3];
614 oow
= LLVMBuildShuffleVector(coeff_bld
->builder
,
615 bld
->oow
, coeff_bld
->undef
,
619 dadq
= lp_build_sub(coeff_bld
,
621 lp_build_mul(coeff_bld
, a
, dwdq
));
622 dadq
= lp_build_mul(coeff_bld
, dadq
, oow
);
627 * Add the derivatives
630 a
= lp_build_add(coeff_bld
, a
, dadq
);
632 #if !PERSPECTIVE_DIVIDE_PER_QUAD
633 if (interp
== LP_INTERP_PERSPECTIVE
) {
635 LLVMValueRef w
= bld
->attribs
[0][3];
637 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
638 oow
= lp_build_rcp(coeff_bld
, w
);
640 a
= lp_build_mul(coeff_bld
, a
, oow
);
644 if (attrib
== 0 && chan
== 2 && !bld
->depth_clamp
) {
645 /* FIXME: Depth values can exceed 1.0, due to the fact that
646 * setup interpolation coefficients refer to (0,0) which causes
647 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
648 * Note though values outside [0,1] are perfectly valid with
649 * depth clip disabled..
650 * XXX: If depth clip is disabled but we force depth clamp
651 * we may get values larger than 1.0 in the fs (but not in
652 * depth test). Not sure if that's an issue...
653 * Also, on a similar note, it is not obvious if the depth values
654 * appearing in fs (with depth clip disabled) should be clamped
655 * to [0,1], clamped to near/far or not be clamped at all...
657 a
= lp_build_min(coeff_bld
, a
, coeff_bld
->one
);
660 attrib_name(a
, attrib
, chan
, "");
662 bld
->attribs
[attrib
][chan
] = a
;
670 * Generate the position vectors.
672 * Parameter x0, y0 are the integer values with upper left coordinates.
675 pos_init(struct lp_build_interp_soa_context
*bld
,
679 LLVMBuilderRef builder
= bld
->coeff_bld
.gallivm
->builder
;
680 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
682 bld
->x
= LLVMBuildSIToFP(builder
, x0
, coeff_bld
->elem_type
, "");
683 bld
->y
= LLVMBuildSIToFP(builder
, y0
, coeff_bld
->elem_type
, "");
688 * Initialize fragment shader input attribute info.
691 lp_build_interp_soa_init(struct lp_build_interp_soa_context
*bld
,
692 struct gallivm_state
*gallivm
,
694 const struct lp_shader_input
*inputs
,
695 boolean pixel_center_integer
,
697 LLVMBuilderRef builder
,
700 LLVMValueRef dadx_ptr
,
701 LLVMValueRef dady_ptr
,
705 struct lp_type coeff_type
;
706 struct lp_type setup_type
;
710 memset(bld
, 0, sizeof *bld
);
712 memset(&coeff_type
, 0, sizeof coeff_type
);
713 coeff_type
.floating
= TRUE
;
714 coeff_type
.sign
= TRUE
;
715 coeff_type
.width
= 32;
716 coeff_type
.length
= type
.length
;
718 memset(&setup_type
, 0, sizeof setup_type
);
719 setup_type
.floating
= TRUE
;
720 setup_type
.sign
= TRUE
;
721 setup_type
.width
= 32;
722 setup_type
.length
= TGSI_NUM_CHANNELS
;
725 /* XXX: we don't support interpolating into any other types */
726 assert(memcmp(&coeff_type
, &type
, sizeof coeff_type
) == 0);
728 lp_build_context_init(&bld
->coeff_bld
, gallivm
, coeff_type
);
729 lp_build_context_init(&bld
->setup_bld
, gallivm
, setup_type
);
731 /* For convenience */
732 bld
->pos
= bld
->attribs
[0];
733 bld
->inputs
= (const LLVMValueRef (*)[TGSI_NUM_CHANNELS
]) bld
->attribs
[1];
736 bld
->mask
[0] = TGSI_WRITEMASK_XYZW
;
737 bld
->interp
[0] = LP_INTERP_LINEAR
;
740 for (attrib
= 0; attrib
< num_inputs
; ++attrib
) {
741 bld
->mask
[1 + attrib
] = inputs
[attrib
].usage_mask
;
742 bld
->interp
[1 + attrib
] = inputs
[attrib
].interp
;
744 bld
->num_attribs
= 1 + num_inputs
;
746 /* Ensure all masked out input channels have a valid value */
747 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
748 for (chan
= 0; chan
< TGSI_NUM_CHANNELS
; ++chan
) {
749 bld
->attribs
[attrib
][chan
] = bld
->coeff_bld
.undef
;
753 if (pixel_center_integer
) {
754 bld
->pos_offset
= 0.0;
756 bld
->pos_offset
= 0.5;
758 bld
->depth_clamp
= depth_clamp
;
760 pos_init(bld
, x0
, y0
);
763 * Simple method (single step interpolation) may be slower if vector length
764 * is just 4, but the results are different (generally less accurate) with
765 * the other method, so always use more accurate version.
768 bld
->simple_interp
= TRUE
;
770 /* XXX this should use a global static table */
772 unsigned num_loops
= 16 / type
.length
;
773 LLVMValueRef pixoffx
, pixoffy
, index
;
776 bld
->xoffset_store
= lp_build_array_alloca(gallivm
,
777 lp_build_vec_type(gallivm
, type
),
778 lp_build_const_int32(gallivm
, num_loops
),
780 bld
->yoffset_store
= lp_build_array_alloca(gallivm
,
781 lp_build_vec_type(gallivm
, type
),
782 lp_build_const_int32(gallivm
, num_loops
),
784 for (i
= 0; i
< num_loops
; i
++) {
785 index
= lp_build_const_int32(gallivm
, i
);
786 calc_offsets(&bld
->coeff_bld
, i
*type
.length
/4, &pixoffx
, &pixoffy
);
787 ptr
= LLVMBuildGEP(builder
, bld
->xoffset_store
, &index
, 1, "");
788 LLVMBuildStore(builder
, pixoffx
, ptr
);
789 ptr
= LLVMBuildGEP(builder
, bld
->yoffset_store
, &index
, 1, "");
790 LLVMBuildStore(builder
, pixoffy
, ptr
);
793 coeffs_init_simple(bld
, a0_ptr
, dadx_ptr
, dady_ptr
);
796 bld
->simple_interp
= FALSE
;
797 coeffs_init(bld
, a0_ptr
, dadx_ptr
, dady_ptr
);
804 * Advance the position and inputs to the given quad within the block.
808 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context
*bld
,
809 struct gallivm_state
*gallivm
,
810 LLVMValueRef quad_start_index
)
812 if (bld
->simple_interp
) {
813 attribs_update_simple(bld
, gallivm
, quad_start_index
, 1, bld
->num_attribs
);
816 attribs_update(bld
, gallivm
, quad_start_index
, 1, bld
->num_attribs
);
821 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context
*bld
,
822 struct gallivm_state
*gallivm
,
823 LLVMValueRef quad_start_index
)
825 if (bld
->simple_interp
) {
826 attribs_update_simple(bld
, gallivm
, quad_start_index
, 0, 1);
829 attribs_update(bld
, gallivm
, quad_start_index
, 0, 1);