1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
52 * We iterate over the quads in order 0, 1, 2, 3:
64 * Within each quad, we have four pixels which are represented in SOA
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
79 * Do one perspective divide per quad.
81 * For perspective interpolation, the final attribute value is given
87 * a = a0 + dadx*x + dady*y
88 * w = w0 + dwdx*x + dwdy*y
89 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
91 * Instead of computing the division per pixel, with this macro we compute the
92 * division on the upper left pixel of each quad, and use a linear
93 * approximation in the remaining pixels, given by:
95 * da'dx = (dadx - dwdx*a)*oow
96 * da'dy = (dady - dwdy*a)*oow
98 * Ironically, this actually makes things slower -- probably because the
99 * divide hardware unit is rarely used, whereas the multiply unit is typically
102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
105 static const unsigned char quad_offset_x
[4] = {0, 1, 0, 1};
106 static const unsigned char quad_offset_y
[4] = {0, 0, 1, 1};
110 attrib_name(LLVMValueRef val
, unsigned attrib
, unsigned chan
, const char *suffix
)
113 lp_build_name(val
, "pos.%c%s", "xyzw"[chan
], suffix
);
115 lp_build_name(val
, "input%u.%c%s", attrib
- 1, "xyzw"[chan
], suffix
);
120 * Initialize the bld->a0, dadx, dady fields. This involves fetching
121 * those values from the arrays which are passed into the JIT function.
124 coeffs_init(struct lp_build_interp_soa_context
*bld
,
126 LLVMValueRef dadx_ptr
,
127 LLVMValueRef dady_ptr
)
129 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
130 LLVMBuilderRef builder
= coeff_bld
->builder
;
131 LLVMValueRef zero
= LLVMConstNull(coeff_bld
->elem_type
);
132 LLVMValueRef one
= LLVMConstReal(coeff_bld
->elem_type
, 1.0);
133 LLVMValueRef i0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
134 LLVMValueRef i1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
135 LLVMValueRef i2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
136 LLVMValueRef i3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
140 /* TODO: Use more vector operations */
142 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
143 const unsigned mask
= bld
->mask
[attrib
];
144 const unsigned interp
= bld
->interp
[attrib
];
145 for (chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
146 if (mask
& (1 << chan
)) {
147 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), attrib
*NUM_CHANNELS
+ chan
, 0);
148 LLVMValueRef a0
= zero
;
149 LLVMValueRef dadx
= zero
;
150 LLVMValueRef dady
= zero
;
151 LLVMValueRef dadxy
= zero
;
157 case LP_INTERP_PERSPECTIVE
:
160 case LP_INTERP_LINEAR
:
161 if (attrib
== 0 && chan
== 0) {
164 else if (attrib
== 0 && chan
== 1) {
168 dadx
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, dadx_ptr
, &index
, 1, ""), "");
169 dady
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, dady_ptr
, &index
, 1, ""), "");
170 dadxy
= LLVMBuildFAdd(builder
, dadx
, dady
, "");
171 attrib_name(dadx
, attrib
, chan
, ".dadx");
172 attrib_name(dady
, attrib
, chan
, ".dady");
173 attrib_name(dadxy
, attrib
, chan
, ".dadxy");
177 case LP_INTERP_CONSTANT
:
178 case LP_INTERP_FACING
:
179 a0
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, a0_ptr
, &index
, 1, ""), "");
180 attrib_name(a0
, attrib
, chan
, ".a0");
183 case LP_INTERP_POSITION
:
184 /* Nothing to do as the position coeffs are already setup in slot 0 */
193 * dadq = {0, dadx, dady, dadx + dady}
196 dadq
= coeff_bld
->undef
;
197 dadq
= LLVMBuildInsertElement(builder
, dadq
, zero
, i0
, "");
198 dadq
= LLVMBuildInsertElement(builder
, dadq
, dadx
, i1
, "");
199 dadq
= LLVMBuildInsertElement(builder
, dadq
, dady
, i2
, "");
200 dadq
= LLVMBuildInsertElement(builder
, dadq
, dadxy
, i3
, "");
206 dadq2
= LLVMBuildFAdd(builder
, dadq
, dadq
, "");
209 * a = a0 + (x * dadx + y * dady)
212 if (attrib
== 0 && chan
== 0) {
215 else if (attrib
== 0 && chan
== 1) {
220 if (interp
!= LP_INTERP_CONSTANT
&&
221 interp
!= LP_INTERP_FACING
) {
222 LLVMValueRef ax
, ay
, axy
;
223 ax
= LLVMBuildFMul(builder
, bld
->x
, dadx
, "");
224 ay
= LLVMBuildFMul(builder
, bld
->y
, dady
, "");
225 axy
= LLVMBuildFAdd(builder
, ax
, ay
, "");
226 a
= LLVMBuildFAdd(builder
, a
, axy
, "");
234 a
= lp_build_broadcast(builder
, coeff_bld
->vec_type
, a
);
237 * Compute the attrib values on the upper-left corner of each quad.
240 a
= LLVMBuildFAdd(builder
, a
, dadq2
, "");
242 #if PERSPECTIVE_DIVIDE_PER_QUAD
247 if (interp
== LP_INTERP_PERSPECTIVE
) {
248 LLVMValueRef w
= bld
->a
[0][3];
250 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
252 bld
->oow
= lp_build_rcp(coeff_bld
, w
);
253 lp_build_name(bld
->oow
, "oow");
255 a
= lp_build_mul(coeff_bld
, a
, bld
->oow
);
259 attrib_name(a
, attrib
, chan
, ".a");
260 attrib_name(dadq
, attrib
, chan
, ".dadq");
262 bld
->a
[attrib
][chan
] = a
;
263 bld
->dadq
[attrib
][chan
] = dadq
;
271 * Increment the shader input attribute values.
272 * This is called when we move from one quad to the next.
275 attribs_update(struct lp_build_interp_soa_context
*bld
,
280 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
281 LLVMValueRef shuffle
= lp_build_const_int_vec(coeff_bld
->type
, quad_index
);
282 LLVMValueRef oow
= NULL
;
286 assert(quad_index
< 4);
288 for(attrib
= start
; attrib
< end
; ++attrib
) {
289 const unsigned mask
= bld
->mask
[attrib
];
290 const unsigned interp
= bld
->interp
[attrib
];
291 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
292 if(mask
& (1 << chan
)) {
294 if (interp
== LP_INTERP_CONSTANT
||
295 interp
== LP_INTERP_FACING
) {
296 a
= bld
->a
[attrib
][chan
];
298 else if (interp
== LP_INTERP_POSITION
) {
300 a
= bld
->attribs
[0][chan
];
305 a
= bld
->a
[attrib
][chan
];
308 * Broadcast the attribute value for this quad into all elements
311 a
= LLVMBuildShuffleVector(coeff_bld
->builder
,
312 a
, coeff_bld
->undef
, shuffle
, "");
315 * Get the derivatives.
318 dadq
= bld
->dadq
[attrib
][chan
];
320 #if PERSPECTIVE_DIVIDE_PER_QUAD
321 if (interp
== LP_INTERP_PERSPECTIVE
) {
322 LLVMValueRef dwdq
= bld
->dadq
[0][3];
326 oow
= LLVMBuildShuffleVector(coeff_bld
->builder
,
327 bld
->oow
, coeff_bld
->undef
,
331 dadq
= lp_build_sub(coeff_bld
,
333 lp_build_mul(coeff_bld
, a
, dwdq
));
334 dadq
= lp_build_mul(coeff_bld
, dadq
, oow
);
339 * Add the derivatives
342 a
= lp_build_add(coeff_bld
, a
, dadq
);
344 #if !PERSPECTIVE_DIVIDE_PER_QUAD
345 if (interp
== LP_INTERP_PERSPECTIVE
) {
347 LLVMValueRef w
= bld
->attribs
[0][3];
349 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
350 oow
= lp_build_rcp(coeff_bld
, w
);
352 a
= lp_build_mul(coeff_bld
, a
, oow
);
356 if (attrib
== 0 && chan
== 2) {
357 /* FIXME: Depth values can exceed 1.0, due to the fact that
358 * setup interpolation coefficients refer to (0,0) which causes
359 * precision loss. So we must clamp to 1.0 here to avoid artifacts
361 a
= lp_build_min(coeff_bld
, a
, coeff_bld
->one
);
364 attrib_name(a
, attrib
, chan
, "");
366 bld
->attribs
[attrib
][chan
] = a
;
374 * Generate the position vectors.
376 * Parameter x0, y0 are the integer values with upper left coordinates.
379 pos_init(struct lp_build_interp_soa_context
*bld
,
383 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
385 bld
->x
= LLVMBuildSIToFP(coeff_bld
->builder
, x0
, coeff_bld
->elem_type
, "");
386 bld
->y
= LLVMBuildSIToFP(coeff_bld
->builder
, y0
, coeff_bld
->elem_type
, "");
391 * Initialize fragment shader input attribute info.
394 lp_build_interp_soa_init(struct lp_build_interp_soa_context
*bld
,
396 const struct lp_shader_input
*inputs
,
397 LLVMBuilderRef builder
,
400 LLVMValueRef dadx_ptr
,
401 LLVMValueRef dady_ptr
,
405 struct lp_type coeff_type
;
409 memset(bld
, 0, sizeof *bld
);
411 memset(&coeff_type
, 0, sizeof coeff_type
);
412 coeff_type
.floating
= TRUE
;
413 coeff_type
.sign
= TRUE
;
414 coeff_type
.width
= 32;
415 coeff_type
.length
= QUAD_SIZE
;
417 /* XXX: we don't support interpolating into any other types */
418 assert(memcmp(&coeff_type
, &type
, sizeof coeff_type
) == 0);
420 lp_build_context_init(&bld
->coeff_bld
, builder
, coeff_type
);
422 /* For convenience */
423 bld
->pos
= bld
->attribs
[0];
424 bld
->inputs
= (const LLVMValueRef (*)[NUM_CHANNELS
]) bld
->attribs
[1];
427 bld
->num_attribs
= 1;
428 bld
->mask
[0] = TGSI_WRITEMASK_XYZW
;
429 bld
->interp
[0] = LP_INTERP_LINEAR
;
432 for (attrib
= 0; attrib
< num_inputs
; ++attrib
) {
433 bld
->mask
[1 + attrib
] = inputs
[attrib
].usage_mask
;
434 bld
->interp
[1 + attrib
] = inputs
[attrib
].interp
;
436 bld
->num_attribs
= 1 + num_inputs
;
438 /* Ensure all masked out input channels have a valid value */
439 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
440 for (chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
441 bld
->attribs
[attrib
][chan
] = bld
->coeff_bld
.undef
;
445 pos_init(bld
, x0
, y0
);
447 coeffs_init(bld
, a0_ptr
, dadx_ptr
, dady_ptr
);
452 * Advance the position and inputs to the given quad within the block.
455 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context
*bld
,
458 assert(quad_index
< 4);
460 attribs_update(bld
, quad_index
, 1, bld
->num_attribs
);
464 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context
*bld
,
467 assert(quad_index
< 4);
469 attribs_update(bld
, quad_index
, 0, 1);