1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Position and shader input interpolation.
33 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
52 * We iterate over the quads in order 0, 1, 2, 3:
64 * Within each quad, we have four pixels which are represented in SOA
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
78 static const unsigned char quad_offset_x
[4] = {0, 1, 0, 1};
79 static const unsigned char quad_offset_y
[4] = {0, 0, 1, 1};
83 attrib_name(LLVMValueRef val
, unsigned attrib
, unsigned chan
, const char *suffix
)
86 lp_build_name(val
, "pos.%c%s", "xyzw"[chan
], suffix
);
88 lp_build_name(val
, "input%u.%c%s", attrib
- 1, "xyzw"[chan
], suffix
);
93 * Initialize the bld->a0, dadx, dady fields. This involves fetching
94 * those values from the arrays which are passed into the JIT function.
97 coeffs_init(struct lp_build_interp_soa_context
*bld
,
99 LLVMValueRef dadx_ptr
,
100 LLVMValueRef dady_ptr
)
102 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
103 LLVMBuilderRef builder
= coeff_bld
->builder
;
107 for(attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
108 const unsigned mask
= bld
->mask
[attrib
];
109 const unsigned interp
= bld
->interp
[attrib
];
110 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
111 if(mask
& (1 << chan
)) {
112 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), attrib
*NUM_CHANNELS
+ chan
, 0);
113 LLVMValueRef a0
= coeff_bld
->undef
;
114 LLVMValueRef dadx
= coeff_bld
->undef
;
115 LLVMValueRef dady
= coeff_bld
->undef
;
118 case LP_INTERP_PERSPECTIVE
:
121 case LP_INTERP_LINEAR
:
122 dadx
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, dadx_ptr
, &index
, 1, ""), "");
123 dady
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, dady_ptr
, &index
, 1, ""), "");
124 dadx
= lp_build_broadcast_scalar(coeff_bld
, dadx
);
125 dady
= lp_build_broadcast_scalar(coeff_bld
, dady
);
126 attrib_name(dadx
, attrib
, chan
, ".dadx");
127 attrib_name(dady
, attrib
, chan
, ".dady");
130 case LP_INTERP_CONSTANT
:
131 case LP_INTERP_FACING
:
132 a0
= LLVMBuildLoad(builder
, LLVMBuildGEP(builder
, a0_ptr
, &index
, 1, ""), "");
133 a0
= lp_build_broadcast_scalar(coeff_bld
, a0
);
134 attrib_name(a0
, attrib
, chan
, ".a0");
137 case LP_INTERP_POSITION
:
138 /* Nothing to do as the position coeffs are already setup in slot 0 */
146 bld
->a0
[attrib
][chan
] = a0
;
147 bld
->dadx
[attrib
][chan
] = dadx
;
148 bld
->dady
[attrib
][chan
] = dady
;
156 * Emit LLVM code to compute the fragment shader input attribute values.
157 * For example, for a color input, we'll compute red, green, blue and alpha
158 * values for the four pixels in a quad.
159 * Recall that we're operating on 4-element vectors so each arithmetic
160 * operation is operating on the four pixels in a quad.
163 attribs_init(struct lp_build_interp_soa_context
*bld
)
165 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
166 LLVMValueRef x
= bld
->pos
[0];
167 LLVMValueRef y
= bld
->pos
[1];
168 LLVMValueRef oow
= NULL
;
172 for(attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
173 const unsigned mask
= bld
->mask
[attrib
];
174 const unsigned interp
= bld
->interp
[attrib
];
175 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
176 if(mask
& (1 << chan
)) {
177 if (interp
== LP_INTERP_POSITION
) {
179 bld
->attribs
[attrib
][chan
] = bld
->attribs
[0][chan
];
182 LLVMValueRef a0
= bld
->a0
[attrib
][chan
];
183 LLVMValueRef dadx
= bld
->dadx
[attrib
][chan
];
184 LLVMValueRef dady
= bld
->dady
[attrib
][chan
];
189 if (interp
!= LP_INTERP_CONSTANT
&&
190 interp
!= LP_INTERP_FACING
) {
191 /* res = res + x * dadx */
192 res
= lp_build_add(coeff_bld
, res
, lp_build_mul(coeff_bld
, x
, dadx
));
193 /* res = res + y * dady */
194 res
= lp_build_add(coeff_bld
, res
, lp_build_mul(coeff_bld
, y
, dady
));
197 /* Keep the value of the attribute before perspective divide
198 * for faster updates.
200 bld
->attribs_pre
[attrib
][chan
] = res
;
202 if (interp
== LP_INTERP_PERSPECTIVE
) {
203 LLVMValueRef w
= bld
->pos
[3];
205 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
207 oow
= lp_build_rcp(coeff_bld
, w
);
208 res
= lp_build_mul(coeff_bld
, res
, oow
);
211 attrib_name(res
, attrib
, chan
, "");
213 bld
->attribs
[attrib
][chan
] = res
;
222 * Increment the shader input attribute values.
223 * This is called when we move from one quad to the next.
226 attribs_update(struct lp_build_interp_soa_context
*bld
, int quad_index
)
228 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
229 LLVMValueRef oow
= NULL
;
233 assert(quad_index
< 4);
235 for(attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
236 const unsigned mask
= bld
->mask
[attrib
];
237 const unsigned interp
= bld
->interp
[attrib
];
239 if (interp
!= LP_INTERP_CONSTANT
&&
240 interp
!= LP_INTERP_FACING
) {
241 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
242 if(mask
& (1 << chan
)) {
243 if (interp
== LP_INTERP_POSITION
) {
245 bld
->attribs
[attrib
][chan
] = bld
->attribs
[0][chan
];
248 LLVMValueRef dadx
= bld
->dadx
[attrib
][chan
];
249 LLVMValueRef dady
= bld
->dady
[attrib
][chan
];
252 res
= bld
->attribs_pre
[attrib
][chan
];
254 if (quad_index
== 1 || quad_index
== 3) {
255 /* top-right or bottom-right quad */
256 /* build res = res + dadx + dadx */
257 res
= lp_build_add(coeff_bld
, res
, dadx
);
258 res
= lp_build_add(coeff_bld
, res
, dadx
);
261 if (quad_index
== 2 || quad_index
== 3) {
262 /* bottom-left or bottom-right quad */
263 /* build res = res + dady + dady */
264 res
= lp_build_add(coeff_bld
, res
, dady
);
265 res
= lp_build_add(coeff_bld
, res
, dady
);
268 if (interp
== LP_INTERP_PERSPECTIVE
) {
269 LLVMValueRef w
= bld
->pos
[3];
271 assert(bld
->mask
[0] & TGSI_WRITEMASK_W
);
273 oow
= lp_build_rcp(coeff_bld
, w
);
274 res
= lp_build_mul(coeff_bld
, res
, oow
);
277 attrib_name(res
, attrib
, chan
, "");
279 bld
->attribs
[attrib
][chan
] = res
;
289 * Generate the position vectors.
291 * Parameter x0, y0 are the integer values with upper left coordinates.
294 pos_init(struct lp_build_interp_soa_context
*bld
,
298 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
299 LLVMValueRef x_offsets
[QUAD_SIZE
];
300 LLVMValueRef y_offsets
[QUAD_SIZE
];
304 * Derive from the quad's upper left scalar coordinates the coordinates for
305 * all other quad pixels
308 x0
= lp_build_broadcast(coeff_bld
->builder
, coeff_bld
->int_vec_type
, x0
);
309 y0
= lp_build_broadcast(coeff_bld
->builder
, coeff_bld
->int_vec_type
, y0
);
311 for(i
= 0; i
< QUAD_SIZE
; ++i
) {
312 x_offsets
[i
] = LLVMConstInt(coeff_bld
->int_elem_type
, quad_offset_x
[i
], 0);
313 y_offsets
[i
] = LLVMConstInt(coeff_bld
->int_elem_type
, quad_offset_y
[i
], 0);
316 x0
= LLVMBuildAdd(coeff_bld
->builder
, x0
, LLVMConstVector(x_offsets
, QUAD_SIZE
), "");
317 y0
= LLVMBuildAdd(coeff_bld
->builder
, y0
, LLVMConstVector(y_offsets
, QUAD_SIZE
), "");
319 x0
= LLVMBuildSIToFP(coeff_bld
->builder
, x0
, coeff_bld
->vec_type
, "");
320 y0
= LLVMBuildSIToFP(coeff_bld
->builder
, y0
, coeff_bld
->vec_type
, "");
322 lp_build_name(x0
, "pos.x");
323 lp_build_name(y0
, "pos.y");
325 bld
->attribs
[0][0] = x0
;
326 bld
->attribs
[0][1] = y0
;
331 * Update quad position values when moving to the next quad.
334 pos_update(struct lp_build_interp_soa_context
*bld
, int quad_index
)
336 struct lp_build_context
*coeff_bld
= &bld
->coeff_bld
;
337 LLVMValueRef x
= bld
->attribs
[0][0];
338 LLVMValueRef y
= bld
->attribs
[0][1];
339 const int xstep
= 2, ystep
= 2;
341 if (quad_index
== 1 || quad_index
== 3) {
342 /* top-right or bottom-right quad in block */
343 /* build x += xstep */
344 x
= lp_build_add(coeff_bld
, x
,
345 lp_build_const_vec(coeff_bld
->type
, xstep
));
348 if (quad_index
== 2) {
349 /* bottom-left quad in block */
350 /* build y += ystep */
351 y
= lp_build_add(coeff_bld
, y
,
352 lp_build_const_vec(coeff_bld
->type
, ystep
));
353 /* build x -= xstep */
354 x
= lp_build_sub(coeff_bld
, x
,
355 lp_build_const_vec(coeff_bld
->type
, xstep
));
358 lp_build_name(x
, "pos.x");
359 lp_build_name(y
, "pos.y");
361 bld
->attribs
[0][0] = x
;
362 bld
->attribs
[0][1] = y
;
367 * Initialize fragment shader input attribute info.
370 lp_build_interp_soa_init(struct lp_build_interp_soa_context
*bld
,
372 const struct lp_shader_input
*inputs
,
373 LLVMBuilderRef builder
,
376 LLVMValueRef dadx_ptr
,
377 LLVMValueRef dady_ptr
,
381 struct lp_type coeff_type
;
385 memset(bld
, 0, sizeof *bld
);
387 memset(&coeff_type
, 0, sizeof coeff_type
);
388 coeff_type
.floating
= TRUE
;
389 coeff_type
.sign
= TRUE
;
390 coeff_type
.width
= 32;
391 coeff_type
.length
= QUAD_SIZE
;
393 /* XXX: we don't support interpolating into any other types */
394 assert(memcmp(&coeff_type
, &type
, sizeof &coeff_type
) == 0);
396 lp_build_context_init(&bld
->coeff_bld
, builder
, coeff_type
);
398 /* For convenience */
399 bld
->pos
= bld
->attribs
[0];
400 bld
->inputs
= (const LLVMValueRef (*)[NUM_CHANNELS
]) bld
->attribs
[1];
403 bld
->num_attribs
= 1;
404 bld
->mask
[0] = TGSI_WRITEMASK_ZW
;
405 bld
->interp
[0] = LP_INTERP_LINEAR
;
408 for (attrib
= 0; attrib
< num_inputs
; ++attrib
) {
409 bld
->mask
[1 + attrib
] = inputs
[attrib
].usage_mask
;
410 bld
->interp
[1 + attrib
] = inputs
[attrib
].interp
;
412 bld
->num_attribs
= 1 + num_inputs
;
414 /* Ensure all masked out input channels have a valid value */
415 for (attrib
= 0; attrib
< bld
->num_attribs
; ++attrib
) {
416 for (chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
417 bld
->attribs
[attrib
][chan
] = bld
->coeff_bld
.undef
;
421 coeffs_init(bld
, a0_ptr
, dadx_ptr
, dady_ptr
);
423 pos_init(bld
, x0
, y0
);
430 * Advance the position and inputs to the given quad within the block.
433 lp_build_interp_soa_update(struct lp_build_interp_soa_context
*bld
,
436 assert(quad_index
< 4);
438 pos_update(bld
, quad_index
);
440 attribs_update(bld
, quad_index
);