52ba7dbdd2a252caaa11d903e6c22725a6c4d659
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "gallivm/lp_bld_logic.h"
47 #include "gallivm/lp_bld_struct.h"
48 #include "gallivm/lp_bld_gather.h"
49 #include "lp_bld_interp.h"
50
51
52 /*
53 * The shader JIT function operates on blocks of quads.
54 * Each block has 2x2 quads and each quad has 2x2 pixels.
55 *
56 * We iterate over the quads in order 0, 1, 2, 3:
57 *
58 * #################
59 * # | # | #
60 * #---0---#---1---#
61 * # | # | #
62 * #################
63 * # | # | #
64 * #---2---#---3---#
65 * # | # | #
66 * #################
67 *
68 * If we iterate over multiple quads at once, quads 01 and 23 are processed
69 * together.
70 *
71 * Within each quad, we have four pixels which are represented in SOA
72 * order:
73 *
74 * #########
75 * # 0 | 1 #
76 * #---+---#
77 * # 2 | 3 #
78 * #########
79 *
80 * So the green channel (for example) of the four pixels is stored in
81 * a single vector register: {g0, g1, g2, g3}.
82 * The order stays the same even with multiple quads:
83 * 0 1 4 5
84 * 2 3 6 7
85 * is stored as g0..g7
86 */
87
88
89 /**
90 * Do one perspective divide per quad.
91 *
92 * For perspective interpolation, the final attribute value is given
93 *
94 * a' = a/w = a * oow
95 *
96 * where
97 *
98 * a = a0 + dadx*x + dady*y
99 * w = w0 + dwdx*x + dwdy*y
100 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
101 *
102 * Instead of computing the division per pixel, with this macro we compute the
103 * division on the upper left pixel of each quad, and use a linear
104 * approximation in the remaining pixels, given by:
105 *
106 * da'dx = (dadx - dwdx*a)*oow
107 * da'dy = (dady - dwdy*a)*oow
108 *
109 * Ironically, this actually makes things slower -- probably because the
110 * divide hardware unit is rarely used, whereas the multiply unit is typically
111 * already saturated.
112 */
113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
114
115
116 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
117 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118
119
120 static void
121 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
122 {
123 if(attrib == 0)
124 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
125 else
126 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
127 }
128
129 static void
130 calc_offsets(struct lp_build_context *coeff_bld,
131 unsigned quad_start_index,
132 LLVMValueRef *pixoffx,
133 LLVMValueRef *pixoffy)
134 {
135 unsigned i;
136 unsigned num_pix = coeff_bld->type.length;
137 struct gallivm_state *gallivm = coeff_bld->gallivm;
138 LLVMBuilderRef builder = coeff_bld->gallivm->builder;
139 LLVMValueRef nr, pixxf, pixyf;
140
141 *pixoffx = coeff_bld->undef;
142 *pixoffy = coeff_bld->undef;
143
144 for (i = 0; i < num_pix; i++) {
145 nr = lp_build_const_int32(gallivm, i);
146 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
147 (quad_start_index & 1) * 2);
148 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
149 (quad_start_index & 2));
150 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
151 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
152 }
153 }
154
155 static void
156 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
157 struct gallivm_state *gallivm,
158 LLVMValueRef loop_iter,
159 LLVMValueRef mask_store,
160 LLVMValueRef pix_center_offset,
161 LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
162 {
163 struct lp_build_context *coeff_bld = &bld->coeff_bld;
164 LLVMBuilderRef builder = gallivm->builder;
165 LLVMValueRef s_mask_and = NULL;
166 LLVMValueRef centroid_x_offset = pix_center_offset;
167 LLVMValueRef centroid_y_offset = pix_center_offset;
168 for (int s = bld->coverage_samples - 1; s >= 0; s--) {
169 LLVMValueRef sample_cov;
170 LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
171
172 s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
173 sample_cov = lp_build_pointer_get(builder, mask_store, s_mask_idx);
174 if (s == bld->coverage_samples - 1)
175 s_mask_and = sample_cov;
176 else
177 s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
178
179 LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
180 LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
181
182 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
183 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
184 x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
185 y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
186 centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
187 centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
188 }
189 *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
190 *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
191 }
192
193 /* Much easier, and significantly less instructions in the per-stamp
194 * part (less than half) but overall more instructions so a loss if
195 * most quads are active. Might be a win though with larger vectors.
196 * No ability to do per-quad divide (doable but not implemented)
197 * Could be made to work with passed in pixel offsets (i.e. active quad merging).
198 */
199 static void
200 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
201 LLVMValueRef a0_ptr,
202 LLVMValueRef dadx_ptr,
203 LLVMValueRef dady_ptr)
204 {
205 struct lp_build_context *coeff_bld = &bld->coeff_bld;
206 struct lp_build_context *setup_bld = &bld->setup_bld;
207 struct gallivm_state *gallivm = coeff_bld->gallivm;
208 LLVMBuilderRef builder = gallivm->builder;
209 unsigned attrib;
210
211 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
212 /*
213 * always fetch all 4 values for performance/simplicity
214 * Note: we do that here because it seems to generate better
215 * code. It generates a lot of moves initially but less
216 * moves later. As far as I can tell this looks like a
217 * llvm issue, instead of simply reloading the values from
218 * the passed in pointers it if it runs out of registers
219 * it spills/reloads them. Maybe some optimization passes
220 * would help.
221 * Might want to investigate this again later.
222 */
223 const unsigned interp = bld->interp[attrib];
224 LLVMValueRef index = lp_build_const_int32(gallivm,
225 attrib * TGSI_NUM_CHANNELS);
226 LLVMValueRef ptr;
227 LLVMValueRef dadxaos = setup_bld->zero;
228 LLVMValueRef dadyaos = setup_bld->zero;
229 LLVMValueRef a0aos = setup_bld->zero;
230
231 switch (interp) {
232 case LP_INTERP_PERSPECTIVE:
233 /* fall-through */
234
235 case LP_INTERP_LINEAR:
236 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
237 ptr = LLVMBuildBitCast(builder, ptr,
238 LLVMPointerType(setup_bld->vec_type, 0), "");
239 dadxaos = LLVMBuildLoad(builder, ptr, "");
240
241 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
242 ptr = LLVMBuildBitCast(builder, ptr,
243 LLVMPointerType(setup_bld->vec_type, 0), "");
244 dadyaos = LLVMBuildLoad(builder, ptr, "");
245
246 attrib_name(dadxaos, attrib, 0, ".dadxaos");
247 attrib_name(dadyaos, attrib, 0, ".dadyaos");
248 /* fall-through */
249
250 case LP_INTERP_CONSTANT:
251 case LP_INTERP_FACING:
252 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
253 ptr = LLVMBuildBitCast(builder, ptr,
254 LLVMPointerType(setup_bld->vec_type, 0), "");
255 a0aos = LLVMBuildLoad(builder, ptr, "");
256 attrib_name(a0aos, attrib, 0, ".a0aos");
257 break;
258
259 case LP_INTERP_POSITION:
260 /* Nothing to do as the position coeffs are already setup in slot 0 */
261 continue;
262
263 default:
264 assert(0);
265 break;
266 }
267 bld->a0aos[attrib] = a0aos;
268 bld->dadxaos[attrib] = dadxaos;
269 bld->dadyaos[attrib] = dadyaos;
270 }
271 }
272
273 /**
274 * Interpolate the shader input attribute values.
275 * This is called for each (group of) quad(s).
276 */
277 static void
278 attribs_update_simple(struct lp_build_interp_soa_context *bld,
279 struct gallivm_state *gallivm,
280 LLVMValueRef loop_iter,
281 LLVMValueRef mask_store,
282 LLVMValueRef sample_id,
283 int start,
284 int end)
285 {
286 LLVMBuilderRef builder = gallivm->builder;
287 struct lp_build_context *coeff_bld = &bld->coeff_bld;
288 struct lp_build_context *setup_bld = &bld->setup_bld;
289 LLVMValueRef oow = NULL;
290 unsigned attrib;
291 LLVMValueRef pixoffx;
292 LLVMValueRef pixoffy;
293 LLVMValueRef ptr;
294 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
295
296 /* could do this with code-generated passed in pixel offsets too */
297
298 assert(loop_iter);
299 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
300 pixoffx = LLVMBuildLoad(builder, ptr, "");
301 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
302 pixoffy = LLVMBuildLoad(builder, ptr, "");
303
304 pixoffx = LLVMBuildFAdd(builder, pixoffx,
305 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
306 pixoffy = LLVMBuildFAdd(builder, pixoffy,
307 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
308
309 for (attrib = start; attrib < end; attrib++) {
310 const unsigned mask = bld->mask[attrib];
311 const unsigned interp = bld->interp[attrib];
312 const unsigned loc = bld->interp_loc[attrib];
313 unsigned chan;
314
315 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
316 if (mask & (1 << chan)) {
317 LLVMValueRef index;
318 LLVMValueRef dadx = coeff_bld->zero;
319 LLVMValueRef dady = coeff_bld->zero;
320 LLVMValueRef a = coeff_bld->zero;
321 LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
322
323 index = lp_build_const_int32(gallivm, chan);
324 switch (interp) {
325 case LP_INTERP_PERSPECTIVE:
326 /* fall-through */
327
328 case LP_INTERP_LINEAR:
329 if (attrib == 0 && chan == 0) {
330 dadx = coeff_bld->one;
331 if (sample_id) {
332 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
333 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
334 a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
335 } else {
336 a = pix_center_offset;
337 }
338 }
339 else if (attrib == 0 && chan == 1) {
340 dady = coeff_bld->one;
341 if (sample_id) {
342 LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
343 y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
344 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
345 a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
346 } else {
347 a = pix_center_offset;
348 }
349 }
350 else {
351 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
352 coeff_bld->type, bld->dadxaos[attrib],
353 index);
354 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
355 coeff_bld->type, bld->dadyaos[attrib],
356 index);
357 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
358 coeff_bld->type, bld->a0aos[attrib],
359 index);
360
361 if (bld->coverage_samples > 1) {
362 LLVMValueRef xoffset = pix_center_offset;
363 LLVMValueRef yoffset = pix_center_offset;
364 if (loc == TGSI_INTERPOLATE_LOC_SAMPLE || (attrib == 0 && chan == 2 && sample_id)) {
365 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
366 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
367
368 x_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, x_val_idx);
369 y_val_idx = lp_build_array_get(gallivm, bld->sample_pos_array, y_val_idx);
370 xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
371 yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
372 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
373 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
374 pix_center_offset, &xoffset, &yoffset);
375 }
376 chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
377 chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
378 }
379 }
380
381 /*
382 * a = a0 + (x * dadx + y * dady)
383 */
384 a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
385 a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
386
387 if (interp == LP_INTERP_PERSPECTIVE) {
388 if (oow == NULL) {
389 LLVMValueRef w = bld->attribs[0][3];
390 assert(attrib != 0);
391 assert(bld->mask[0] & TGSI_WRITEMASK_W);
392 oow = lp_build_rcp(coeff_bld, w);
393 }
394 a = lp_build_mul(coeff_bld, a, oow);
395 }
396 break;
397
398 case LP_INTERP_CONSTANT:
399 case LP_INTERP_FACING:
400 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
401 coeff_bld->type, bld->a0aos[attrib],
402 index);
403 break;
404
405 case LP_INTERP_POSITION:
406 assert(attrib > 0);
407 a = bld->attribs[0][chan];
408 break;
409
410 default:
411 assert(0);
412 break;
413 }
414
415 if ((attrib == 0) && (chan == 2) && !bld->depth_clamp){
416 /* FIXME: Depth values can exceed 1.0, due to the fact that
417 * setup interpolation coefficients refer to (0,0) which causes
418 * precision loss. So we must clamp to 1.0 here to avoid artifacts.
419 * Note though values outside [0,1] are perfectly valid with
420 * depth clip disabled.
421 * XXX: If depth clip is disabled but we force depth clamp
422 * we may get values larger than 1.0 in the fs (but not in
423 * depth test). Not sure if that's an issue...
424 * Also, on a similar note, it is not obvious if the depth values
425 * appearing in fs (with depth clip disabled) should be clamped
426 * to [0,1], clamped to near/far or not be clamped at all...
427 */
428 a = lp_build_min(coeff_bld, a, coeff_bld->one);
429 }
430 bld->attribs[attrib][chan] = a;
431 }
432 }
433 }
434 }
435
436 static LLVMValueRef
437 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
438 struct gallivm_state *gallivm,
439 unsigned attrib, unsigned chan,
440 LLVMValueRef indir_index,
441 LLVMValueRef pixoffx,
442 LLVMValueRef pixoffy)
443 {
444 LLVMBuilderRef builder = gallivm->builder;
445 struct lp_build_context *coeff_bld = &bld->coeff_bld;
446 const unsigned interp = bld->interp[attrib];
447 LLVMValueRef dadx = coeff_bld->zero;
448 LLVMValueRef dady = coeff_bld->zero;
449 LLVMValueRef a = coeff_bld->zero;
450
451 LLVMTypeRef u8ptr = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
452
453 indir_index = LLVMBuildAdd(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, attrib), "");
454 LLVMValueRef index = LLVMBuildMul(builder, indir_index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
455 index = LLVMBuildAdd(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, chan), "");
456
457 /* size up to byte indices */
458 index = LLVMBuildMul(builder, index, lp_build_const_int_vec(gallivm, coeff_bld->type, 4), "");
459
460 struct lp_type dst_type = coeff_bld->type;
461 dst_type.length = 1;
462 switch (interp) {
463 case LP_INTERP_PERSPECTIVE:
464 /* fall-through */
465 case LP_INTERP_LINEAR:
466
467 dadx = lp_build_gather(gallivm, coeff_bld->type.length,
468 coeff_bld->type.width, dst_type,
469 true, LLVMBuildBitCast(builder, bld->dadx_ptr, u8ptr, ""), index, false);
470
471 dady = lp_build_gather(gallivm, coeff_bld->type.length,
472 coeff_bld->type.width, dst_type,
473 true, LLVMBuildBitCast(builder, bld->dady_ptr, u8ptr, ""), index, false);
474
475 a = lp_build_gather(gallivm, coeff_bld->type.length,
476 coeff_bld->type.width, dst_type,
477 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
478
479 /*
480 * a = a0 + (x * dadx + y * dady)
481 */
482 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
483 a = lp_build_fmuladd(builder, dady, pixoffy, a);
484
485 if (interp == LP_INTERP_PERSPECTIVE) {
486 LLVMValueRef w = bld->attribs[0][3];
487 assert(attrib != 0);
488 assert(bld->mask[0] & TGSI_WRITEMASK_W);
489 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
490 a = lp_build_mul(coeff_bld, a, oow);
491 }
492
493 break;
494 case LP_INTERP_CONSTANT:
495 case LP_INTERP_FACING:
496 a = lp_build_gather(gallivm, coeff_bld->type.length,
497 coeff_bld->type.width, dst_type,
498 true, LLVMBuildBitCast(builder, bld->a0_ptr, u8ptr, ""), index, false);
499 break;
500 default:
501 assert(0);
502 break;
503 }
504 return a;
505 }
506
507 LLVMValueRef
508 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
509 struct gallivm_state *gallivm,
510 LLVMValueRef loop_iter,
511 LLVMValueRef mask_store,
512 unsigned attrib, unsigned chan,
513 unsigned loc,
514 LLVMValueRef indir_index,
515 LLVMValueRef offsets[2])
516 {
517 LLVMBuilderRef builder = gallivm->builder;
518 struct lp_build_context *coeff_bld = &bld->coeff_bld;
519 struct lp_build_context *setup_bld = &bld->setup_bld;
520 LLVMValueRef pixoffx;
521 LLVMValueRef pixoffy;
522 LLVMValueRef ptr;
523
524 /* could do this with code-generated passed in pixel offsets too */
525
526 assert(loop_iter);
527 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, "");
528 pixoffx = LLVMBuildLoad(builder, ptr, "");
529 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, "");
530 pixoffy = LLVMBuildLoad(builder, ptr, "");
531
532 pixoffx = LLVMBuildFAdd(builder, pixoffx,
533 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
534 pixoffy = LLVMBuildFAdd(builder, pixoffy,
535 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
536
537 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
538
539 if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
540 if (bld->coverage_samples > 1) {
541 pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
542 pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
543 }
544
545 if (offsets[0])
546 pixoffx = LLVMBuildFAdd(builder, pixoffx,
547 offsets[0], "");
548 if (offsets[1])
549 pixoffy = LLVMBuildFAdd(builder, pixoffy,
550 offsets[1], "");
551 } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
552 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0], lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
553 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
554
555 LLVMValueRef base_ptr = LLVMBuildBitCast(gallivm->builder, bld->sample_pos_array,
556 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
557 LLVMValueRef xoffset = lp_build_gather(gallivm,
558 bld->coeff_bld.type.length,
559 bld->coeff_bld.type.width,
560 lp_elem_type(bld->coeff_bld.type),
561 false,
562 base_ptr,
563 x_val_idx, true);
564 LLVMValueRef yoffset = lp_build_gather(gallivm,
565 bld->coeff_bld.type.length,
566 bld->coeff_bld.type.width,
567 lp_elem_type(bld->coeff_bld.type),
568 false,
569 base_ptr,
570 y_val_idx, true);
571
572 if (bld->coverage_samples > 1) {
573 pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
574 pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
575 }
576 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
577 LLVMValueRef centroid_x_offset, centroid_y_offset;
578
579 /* for centroid find covered samples for this quad. */
580 /* if all samples are covered use pixel centers */
581 if (bld->coverage_samples > 1) {
582 calc_centroid_offsets(bld, gallivm, loop_iter, mask_store,
583 pix_center_offset, &centroid_x_offset, &centroid_y_offset);
584
585 pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
586 pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
587 }
588 }
589
590 // remap attrib properly.
591 attrib++;
592
593 if (indir_index)
594 return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
595 indir_index, pixoffx, pixoffy);
596
597
598 const unsigned interp = bld->interp[attrib];
599 LLVMValueRef dadx = coeff_bld->zero;
600 LLVMValueRef dady = coeff_bld->zero;
601 LLVMValueRef a = coeff_bld->zero;
602
603 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
604
605 switch (interp) {
606 case LP_INTERP_PERSPECTIVE:
607 /* fall-through */
608 case LP_INTERP_LINEAR:
609 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
610 coeff_bld->type, bld->dadxaos[attrib],
611 index);
612
613 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
614 coeff_bld->type, bld->dadyaos[attrib],
615 index);
616
617 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
618 coeff_bld->type, bld->a0aos[attrib],
619 index);
620
621 /*
622 * a = a0 + (x * dadx + y * dady)
623 */
624 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
625 a = lp_build_fmuladd(builder, dady, pixoffy, a);
626
627 if (interp == LP_INTERP_PERSPECTIVE) {
628 LLVMValueRef w = bld->attribs[0][3];
629 assert(attrib != 0);
630 assert(bld->mask[0] & TGSI_WRITEMASK_W);
631 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
632 a = lp_build_mul(coeff_bld, a, oow);
633 }
634
635 break;
636 case LP_INTERP_CONSTANT:
637 case LP_INTERP_FACING:
638 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
639 coeff_bld->type, bld->a0aos[attrib],
640 index);
641 break;
642 default:
643 assert(0);
644 break;
645 }
646 return a;
647 }
648
649 /**
650 * Generate the position vectors.
651 *
652 * Parameter x0, y0 are the integer values with upper left coordinates.
653 */
654 static void
655 pos_init(struct lp_build_interp_soa_context *bld,
656 LLVMValueRef x0,
657 LLVMValueRef y0)
658 {
659 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
660 struct lp_build_context *coeff_bld = &bld->coeff_bld;
661
662 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
663 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
664 }
665
666
667 /**
668 * Initialize fragment shader input attribute info.
669 */
670 void
671 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
672 struct gallivm_state *gallivm,
673 unsigned num_inputs,
674 const struct lp_shader_input *inputs,
675 boolean pixel_center_integer,
676 unsigned coverage_samples,
677 LLVMValueRef sample_pos_array,
678 LLVMValueRef num_loop,
679 boolean depth_clamp,
680 LLVMBuilderRef builder,
681 struct lp_type type,
682 LLVMValueRef a0_ptr,
683 LLVMValueRef dadx_ptr,
684 LLVMValueRef dady_ptr,
685 LLVMValueRef x0,
686 LLVMValueRef y0)
687 {
688 struct lp_type coeff_type;
689 struct lp_type setup_type;
690 unsigned attrib;
691 unsigned chan;
692
693 memset(bld, 0, sizeof *bld);
694
695 memset(&coeff_type, 0, sizeof coeff_type);
696 coeff_type.floating = TRUE;
697 coeff_type.sign = TRUE;
698 coeff_type.width = 32;
699 coeff_type.length = type.length;
700
701 memset(&setup_type, 0, sizeof setup_type);
702 setup_type.floating = TRUE;
703 setup_type.sign = TRUE;
704 setup_type.width = 32;
705 setup_type.length = TGSI_NUM_CHANNELS;
706
707
708 /* XXX: we don't support interpolating into any other types */
709 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
710
711 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
712 lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
713
714 /* For convenience */
715 bld->pos = bld->attribs[0];
716 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
717
718 /* Position */
719 bld->mask[0] = TGSI_WRITEMASK_XYZW;
720 bld->interp[0] = LP_INTERP_LINEAR;
721 bld->interp_loc[0] = 0;
722
723 /* Inputs */
724 for (attrib = 0; attrib < num_inputs; ++attrib) {
725 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
726 bld->interp[1 + attrib] = inputs[attrib].interp;
727 bld->interp_loc[1 + attrib] = inputs[attrib].location;
728 }
729 bld->num_attribs = 1 + num_inputs;
730
731 /* needed for indirect */
732 bld->a0_ptr = a0_ptr;
733 bld->dadx_ptr = dadx_ptr;
734 bld->dady_ptr = dady_ptr;
735
736 /* Ensure all masked out input channels have a valid value */
737 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
738 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
739 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
740 }
741 }
742
743 if (pixel_center_integer) {
744 bld->pos_offset = 0.0;
745 } else {
746 bld->pos_offset = 0.5;
747 }
748 bld->depth_clamp = depth_clamp;
749 bld->coverage_samples = coverage_samples;
750 bld->num_loop = num_loop;
751 bld->sample_pos_array = sample_pos_array;
752
753 pos_init(bld, x0, y0);
754
755 /*
756 * Simple method (single step interpolation) may be slower if vector length
757 * is just 4, but the results are different (generally less accurate) with
758 * the other method, so always use more accurate version.
759 */
760 {
761 /* XXX this should use a global static table */
762 unsigned i;
763 unsigned num_loops = 16 / type.length;
764 LLVMValueRef pixoffx, pixoffy, index;
765 LLVMValueRef ptr;
766
767 bld->xoffset_store = lp_build_array_alloca(gallivm,
768 lp_build_vec_type(gallivm, type),
769 lp_build_const_int32(gallivm, num_loops),
770 "");
771 bld->yoffset_store = lp_build_array_alloca(gallivm,
772 lp_build_vec_type(gallivm, type),
773 lp_build_const_int32(gallivm, num_loops),
774 "");
775 for (i = 0; i < num_loops; i++) {
776 index = lp_build_const_int32(gallivm, i);
777 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
778 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, "");
779 LLVMBuildStore(builder, pixoffx, ptr);
780 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, "");
781 LLVMBuildStore(builder, pixoffy, ptr);
782 }
783 }
784 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
785 }
786
787
788 /*
789 * Advance the position and inputs to the given quad within the block.
790 */
791
792 void
793 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
794 struct gallivm_state *gallivm,
795 LLVMValueRef quad_start_index,
796 LLVMValueRef mask_store,
797 LLVMValueRef sample_id)
798 {
799 attribs_update_simple(bld, gallivm, quad_start_index, mask_store, sample_id, 1, bld->num_attribs);
800 }
801
802 void
803 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
804 struct gallivm_state *gallivm,
805 LLVMValueRef quad_start_index,
806 LLVMValueRef sample_id)
807 {
808 attribs_update_simple(bld, gallivm, quad_start_index, NULL, sample_id, 0, 1);
809 }
810