llvmpipe: fix incorrect array index in image dump code
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
46
47
48 /*
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
51 *
52 * We iterate over the quads in order 0, 1, 2, 3:
53 *
54 * #################
55 * # | # | #
56 * #---0---#---1---#
57 * # | # | #
58 * #################
59 * # | # | #
60 * #---2---#---3---#
61 * # | # | #
62 * #################
63 *
64 * Within each quad, we have four pixels which are represented in SOA
65 * order:
66 *
67 * #########
68 * # 0 | 1 #
69 * #---+---#
70 * # 2 | 3 #
71 * #########
72 *
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
75 */
76
77
78 /**
79 * Do one perspective divide per quad.
80 *
81 * For perspective interpolation, the final attribute value is given
82 *
83 * a' = a/w = a * oow
84 *
85 * where
86 *
87 * a = a0 + dadx*x + dady*y
88 * w = w0 + dwdx*x + dwdy*y
89 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
90 *
91 * Instead of computing the division per pixel, with this macro we compute the
92 * division on the upper left pixel of each quad, and use a linear
93 * approximation in the remaining pixels, given by:
94 *
95 * da'dx = (dadx - dwdx*a)*oow
96 * da'dy = (dady - dwdy*a)*oow
97 *
98 * Ironically, this actually makes things slower -- probably because the
99 * divide hardware unit is rarely used, whereas the multiply unit is typically
100 * already saturated.
101 */
102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
103
104
105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
107
108
109 static void
110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
111 {
112 if(attrib == 0)
113 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
114 else
115 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
116 }
117
118
119 /**
120 * Initialize the bld->a0, dadx, dady fields. This involves fetching
121 * those values from the arrays which are passed into the JIT function.
122 */
123 static void
124 coeffs_init(struct lp_build_interp_soa_context *bld,
125 LLVMValueRef a0_ptr,
126 LLVMValueRef dadx_ptr,
127 LLVMValueRef dady_ptr)
128 {
129 struct lp_build_context *coeff_bld = &bld->coeff_bld;
130 struct gallivm_state *gallivm = coeff_bld->gallivm;
131 LLVMBuilderRef builder = gallivm->builder;
132 LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
133 LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
134 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
135 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
136 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
137 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
138 unsigned attrib;
139 unsigned chan;
140
141 /* TODO: Use more vector operations */
142
143 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
144 const unsigned mask = bld->mask[attrib];
145 const unsigned interp = bld->interp[attrib];
146 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
147 if (mask & (1 << chan)) {
148 LLVMValueRef index = lp_build_const_int32(gallivm,
149 attrib * NUM_CHANNELS + chan);
150 LLVMValueRef a0 = zero;
151 LLVMValueRef dadx = zero;
152 LLVMValueRef dady = zero;
153 LLVMValueRef dadxy = zero;
154 LLVMValueRef dadq;
155 LLVMValueRef dadq2;
156 LLVMValueRef a;
157
158 switch (interp) {
159 case LP_INTERP_PERSPECTIVE:
160 /* fall-through */
161
162 case LP_INTERP_LINEAR:
163 if (attrib == 0 && chan == 0) {
164 dadxy = dadx = one;
165 }
166 else if (attrib == 0 && chan == 1) {
167 dadxy = dady = one;
168 }
169 else {
170 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
171 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
172 dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
173 attrib_name(dadx, attrib, chan, ".dadx");
174 attrib_name(dady, attrib, chan, ".dady");
175 attrib_name(dadxy, attrib, chan, ".dadxy");
176 }
177 /* fall-through */
178
179 case LP_INTERP_CONSTANT:
180 case LP_INTERP_FACING:
181 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
182 attrib_name(a0, attrib, chan, ".a0");
183 break;
184
185 case LP_INTERP_POSITION:
186 /* Nothing to do as the position coeffs are already setup in slot 0 */
187 continue;
188
189 default:
190 assert(0);
191 break;
192 }
193
194 /*
195 * dadq = {0, dadx, dady, dadx + dady}
196 */
197
198 dadq = coeff_bld->undef;
199 dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
200 dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
201 dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
202 dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
203
204 /*
205 * dadq2 = 2 * dq
206 */
207
208 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
209
210 /*
211 * a = a0 + (x * dadx + y * dady)
212 */
213
214 if (attrib == 0 && chan == 0) {
215 a = bld->x;
216 }
217 else if (attrib == 0 && chan == 1) {
218 a = bld->y;
219 }
220 else {
221 a = a0;
222 if (interp != LP_INTERP_CONSTANT &&
223 interp != LP_INTERP_FACING) {
224 LLVMValueRef ax, ay, axy;
225 ax = LLVMBuildFMul(builder, bld->x, dadx, "");
226 ay = LLVMBuildFMul(builder, bld->y, dady, "");
227 axy = LLVMBuildFAdd(builder, ax, ay, "");
228 a = LLVMBuildFAdd(builder, a, axy, "");
229 }
230 }
231
232 /*
233 * a = {a, a, a, a}
234 */
235
236 a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
237
238 /*
239 * Compute the attrib values on the upper-left corner of each quad.
240 */
241
242 a = LLVMBuildFAdd(builder, a, dadq2, "");
243
244 #if PERSPECTIVE_DIVIDE_PER_QUAD
245 /*
246 * a *= 1 / w
247 */
248
249 if (interp == LP_INTERP_PERSPECTIVE) {
250 LLVMValueRef w = bld->a[0][3];
251 assert(attrib != 0);
252 assert(bld->mask[0] & TGSI_WRITEMASK_W);
253 if (!bld->oow) {
254 bld->oow = lp_build_rcp(coeff_bld, w);
255 lp_build_name(bld->oow, "oow");
256 }
257 a = lp_build_mul(coeff_bld, a, bld->oow);
258 }
259 #endif
260
261 attrib_name(a, attrib, chan, ".a");
262 attrib_name(dadq, attrib, chan, ".dadq");
263
264 bld->a [attrib][chan] = a;
265 bld->dadq[attrib][chan] = dadq;
266 }
267 }
268 }
269 }
270
271
272 /**
273 * Increment the shader input attribute values.
274 * This is called when we move from one quad to the next.
275 */
276 static void
277 attribs_update(struct lp_build_interp_soa_context *bld,
278 struct gallivm_state *gallivm,
279 int quad_index,
280 int start,
281 int end)
282 {
283 LLVMBuilderRef builder = gallivm->builder;
284 struct lp_build_context *coeff_bld = &bld->coeff_bld;
285 LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
286 LLVMValueRef oow = NULL;
287 unsigned attrib;
288 unsigned chan;
289
290 assert(quad_index < 4);
291
292 for(attrib = start; attrib < end; ++attrib) {
293 const unsigned mask = bld->mask[attrib];
294 const unsigned interp = bld->interp[attrib];
295 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
296 if(mask & (1 << chan)) {
297 LLVMValueRef a;
298 if (interp == LP_INTERP_CONSTANT ||
299 interp == LP_INTERP_FACING) {
300 a = bld->a[attrib][chan];
301 }
302 else if (interp == LP_INTERP_POSITION) {
303 assert(attrib > 0);
304 a = bld->attribs[0][chan];
305 }
306 else {
307 LLVMValueRef dadq;
308
309 a = bld->a[attrib][chan];
310
311 /*
312 * Broadcast the attribute value for this quad into all elements
313 */
314
315 a = LLVMBuildShuffleVector(builder,
316 a, coeff_bld->undef, shuffle, "");
317
318 /*
319 * Get the derivatives.
320 */
321
322 dadq = bld->dadq[attrib][chan];
323
324 #if PERSPECTIVE_DIVIDE_PER_QUAD
325 if (interp == LP_INTERP_PERSPECTIVE) {
326 LLVMValueRef dwdq = bld->dadq[0][3];
327
328 if (oow == NULL) {
329 assert(bld->oow);
330 oow = LLVMBuildShuffleVector(coeff_bld->builder,
331 bld->oow, coeff_bld->undef,
332 shuffle, "");
333 }
334
335 dadq = lp_build_sub(coeff_bld,
336 dadq,
337 lp_build_mul(coeff_bld, a, dwdq));
338 dadq = lp_build_mul(coeff_bld, dadq, oow);
339 }
340 #endif
341
342 /*
343 * Add the derivatives
344 */
345
346 a = lp_build_add(coeff_bld, a, dadq);
347
348 #if !PERSPECTIVE_DIVIDE_PER_QUAD
349 if (interp == LP_INTERP_PERSPECTIVE) {
350 if (oow == NULL) {
351 LLVMValueRef w = bld->attribs[0][3];
352 assert(attrib != 0);
353 assert(bld->mask[0] & TGSI_WRITEMASK_W);
354 oow = lp_build_rcp(coeff_bld, w);
355 }
356 a = lp_build_mul(coeff_bld, a, oow);
357 }
358 #endif
359
360 if (attrib == 0 && chan == 2) {
361 /* FIXME: Depth values can exceed 1.0, due to the fact that
362 * setup interpolation coefficients refer to (0,0) which causes
363 * precision loss. So we must clamp to 1.0 here to avoid artifacts
364 */
365 a = lp_build_min(coeff_bld, a, coeff_bld->one);
366 }
367
368 attrib_name(a, attrib, chan, "");
369 }
370 bld->attribs[attrib][chan] = a;
371 }
372 }
373 }
374 }
375
376
377 /**
378 * Generate the position vectors.
379 *
380 * Parameter x0, y0 are the integer values with upper left coordinates.
381 */
382 static void
383 pos_init(struct lp_build_interp_soa_context *bld,
384 LLVMValueRef x0,
385 LLVMValueRef y0)
386 {
387 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
388 struct lp_build_context *coeff_bld = &bld->coeff_bld;
389
390 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
391 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
392 }
393
394
395 /**
396 * Initialize fragment shader input attribute info.
397 */
398 void
399 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
400 struct gallivm_state *gallivm,
401 unsigned num_inputs,
402 const struct lp_shader_input *inputs,
403 LLVMBuilderRef builder,
404 struct lp_type type,
405 LLVMValueRef a0_ptr,
406 LLVMValueRef dadx_ptr,
407 LLVMValueRef dady_ptr,
408 LLVMValueRef x0,
409 LLVMValueRef y0)
410 {
411 struct lp_type coeff_type;
412 unsigned attrib;
413 unsigned chan;
414
415 memset(bld, 0, sizeof *bld);
416
417 memset(&coeff_type, 0, sizeof coeff_type);
418 coeff_type.floating = TRUE;
419 coeff_type.sign = TRUE;
420 coeff_type.width = 32;
421 coeff_type.length = QUAD_SIZE;
422
423 /* XXX: we don't support interpolating into any other types */
424 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
425
426 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
427
428 /* For convenience */
429 bld->pos = bld->attribs[0];
430 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
431
432 /* Position */
433 bld->num_attribs = 1;
434 bld->mask[0] = TGSI_WRITEMASK_XYZW;
435 bld->interp[0] = LP_INTERP_LINEAR;
436
437 /* Inputs */
438 for (attrib = 0; attrib < num_inputs; ++attrib) {
439 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
440 bld->interp[1 + attrib] = inputs[attrib].interp;
441 }
442 bld->num_attribs = 1 + num_inputs;
443
444 /* Ensure all masked out input channels have a valid value */
445 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
446 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
447 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
448 }
449 }
450
451 pos_init(bld, x0, y0);
452
453 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
454 }
455
456
457 /**
458 * Advance the position and inputs to the given quad within the block.
459 */
460 void
461 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
462 struct gallivm_state *gallivm,
463 int quad_index)
464 {
465 assert(quad_index < 4);
466
467 attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
468 }
469
470 void
471 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
472 struct gallivm_state *gallivm,
473 int quad_index)
474 {
475 assert(quad_index < 4);
476
477 attribs_update(bld, gallivm, quad_index, 0, 1);
478 }
479