Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
46
47
48 /*
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
51 *
52 * We iterate over the quads in order 0, 1, 2, 3:
53 *
54 * #################
55 * # | # | #
56 * #---0---#---1---#
57 * # | # | #
58 * #################
59 * # | # | #
60 * #---2---#---3---#
61 * # | # | #
62 * #################
63 *
64 * Within each quad, we have four pixels which are represented in SOA
65 * order:
66 *
67 * #########
68 * # 0 | 1 #
69 * #---+---#
70 * # 2 | 3 #
71 * #########
72 *
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
75 */
76
77
78 /**
79 * Do one perspective divide per quad.
80 *
81 * For perspective interpolation, the final attribute value is given
82 *
83 * a' = a/w = a * oow
84 *
85 * where
86 *
87 * a = a0 + dadx*x + dady*y
88 * w = w0 + dwdx*x + dwdy*y
89 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
90 *
91 * Instead of computing the division per pixel, with this macro we compute the
92 * division on the upper left pixel of each quad, and use a linear
93 * approximation in the remaining pixels, given by:
94 *
95 * da'dx = (dadx - dwdx*a)*oow
96 * da'dy = (dady - dwdy*a)*oow
97 *
98 * Ironically, this actually makes things slower -- probably because the
99 * divide hardware unit is rarely used, whereas the multiply unit is typically
100 * already saturated.
101 */
102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
103
104
105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
107
108
109 static void
110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
111 {
112 if(attrib == 0)
113 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
114 else
115 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
116 }
117
118
119 /**
120 * Initialize the bld->a0, dadx, dady fields. This involves fetching
121 * those values from the arrays which are passed into the JIT function.
122 */
123 static void
124 coeffs_init(struct lp_build_interp_soa_context *bld,
125 LLVMValueRef a0_ptr,
126 LLVMValueRef dadx_ptr,
127 LLVMValueRef dady_ptr)
128 {
129 struct lp_build_context *coeff_bld = &bld->coeff_bld;
130 LLVMBuilderRef builder = coeff_bld->builder;
131 LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
132 LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
133 LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
134 LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
135 LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
136 LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
137 unsigned attrib;
138 unsigned chan;
139
140 /* TODO: Use more vector operations */
141
142 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
143 const unsigned mask = bld->mask[attrib];
144 const unsigned interp = bld->interp[attrib];
145 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
146 if (mask & (1 << chan)) {
147 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
148 LLVMValueRef a0 = zero;
149 LLVMValueRef dadx = zero;
150 LLVMValueRef dady = zero;
151 LLVMValueRef dadxy = zero;
152 LLVMValueRef dadq;
153 LLVMValueRef dadq2;
154 LLVMValueRef a;
155
156 switch (interp) {
157 case LP_INTERP_PERSPECTIVE:
158 /* fall-through */
159
160 case LP_INTERP_LINEAR:
161 if (attrib == 0 && chan == 0) {
162 dadxy = dadx = one;
163 }
164 else if (attrib == 0 && chan == 1) {
165 dadxy = dady = one;
166 }
167 else {
168 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
169 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
170 dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
171 attrib_name(dadx, attrib, chan, ".dadx");
172 attrib_name(dady, attrib, chan, ".dady");
173 attrib_name(dadxy, attrib, chan, ".dadxy");
174 }
175 /* fall-through */
176
177 case LP_INTERP_CONSTANT:
178 case LP_INTERP_FACING:
179 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
180 attrib_name(a0, attrib, chan, ".a0");
181 break;
182
183 case LP_INTERP_POSITION:
184 /* Nothing to do as the position coeffs are already setup in slot 0 */
185 continue;
186
187 default:
188 assert(0);
189 break;
190 }
191
192 /*
193 * dadq = {0, dadx, dady, dadx + dady}
194 */
195
196 dadq = coeff_bld->undef;
197 dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
198 dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
199 dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
200 dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
201
202 /*
203 * dadq2 = 2 * dq
204 */
205
206 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
207
208 /*
209 * a = a0 + (x * dadx + y * dady)
210 */
211
212 if (attrib == 0 && chan == 0) {
213 a = bld->x;
214 }
215 else if (attrib == 0 && chan == 1) {
216 a = bld->y;
217 }
218 else {
219 a = a0;
220 if (interp != LP_INTERP_CONSTANT &&
221 interp != LP_INTERP_FACING) {
222 LLVMValueRef ax, ay, axy;
223 ax = LLVMBuildFMul(builder, bld->x, dadx, "");
224 ay = LLVMBuildFMul(builder, bld->y, dady, "");
225 axy = LLVMBuildFAdd(builder, ax, ay, "");
226 a = LLVMBuildFAdd(builder, a, axy, "");
227 }
228 }
229
230 /*
231 * a = {a, a, a, a}
232 */
233
234 a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
235
236 /*
237 * Compute the attrib values on the upper-left corner of each quad.
238 */
239
240 a = LLVMBuildFAdd(builder, a, dadq2, "");
241
242 #if PERSPECTIVE_DIVIDE_PER_QUAD
243 /*
244 * a *= 1 / w
245 */
246
247 if (interp == LP_INTERP_PERSPECTIVE) {
248 LLVMValueRef w = bld->a[0][3];
249 assert(attrib != 0);
250 assert(bld->mask[0] & TGSI_WRITEMASK_W);
251 if (!bld->oow) {
252 bld->oow = lp_build_rcp(coeff_bld, w);
253 lp_build_name(bld->oow, "oow");
254 }
255 a = lp_build_mul(coeff_bld, a, bld->oow);
256 }
257 #endif
258
259 attrib_name(a, attrib, chan, ".a");
260 attrib_name(dadq, attrib, chan, ".dadq");
261
262 bld->a [attrib][chan] = a;
263 bld->dadq[attrib][chan] = dadq;
264 }
265 }
266 }
267 }
268
269
270 /**
271 * Increment the shader input attribute values.
272 * This is called when we move from one quad to the next.
273 */
274 static void
275 attribs_update(struct lp_build_interp_soa_context *bld,
276 int quad_index,
277 int start,
278 int end)
279 {
280 struct lp_build_context *coeff_bld = &bld->coeff_bld;
281 LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
282 LLVMValueRef oow = NULL;
283 unsigned attrib;
284 unsigned chan;
285
286 assert(quad_index < 4);
287
288 for(attrib = start; attrib < end; ++attrib) {
289 const unsigned mask = bld->mask[attrib];
290 const unsigned interp = bld->interp[attrib];
291 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
292 if(mask & (1 << chan)) {
293 LLVMValueRef a;
294 if (interp == LP_INTERP_CONSTANT ||
295 interp == LP_INTERP_FACING) {
296 a = bld->a[attrib][chan];
297 }
298 else if (interp == LP_INTERP_POSITION) {
299 assert(attrib > 0);
300 a = bld->attribs[0][chan];
301 }
302 else {
303 LLVMValueRef dadq;
304
305 a = bld->a[attrib][chan];
306
307 /*
308 * Broadcast the attribute value for this quad into all elements
309 */
310
311 a = LLVMBuildShuffleVector(coeff_bld->builder,
312 a, coeff_bld->undef, shuffle, "");
313
314 /*
315 * Get the derivatives.
316 */
317
318 dadq = bld->dadq[attrib][chan];
319
320 #if PERSPECTIVE_DIVIDE_PER_QUAD
321 if (interp == LP_INTERP_PERSPECTIVE) {
322 LLVMValueRef dwdq = bld->dadq[0][3];
323
324 if (oow == NULL) {
325 assert(bld->oow);
326 oow = LLVMBuildShuffleVector(coeff_bld->builder,
327 bld->oow, coeff_bld->undef,
328 shuffle, "");
329 }
330
331 dadq = lp_build_sub(coeff_bld,
332 dadq,
333 lp_build_mul(coeff_bld, a, dwdq));
334 dadq = lp_build_mul(coeff_bld, dadq, oow);
335 }
336 #endif
337
338 /*
339 * Add the derivatives
340 */
341
342 a = lp_build_add(coeff_bld, a, dadq);
343
344 #if !PERSPECTIVE_DIVIDE_PER_QUAD
345 if (interp == LP_INTERP_PERSPECTIVE) {
346 if (oow == NULL) {
347 LLVMValueRef w = bld->attribs[0][3];
348 assert(attrib != 0);
349 assert(bld->mask[0] & TGSI_WRITEMASK_W);
350 oow = lp_build_rcp(coeff_bld, w);
351 }
352 a = lp_build_mul(coeff_bld, a, oow);
353 }
354 #endif
355
356 if (attrib == 0 && chan == 2) {
357 /* FIXME: Depth values can exceed 1.0, due to the fact that
358 * setup interpolation coefficients refer to (0,0) which causes
359 * precision loss. So we must clamp to 1.0 here to avoid artifacts
360 */
361 a = lp_build_min(coeff_bld, a, coeff_bld->one);
362 }
363
364 attrib_name(a, attrib, chan, "");
365 }
366 bld->attribs[attrib][chan] = a;
367 }
368 }
369 }
370 }
371
372
373 /**
374 * Generate the position vectors.
375 *
376 * Parameter x0, y0 are the integer values with upper left coordinates.
377 */
378 static void
379 pos_init(struct lp_build_interp_soa_context *bld,
380 LLVMValueRef x0,
381 LLVMValueRef y0)
382 {
383 struct lp_build_context *coeff_bld = &bld->coeff_bld;
384
385 bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
386 bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
387 }
388
389
390 /**
391 * Initialize fragment shader input attribute info.
392 */
393 void
394 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
395 unsigned num_inputs,
396 const struct lp_shader_input *inputs,
397 LLVMBuilderRef builder,
398 struct lp_type type,
399 LLVMValueRef a0_ptr,
400 LLVMValueRef dadx_ptr,
401 LLVMValueRef dady_ptr,
402 LLVMValueRef x0,
403 LLVMValueRef y0)
404 {
405 struct lp_type coeff_type;
406 unsigned attrib;
407 unsigned chan;
408
409 memset(bld, 0, sizeof *bld);
410
411 memset(&coeff_type, 0, sizeof coeff_type);
412 coeff_type.floating = TRUE;
413 coeff_type.sign = TRUE;
414 coeff_type.width = 32;
415 coeff_type.length = QUAD_SIZE;
416
417 /* XXX: we don't support interpolating into any other types */
418 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
419
420 lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
421
422 /* For convenience */
423 bld->pos = bld->attribs[0];
424 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
425
426 /* Position */
427 bld->num_attribs = 1;
428 bld->mask[0] = TGSI_WRITEMASK_XYZW;
429 bld->interp[0] = LP_INTERP_LINEAR;
430
431 /* Inputs */
432 for (attrib = 0; attrib < num_inputs; ++attrib) {
433 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
434 bld->interp[1 + attrib] = inputs[attrib].interp;
435 }
436 bld->num_attribs = 1 + num_inputs;
437
438 /* Ensure all masked out input channels have a valid value */
439 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
440 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
441 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
442 }
443 }
444
445 pos_init(bld, x0, y0);
446
447 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
448 }
449
450
451 /**
452 * Advance the position and inputs to the given quad within the block.
453 */
454 void
455 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
456 int quad_index)
457 {
458 assert(quad_index < 4);
459
460 attribs_update(bld, quad_index, 1, bld->num_attribs);
461 }
462
463 void
464 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
465 int quad_index)
466 {
467 assert(quad_index < 4);
468
469 attribs_update(bld, quad_index, 0, 1);
470 }
471