2a374f8c3909014f4d1a8e717cfce48c3d9362ed
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
46
47
48 /*
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
51 *
52 * We iterate over the quads in order 0, 1, 2, 3:
53 *
54 * #################
55 * # | # | #
56 * #---0---#---1---#
57 * # | # | #
58 * #################
59 * # | # | #
60 * #---2---#---3---#
61 * # | # | #
62 * #################
63 *
64 * Within each quad, we have four pixels which are represented in SOA
65 * order:
66 *
67 * #########
68 * # 0 | 1 #
69 * #---+---#
70 * # 2 | 3 #
71 * #########
72 *
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
75 */
76
77
78 /**
79 * Do one perspective divide per quad.
80 *
81 * For perspective interpolation, the final attribute value is given
82 *
83 * a' = a/w = a * oow
84 *
85 * where
86 *
87 * a = a0 + dadx*x + dady*y
88 * w = w0 + dwdx*x + dwdy*y
89 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
90 *
91 * Instead of computing the division per pixel, with this macro we compute the
92 * division on the upper left pixel of each quad, and use a linear
93 * approximation in the remaining pixels, given by:
94 *
95 * da'dx = (dadx - dwdx*a)*oow
96 * da'dy = (dady - dwdy*a)*oow
97 *
98 * Ironically, this actually makes things slower -- probably because the
99 * divide hardware unit is rarely used, whereas the multiply unit is typically
100 * already saturated.
101 */
102 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
103
104
105 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
106 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
107
108
109 static void
110 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
111 {
112 if(attrib == 0)
113 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
114 else
115 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
116 }
117
118
119 /**
120 * Initialize the bld->a0, dadx, dady fields. This involves fetching
121 * those values from the arrays which are passed into the JIT function.
122 */
123 static void
124 coeffs_init(struct lp_build_interp_soa_context *bld,
125 LLVMValueRef a0_ptr,
126 LLVMValueRef dadx_ptr,
127 LLVMValueRef dady_ptr)
128 {
129 struct lp_build_context *coeff_bld = &bld->coeff_bld;
130 LLVMBuilderRef builder = coeff_bld->builder;
131 LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
132 LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
133 LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
134 LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
135 LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
136 LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
137 unsigned attrib;
138 unsigned chan;
139
140 /* TODO: Use more vector operations */
141
142 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
143 const unsigned mask = bld->mask[attrib];
144 const unsigned interp = bld->interp[attrib];
145 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
146 if (mask & (1 << chan)) {
147 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
148 LLVMValueRef a0 = zero;
149 LLVMValueRef dadx = zero;
150 LLVMValueRef dady = zero;
151 LLVMValueRef dadxy = zero;
152 LLVMValueRef dadq;
153 LLVMValueRef dadq2;
154 LLVMValueRef a;
155
156 switch (interp) {
157 case LP_INTERP_PERSPECTIVE:
158 /* fall-through */
159
160 case LP_INTERP_LINEAR:
161 if (attrib == 0 && chan == 0) {
162 dadxy = dadx = one;
163 }
164 else if (attrib == 0 && chan == 1) {
165 dadxy = dady = one;
166 }
167 else {
168 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
169 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
170 dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
171 attrib_name(dadx, attrib, chan, ".dadx");
172 attrib_name(dady, attrib, chan, ".dady");
173 attrib_name(dadxy, attrib, chan, ".dadxy");
174 }
175 /* fall-through */
176
177 case LP_INTERP_CONSTANT:
178 case LP_INTERP_FACING:
179 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
180 attrib_name(a0, attrib, chan, ".a0");
181 break;
182
183 case LP_INTERP_POSITION:
184 /* Nothing to do as the position coeffs are already setup in slot 0 */
185 continue;
186
187 default:
188 assert(0);
189 break;
190 }
191
192 /*
193 * dadq = {0, dadx, dady, dadx + dady}
194 */
195
196 dadq = coeff_bld->undef;
197 dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
198 dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
199 dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
200 dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
201
202 /*
203 * dadq2 = 2 * dq
204 */
205
206 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
207
208 /*
209 * a = a0 + x * dadx + y * dady
210 */
211
212 if (attrib == 0 && chan == 0) {
213 a = bld->x;
214 }
215 else if (attrib == 0 && chan == 1) {
216 a = bld->y;
217 }
218 else {
219 a = a0;
220 if (interp != LP_INTERP_CONSTANT &&
221 interp != LP_INTERP_FACING) {
222 LLVMValueRef tmp;
223 tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
224 a = LLVMBuildFAdd(builder, a, tmp, "");
225 tmp = LLVMBuildFMul(builder, bld->y, dady, "");
226 a = LLVMBuildFAdd(builder, a, tmp, "");
227 }
228 }
229
230 /*
231 * a = {a, a, a, a}
232 */
233
234 a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
235
236 /*
237 * Compute the attrib values on the upper-left corner of each quad.
238 */
239
240 a = LLVMBuildFAdd(builder, a, dadq2, "");
241
242 #if PERSPECTIVE_DIVIDE_PER_QUAD
243 /*
244 * a *= 1 / w
245 */
246
247 if (interp == LP_INTERP_PERSPECTIVE) {
248 LLVMValueRef w = bld->a[0][3];
249 assert(attrib != 0);
250 assert(bld->mask[0] & TGSI_WRITEMASK_W);
251 if (!bld->oow) {
252 bld->oow = lp_build_rcp(coeff_bld, w);
253 lp_build_name(bld->oow, "oow");
254 }
255 a = lp_build_mul(coeff_bld, a, bld->oow);
256 }
257 #endif
258
259 attrib_name(a, attrib, chan, ".a");
260 attrib_name(dadq, attrib, chan, ".dadq");
261
262 bld->a [attrib][chan] = a;
263 bld->dadq[attrib][chan] = dadq;
264 }
265 }
266 }
267 }
268
269
270 /**
271 * Increment the shader input attribute values.
272 * This is called when we move from one quad to the next.
273 */
274 static void
275 attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
276 {
277 struct lp_build_context *coeff_bld = &bld->coeff_bld;
278 LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
279 LLVMValueRef oow = NULL;
280 unsigned attrib;
281 unsigned chan;
282
283 assert(quad_index < 4);
284
285 for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
286 const unsigned mask = bld->mask[attrib];
287 const unsigned interp = bld->interp[attrib];
288 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
289 if(mask & (1 << chan)) {
290 LLVMValueRef a;
291 if (interp == LP_INTERP_CONSTANT ||
292 interp == LP_INTERP_FACING) {
293 a = bld->a[attrib][chan];
294 }
295 else if (interp == LP_INTERP_POSITION) {
296 assert(attrib > 0);
297 a = bld->attribs[0][chan];
298 }
299 else {
300 LLVMValueRef dadq;
301
302 a = bld->a[attrib][chan];
303
304 /*
305 * Broadcast the attribute value for this quad into all elements
306 */
307
308 a = LLVMBuildShuffleVector(coeff_bld->builder,
309 a, coeff_bld->undef, shuffle, "");
310
311 /*
312 * Get the derivatives.
313 */
314
315 dadq = bld->dadq[attrib][chan];
316
317 #if PERSPECTIVE_DIVIDE_PER_QUAD
318 if (interp == LP_INTERP_PERSPECTIVE) {
319 LLVMValueRef dwdq = bld->dadq[0][3];
320
321 if (oow == NULL) {
322 assert(bld->oow);
323 oow = LLVMBuildShuffleVector(coeff_bld->builder,
324 bld->oow, coeff_bld->undef,
325 shuffle, "");
326 }
327
328 dadq = lp_build_sub(coeff_bld,
329 dadq,
330 lp_build_mul(coeff_bld, a, dwdq));
331 dadq = lp_build_mul(coeff_bld, dadq, oow);
332 }
333 #endif
334
335 /*
336 * Add the derivatives
337 */
338
339 a = lp_build_add(coeff_bld, a, dadq);
340
341 #if !PERSPECTIVE_DIVIDE_PER_QUAD
342 if (interp == LP_INTERP_PERSPECTIVE) {
343 if (oow == NULL) {
344 LLVMValueRef w = bld->attribs[0][3];
345 assert(attrib != 0);
346 assert(bld->mask[0] & TGSI_WRITEMASK_W);
347 oow = lp_build_rcp(coeff_bld, w);
348 }
349 a = lp_build_mul(coeff_bld, a, oow);
350 }
351 #endif
352
353 attrib_name(a, attrib, chan, "");
354 }
355 bld->attribs[attrib][chan] = a;
356 }
357 }
358 }
359 }
360
361
362 /**
363 * Generate the position vectors.
364 *
365 * Parameter x0, y0 are the integer values with upper left coordinates.
366 */
367 static void
368 pos_init(struct lp_build_interp_soa_context *bld,
369 LLVMValueRef x0,
370 LLVMValueRef y0)
371 {
372 struct lp_build_context *coeff_bld = &bld->coeff_bld;
373
374 bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
375 bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
376 }
377
378
379 /**
380 * Initialize fragment shader input attribute info.
381 */
382 void
383 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
384 unsigned num_inputs,
385 const struct lp_shader_input *inputs,
386 LLVMBuilderRef builder,
387 struct lp_type type,
388 LLVMValueRef a0_ptr,
389 LLVMValueRef dadx_ptr,
390 LLVMValueRef dady_ptr,
391 LLVMValueRef x0,
392 LLVMValueRef y0)
393 {
394 struct lp_type coeff_type;
395 unsigned attrib;
396 unsigned chan;
397
398 memset(bld, 0, sizeof *bld);
399
400 memset(&coeff_type, 0, sizeof coeff_type);
401 coeff_type.floating = TRUE;
402 coeff_type.sign = TRUE;
403 coeff_type.width = 32;
404 coeff_type.length = QUAD_SIZE;
405
406 /* XXX: we don't support interpolating into any other types */
407 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
408
409 lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
410
411 /* For convenience */
412 bld->pos = bld->attribs[0];
413 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
414
415 /* Position */
416 bld->num_attribs = 1;
417 bld->mask[0] = TGSI_WRITEMASK_XYZW;
418 bld->interp[0] = LP_INTERP_LINEAR;
419
420 /* Inputs */
421 for (attrib = 0; attrib < num_inputs; ++attrib) {
422 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
423 bld->interp[1 + attrib] = inputs[attrib].interp;
424 }
425 bld->num_attribs = 1 + num_inputs;
426
427 /* Ensure all masked out input channels have a valid value */
428 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
429 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
430 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
431 }
432 }
433
434 pos_init(bld, x0, y0);
435
436 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
437
438 attribs_update(bld, 0);
439 }
440
441
442 /**
443 * Advance the position and inputs to the given quad within the block.
444 */
445 void
446 lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
447 int quad_index)
448 {
449 assert(quad_index < 4);
450
451 attribs_update(bld, quad_index);
452 }