f0a0a0b6cfc6b7c39be208e8858d6f3e4cf1a412
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
46
47
48 /*
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
51 *
52 * We iterate over the quads in order 0, 1, 2, 3:
53 *
54 * #################
55 * # | # | #
56 * #---0---#---1---#
57 * # | # | #
58 * #################
59 * # | # | #
60 * #---2---#---3---#
61 * # | # | #
62 * #################
63 *
64 * Within each quad, we have four pixels which are represented in SOA
65 * order:
66 *
67 * #########
68 * # 0 | 1 #
69 * #---+---#
70 * # 2 | 3 #
71 * #########
72 *
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
75 */
76
77
78 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
79 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
80
81
82 static void
83 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
84 {
85 if(attrib == 0)
86 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
87 else
88 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
89 }
90
91
92 /**
93 * Initialize the bld->a0, dadx, dady fields. This involves fetching
94 * those values from the arrays which are passed into the JIT function.
95 */
96 static void
97 coeffs_init(struct lp_build_interp_soa_context *bld,
98 LLVMValueRef a0_ptr,
99 LLVMValueRef dadx_ptr,
100 LLVMValueRef dady_ptr)
101 {
102 struct lp_build_context *coeff_bld = &bld->coeff_bld;
103 LLVMBuilderRef builder = coeff_bld->builder;
104 unsigned attrib;
105 unsigned chan;
106
107 for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
108 const unsigned mask = bld->mask[attrib];
109 const unsigned interp = bld->interp[attrib];
110 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
111 if(mask & (1 << chan)) {
112 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
113 LLVMValueRef a0 = coeff_bld->undef;
114 LLVMValueRef dadx = coeff_bld->undef;
115 LLVMValueRef dady = coeff_bld->undef;
116
117 switch( interp ) {
118 case LP_INTERP_PERSPECTIVE:
119 /* fall-through */
120
121 case LP_INTERP_LINEAR:
122 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
123 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
124 dadx = lp_build_broadcast_scalar(coeff_bld, dadx);
125 dady = lp_build_broadcast_scalar(coeff_bld, dady);
126 attrib_name(dadx, attrib, chan, ".dadx");
127 attrib_name(dady, attrib, chan, ".dady");
128 /* fall-through */
129
130 case LP_INTERP_CONSTANT:
131 case LP_INTERP_FACING:
132 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
133 a0 = lp_build_broadcast_scalar(coeff_bld, a0);
134 attrib_name(a0, attrib, chan, ".a0");
135 break;
136
137 case LP_INTERP_POSITION:
138 /* Nothing to do as the position coeffs are already setup in slot 0 */
139 break;
140
141 default:
142 assert(0);
143 break;
144 }
145
146 bld->a0 [attrib][chan] = a0;
147 bld->dadx[attrib][chan] = dadx;
148 bld->dady[attrib][chan] = dady;
149 }
150 }
151 }
152 }
153
154
155 /**
156 * Emit LLVM code to compute the fragment shader input attribute values.
157 * For example, for a color input, we'll compute red, green, blue and alpha
158 * values for the four pixels in a quad.
159 * Recall that we're operating on 4-element vectors so each arithmetic
160 * operation is operating on the four pixels in a quad.
161 */
162 static void
163 attribs_init(struct lp_build_interp_soa_context *bld)
164 {
165 struct lp_build_context *coeff_bld = &bld->coeff_bld;
166 LLVMValueRef x = bld->pos[0];
167 LLVMValueRef y = bld->pos[1];
168 LLVMValueRef oow = NULL;
169 unsigned attrib;
170 unsigned chan;
171
172 for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
173 const unsigned mask = bld->mask[attrib];
174 const unsigned interp = bld->interp[attrib];
175 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
176 if(mask & (1 << chan)) {
177 if (interp == LP_INTERP_POSITION) {
178 assert(attrib > 0);
179 bld->attribs[attrib][chan] = bld->attribs[0][chan];
180 }
181 else {
182 LLVMValueRef a0 = bld->a0 [attrib][chan];
183 LLVMValueRef dadx = bld->dadx[attrib][chan];
184 LLVMValueRef dady = bld->dady[attrib][chan];
185 LLVMValueRef res;
186
187 res = a0;
188
189 if (interp != LP_INTERP_CONSTANT &&
190 interp != LP_INTERP_FACING) {
191 /* res = res + x * dadx */
192 res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, x, dadx));
193 /* res = res + y * dady */
194 res = lp_build_add(coeff_bld, res, lp_build_mul(coeff_bld, y, dady));
195 }
196
197 /* Keep the value of the attribute before perspective divide
198 * for faster updates.
199 */
200 bld->attribs_pre[attrib][chan] = res;
201
202 if (interp == LP_INTERP_PERSPECTIVE) {
203 LLVMValueRef w = bld->pos[3];
204 assert(attrib != 0);
205 assert(bld->mask[0] & TGSI_WRITEMASK_W);
206 if(!oow)
207 oow = lp_build_rcp(coeff_bld, w);
208 res = lp_build_mul(coeff_bld, res, oow);
209 }
210
211 attrib_name(res, attrib, chan, "");
212
213 bld->attribs[attrib][chan] = res;
214 }
215 }
216 }
217 }
218 }
219
220
221 /**
222 * Increment the shader input attribute values.
223 * This is called when we move from one quad to the next.
224 */
225 static void
226 attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
227 {
228 struct lp_build_context *coeff_bld = &bld->coeff_bld;
229 LLVMValueRef oow = NULL;
230 unsigned attrib;
231 unsigned chan;
232
233 assert(quad_index < 4);
234
235 for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
236 const unsigned mask = bld->mask[attrib];
237 const unsigned interp = bld->interp[attrib];
238
239 if (interp != LP_INTERP_CONSTANT &&
240 interp != LP_INTERP_FACING) {
241 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
242 if(mask & (1 << chan)) {
243 if (interp == LP_INTERP_POSITION) {
244 assert(attrib > 0);
245 bld->attribs[attrib][chan] = bld->attribs[0][chan];
246 }
247 else {
248 LLVMValueRef dadx = bld->dadx[attrib][chan];
249 LLVMValueRef dady = bld->dady[attrib][chan];
250 LLVMValueRef res;
251
252 res = bld->attribs_pre[attrib][chan];
253
254 if (quad_index == 1 || quad_index == 3) {
255 /* top-right or bottom-right quad */
256 /* build res = res + dadx + dadx */
257 res = lp_build_add(coeff_bld, res, dadx);
258 res = lp_build_add(coeff_bld, res, dadx);
259 }
260
261 if (quad_index == 2 || quad_index == 3) {
262 /* bottom-left or bottom-right quad */
263 /* build res = res + dady + dady */
264 res = lp_build_add(coeff_bld, res, dady);
265 res = lp_build_add(coeff_bld, res, dady);
266 }
267
268 if (interp == LP_INTERP_PERSPECTIVE) {
269 LLVMValueRef w = bld->pos[3];
270 assert(attrib != 0);
271 assert(bld->mask[0] & TGSI_WRITEMASK_W);
272 if(!oow)
273 oow = lp_build_rcp(coeff_bld, w);
274 res = lp_build_mul(coeff_bld, res, oow);
275 }
276
277 attrib_name(res, attrib, chan, "");
278
279 bld->attribs[attrib][chan] = res;
280 }
281 }
282 }
283 }
284 }
285 }
286
287
288 /**
289 * Generate the position vectors.
290 *
291 * Parameter x0, y0 are the integer values with upper left coordinates.
292 */
293 static void
294 pos_init(struct lp_build_interp_soa_context *bld,
295 LLVMValueRef x0,
296 LLVMValueRef y0)
297 {
298 struct lp_build_context *coeff_bld = &bld->coeff_bld;
299 LLVMValueRef x_offsets[QUAD_SIZE];
300 LLVMValueRef y_offsets[QUAD_SIZE];
301 unsigned i;
302
303 /*
304 * Derive from the quad's upper left scalar coordinates the coordinates for
305 * all other quad pixels
306 */
307
308 x0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, x0);
309 y0 = lp_build_broadcast(coeff_bld->builder, coeff_bld->int_vec_type, y0);
310
311 for(i = 0; i < QUAD_SIZE; ++i) {
312 x_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_x[i], 0);
313 y_offsets[i] = LLVMConstInt(coeff_bld->int_elem_type, quad_offset_y[i], 0);
314 }
315
316 x0 = LLVMBuildAdd(coeff_bld->builder, x0, LLVMConstVector(x_offsets, QUAD_SIZE), "");
317 y0 = LLVMBuildAdd(coeff_bld->builder, y0, LLVMConstVector(y_offsets, QUAD_SIZE), "");
318
319 x0 = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->vec_type, "");
320 y0 = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->vec_type, "");
321
322 lp_build_name(x0, "pos.x");
323 lp_build_name(y0, "pos.y");
324
325 bld->attribs[0][0] = x0;
326 bld->attribs[0][1] = y0;
327 }
328
329
330 /**
331 * Update quad position values when moving to the next quad.
332 */
333 static void
334 pos_update(struct lp_build_interp_soa_context *bld, int quad_index)
335 {
336 struct lp_build_context *coeff_bld = &bld->coeff_bld;
337 LLVMValueRef x = bld->attribs[0][0];
338 LLVMValueRef y = bld->attribs[0][1];
339 const int xstep = 2, ystep = 2;
340
341 if (quad_index == 1 || quad_index == 3) {
342 /* top-right or bottom-right quad in block */
343 /* build x += xstep */
344 x = lp_build_add(coeff_bld, x,
345 lp_build_const_vec(coeff_bld->type, xstep));
346 }
347
348 if (quad_index == 2) {
349 /* bottom-left quad in block */
350 /* build y += ystep */
351 y = lp_build_add(coeff_bld, y,
352 lp_build_const_vec(coeff_bld->type, ystep));
353 /* build x -= xstep */
354 x = lp_build_sub(coeff_bld, x,
355 lp_build_const_vec(coeff_bld->type, xstep));
356 }
357
358 lp_build_name(x, "pos.x");
359 lp_build_name(y, "pos.y");
360
361 bld->attribs[0][0] = x;
362 bld->attribs[0][1] = y;
363 }
364
365
366 /**
367 * Initialize fragment shader input attribute info.
368 */
369 void
370 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
371 unsigned num_inputs,
372 const struct lp_shader_input *inputs,
373 LLVMBuilderRef builder,
374 struct lp_type type,
375 LLVMValueRef a0_ptr,
376 LLVMValueRef dadx_ptr,
377 LLVMValueRef dady_ptr,
378 LLVMValueRef x0,
379 LLVMValueRef y0)
380 {
381 struct lp_type coeff_type;
382 unsigned attrib;
383 unsigned chan;
384
385 memset(bld, 0, sizeof *bld);
386
387 memset(&coeff_type, 0, sizeof coeff_type);
388 coeff_type.floating = TRUE;
389 coeff_type.sign = TRUE;
390 coeff_type.width = 32;
391 coeff_type.length = QUAD_SIZE;
392
393 /* XXX: we don't support interpolating into any other types */
394 assert(memcmp(&coeff_type, &type, sizeof &coeff_type) == 0);
395
396 lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
397
398 /* For convenience */
399 bld->pos = bld->attribs[0];
400 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
401
402 /* Position */
403 bld->num_attribs = 1;
404 bld->mask[0] = TGSI_WRITEMASK_ZW;
405 bld->interp[0] = LP_INTERP_LINEAR;
406
407 /* Inputs */
408 for (attrib = 0; attrib < num_inputs; ++attrib) {
409 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
410 bld->interp[1 + attrib] = inputs[attrib].interp;
411 }
412 bld->num_attribs = 1 + num_inputs;
413
414 /* Ensure all masked out input channels have a valid value */
415 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
416 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
417 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
418 }
419 }
420
421 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
422
423 pos_init(bld, x0, y0);
424
425 attribs_init(bld);
426 }
427
428
429 /**
430 * Advance the position and inputs to the given quad within the block.
431 */
432 void
433 lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
434 int quad_index)
435 {
436 assert(quad_index < 4);
437
438 pos_update(bld, quad_index);
439
440 attribs_update(bld, quad_index);
441 }