llvmpipe: Always use floating-point operators for floating-point types
[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_interp.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/u_debug.h"
38 #include "util/u_memory.h"
39 #include "util/u_math.h"
40 #include "tgsi/tgsi_scan.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "lp_bld_interp.h"
46
47
48 /*
49 * The shader JIT function operates on blocks of quads.
50 * Each block has 2x2 quads and each quad has 2x2 pixels.
51 *
52 * We iterate over the quads in order 0, 1, 2, 3:
53 *
54 * #################
55 * # | # | #
56 * #---0---#---1---#
57 * # | # | #
58 * #################
59 * # | # | #
60 * #---2---#---3---#
61 * # | # | #
62 * #################
63 *
64 * Within each quad, we have four pixels which are represented in SOA
65 * order:
66 *
67 * #########
68 * # 0 | 1 #
69 * #---+---#
70 * # 2 | 3 #
71 * #########
72 *
73 * So the green channel (for example) of the four pixels is stored in
74 * a single vector register: {g0, g1, g2, g3}.
75 */
76
77
78 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
79 static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
80
81
82 static void
83 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
84 {
85 if(attrib == 0)
86 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
87 else
88 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
89 }
90
91
92 /**
93 * Initialize the bld->a0, dadx, dady fields. This involves fetching
94 * those values from the arrays which are passed into the JIT function.
95 */
96 static void
97 coeffs_init(struct lp_build_interp_soa_context *bld,
98 LLVMValueRef a0_ptr,
99 LLVMValueRef dadx_ptr,
100 LLVMValueRef dady_ptr)
101 {
102 struct lp_build_context *coeff_bld = &bld->coeff_bld;
103 LLVMBuilderRef builder = coeff_bld->builder;
104 LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
105 LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
106 LLVMValueRef i0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
107 LLVMValueRef i1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
108 LLVMValueRef i2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
109 LLVMValueRef i3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
110 LLVMValueRef oow = NULL;
111 unsigned attrib;
112 unsigned chan;
113
114 /* TODO: Use more vector operations */
115
116 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
117 const unsigned mask = bld->mask[attrib];
118 const unsigned interp = bld->interp[attrib];
119 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
120 if (mask & (1 << chan)) {
121 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), attrib*NUM_CHANNELS + chan, 0);
122 LLVMValueRef a0 = zero;
123 LLVMValueRef dadx = zero;
124 LLVMValueRef dady = zero;
125 LLVMValueRef dadxy = zero;
126 LLVMValueRef dadq;
127 LLVMValueRef dadq2;
128 LLVMValueRef a;
129
130 switch (interp) {
131 case LP_INTERP_PERSPECTIVE:
132 /* fall-through */
133
134 case LP_INTERP_LINEAR:
135 if (attrib == 0 && chan == 0) {
136 dadxy = dadx = one;
137 }
138 else if (attrib == 0 && chan == 1) {
139 dadxy = dady = one;
140 }
141 else {
142 dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
143 dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
144 dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
145 attrib_name(dadx, attrib, chan, ".dadx");
146 attrib_name(dady, attrib, chan, ".dady");
147 attrib_name(dadxy, attrib, chan, ".dadxy");
148 }
149 /* fall-through */
150
151 case LP_INTERP_CONSTANT:
152 case LP_INTERP_FACING:
153 a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
154 attrib_name(a0, attrib, chan, ".a0");
155 break;
156
157 case LP_INTERP_POSITION:
158 /* Nothing to do as the position coeffs are already setup in slot 0 */
159 continue;
160
161 default:
162 assert(0);
163 break;
164 }
165
166 /*
167 * dadq = {0, dadx, dady, dadx + dady}
168 */
169
170 dadq = coeff_bld->undef;
171 dadq = LLVMBuildInsertElement(builder, dadq, zero, i0, "");
172 dadq = LLVMBuildInsertElement(builder, dadq, dadx, i1, "");
173 dadq = LLVMBuildInsertElement(builder, dadq, dady, i2, "");
174 dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
175
176 /*
177 * dadq2 = 2 * dq
178 */
179
180 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
181
182 /*
183 * a = a0 + x * dadx + y * dady
184 */
185
186 if (attrib == 0 && chan == 0) {
187 a = bld->x;
188 }
189 else if (attrib == 0 && chan == 1) {
190 a = bld->y;
191 }
192 else {
193 a = a0;
194 if (interp != LP_INTERP_CONSTANT &&
195 interp != LP_INTERP_FACING) {
196 LLVMValueRef tmp;
197 tmp = LLVMBuildFMul(builder, bld->x, dadx, "");
198 a = LLVMBuildFAdd(builder, a, tmp, "");
199 tmp = LLVMBuildFMul(builder, bld->y, dady, "");
200 a = LLVMBuildFAdd(builder, a, tmp, "");
201 }
202 }
203
204 /*
205 * a = {a, a, a, a}
206 */
207
208 a = lp_build_broadcast(builder, coeff_bld->vec_type, a);
209
210 /*
211 * Compute the attrib values on the upper-left corner of each quad.
212 */
213
214 a = LLVMBuildFAdd(builder, a, dadq2, "");
215
216 /*
217 * a *= 1 / w
218 * dadq *= 1 / w
219 */
220
221 if (interp == LP_INTERP_PERSPECTIVE) {
222 LLVMValueRef w = bld->a[0][3];
223 assert(attrib != 0);
224 assert(bld->mask[0] & TGSI_WRITEMASK_W);
225 if (!oow) {
226 oow = lp_build_rcp(coeff_bld, w);
227 lp_build_name(oow, "oow");
228 }
229 a = lp_build_mul(coeff_bld, a, oow);
230 dadq = lp_build_mul(coeff_bld, dadq, oow);
231 }
232
233 attrib_name(a, attrib, chan, ".a");
234 attrib_name(dadq, attrib, chan, ".dadq");
235
236 bld->a [attrib][chan] = a;
237 bld->dadq[attrib][chan] = dadq;
238 }
239 }
240 }
241 }
242
243
244 /**
245 * Increment the shader input attribute values.
246 * This is called when we move from one quad to the next.
247 */
248 static void
249 attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
250 {
251 struct lp_build_context *coeff_bld = &bld->coeff_bld;
252 LLVMValueRef shuffle = lp_build_const_int_vec(coeff_bld->type, quad_index);
253 unsigned attrib;
254 unsigned chan;
255
256 assert(quad_index < 4);
257
258 for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
259 const unsigned mask = bld->mask[attrib];
260 const unsigned interp = bld->interp[attrib];
261 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
262 if(mask & (1 << chan)) {
263 LLVMValueRef a;
264 if (interp == LP_INTERP_CONSTANT ||
265 interp == LP_INTERP_FACING) {
266 a = bld->a[attrib][chan];
267 }
268 else if (interp == LP_INTERP_POSITION) {
269 assert(attrib > 0);
270 a = bld->attribs[0][chan];
271 }
272 else {
273 a = bld->a[attrib][chan];
274
275 /*
276 * Broadcast the attribute value for this quad into all elements
277 */
278
279 a = LLVMBuildShuffleVector(coeff_bld->builder,
280 a, coeff_bld->undef, shuffle, "");
281
282 /*
283 * Add the derivatives
284 */
285
286 a = lp_build_add(coeff_bld, a, bld->dadq[attrib][chan]);
287
288 attrib_name(a, attrib, chan, "");
289 }
290 bld->attribs[attrib][chan] = a;
291 }
292 }
293 }
294 }
295
296
297 /**
298 * Generate the position vectors.
299 *
300 * Parameter x0, y0 are the integer values with upper left coordinates.
301 */
302 static void
303 pos_init(struct lp_build_interp_soa_context *bld,
304 LLVMValueRef x0,
305 LLVMValueRef y0)
306 {
307 struct lp_build_context *coeff_bld = &bld->coeff_bld;
308
309 bld->x = LLVMBuildSIToFP(coeff_bld->builder, x0, coeff_bld->elem_type, "");
310 bld->y = LLVMBuildSIToFP(coeff_bld->builder, y0, coeff_bld->elem_type, "");
311 }
312
313
314 /**
315 * Initialize fragment shader input attribute info.
316 */
317 void
318 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
319 unsigned num_inputs,
320 const struct lp_shader_input *inputs,
321 LLVMBuilderRef builder,
322 struct lp_type type,
323 LLVMValueRef a0_ptr,
324 LLVMValueRef dadx_ptr,
325 LLVMValueRef dady_ptr,
326 LLVMValueRef x0,
327 LLVMValueRef y0)
328 {
329 struct lp_type coeff_type;
330 unsigned attrib;
331 unsigned chan;
332
333 memset(bld, 0, sizeof *bld);
334
335 memset(&coeff_type, 0, sizeof coeff_type);
336 coeff_type.floating = TRUE;
337 coeff_type.sign = TRUE;
338 coeff_type.width = 32;
339 coeff_type.length = QUAD_SIZE;
340
341 /* XXX: we don't support interpolating into any other types */
342 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
343
344 lp_build_context_init(&bld->coeff_bld, builder, coeff_type);
345
346 /* For convenience */
347 bld->pos = bld->attribs[0];
348 bld->inputs = (const LLVMValueRef (*)[NUM_CHANNELS]) bld->attribs[1];
349
350 /* Position */
351 bld->num_attribs = 1;
352 bld->mask[0] = TGSI_WRITEMASK_XYZW;
353 bld->interp[0] = LP_INTERP_LINEAR;
354
355 /* Inputs */
356 for (attrib = 0; attrib < num_inputs; ++attrib) {
357 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
358 bld->interp[1 + attrib] = inputs[attrib].interp;
359 }
360 bld->num_attribs = 1 + num_inputs;
361
362 /* Ensure all masked out input channels have a valid value */
363 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
364 for (chan = 0; chan < NUM_CHANNELS; ++chan) {
365 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
366 }
367 }
368
369 pos_init(bld, x0, y0);
370
371 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
372
373 attribs_update(bld, 0);
374 }
375
376
377 /**
378 * Advance the position and inputs to the given quad within the block.
379 */
380 void
381 lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
382 int quad_index)
383 {
384 assert(quad_index < 4);
385
386 attribs_update(bld, quad_index);
387 }