Merge remote branch 'origin/nv50-compiler'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_coef.c
1 /**************************************************************************
2 *
3 * Copyright 2010, VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
37 #include "lp_rast.h"
38 #include "lp_state_fs.h"
39
40 #if !defined(PIPE_ARCH_SSE)
41
42 /**
43 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
44 */
45 static void constant_coef( struct lp_rast_shader_inputs *inputs,
46 unsigned slot,
47 const float value,
48 unsigned i )
49 {
50 inputs->a0[slot][i] = value;
51 inputs->dadx[slot][i] = 0.0f;
52 inputs->dady[slot][i] = 0.0f;
53 }
54
55
56
57 static void linear_coef( struct lp_rast_shader_inputs *inputs,
58 const struct lp_tri_info *info,
59 unsigned slot,
60 unsigned vert_attr,
61 unsigned i)
62 {
63 float a0 = info->v0[vert_attr][i];
64 float a1 = info->v1[vert_attr][i];
65 float a2 = info->v2[vert_attr][i];
66
67 float da01 = a0 - a1;
68 float da20 = a2 - a0;
69 float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
70 float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
71
72 inputs->dadx[slot][i] = dadx;
73 inputs->dady[slot][i] = dady;
74
75 /* calculate a0 as the value which would be sampled for the
76 * fragment at (0,0), taking into account that we want to sample at
77 * pixel centers, in other words (0.5, 0.5).
78 *
79 * this is neat but unfortunately not a good way to do things for
80 * triangles with very large values of dadx or dady as it will
81 * result in the subtraction and re-addition from a0 of a very
82 * large number, which means we'll end up loosing a lot of the
83 * fractional bits and precision from a0. the way to fix this is
84 * to define a0 as the sample at a pixel center somewhere near vmin
85 * instead - i'll switch to this later.
86 */
87 inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
88 dady * info->y0_center);
89 }
90
91
92 /**
93 * Compute a0, dadx and dady for a perspective-corrected interpolant,
94 * for a triangle.
95 * We basically multiply the vertex value by 1/w before computing
96 * the plane coefficients (a0, dadx, dady).
97 * Later, when we compute the value at a particular fragment position we'll
98 * divide the interpolated value by the interpolated W at that fragment.
99 */
100 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
101 const struct lp_tri_info *info,
102 unsigned slot,
103 unsigned vert_attr,
104 unsigned i)
105 {
106 /* premultiply by 1/w (v[0][3] is always 1/w):
107 */
108 float a0 = info->v0[vert_attr][i] * info->v0[0][3];
109 float a1 = info->v1[vert_attr][i] * info->v1[0][3];
110 float a2 = info->v2[vert_attr][i] * info->v2[0][3];
111 float da01 = a0 - a1;
112 float da20 = a2 - a0;
113 float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
114 float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
115
116 inputs->dadx[slot][i] = dadx;
117 inputs->dady[slot][i] = dady;
118 inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
119 dady * info->y0_center);
120 }
121
122
123 /**
124 * Special coefficient setup for gl_FragCoord.
125 * X and Y are trivial
126 * Z and W are copied from position_coef which should have already been computed.
127 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
128 */
129 static void
130 setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
131 const struct lp_tri_info *info,
132 unsigned slot,
133 unsigned usage_mask)
134 {
135 /*X*/
136 if (usage_mask & TGSI_WRITEMASK_X) {
137 inputs->a0[slot][0] = 0.0;
138 inputs->dadx[slot][0] = 1.0;
139 inputs->dady[slot][0] = 0.0;
140 }
141
142 /*Y*/
143 if (usage_mask & TGSI_WRITEMASK_Y) {
144 inputs->a0[slot][1] = 0.0;
145 inputs->dadx[slot][1] = 0.0;
146 inputs->dady[slot][1] = 1.0;
147 }
148
149 /*Z*/
150 if (usage_mask & TGSI_WRITEMASK_Z) {
151 linear_coef(inputs, info, slot, 0, 2);
152 }
153
154 /*W*/
155 if (usage_mask & TGSI_WRITEMASK_W) {
156 linear_coef(inputs, info, slot, 0, 3);
157 }
158 }
159
160
161 /**
162 * Setup the fragment input attribute with the front-facing value.
163 * \param frontface is the triangle front facing?
164 */
165 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
166 unsigned slot,
167 boolean frontface,
168 unsigned usage_mask)
169 {
170 /* convert TRUE to 1.0 and FALSE to -1.0 */
171 if (usage_mask & TGSI_WRITEMASK_X)
172 constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
173
174 if (usage_mask & TGSI_WRITEMASK_Y)
175 constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
176
177 if (usage_mask & TGSI_WRITEMASK_Z)
178 constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
179
180 if (usage_mask & TGSI_WRITEMASK_W)
181 constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
182 }
183
184
185 /**
186 * Compute the tri->coef[] array dadx, dady, a0 values.
187 */
188 void lp_setup_tri_coef( struct lp_setup_context *setup,
189 struct lp_rast_shader_inputs *inputs,
190 const float (*v0)[4],
191 const float (*v1)[4],
192 const float (*v2)[4],
193 boolean frontfacing)
194 {
195 unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
196 unsigned slot;
197 unsigned i;
198 struct lp_tri_info info;
199 float dx01 = v0[0][0] - v1[0][0];
200 float dy01 = v0[0][1] - v1[0][1];
201 float dx20 = v2[0][0] - v0[0][0];
202 float dy20 = v2[0][1] - v0[0][1];
203 float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
204
205 info.v0 = v0;
206 info.v1 = v1;
207 info.v2 = v2;
208 info.frontfacing = frontfacing;
209 info.x0_center = v0[0][0] - setup->pixel_offset;
210 info.y0_center = v0[0][1] - setup->pixel_offset;
211 info.dx01_ooa = dx01 * oneoverarea;
212 info.dx20_ooa = dx20 * oneoverarea;
213 info.dy01_ooa = dy01 * oneoverarea;
214 info.dy20_ooa = dy20 * oneoverarea;
215
216
217 /* setup interpolation for all the remaining attributes:
218 */
219 for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
220 unsigned vert_attr = setup->fs.input[slot].src_index;
221 unsigned usage_mask = setup->fs.input[slot].usage_mask;
222
223 switch (setup->fs.input[slot].interp) {
224 case LP_INTERP_CONSTANT:
225 if (setup->flatshade_first) {
226 for (i = 0; i < NUM_CHANNELS; i++)
227 if (usage_mask & (1 << i))
228 constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
229 }
230 else {
231 for (i = 0; i < NUM_CHANNELS; i++)
232 if (usage_mask & (1 << i))
233 constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
234 }
235 break;
236
237 case LP_INTERP_LINEAR:
238 for (i = 0; i < NUM_CHANNELS; i++)
239 if (usage_mask & (1 << i))
240 linear_coef(inputs, &info, slot+1, vert_attr, i);
241 break;
242
243 case LP_INTERP_PERSPECTIVE:
244 for (i = 0; i < NUM_CHANNELS; i++)
245 if (usage_mask & (1 << i))
246 perspective_coef(inputs, &info, slot+1, vert_attr, i);
247 fragcoord_usage_mask |= TGSI_WRITEMASK_W;
248 break;
249
250 case LP_INTERP_POSITION:
251 /*
252 * The generated pixel interpolators will pick up the coeffs from
253 * slot 0, so all need to ensure that the usage mask is covers all
254 * usages.
255 */
256 fragcoord_usage_mask |= usage_mask;
257 break;
258
259 case LP_INTERP_FACING:
260 setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
261 break;
262
263 default:
264 assert(0);
265 }
266 }
267
268 /* The internal position input is in slot zero:
269 */
270 setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
271 }
272
273 #else
274 extern void lp_setup_coef_dummy(void);
275 void lp_setup_coef_dummy(void)
276 {
277 }
278
279 #endif