Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_coef.c
1 /**************************************************************************
2 *
3 * Copyright 2010, VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
37 #include "lp_rast.h"
38 #include "lp_state_fs.h"
39
40 #if !defined(PIPE_ARCH_SSE)
41
42 /**
43 * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
44 */
45 static void constant_coef( struct lp_rast_shader_inputs *inputs,
46 unsigned slot,
47 const float value,
48 unsigned i )
49 {
50 inputs->a0[slot][i] = value;
51 inputs->dadx[slot][i] = 0.0f;
52 inputs->dady[slot][i] = 0.0f;
53 }
54
55
56
57 static void linear_coef( struct lp_rast_shader_inputs *inputs,
58 const struct lp_tri_info *info,
59 unsigned slot,
60 unsigned vert_attr,
61 unsigned i)
62 {
63 float a0 = info->v0[vert_attr][i];
64 float a1 = info->v1[vert_attr][i];
65 float a2 = info->v2[vert_attr][i];
66
67 float da01 = a0 - a1;
68 float da20 = a2 - a0;
69 float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
70 float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
71
72 inputs->dadx[slot][i] = dadx;
73 inputs->dady[slot][i] = dady;
74
75 /* calculate a0 as the value which would be sampled for the
76 * fragment at (0,0), taking into account that we want to sample at
77 * pixel centers, in other words (0.5, 0.5).
78 *
79 * this is neat but unfortunately not a good way to do things for
80 * triangles with very large values of dadx or dady as it will
81 * result in the subtraction and re-addition from a0 of a very
82 * large number, which means we'll end up loosing a lot of the
83 * fractional bits and precision from a0. the way to fix this is
84 * to define a0 as the sample at a pixel center somewhere near vmin
85 * instead - i'll switch to this later.
86 */
87 inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
88 dady * info->y0_center);
89 }
90
91
92 /**
93 * Compute a0, dadx and dady for a perspective-corrected interpolant,
94 * for a triangle.
95 * We basically multiply the vertex value by 1/w before computing
96 * the plane coefficients (a0, dadx, dady).
97 * Later, when we compute the value at a particular fragment position we'll
98 * divide the interpolated value by the interpolated W at that fragment.
99 */
100 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
101 const struct lp_tri_info *info,
102 unsigned slot,
103 unsigned vert_attr,
104 unsigned i)
105 {
106 /* premultiply by 1/w (v[0][3] is always 1/w):
107 */
108 float a0 = info->v0[vert_attr][i] * info->v0[0][3];
109 float a1 = info->v1[vert_attr][i] * info->v1[0][3];
110 float a2 = info->v2[vert_attr][i] * info->v2[0][3];
111 float da01 = a0 - a1;
112 float da20 = a2 - a0;
113 float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
114 float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
115
116 inputs->dadx[slot][i] = dadx;
117 inputs->dady[slot][i] = dady;
118 inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
119 dady * info->y0_center);
120 }
121
122
123 /**
124 * Special coefficient setup for gl_FragCoord.
125 * X and Y are trivial
126 * Z and W are copied from position_coef which should have already been computed.
127 * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
128 */
129 static void
130 setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
131 const struct lp_tri_info *info,
132 unsigned slot,
133 unsigned usage_mask)
134 {
135 /*X*/
136 if (usage_mask & TGSI_WRITEMASK_X) {
137 inputs->a0[slot][0] = 0.0;
138 inputs->dadx[slot][0] = 1.0;
139 inputs->dady[slot][0] = 0.0;
140 }
141
142 /*Y*/
143 if (usage_mask & TGSI_WRITEMASK_Y) {
144 inputs->a0[slot][1] = 0.0;
145 inputs->dadx[slot][1] = 0.0;
146 inputs->dady[slot][1] = 1.0;
147 }
148
149 /*Z*/
150 if (usage_mask & TGSI_WRITEMASK_Z) {
151 linear_coef(inputs, info, slot, 0, 2);
152 }
153
154 /*W*/
155 if (usage_mask & TGSI_WRITEMASK_W) {
156 linear_coef(inputs, info, slot, 0, 3);
157 }
158 }
159
160
161 /**
162 * Setup the fragment input attribute with the front-facing value.
163 * \param frontface is the triangle front facing?
164 */
165 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
166 unsigned slot,
167 boolean frontface,
168 unsigned usage_mask)
169 {
170 /* convert TRUE to 1.0 and FALSE to -1.0 */
171 if (usage_mask & TGSI_WRITEMASK_X)
172 constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
173
174 if (usage_mask & TGSI_WRITEMASK_Y)
175 constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
176
177 if (usage_mask & TGSI_WRITEMASK_Z)
178 constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
179
180 if (usage_mask & TGSI_WRITEMASK_W)
181 constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
182 }
183
184
185 /**
186 * Compute the tri->coef[] array dadx, dady, a0 values.
187 */
188 void lp_setup_tri_coef( struct lp_setup_context *setup,
189 struct lp_rast_shader_inputs *inputs,
190 const struct lp_tri_info *info)
191 {
192 unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
193 unsigned slot;
194 unsigned i;
195
196 /* setup interpolation for all the remaining attributes:
197 */
198 for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
199 unsigned vert_attr = setup->fs.input[slot].src_index;
200 unsigned usage_mask = setup->fs.input[slot].usage_mask;
201
202 switch (setup->fs.input[slot].interp) {
203 case LP_INTERP_CONSTANT:
204 if (setup->flatshade_first) {
205 for (i = 0; i < NUM_CHANNELS; i++)
206 if (usage_mask & (1 << i))
207 constant_coef(inputs, slot+1, info->v0[vert_attr][i], i);
208 }
209 else {
210 for (i = 0; i < NUM_CHANNELS; i++)
211 if (usage_mask & (1 << i))
212 constant_coef(inputs, slot+1, info->v2[vert_attr][i], i);
213 }
214 break;
215
216 case LP_INTERP_LINEAR:
217 for (i = 0; i < NUM_CHANNELS; i++)
218 if (usage_mask & (1 << i))
219 linear_coef(inputs, info, slot+1, vert_attr, i);
220 break;
221
222 case LP_INTERP_PERSPECTIVE:
223 for (i = 0; i < NUM_CHANNELS; i++)
224 if (usage_mask & (1 << i))
225 perspective_coef(inputs, info, slot+1, vert_attr, i);
226 fragcoord_usage_mask |= TGSI_WRITEMASK_W;
227 break;
228
229 case LP_INTERP_POSITION:
230 /*
231 * The generated pixel interpolators will pick up the coeffs from
232 * slot 0, so all need to ensure that the usage mask is covers all
233 * usages.
234 */
235 fragcoord_usage_mask |= usage_mask;
236 break;
237
238 case LP_INTERP_FACING:
239 setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask);
240 break;
241
242 default:
243 assert(0);
244 }
245 }
246
247 /* The internal position input is in slot zero:
248 */
249 setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask);
250 }
251
252 #else
253 extern void lp_setup_coef_dummy(void);
254 void lp_setup_coef_dummy(void)
255 {
256 }
257
258 #endif