llvmpipe: Fix sprite coord perspective interpolation of Q.
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_coef_intrin.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
37 #include "lp_rast.h"
38
39 #if defined(PIPE_ARCH_SSE)
40 #include <emmintrin.h>
41
42
43 static void constant_coef4( struct lp_rast_shader_inputs *inputs,
44 const struct lp_tri_info *info,
45 unsigned slot,
46 const float *attr)
47 {
48 *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
49 *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
50 *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
51 }
52
53
54
55 /**
56 * Setup the fragment input attribute with the front-facing value.
57 * \param frontface is the triangle front facing?
58 */
59 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
60 const struct lp_tri_info *info,
61 unsigned slot )
62 {
63 /* XXX: just pass frontface directly to the shader, don't bother
64 * treating it as an input.
65 */
66 __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
67 0, 0, 0);
68
69 *(__m128 *)inputs->a0[slot] = a0;
70 *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
71 *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
72 }
73
74
75
76 static void calc_coef4( struct lp_rast_shader_inputs *inputs,
77 const struct lp_tri_info *info,
78 unsigned slot,
79 __m128 a0,
80 __m128 a1,
81 __m128 a2)
82 {
83 __m128 da01 = _mm_sub_ps(a0, a1);
84 __m128 da20 = _mm_sub_ps(a2, a0);
85
86 __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
87 __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
88 __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
89
90 __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
91 __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
92 __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
93
94 __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
95 __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
96 __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
97 __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
98
99 *(__m128 *)inputs->a0[slot] = attr_0;
100 *(__m128 *)inputs->dadx[slot] = dadx;
101 *(__m128 *)inputs->dady[slot] = dady;
102 }
103
104
105 static void linear_coef( struct lp_rast_shader_inputs *inputs,
106 const struct lp_tri_info *info,
107 unsigned slot,
108 unsigned vert_attr)
109 {
110 __m128 a0 = *(const __m128 *)info->v0[vert_attr];
111 __m128 a1 = *(const __m128 *)info->v1[vert_attr];
112 __m128 a2 = *(const __m128 *)info->v2[vert_attr];
113
114 calc_coef4(inputs, info, slot, a0, a1, a2);
115 }
116
117
118
119 /**
120 * Compute a0, dadx and dady for a perspective-corrected interpolant,
121 * for a triangle.
122 * We basically multiply the vertex value by 1/w before computing
123 * the plane coefficients (a0, dadx, dady).
124 * Later, when we compute the value at a particular fragment position we'll
125 * divide the interpolated value by the interpolated W at that fragment.
126 */
127 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
128 const struct lp_tri_info *info,
129 unsigned slot,
130 unsigned vert_attr)
131 {
132 /* premultiply by 1/w (v[0][3] is always 1/w):
133 */
134 __m128 a0 = *(const __m128 *)info->v0[vert_attr];
135 __m128 a1 = *(const __m128 *)info->v1[vert_attr];
136 __m128 a2 = *(const __m128 *)info->v2[vert_attr];
137
138 __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
139 __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
140 __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
141
142 calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
143 }
144
145
146
147
148
149 /**
150 * Compute the inputs-> dadx, dady, a0 values.
151 */
152 void lp_setup_tri_coef( struct lp_setup_context *setup,
153 struct lp_rast_shader_inputs *inputs,
154 const float (*v0)[4],
155 const float (*v1)[4],
156 const float (*v2)[4],
157 boolean frontfacing)
158 {
159 unsigned slot;
160 struct lp_tri_info info;
161 float dx01 = v0[0][0] - v1[0][0];
162 float dy01 = v0[0][1] - v1[0][1];
163 float dx20 = v2[0][0] - v0[0][0];
164 float dy20 = v2[0][1] - v0[0][1];
165 float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
166
167 info.v0 = v0;
168 info.v1 = v1;
169 info.v2 = v2;
170 info.frontfacing = frontfacing;
171 info.x0_center = v0[0][0] - setup->pixel_offset;
172 info.y0_center = v0[0][1] - setup->pixel_offset;
173 info.dx01_ooa = dx01 * oneoverarea;
174 info.dx20_ooa = dx20 * oneoverarea;
175 info.dy01_ooa = dy01 * oneoverarea;
176 info.dy20_ooa = dy20 * oneoverarea;
177
178
179 /* The internal position input is in slot zero:
180 */
181 linear_coef(inputs, &info, 0, 0);
182
183 /* setup interpolation for all the remaining attributes:
184 */
185 for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
186 unsigned vert_attr = setup->fs.input[slot].src_index;
187
188 switch (setup->fs.input[slot].interp) {
189 case LP_INTERP_CONSTANT:
190 if (setup->flatshade_first) {
191 constant_coef4(inputs, &info, slot+1, info.v0[vert_attr]);
192 }
193 else {
194 constant_coef4(inputs, &info, slot+1, info.v2[vert_attr]);
195 }
196 break;
197
198 case LP_INTERP_LINEAR:
199 linear_coef(inputs, &info, slot+1, vert_attr);
200 break;
201
202 case LP_INTERP_PERSPECTIVE:
203 perspective_coef(inputs, &info, slot+1, vert_attr);
204 break;
205
206 case LP_INTERP_POSITION:
207 /*
208 * The generated pixel interpolators will pick up the coeffs from
209 * slot 0.
210 */
211 break;
212
213 case LP_INTERP_FACING:
214 setup_facing_coef(inputs, &info, slot+1);
215 break;
216
217 default:
218 assert(0);
219 }
220 }
221 }
222
223 #else
224 extern void lp_setup_coef_dummy(void);
225 void lp_setup_coef_dummy(void)
226 {
227 }
228 #endif