Merge remote branch 'origin/master' into nv50-compiler
[mesa.git] / src / gallium / drivers / llvmpipe / lp_setup_coef_intrin.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Binning code for triangles
30 */
31
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
34 #include "lp_perf.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
37 #include "lp_rast.h"
38
39 #if defined(PIPE_ARCH_SSE)
40 #include <emmintrin.h>
41
42
43 static void constant_coef4( struct lp_rast_shader_inputs *inputs,
44 const struct lp_tri_info *info,
45 unsigned slot,
46 const float *attr)
47 {
48 *(__m128 *)inputs->a0[slot] = *(__m128 *)attr;
49 *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
50 *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
51 }
52
53
54
55 /**
56 * Setup the fragment input attribute with the front-facing value.
57 * \param frontface is the triangle front facing?
58 */
59 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
60 const struct lp_tri_info *info,
61 unsigned slot )
62 {
63 /* XXX: just pass frontface directly to the shader, don't bother
64 * treating it as an input.
65 */
66 __m128 a0 = _mm_setr_ps(info->frontfacing ? 1.0 : -1.0,
67 0, 0, 0);
68
69 *(__m128 *)inputs->a0[slot] = a0;
70 *(__m128 *)inputs->dadx[slot] = _mm_set1_ps(0.0);
71 *(__m128 *)inputs->dady[slot] = _mm_set1_ps(0.0);
72 }
73
74
75
76 static void calc_coef4( struct lp_rast_shader_inputs *inputs,
77 const struct lp_tri_info *info,
78 unsigned slot,
79 __m128 a0,
80 __m128 a1,
81 __m128 a2)
82 {
83 __m128 da01 = _mm_sub_ps(a0, a1);
84 __m128 da20 = _mm_sub_ps(a2, a0);
85
86 __m128 da01_dy20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dy20_ooa));
87 __m128 da20_dy01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dy01_ooa));
88 __m128 dadx = _mm_sub_ps(da01_dy20_ooa, da20_dy01_ooa);
89
90 __m128 da01_dx20_ooa = _mm_mul_ps(da01, _mm_set1_ps(info->dx20_ooa));
91 __m128 da20_dx01_ooa = _mm_mul_ps(da20, _mm_set1_ps(info->dx01_ooa));
92 __m128 dady = _mm_sub_ps(da20_dx01_ooa, da01_dx20_ooa);
93
94 __m128 dadx_x0 = _mm_mul_ps(dadx, _mm_set1_ps(info->x0_center));
95 __m128 dady_y0 = _mm_mul_ps(dady, _mm_set1_ps(info->y0_center));
96 __m128 attr_v0 = _mm_add_ps(dadx_x0, dady_y0);
97 __m128 attr_0 = _mm_sub_ps(a0, attr_v0);
98
99 *(__m128 *)inputs->a0[slot] = attr_0;
100 *(__m128 *)inputs->dadx[slot] = dadx;
101 *(__m128 *)inputs->dady[slot] = dady;
102 }
103
104
105 static void linear_coef( struct lp_rast_shader_inputs *inputs,
106 const struct lp_tri_info *info,
107 unsigned slot,
108 unsigned vert_attr)
109 {
110 __m128 a0 = *(const __m128 *)info->v0[vert_attr];
111 __m128 a1 = *(const __m128 *)info->v1[vert_attr];
112 __m128 a2 = *(const __m128 *)info->v2[vert_attr];
113
114 calc_coef4(inputs, info, slot, a0, a1, a2);
115 }
116
117
118
119 /**
120 * Compute a0, dadx and dady for a perspective-corrected interpolant,
121 * for a triangle.
122 * We basically multiply the vertex value by 1/w before computing
123 * the plane coefficients (a0, dadx, dady).
124 * Later, when we compute the value at a particular fragment position we'll
125 * divide the interpolated value by the interpolated W at that fragment.
126 */
127 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
128 const struct lp_tri_info *info,
129 unsigned slot,
130 unsigned vert_attr)
131 {
132 /* premultiply by 1/w (v[0][3] is always 1/w):
133 */
134 __m128 a0 = *(const __m128 *)info->v0[vert_attr];
135 __m128 a1 = *(const __m128 *)info->v1[vert_attr];
136 __m128 a2 = *(const __m128 *)info->v2[vert_attr];
137
138 __m128 a0_oow = _mm_mul_ps(a0, _mm_set1_ps(info->v0[0][3]));
139 __m128 a1_oow = _mm_mul_ps(a1, _mm_set1_ps(info->v1[0][3]));
140 __m128 a2_oow = _mm_mul_ps(a2, _mm_set1_ps(info->v2[0][3]));
141
142 calc_coef4(inputs, info, slot, a0_oow, a1_oow, a2_oow);
143 }
144
145
146
147
148
149 /**
150 * Compute the inputs-> dadx, dady, a0 values.
151 */
152 void lp_setup_tri_coef( struct lp_setup_context *setup,
153 struct lp_rast_shader_inputs *inputs,
154 const struct lp_tri_info *info)
155 {
156 unsigned slot;
157
158 /* The internal position input is in slot zero:
159 */
160 linear_coef(inputs, info, 0, 0);
161
162 /* setup interpolation for all the remaining attributes:
163 */
164 for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
165 unsigned vert_attr = setup->fs.input[slot].src_index;
166
167 switch (setup->fs.input[slot].interp) {
168 case LP_INTERP_CONSTANT:
169 if (setup->flatshade_first) {
170 constant_coef4(inputs, info, slot+1, info->v0[vert_attr]);
171 }
172 else {
173 constant_coef4(inputs, info, slot+1, info->v2[vert_attr]);
174 }
175 break;
176
177 case LP_INTERP_LINEAR:
178 linear_coef(inputs, info, slot+1, vert_attr);
179 break;
180
181 case LP_INTERP_PERSPECTIVE:
182 perspective_coef(inputs, info, slot+1, vert_attr);
183 break;
184
185 case LP_INTERP_POSITION:
186 /*
187 * The generated pixel interpolators will pick up the coeffs from
188 * slot 0.
189 */
190 break;
191
192 case LP_INTERP_FACING:
193 setup_facing_coef(inputs, info, slot+1);
194 break;
195
196 default:
197 assert(0);
198 }
199 }
200 }
201
202 #else
203 extern void lp_setup_coef_dummy(void);
204 void lp_setup_coef_dummy(void)
205 {
206 }
207 #endif