1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Binning code for triangles
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
39 #if defined(PIPE_ARCH_SSE)
40 #include <emmintrin.h>
43 static void constant_coef4( struct lp_rast_shader_inputs
*inputs
,
44 const struct lp_tri_info
*info
,
48 *(__m128
*)inputs
->a0
[slot
] = *(__m128
*)attr
;
49 *(__m128
*)inputs
->dadx
[slot
] = _mm_set1_ps(0.0);
50 *(__m128
*)inputs
->dady
[slot
] = _mm_set1_ps(0.0);
56 * Setup the fragment input attribute with the front-facing value.
57 * \param frontface is the triangle front facing?
59 static void setup_facing_coef( struct lp_rast_shader_inputs
*inputs
,
60 const struct lp_tri_info
*info
,
63 /* XXX: just pass frontface directly to the shader, don't bother
64 * treating it as an input.
66 __m128 a0
= _mm_setr_ps(info
->frontfacing
? 1.0 : -1.0,
69 *(__m128
*)inputs
->a0
[slot
] = a0
;
70 *(__m128
*)inputs
->dadx
[slot
] = _mm_set1_ps(0.0);
71 *(__m128
*)inputs
->dady
[slot
] = _mm_set1_ps(0.0);
76 static void calc_coef4( struct lp_rast_shader_inputs
*inputs
,
77 const struct lp_tri_info
*info
,
83 __m128 da01
= _mm_sub_ps(a0
, a1
);
84 __m128 da20
= _mm_sub_ps(a2
, a0
);
86 __m128 da01_dy20_ooa
= _mm_mul_ps(da01
, _mm_set1_ps(info
->dy20_ooa
));
87 __m128 da20_dy01_ooa
= _mm_mul_ps(da20
, _mm_set1_ps(info
->dy01_ooa
));
88 __m128 dadx
= _mm_sub_ps(da01_dy20_ooa
, da20_dy01_ooa
);
90 __m128 da01_dx20_ooa
= _mm_mul_ps(da01
, _mm_set1_ps(info
->dx20_ooa
));
91 __m128 da20_dx01_ooa
= _mm_mul_ps(da20
, _mm_set1_ps(info
->dx01_ooa
));
92 __m128 dady
= _mm_sub_ps(da20_dx01_ooa
, da01_dx20_ooa
);
94 __m128 dadx_x0
= _mm_mul_ps(dadx
, _mm_set1_ps(info
->x0_center
));
95 __m128 dady_y0
= _mm_mul_ps(dady
, _mm_set1_ps(info
->y0_center
));
96 __m128 attr_v0
= _mm_add_ps(dadx_x0
, dady_y0
);
97 __m128 attr_0
= _mm_sub_ps(a0
, attr_v0
);
99 *(__m128
*)inputs
->a0
[slot
] = attr_0
;
100 *(__m128
*)inputs
->dadx
[slot
] = dadx
;
101 *(__m128
*)inputs
->dady
[slot
] = dady
;
105 static void linear_coef( struct lp_rast_shader_inputs
*inputs
,
106 const struct lp_tri_info
*info
,
110 __m128 a0
= *(const __m128
*)info
->v0
[vert_attr
];
111 __m128 a1
= *(const __m128
*)info
->v1
[vert_attr
];
112 __m128 a2
= *(const __m128
*)info
->v2
[vert_attr
];
114 calc_coef4(inputs
, info
, slot
, a0
, a1
, a2
);
120 * Compute a0, dadx and dady for a perspective-corrected interpolant,
122 * We basically multiply the vertex value by 1/w before computing
123 * the plane coefficients (a0, dadx, dady).
124 * Later, when we compute the value at a particular fragment position we'll
125 * divide the interpolated value by the interpolated W at that fragment.
127 static void perspective_coef( struct lp_rast_shader_inputs
*inputs
,
128 const struct lp_tri_info
*info
,
132 /* premultiply by 1/w (v[0][3] is always 1/w):
134 __m128 a0
= *(const __m128
*)info
->v0
[vert_attr
];
135 __m128 a1
= *(const __m128
*)info
->v1
[vert_attr
];
136 __m128 a2
= *(const __m128
*)info
->v2
[vert_attr
];
138 __m128 a0_oow
= _mm_mul_ps(a0
, _mm_set1_ps(info
->v0
[0][3]));
139 __m128 a1_oow
= _mm_mul_ps(a1
, _mm_set1_ps(info
->v1
[0][3]));
140 __m128 a2_oow
= _mm_mul_ps(a2
, _mm_set1_ps(info
->v2
[0][3]));
142 calc_coef4(inputs
, info
, slot
, a0_oow
, a1_oow
, a2_oow
);
150 * Compute the inputs-> dadx, dady, a0 values.
152 void lp_setup_tri_coef( struct lp_setup_context
*setup
,
153 struct lp_rast_shader_inputs
*inputs
,
154 const float (*v0
)[4],
155 const float (*v1
)[4],
156 const float (*v2
)[4],
160 struct lp_tri_info info
;
161 float dx01
= v0
[0][0] - v1
[0][0];
162 float dy01
= v0
[0][1] - v1
[0][1];
163 float dx20
= v2
[0][0] - v0
[0][0];
164 float dy20
= v2
[0][1] - v0
[0][1];
165 float oneoverarea
= 1.0f
/ (dx01
* dy20
- dx20
* dy01
);
170 info
.frontfacing
= frontfacing
;
171 info
.x0_center
= v0
[0][0] - setup
->pixel_offset
;
172 info
.y0_center
= v0
[0][1] - setup
->pixel_offset
;
173 info
.dx01_ooa
= dx01
* oneoverarea
;
174 info
.dx20_ooa
= dx20
* oneoverarea
;
175 info
.dy01_ooa
= dy01
* oneoverarea
;
176 info
.dy20_ooa
= dy20
* oneoverarea
;
179 /* The internal position input is in slot zero:
181 linear_coef(inputs
, &info
, 0, 0);
183 /* setup interpolation for all the remaining attributes:
185 for (slot
= 0; slot
< setup
->fs
.nr_inputs
; slot
++) {
186 unsigned vert_attr
= setup
->fs
.input
[slot
].src_index
;
188 switch (setup
->fs
.input
[slot
].interp
) {
189 case LP_INTERP_CONSTANT
:
190 if (setup
->flatshade_first
) {
191 constant_coef4(inputs
, &info
, slot
+1, info
.v0
[vert_attr
]);
194 constant_coef4(inputs
, &info
, slot
+1, info
.v2
[vert_attr
]);
198 case LP_INTERP_LINEAR
:
199 linear_coef(inputs
, &info
, slot
+1, vert_attr
);
202 case LP_INTERP_PERSPECTIVE
:
203 perspective_coef(inputs
, &info
, slot
+1, vert_attr
);
206 case LP_INTERP_POSITION
:
208 * The generated pixel interpolators will pick up the coeffs from
213 case LP_INTERP_FACING
:
214 setup_facing_coef(inputs
, &info
, slot
+1);
224 extern void lp_setup_coef_dummy(void);
225 void lp_setup_coef_dummy(void)