1 /**************************************************************************
3 * Copyright 2010 VMware.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * Binning code for triangles
32 #include "util/u_math.h"
33 #include "util/u_memory.h"
35 #include "lp_setup_context.h"
36 #include "lp_setup_coef.h"
39 #if defined(PIPE_ARCH_SSE)
40 #include <emmintrin.h>
43 static void constant_coef4( struct lp_rast_shader_inputs
*inputs
,
44 const struct lp_tri_info
*info
,
48 *(__m128
*)inputs
->a0
[slot
] = *(__m128
*)attr
;
49 *(__m128
*)inputs
->dadx
[slot
] = _mm_set1_ps(0.0);
50 *(__m128
*)inputs
->dady
[slot
] = _mm_set1_ps(0.0);
56 * Setup the fragment input attribute with the front-facing value.
57 * \param frontface is the triangle front facing?
59 static void setup_facing_coef( struct lp_rast_shader_inputs
*inputs
,
60 const struct lp_tri_info
*info
,
63 /* XXX: just pass frontface directly to the shader, don't bother
64 * treating it as an input.
66 __m128 a0
= _mm_setr_ps(info
->frontfacing
? 1.0 : -1.0,
69 *(__m128
*)inputs
->a0
[slot
] = a0
;
70 *(__m128
*)inputs
->dadx
[slot
] = _mm_set1_ps(0.0);
71 *(__m128
*)inputs
->dady
[slot
] = _mm_set1_ps(0.0);
76 static void calc_coef4( struct lp_rast_shader_inputs
*inputs
,
77 const struct lp_tri_info
*info
,
83 __m128 da01
= _mm_sub_ps(a0
, a1
);
84 __m128 da20
= _mm_sub_ps(a2
, a0
);
86 __m128 da01_dy20_ooa
= _mm_mul_ps(da01
, _mm_set1_ps(info
->dy20_ooa
));
87 __m128 da20_dy01_ooa
= _mm_mul_ps(da20
, _mm_set1_ps(info
->dy01_ooa
));
88 __m128 dadx
= _mm_sub_ps(da01_dy20_ooa
, da20_dy01_ooa
);
90 __m128 da01_dx20_ooa
= _mm_mul_ps(da01
, _mm_set1_ps(info
->dx20_ooa
));
91 __m128 da20_dx01_ooa
= _mm_mul_ps(da20
, _mm_set1_ps(info
->dx01_ooa
));
92 __m128 dady
= _mm_sub_ps(da20_dx01_ooa
, da01_dx20_ooa
);
94 __m128 dadx_x0
= _mm_mul_ps(dadx
, _mm_set1_ps(info
->x0_center
));
95 __m128 dady_y0
= _mm_mul_ps(dady
, _mm_set1_ps(info
->y0_center
));
96 __m128 attr_v0
= _mm_add_ps(dadx_x0
, dady_y0
);
97 __m128 attr_0
= _mm_sub_ps(a0
, attr_v0
);
99 *(__m128
*)inputs
->a0
[slot
] = attr_0
;
100 *(__m128
*)inputs
->dadx
[slot
] = dadx
;
101 *(__m128
*)inputs
->dady
[slot
] = dady
;
105 static void linear_coef( struct lp_rast_shader_inputs
*inputs
,
106 const struct lp_tri_info
*info
,
110 __m128 a0
= *(const __m128
*)info
->v0
[vert_attr
];
111 __m128 a1
= *(const __m128
*)info
->v1
[vert_attr
];
112 __m128 a2
= *(const __m128
*)info
->v2
[vert_attr
];
114 calc_coef4(inputs
, info
, slot
, a0
, a1
, a2
);
120 * Compute a0, dadx and dady for a perspective-corrected interpolant,
122 * We basically multiply the vertex value by 1/w before computing
123 * the plane coefficients (a0, dadx, dady).
124 * Later, when we compute the value at a particular fragment position we'll
125 * divide the interpolated value by the interpolated W at that fragment.
127 static void perspective_coef( struct lp_rast_shader_inputs
*inputs
,
128 const struct lp_tri_info
*info
,
132 /* premultiply by 1/w (v[0][3] is always 1/w):
134 __m128 a0
= *(const __m128
*)info
->v0
[vert_attr
];
135 __m128 a1
= *(const __m128
*)info
->v1
[vert_attr
];
136 __m128 a2
= *(const __m128
*)info
->v2
[vert_attr
];
138 __m128 a0_oow
= _mm_mul_ps(a0
, _mm_set1_ps(info
->v0
[0][3]));
139 __m128 a1_oow
= _mm_mul_ps(a1
, _mm_set1_ps(info
->v1
[0][3]));
140 __m128 a2_oow
= _mm_mul_ps(a2
, _mm_set1_ps(info
->v2
[0][3]));
142 calc_coef4(inputs
, info
, slot
, a0_oow
, a1_oow
, a2_oow
);
150 * Compute the inputs-> dadx, dady, a0 values.
152 void lp_setup_tri_coef( struct lp_setup_context
*setup
,
153 struct lp_rast_shader_inputs
*inputs
,
154 const struct lp_tri_info
*info
)
158 /* The internal position input is in slot zero:
160 linear_coef(inputs
, info
, 0, 0);
162 /* setup interpolation for all the remaining attributes:
164 for (slot
= 0; slot
< setup
->fs
.nr_inputs
; slot
++) {
165 unsigned vert_attr
= setup
->fs
.input
[slot
].src_index
;
167 switch (setup
->fs
.input
[slot
].interp
) {
168 case LP_INTERP_CONSTANT
:
169 if (setup
->flatshade_first
) {
170 constant_coef4(inputs
, info
, slot
+1, info
->v0
[vert_attr
]);
173 constant_coef4(inputs
, info
, slot
+1, info
->v2
[vert_attr
]);
177 case LP_INTERP_LINEAR
:
178 linear_coef(inputs
, info
, slot
+1, vert_attr
);
181 case LP_INTERP_PERSPECTIVE
:
182 perspective_coef(inputs
, info
, slot
+1, vert_attr
);
185 case LP_INTERP_POSITION
:
187 * The generated pixel interpolators will pick up the coeffs from
192 case LP_INTERP_FACING
:
193 setup_facing_coef(inputs
, info
, slot
+1);
203 extern void lp_setup_coef_dummy(void);
204 void lp_setup_coef_dummy(void)