fix stores to vertex state program registers
[mesa.git] / src / mesa / math / m_clip_tmp.h
1
2 /*
3 * Mesa 3-D graphics library
4 * Version: 5.1
5 *
6 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /*
27 * New (3.1) transformation code written by Keith Whitwell.
28 */
29
30
31 /* KW: a clever asm implementation would nestle integer versions
32 * of the outcode calculation underneath the division. Gcc won't
33 * do this, strangely enough, so I only do the divide in
34 * the case where the cliptest passes. This isn't essential,
35 * and an asm implementation needn't replicate that behaviour.
36 *
37 * \param clip_vec vector of incoming clip-space coords
38 * \param proj_vec vector of resultant NDC-space projected coords
39 * \param clipMask resulting array of clip flags
40 * \param orMask bitwise-OR of clipMask values
41 * \param andMask bitwise-AND of clipMask values
42 * \return proj_vec pointer
43 */
44 static GLvector4f * _XFORMAPI TAG(cliptest_points4)( GLvector4f *clip_vec,
45 GLvector4f *proj_vec,
46 GLubyte clipMask[],
47 GLubyte *orMask,
48 GLubyte *andMask )
49 {
50 const GLuint stride = clip_vec->stride;
51 const GLfloat *from = (GLfloat *)clip_vec->start;
52 const GLuint count = clip_vec->count;
53 GLuint c = 0;
54 GLfloat (*vProj)[4] = (GLfloat (*)[4])proj_vec->start;
55 GLubyte tmpAndMask = *andMask;
56 GLubyte tmpOrMask = *orMask;
57 GLuint i;
58 STRIDE_LOOP {
59 const GLfloat cx = from[0];
60 const GLfloat cy = from[1];
61 const GLfloat cz = from[2];
62 const GLfloat cw = from[3];
63 #if defined(macintosh) || defined(__powerpc__)
64 /* on powerpc cliptest is 17% faster in this way. */
65 GLuint mask;
66 mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
67 mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
68 mask |= (((cw < cy) << CLIP_TOP_SHIFT));
69 mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
70 mask |= (((cw < cz) << CLIP_FAR_SHIFT));
71 mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
72 #else /* !defined(macintosh)) */
73 GLubyte mask = 0;
74 if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
75 if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
76 if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
77 if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
78 if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
79 if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
80 #endif /* defined(macintosh) */
81
82 clipMask[i] = mask;
83 if (mask) {
84 c++;
85 tmpAndMask &= mask;
86 tmpOrMask |= mask;
87 vProj[i][0] = 0;
88 vProj[i][1] = 0;
89 vProj[i][2] = 0;
90 vProj[i][3] = 1;
91 } else {
92 GLfloat oow = 1.0F / cw;
93 vProj[i][0] = cx * oow;
94 vProj[i][1] = cy * oow;
95 vProj[i][2] = cz * oow;
96 vProj[i][3] = oow;
97 }
98 }
99
100 *orMask = tmpOrMask;
101 *andMask = (GLubyte) (c < count ? 0 : tmpAndMask);
102
103 proj_vec->flags |= VEC_SIZE_4;
104 proj_vec->size = 4;
105 proj_vec->count = clip_vec->count;
106 return proj_vec;
107 }
108
109
110
111 /*
112 * \param clip_vec vector of incoming clip-space coords
113 * \param proj_vec vector of resultant NDC-space projected coords
114 * \param clipMask resulting array of clip flags
115 * \param orMask bitwise-OR of clipMask values
116 * \param andMask bitwise-AND of clipMask values
117 * \return clip_vec pointer
118 */
119 static GLvector4f * _XFORMAPI TAG(cliptest_np_points4)( GLvector4f *clip_vec,
120 GLvector4f *proj_vec,
121 GLubyte clipMask[],
122 GLubyte *orMask,
123 GLubyte *andMask )
124 {
125 const GLuint stride = clip_vec->stride;
126 const GLuint count = clip_vec->count;
127 const GLfloat *from = (GLfloat *)clip_vec->start;
128 GLuint c = 0;
129 GLubyte tmpAndMask = *andMask;
130 GLubyte tmpOrMask = *orMask;
131 GLuint i;
132 STRIDE_LOOP {
133 const GLfloat cx = from[0];
134 const GLfloat cy = from[1];
135 const GLfloat cz = from[2];
136 const GLfloat cw = from[3];
137 #if defined(macintosh) || defined(__powerpc__)
138 /* on powerpc cliptest is 17% faster in this way. */
139 GLuint mask;
140 mask = (((cw < cx) << CLIP_RIGHT_SHIFT));
141 mask |= (((cw < -cx) << CLIP_LEFT_SHIFT));
142 mask |= (((cw < cy) << CLIP_TOP_SHIFT));
143 mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT));
144 mask |= (((cw < cz) << CLIP_FAR_SHIFT));
145 mask |= (((cw < -cz) << CLIP_NEAR_SHIFT));
146 #else /* !defined(macintosh)) */
147 GLubyte mask = 0;
148 if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT;
149 if ( cx + cw < 0) mask |= CLIP_LEFT_BIT;
150 if (-cy + cw < 0) mask |= CLIP_TOP_BIT;
151 if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT;
152 if (-cz + cw < 0) mask |= CLIP_FAR_BIT;
153 if ( cz + cw < 0) mask |= CLIP_NEAR_BIT;
154 #endif /* defined(macintosh) */
155
156 clipMask[i] = mask;
157 if (mask) {
158 c++;
159 tmpAndMask &= mask;
160 tmpOrMask |= mask;
161 }
162 }
163
164 *orMask = tmpOrMask;
165 *andMask = (GLubyte) (c < count ? 0 : tmpAndMask);
166 return clip_vec;
167 }
168
169
170 static GLvector4f * _XFORMAPI TAG(cliptest_points3)( GLvector4f *clip_vec,
171 GLvector4f *proj_vec,
172 GLubyte clipMask[],
173 GLubyte *orMask,
174 GLubyte *andMask )
175 {
176 const GLuint stride = clip_vec->stride;
177 const GLuint count = clip_vec->count;
178 const GLfloat *from = (GLfloat *)clip_vec->start;
179
180 GLubyte tmpOrMask = *orMask;
181 GLubyte tmpAndMask = *andMask;
182 GLuint i;
183 STRIDE_LOOP {
184 const GLfloat cx = from[0], cy = from[1], cz = from[2];
185 GLubyte mask = 0;
186 if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
187 else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
188 if (cy > 1.0) mask |= CLIP_TOP_BIT;
189 else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
190 if (cz > 1.0) mask |= CLIP_FAR_BIT;
191 else if (cz < -1.0) mask |= CLIP_NEAR_BIT;
192 clipMask[i] = mask;
193 tmpOrMask |= mask;
194 tmpAndMask &= mask;
195 }
196
197 *orMask = tmpOrMask;
198 *andMask = tmpAndMask;
199 return clip_vec;
200 }
201
202
203 static GLvector4f * _XFORMAPI TAG(cliptest_points2)( GLvector4f *clip_vec,
204 GLvector4f *proj_vec,
205 GLubyte clipMask[],
206 GLubyte *orMask,
207 GLubyte *andMask )
208 {
209 const GLuint stride = clip_vec->stride;
210 const GLuint count = clip_vec->count;
211 const GLfloat *from = (GLfloat *)clip_vec->start;
212
213 GLubyte tmpOrMask = *orMask;
214 GLubyte tmpAndMask = *andMask;
215 GLuint i;
216 STRIDE_LOOP {
217 const GLfloat cx = from[0], cy = from[1];
218 GLubyte mask = 0;
219 if (cx > 1.0) mask |= CLIP_RIGHT_BIT;
220 else if (cx < -1.0) mask |= CLIP_LEFT_BIT;
221 if (cy > 1.0) mask |= CLIP_TOP_BIT;
222 else if (cy < -1.0) mask |= CLIP_BOTTOM_BIT;
223 clipMask[i] = mask;
224 tmpOrMask |= mask;
225 tmpAndMask &= mask;
226 }
227
228 *orMask = tmpOrMask;
229 *andMask = tmpAndMask;
230 return clip_vec;
231 }
232
233
234 static void TAG(init_c_cliptest)( void )
235 {
236 _mesa_clip_tab[4] = TAG(cliptest_points4);
237 _mesa_clip_tab[3] = TAG(cliptest_points3);
238 _mesa_clip_tab[2] = TAG(cliptest_points2);
239
240 _mesa_clip_np_tab[4] = TAG(cliptest_np_points4);
241 _mesa_clip_np_tab[3] = TAG(cliptest_points3);
242 _mesa_clip_np_tab[2] = TAG(cliptest_points2);
243 }