1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 * This file is compiled with clang into the LLVM bitcode
32 * Zack Rusin zack@tungstengraphics.com
34 typedef __attribute__(( ext_vector_type(4) )) float float4
;
37 extern float fabsf(float val
);
41 float4
absvec(float4 vec
)
52 float4
maxvec(float4 a
, float4 b
)
54 return (float4
){(a
.x
> b
.x
) ? a
.x
: b
.x
,
55 (a
.y
> b
.y
) ? a
.y
: b
.y
,
56 (a
.z
> b
.z
) ? a
.z
: b
.z
,
57 (a
.w
> b
.w
) ? a
.w
: b
.w
};
60 float4
minvec(float4 a
, float4 b
)
62 return (float4
){(a
.x
< b
.x
) ? a
.x
: b
.x
,
63 (a
.y
< b
.y
) ? a
.y
: b
.y
,
64 (a
.z
< b
.z
) ? a
.z
: b
.z
,
65 (a
.w
< b
.w
) ? a
.w
: b
.w
};
68 extern float powf(float num
, float p
);
69 extern float sqrtf(float x
);
71 float4
powvec(float4 vec
, float4 q
)
74 p
.x
= powf(vec
.x
, q
.x
);
75 p
.y
= powf(vec
.y
, q
.y
);
76 p
.z
= powf(vec
.z
, q
.z
);
77 p
.w
= powf(vec
.w
, q
.w
);
81 float4
sqrtvec(float4 vec
)
91 float4
sltvec(float4 v1
, float4 v2
)
94 p
.x
= (v1
.x
< v2
.x
) ? 1.0 : 0.0;
95 p
.y
= (v1
.y
< v2
.y
) ? 1.0 : 0.0;
96 p
.z
= (v1
.z
< v2
.z
) ? 1.0 : 0.0;
97 p
.w
= (v1
.w
< v2
.w
) ? 1.0 : 0.0;
104 void abs(float4
*res
,
105 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
)
107 res
[0] = absvec(tmp0x
);
108 res
[1] = absvec(tmp0y
);
109 res
[2] = absvec(tmp0z
);
110 res
[3] = absvec(tmp0w
);
113 void dp3(float4
*res
,
114 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
115 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
117 float4 dot
= (tmp0x
* tmp1x
) + (tmp0y
* tmp1y
) +
126 void dp4(float4
*res
,
127 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
128 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
130 float4 dot
= (tmp0x
* tmp1x
) + (tmp0y
* tmp1y
) +
131 (tmp0z
* tmp1z
) + (tmp0w
* tmp1w
);
139 void lit(float4
*res
,
140 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
)
142 const float4 zerovec
= (float4
) {0.0, 0.0, 0.0, 0.0};
143 const float4 min128
= (float4
) {-128.f
, -128.f
, -128.f
, -128.f
};
144 const float4 plus128
= (float4
) {128.f
, 128.f
, 128.f
, 128.f
};
146 res
[0] = (float4
){1.0, 1.0, 1.0, 1.0};
148 float4 tmpy
= maxvec(tmp0y
, zerovec
);
149 float4 tmpw
= minvec(tmp0w
, plus128
);
150 tmpw
= maxvec(tmpw
, min128
);
152 res
[2] = powvec(tmpy
, tmpw
);
157 res
[3] = (float4
){1.0, 1.0, 1.0, 1.0};
160 void min(float4
*res
,
161 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
162 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
164 res
[0] = minvec(tmp0x
, tmp1x
);
165 res
[1] = minvec(tmp0y
, tmp1y
);
166 res
[2] = minvec(tmp0z
, tmp1z
);
167 res
[3] = minvec(tmp0w
, tmp1w
);
171 void max(float4
*res
,
172 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
173 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
175 res
[0] = maxvec(tmp0x
, tmp1x
);
176 res
[1] = maxvec(tmp0y
, tmp1y
);
177 res
[2] = maxvec(tmp0z
, tmp1z
);
178 res
[3] = maxvec(tmp0w
, tmp1w
);
181 void pow(float4
*res
,
182 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
183 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
185 res
[0] = powvec(tmp0x
, tmp1x
);
191 void rsq(float4
*res
,
192 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
)
194 const float4 onevec
= (float4
) {1., 1., 1., 1.};
195 res
[0] = onevec
/sqrtvec(absvec(tmp0x
));
196 res
[1] = onevec
/sqrtvec(absvec(tmp0y
));
197 res
[2] = onevec
/sqrtvec(absvec(tmp0z
));
198 res
[3] = onevec
/sqrtvec(absvec(tmp0w
));
201 void slt(float4
*res
,
202 float4 tmp0x
, float4 tmp0y
, float4 tmp0z
, float4 tmp0w
,
203 float4 tmp1x
, float4 tmp1y
, float4 tmp1z
, float4 tmp1w
)
205 res
[0] = sltvec(tmp0x
, tmp1x
);
206 res
[1] = sltvec(tmp0y
, tmp1y
);
207 res
[2] = sltvec(tmp0z
, tmp1z
);
208 res
[3] = sltvec(tmp0w
, tmp1w
);