Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
[mesa.git] / src / gallium / auxiliary / gallivm / soabuiltins.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * This file is compiled with clang into the LLVM bitcode
30 *
31 * Authors:
32 * Zack Rusin zack@tungstengraphics.com
33 */
34 typedef __attribute__(( ext_vector_type(4) )) float float4;
35
36
37 extern float fabsf(float val);
38
39 float4 absvec(float4 vec)
40 {
41 float4 res;
42 res.x = fabsf(vec.x);
43 res.y = fabsf(vec.y);
44 res.z = fabsf(vec.z);
45 res.w = fabsf(vec.w);
46
47 return res;
48 }
49
50 void abs(float4 *res,
51 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
52 {
53 res[0] = absvec(tmp0x);
54 res[1] = absvec(tmp0y);
55 res[2] = absvec(tmp0z);
56 res[3] = absvec(tmp0w);
57 }
58
59 void dp3(float4 *res,
60 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
61 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
62 {
63 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
64 (tmp0z * tmp1z);
65
66 res[0] = dot;
67 res[1] = dot;
68 res[2] = dot;
69 res[3] = dot;
70 }
71
72
73 void dp4(float4 *res,
74 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
75 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
76 {
77 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
78 (tmp0z * tmp1z) + (tmp0w * tmp1w);
79
80 res[0] = dot;
81 res[1] = dot;
82 res[2] = dot;
83 res[3] = dot;
84 }
85
86 extern float powf(float num, float p);
87 extern float sqrtf(float x);
88
89 float4 powvec(float4 vec, float4 q)
90 {
91 float4 p;
92 p.x = powf(vec.x, q.x);
93 p.y = powf(vec.y, q.y);
94 p.z = powf(vec.z, q.z);
95 p.w = powf(vec.w, q.w);
96 return p;
97 }
98
99 void pow(float4 *res,
100 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
101 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
102 {
103 res[0] = powvec(tmp0x, tmp1x);
104 res[1] = res[0];
105 res[2] = res[0];
106 res[3] = res[0];
107 }
108
109 float4 minvec(float4 a, float4 b)
110 {
111 return (float4){(a.x < b.x) ? a.x : b.x,
112 (a.y < b.y) ? a.y : b.y,
113 (a.z < b.z) ? a.z : b.z,
114 (a.w < b.w) ? a.w : b.w};
115 }
116
117 void min(float4 *res,
118 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
119 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
120 {
121 res[0] = minvec(tmp0x, tmp1x);
122 res[1] = minvec(tmp0y, tmp1y);
123 res[2] = minvec(tmp0z, tmp1z);
124 res[3] = minvec(tmp0w, tmp1w);
125 }
126
127
128 float4 maxvec(float4 a, float4 b)
129 {
130 return (float4){(a.x > b.x) ? a.x : b.x,
131 (a.y > b.y) ? a.y : b.y,
132 (a.z > b.z) ? a.z : b.z,
133 (a.w > b.w) ? a.w : b.w};
134 }
135
136 void max(float4 *res,
137 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
138 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
139 {
140 res[0] = maxvec(tmp0x, tmp1x);
141 res[1] = maxvec(tmp0y, tmp1y);
142 res[2] = maxvec(tmp0z, tmp1z);
143 res[3] = maxvec(tmp0w, tmp1w);
144 }
145
146
147 void lit(float4 *res,
148 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
149 {
150 const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
151 const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
152 const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f};
153
154 res[0] = (float4){1.0, 1.0, 1.0, 1.0};
155 if (tmp0x.x > 0) {
156 float4 tmpy = maxvec(tmp0y, zerovec);
157 float4 tmpw = minvec(tmp0w, plus128);
158 tmpw = maxvec(tmpw, min128);
159 res[1] = tmp0x;
160 res[2] = powvec(tmpy, tmpw);
161 } else {
162 res[1] = zerovec;
163 res[2] = zerovec;
164 }
165 res[3] = (float4){1.0, 1.0, 1.0, 1.0};
166 }
167
168
169 float4 sqrtvec(float4 vec)
170 {
171 float4 p;
172 p.x = sqrtf(vec.x);
173 p.y = sqrtf(vec.y);
174 p.z = sqrtf(vec.z);
175 p.w = sqrtf(vec.w);
176 return p;
177 }
178
179 void rsq(float4 *res,
180 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
181 {
182 const float4 onevec = (float4) {1., 1., 1., 1.};
183 res[0] = onevec/sqrtvec(absvec(tmp0x));
184 res[1] = onevec/sqrtvec(absvec(tmp0y));
185 res[2] = onevec/sqrtvec(absvec(tmp0z));
186 res[3] = onevec/sqrtvec(absvec(tmp0w));
187 }