e153389e6a9bd0b7041fc0b74a9fbb80b564b908
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_intr.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Helpers for emiting intrinsic calls.
32 *
33 * LLVM vanilla IR doesn't represent all basic arithmetic operations we care
34 * about, and it is often necessary to resort target-specific intrinsics for
35 * performance, convenience.
36 *
37 * Ideally we would like to stay away from target specific intrinsics and
38 * move all the instruction selection logic into upstream LLVM where it belongs.
39 *
40 * These functions are also used for calling C functions provided by us from
41 * generated LLVM code.
42 *
43 * @author Jose Fonseca <jfonseca@vmware.com>
44 */
45
46
47 #include "util/u_debug.h"
48 #include "util/u_string.h"
49
50 #include "lp_bld_const.h"
51 #include "lp_bld_intr.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_pack.h"
54
55
56 void
57 lp_format_intrinsic(char *name,
58 size_t size,
59 const char *name_root,
60 LLVMTypeRef type)
61 {
62 unsigned length = 0;
63 unsigned width;
64 char c;
65
66 LLVMTypeKind kind = LLVMGetTypeKind(type);
67 if (kind == LLVMVectorTypeKind) {
68 length = LLVMGetVectorSize(type);
69 type = LLVMGetElementType(type);
70 kind = LLVMGetTypeKind(type);
71 }
72
73 switch (kind) {
74 case LLVMIntegerTypeKind:
75 c = 'i';
76 width = LLVMGetIntTypeWidth(type);
77 break;
78 case LLVMFloatTypeKind:
79 c = 'f';
80 width = 32;
81 break;
82 case LLVMDoubleTypeKind:
83 c = 'f';
84 width = 64;
85 break;
86 default:
87 assert(0);
88 }
89
90 if (length) {
91 util_snprintf(name, size, "%s.v%u%c%u", name_root, length, c, width);
92 } else {
93 util_snprintf(name, size, "%s.%c%u", name_root, c, width);
94 }
95 }
96
97
98 LLVMValueRef
99 lp_declare_intrinsic(LLVMModuleRef module,
100 const char *name,
101 LLVMTypeRef ret_type,
102 LLVMTypeRef *arg_types,
103 unsigned num_args)
104 {
105 LLVMTypeRef function_type;
106 LLVMValueRef function;
107
108 assert(!LLVMGetNamedFunction(module, name));
109
110 function_type = LLVMFunctionType(ret_type, arg_types, num_args, 0);
111 function = LLVMAddFunction(module, name, function_type);
112
113 LLVMSetFunctionCallConv(function, LLVMCCallConv);
114 LLVMSetLinkage(function, LLVMExternalLinkage);
115
116 assert(LLVMIsDeclaration(function));
117
118 return function;
119 }
120
121
122 LLVMValueRef
123 lp_build_intrinsic(LLVMBuilderRef builder,
124 const char *name,
125 LLVMTypeRef ret_type,
126 LLVMValueRef *args,
127 unsigned num_args,
128 LLVMAttribute attr)
129 {
130 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
131 LLVMValueRef function;
132
133 function = LLVMGetNamedFunction(module, name);
134 if(!function) {
135 LLVMTypeRef arg_types[LP_MAX_FUNC_ARGS];
136 unsigned i;
137
138 assert(num_args <= LP_MAX_FUNC_ARGS);
139
140 for(i = 0; i < num_args; ++i) {
141 assert(args[i]);
142 arg_types[i] = LLVMTypeOf(args[i]);
143 }
144
145 function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
146
147 if (attr)
148 LLVMAddFunctionAttr(function, attr);
149 }
150
151 return LLVMBuildCall(builder, function, args, num_args, "");
152 }
153
154
155 LLVMValueRef
156 lp_build_intrinsic_unary(LLVMBuilderRef builder,
157 const char *name,
158 LLVMTypeRef ret_type,
159 LLVMValueRef a)
160 {
161 return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
162 }
163
164
165 LLVMValueRef
166 lp_build_intrinsic_binary(LLVMBuilderRef builder,
167 const char *name,
168 LLVMTypeRef ret_type,
169 LLVMValueRef a,
170 LLVMValueRef b)
171 {
172 LLVMValueRef args[2];
173
174 args[0] = a;
175 args[1] = b;
176
177 return lp_build_intrinsic(builder, name, ret_type, args, 2, 0);
178 }
179
180
181 /**
182 * Call intrinsic with arguments adapted to intrinsic vector length.
183 *
184 * Split vectors which are too large for the hw, or expand them if they
185 * are too small, so a caller calling a function which might use intrinsics
186 * doesn't need to do splitting/expansion on its own.
187 * This only supports intrinsics where src and dst types match.
188 */
189 LLVMValueRef
190 lp_build_intrinsic_binary_anylength(struct gallivm_state *gallivm,
191 const char *name,
192 struct lp_type src_type,
193 unsigned intr_size,
194 LLVMValueRef a,
195 LLVMValueRef b)
196 {
197 unsigned i;
198 struct lp_type intrin_type = src_type;
199 LLVMBuilderRef builder = gallivm->builder;
200 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
201 LLVMValueRef anative, bnative;
202 unsigned intrin_length = intr_size / src_type.width;
203
204 intrin_type.length = intrin_length;
205
206 if (intrin_length > src_type.length) {
207 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
208 LLVMValueRef constvec, tmp;
209
210 for (i = 0; i < src_type.length; i++) {
211 elems[i] = lp_build_const_int32(gallivm, i);
212 }
213 for (; i < intrin_length; i++) {
214 elems[i] = i32undef;
215 }
216 if (src_type.length == 1) {
217 LLVMTypeRef elem_type = lp_build_elem_type(gallivm, intrin_type);
218 a = LLVMBuildBitCast(builder, a, LLVMVectorType(elem_type, 1), "");
219 b = LLVMBuildBitCast(builder, b, LLVMVectorType(elem_type, 1), "");
220 }
221 constvec = LLVMConstVector(elems, intrin_length);
222 anative = LLVMBuildShuffleVector(builder, a, a, constvec, "");
223 bnative = LLVMBuildShuffleVector(builder, b, b, constvec, "");
224 tmp = lp_build_intrinsic_binary(builder, name,
225 lp_build_vec_type(gallivm, intrin_type),
226 anative, bnative);
227 if (src_type.length > 1) {
228 constvec = LLVMConstVector(elems, src_type.length);
229 return LLVMBuildShuffleVector(builder, tmp, tmp, constvec, "");
230 }
231 else {
232 return LLVMBuildExtractElement(builder, tmp, elems[0], "");
233 }
234 }
235 else if (intrin_length < src_type.length) {
236 unsigned num_vec = src_type.length / intrin_length;
237 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
238
239 /* don't support arbitrary size here as this is so yuck */
240 if (src_type.length % intrin_length) {
241 /* FIXME: This is something which should be supported
242 * but there doesn't seem to be any need for it currently
243 * so crash and burn.
244 */
245 debug_printf("%s: should handle arbitrary vector size\n",
246 __FUNCTION__);
247 assert(0);
248 return NULL;
249 }
250
251 for (i = 0; i < num_vec; i++) {
252 anative = lp_build_extract_range(gallivm, a, i*intrin_length,
253 intrin_length);
254 bnative = lp_build_extract_range(gallivm, b, i*intrin_length,
255 intrin_length);
256 tmp[i] = lp_build_intrinsic_binary(builder, name,
257 lp_build_vec_type(gallivm, intrin_type),
258 anative, bnative);
259 }
260 return lp_build_concat(gallivm, tmp, intrin_type, num_vec);
261 }
262 else {
263 return lp_build_intrinsic_binary(builder, name,
264 lp_build_vec_type(gallivm, src_type),
265 a, b);
266 }
267 }
268
269
270 LLVMValueRef
271 lp_build_intrinsic_map(struct gallivm_state *gallivm,
272 const char *name,
273 LLVMTypeRef ret_type,
274 LLVMValueRef *args,
275 unsigned num_args)
276 {
277 LLVMBuilderRef builder = gallivm->builder;
278 LLVMTypeRef ret_elem_type = LLVMGetElementType(ret_type);
279 unsigned n = LLVMGetVectorSize(ret_type);
280 unsigned i, j;
281 LLVMValueRef res;
282
283 assert(num_args <= LP_MAX_FUNC_ARGS);
284
285 res = LLVMGetUndef(ret_type);
286 for(i = 0; i < n; ++i) {
287 LLVMValueRef index = lp_build_const_int32(gallivm, i);
288 LLVMValueRef arg_elems[LP_MAX_FUNC_ARGS];
289 LLVMValueRef res_elem;
290 for(j = 0; j < num_args; ++j)
291 arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
292 res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
293 res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
294 }
295
296 return res;
297 }
298
299
300 LLVMValueRef
301 lp_build_intrinsic_map_unary(struct gallivm_state *gallivm,
302 const char *name,
303 LLVMTypeRef ret_type,
304 LLVMValueRef a)
305 {
306 return lp_build_intrinsic_map(gallivm, name, ret_type, &a, 1);
307 }
308
309
310 LLVMValueRef
311 lp_build_intrinsic_map_binary(struct gallivm_state *gallivm,
312 const char *name,
313 LLVMTypeRef ret_type,
314 LLVMValueRef a,
315 LLVMValueRef b)
316 {
317 LLVMValueRef args[2];
318
319 args[0] = a;
320 args[1] = b;
321
322 return lp_build_intrinsic_map(gallivm, name, ret_type, args, 2);
323 }
324
325