radeonsi: keep using v_rcp_f32 for division in future LLVM (v2)
[mesa.git] / src / gallium / drivers / radeon / radeon_llvm_util.c
1 /*
2 * Copyright 2012, 2013 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 */
26
27 #include "radeon_llvm_util.h"
28 #include "util/u_memory.h"
29
30 #include <llvm-c/BitReader.h>
31 #include <llvm-c/Core.h>
32 #include <llvm-c/Target.h>
33 #include <llvm-c/Transforms/IPO.h>
34 #include <llvm-c/Transforms/PassManagerBuilder.h>
35
36 LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
37 const char * bitcode, unsigned bitcode_len)
38 {
39 LLVMMemoryBufferRef buf;
40 LLVMModuleRef module;
41
42 buf = LLVMCreateMemoryBufferWithMemoryRangeCopy((const char*)bitcode,
43 bitcode_len, "radeon");
44 LLVMParseBitcodeInContext(ctx, buf, &module, NULL);
45 LLVMDisposeMemoryBuffer(buf);
46 return module;
47 }
48
49 unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
50 const char *bitcode, unsigned bitcode_len)
51 {
52 LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
53 return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
54 }
55
56 static void radeon_llvm_optimize(LLVMModuleRef mod)
57 {
58 #if HAVE_LLVM < 0x0309
59 const char *data_layout = LLVMGetDataLayout(mod);
60 LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout);
61 #endif
62 LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate();
63 LLVMPassManagerRef pass_manager = LLVMCreatePassManager();
64
65 /* Functions calls are not supported yet, so we need to inline
66 * everything. The most efficient way to do this is to add
67 * the always_inline attribute to all non-kernel functions
68 * and then run the Always Inline pass. The Always Inline
69 * pass will automaically inline functions with this attribute
70 * and does not perform the expensive cost analysis that the normal
71 * inliner does.
72 */
73
74 LLVMValueRef fn;
75 for (fn = LLVMGetFirstFunction(mod); fn; fn = LLVMGetNextFunction(fn)) {
76 /* All the non-kernel functions have internal linkage */
77 if (LLVMGetLinkage(fn) == LLVMInternalLinkage) {
78 LLVMAddFunctionAttr(fn, LLVMAlwaysInlineAttribute);
79 }
80 }
81
82 #if HAVE_LLVM < 0x0309
83 LLVMAddTargetData(TD, pass_manager);
84 #endif
85 LLVMAddAlwaysInlinerPass(pass_manager);
86 LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager);
87
88 LLVMRunPassManager(pass_manager, mod);
89 LLVMPassManagerBuilderDispose(builder);
90 LLVMDisposePassManager(pass_manager);
91 #if HAVE_LLVM < 0x0309
92 LLVMDisposeTargetData(TD);
93 #endif
94 }
95
96 LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
97 const char *bitcode, unsigned bitcode_len)
98 {
99 LLVMModuleRef mod;
100 unsigned num_kernels;
101 LLVMValueRef *kernel_metadata;
102 unsigned i;
103
104 mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
105 num_kernels = LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
106 kernel_metadata = MALLOC(num_kernels * sizeof(LLVMValueRef));
107 LLVMGetNamedMetadataOperands(mod, "opencl.kernels", kernel_metadata);
108 for (i = 0; i < num_kernels; i++) {
109 LLVMValueRef kernel_signature, *kernel_function;
110 unsigned num_kernel_md_operands;
111 if (i == index) {
112 continue;
113 }
114 kernel_signature = kernel_metadata[i];
115 num_kernel_md_operands = LLVMGetMDNodeNumOperands(kernel_signature);
116 kernel_function = MALLOC(num_kernel_md_operands * sizeof (LLVMValueRef));
117 LLVMGetMDNodeOperands(kernel_signature, kernel_function);
118 LLVMDeleteFunction(*kernel_function);
119 FREE(kernel_function);
120 }
121 FREE(kernel_metadata);
122 radeon_llvm_optimize(mod);
123 return mod;
124 }