swr: [rasterizer jitter] fix assert in AVX implementation of MASKLOADD
[mesa.git] / src / gallium / drivers / swr / rasterizer / jitter / JitManager.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file JitManager.cpp
24 *
25 * @brief Implementation if the Jit Manager.
26 *
27 * Notes:
28 *
29 ******************************************************************************/
30 #if defined(_WIN32)
31 #pragma warning(disable: 4800 4146 4244 4267 4355 4996)
32 #endif
33
34 #include "jit_api.h"
35 #include "JitManager.h"
36 #include "fetch_jit.h"
37
38 #if defined(_WIN32)
39 #include "llvm/ADT/Triple.h"
40 #endif
41 #include "llvm/IR/Function.h"
42 #include "llvm/Support/DynamicLibrary.h"
43
44 #include "llvm/Support/MemoryBuffer.h"
45 #include "llvm/Support/SourceMgr.h"
46
47 #include "llvm/Analysis/CFGPrinter.h"
48 #include "llvm/IRReader/IRReader.h"
49 #include "llvm/Target/TargetMachine.h"
50 #include "llvm/Support/FormattedStream.h"
51
52 #if LLVM_USE_INTEL_JITEVENTS
53 #include "llvm/ExecutionEngine/JITEventListener.h"
54 #endif
55
56 #include "core/state.h"
57 #include "common/containers.hpp"
58
59 #include "state_llvm.h"
60
61 #include <sstream>
62 #if defined(_WIN32)
63 #include <psapi.h>
64 #include <cstring>
65
66 #define INTEL_OUTPUT_DIR "c:\\Intel"
67 #define SWR_OUTPUT_DIR INTEL_OUTPUT_DIR "\\SWR"
68 #define JITTER_OUTPUT_DIR SWR_OUTPUT_DIR "\\Jitter"
69 #endif
70
71 using namespace llvm;
72
73 //////////////////////////////////////////////////////////////////////////
74 /// @brief Contructor for JitManager.
75 /// @param simdWidth - SIMD width to be used in generated program.
76 JitManager::JitManager(uint32_t simdWidth, const char *arch)
77 : mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch)
78 {
79 InitializeNativeTarget();
80 InitializeNativeTargetAsmPrinter();
81 InitializeNativeTargetDisassembler();
82
83 TargetOptions tOpts;
84 tOpts.AllowFPOpFusion = FPOpFusion::Fast;
85 tOpts.NoInfsFPMath = false;
86 tOpts.NoNaNsFPMath = false;
87 tOpts.UnsafeFPMath = true;
88 #if defined(_DEBUG)
89 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7
90 tOpts.NoFramePointerElim = true;
91 #endif
92 #endif
93
94 //tOpts.PrintMachineCode = true;
95
96 std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
97 fnName << mJitNumber++;
98 std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
99 mpCurrentModule = newModule.get();
100
101 auto &&EB = EngineBuilder(std::move(newModule));
102 EB.setTargetOptions(tOpts);
103 EB.setOptLevel(CodeGenOpt::Aggressive);
104
105 StringRef hostCPUName;
106
107 // force JIT to use the same CPU arch as the rest of swr
108 if(mArch.AVX512F())
109 {
110 assert(0 && "Implement AVX512 jitter");
111 hostCPUName = sys::getHostCPUName();
112 if (mVWidth == 0)
113 {
114 mVWidth = 16;
115 }
116 }
117 else if(mArch.AVX2())
118 {
119 hostCPUName = StringRef("core-avx2");
120 if (mVWidth == 0)
121 {
122 mVWidth = 8;
123 }
124 }
125 else if(mArch.AVX())
126 {
127 if (mArch.F16C())
128 {
129 hostCPUName = StringRef("core-avx-i");
130 }
131 else
132 {
133 hostCPUName = StringRef("corei7-avx");
134 }
135 if (mVWidth == 0)
136 {
137 mVWidth = 8;
138 }
139 }
140 else
141 {
142 hostCPUName = sys::getHostCPUName();
143 if (mVWidth == 0)
144 {
145 mVWidth = 8; // 4?
146 }
147 }
148
149 EB.setMCPU(hostCPUName);
150
151 #if defined(_WIN32)
152 // Needed for MCJIT on windows
153 Triple hostTriple(sys::getProcessTriple());
154 hostTriple.setObjectFormat(Triple::ELF);
155 mpCurrentModule->setTargetTriple(hostTriple.getTriple());
156 #endif // _WIN32
157
158 mpExec = EB.create();
159
160 #if LLVM_USE_INTEL_JITEVENTS
161 JITEventListener *vTune = JITEventListener::createIntelJITEventListener();
162 mpExec->RegisterJITEventListener(vTune);
163 #endif
164
165 mFP32Ty = Type::getFloatTy(mContext); // float type
166 mInt8Ty = Type::getInt8Ty(mContext);
167 mInt32Ty = Type::getInt32Ty(mContext); // int type
168 mInt64Ty = Type::getInt64Ty(mContext); // int type
169 mV4FP32Ty = StructType::get(mContext, std::vector<Type*>(4, mFP32Ty), false); // vector4 float type (represented as structure)
170 mV4Int32Ty = StructType::get(mContext, std::vector<Type*>(4, mInt32Ty), false); // vector4 int type
171
172 // fetch function signature
173 // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
174 std::vector<Type*> fsArgs;
175 fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
176 fsArgs.push_back(PointerType::get(Gen_simdvertex(this), 0));
177
178 mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, false);
179
180 mSimtFP32Ty = VectorType::get(mFP32Ty, mVWidth);
181 mSimtInt32Ty = VectorType::get(mInt32Ty, mVWidth);
182
183 mSimdVectorTy = StructType::get(mContext, std::vector<Type*>(4, mSimtFP32Ty), false);
184 mSimdVectorInt32Ty = StructType::get(mContext, std::vector<Type*>(4, mSimtInt32Ty), false);
185
186 #if defined(_WIN32)
187 // explicitly instantiate used symbols from potentially staticly linked libs
188 sys::DynamicLibrary::AddSymbol("exp2f", &exp2f);
189 sys::DynamicLibrary::AddSymbol("log2f", &log2f);
190 sys::DynamicLibrary::AddSymbol("sinf", &sinf);
191 sys::DynamicLibrary::AddSymbol("cosf", &cosf);
192 sys::DynamicLibrary::AddSymbol("powf", &powf);
193 #endif
194
195 #if defined(_WIN32)
196 if (KNOB_DUMP_SHADER_IR)
197 {
198 CreateDirectory(INTEL_OUTPUT_DIR, NULL);
199 CreateDirectory(SWR_OUTPUT_DIR, NULL);
200 CreateDirectory(JITTER_OUTPUT_DIR, NULL);
201 }
202 #endif
203 }
204
205 //////////////////////////////////////////////////////////////////////////
206 /// @brief Create new LLVM module.
207 void JitManager::SetupNewModule()
208 {
209 SWR_ASSERT(mIsModuleFinalized == true && "Current module is not finalized!");
210
211 std::stringstream fnName("JitModule", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
212 fnName << mJitNumber++;
213 std::unique_ptr<Module> newModule(new Module(fnName.str(), mContext));
214 mpCurrentModule = newModule.get();
215 #if defined(_WIN32)
216 // Needed for MCJIT on windows
217 Triple hostTriple(sys::getProcessTriple());
218 hostTriple.setObjectFormat(Triple::ELF);
219 newModule->setTargetTriple(hostTriple.getTriple());
220 #endif // _WIN32
221
222 mpExec->addModule(std::move(newModule));
223 mIsModuleFinalized = false;
224 }
225
226 //////////////////////////////////////////////////////////////////////////
227 /// @brief Create new LLVM module from IR.
228 bool JitManager::SetupModuleFromIR(const uint8_t *pIR)
229 {
230 std::unique_ptr<MemoryBuffer> pMem = MemoryBuffer::getMemBuffer(StringRef((const char*)pIR), "");
231
232 SMDiagnostic Err;
233 std::unique_ptr<Module> newModule = parseIR(pMem.get()->getMemBufferRef(), Err, mContext);
234
235 if (newModule == nullptr)
236 {
237 SWR_ASSERT(0, "Parse failed! Check Err for details.");
238 return false;
239 }
240
241 mpCurrentModule = newModule.get();
242 #if defined(_WIN32)
243 // Needed for MCJIT on windows
244 Triple hostTriple(sys::getProcessTriple());
245 hostTriple.setObjectFormat(Triple::ELF);
246 newModule->setTargetTriple(hostTriple.getTriple());
247 #endif // _WIN32
248
249 mpExec->addModule(std::move(newModule));
250 mIsModuleFinalized = false;
251
252 return true;
253 }
254
255
256 //////////////////////////////////////////////////////////////////////////
257 /// @brief Dump function x86 assembly to file.
258 /// @note This should only be called after the module has been jitted to x86 and the
259 /// module will not be further accessed.
260 void JitManager::DumpAsm(Function* pFunction, const char* fileName)
261 {
262 if (KNOB_DUMP_SHADER_IR)
263 {
264
265 #if defined(_WIN32)
266 DWORD pid = GetCurrentProcessId();
267 TCHAR procname[MAX_PATH];
268 GetModuleFileName(NULL, procname, MAX_PATH);
269 const char* pBaseName = strrchr(procname, '\\');
270 std::stringstream outDir;
271 outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
272 CreateDirectory(outDir.str().c_str(), NULL);
273 #endif
274
275 std::error_code EC;
276 Module* pModule = pFunction->getParent();
277 const char *funcName = pFunction->getName().data();
278 char fName[256];
279 #if defined(_WIN32)
280 sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
281 #else
282 sprintf(fName, "%s.%s.asm", funcName, fileName);
283 #endif
284
285 #if HAVE_LLVM == 0x306
286 raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
287 formatted_raw_ostream filestream(fd);
288 #else
289 raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
290 #endif
291
292 legacy::PassManager* pMPasses = new legacy::PassManager();
293 auto* pTarget = mpExec->getTargetMachine();
294 pTarget->Options.MCOptions.AsmVerbose = true;
295 pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
296 pMPasses->run(*pModule);
297 delete pMPasses;
298 pTarget->Options.MCOptions.AsmVerbose = false;
299 }
300 }
301
302 //////////////////////////////////////////////////////////////////////////
303 /// @brief Dump function to file.
304 void JitManager::DumpToFile(Function *f, const char *fileName)
305 {
306 if (KNOB_DUMP_SHADER_IR)
307 {
308 #if defined(_WIN32)
309 DWORD pid = GetCurrentProcessId();
310 TCHAR procname[MAX_PATH];
311 GetModuleFileName(NULL, procname, MAX_PATH);
312 const char* pBaseName = strrchr(procname, '\\');
313 std::stringstream outDir;
314 outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
315 CreateDirectory(outDir.str().c_str(), NULL);
316 #endif
317
318 std::error_code EC;
319 const char *funcName = f->getName().data();
320 char fName[256];
321 #if defined(_WIN32)
322 sprintf(fName, "%s\\%s.%s.ll", outDir.str().c_str(), funcName, fileName);
323 #else
324 sprintf(fName, "%s.%s.ll", funcName, fileName);
325 #endif
326 raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
327 Module* pModule = f->getParent();
328 pModule->print(fd, nullptr);
329
330 #if defined(_WIN32)
331 sprintf(fName, "%s\\cfg.%s.%s.dot", outDir.str().c_str(), funcName, fileName);
332 #else
333 sprintf(fName, "cfg.%s.%s.dot", funcName, fileName);
334 #endif
335 fd.flush();
336
337 raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
338 WriteGraph(fd_cfg, (const Function*)f);
339
340 fd_cfg.flush();
341 }
342 }
343
344 extern "C"
345 {
346 //////////////////////////////////////////////////////////////////////////
347 /// @brief Create JIT context.
348 /// @param simdWidth - SIMD width to be used in generated program.
349 HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch)
350 {
351 return new JitManager(targetSimdWidth, arch);
352 }
353
354 //////////////////////////////////////////////////////////////////////////
355 /// @brief Destroy JIT context.
356 void JITCALL JitDestroyContext(HANDLE hJitContext)
357 {
358 delete reinterpret_cast<JitManager*>(hJitContext);
359 }
360 }