gallivm: optimize lp_build_minify for sse
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_debug.cpp
1 /**************************************************************************
2 *
3 * Copyright 2009-2011 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stddef.h>
29
30 #include <llvm-c/Core.h>
31 #include <llvm/Target/TargetMachine.h>
32 #include <llvm/Target/TargetInstrInfo.h>
33 #include <llvm/Support/raw_ostream.h>
34 #include <llvm/Support/Format.h>
35 #include <llvm/Support/MemoryObject.h>
36
37 #if HAVE_LLVM >= 0x0300
38 #include <llvm/Support/TargetRegistry.h>
39 #else /* HAVE_LLVM < 0x0300 */
40 #include <llvm/Target/TargetRegistry.h>
41 #endif /* HAVE_LLVM < 0x0300 */
42
43 #if HAVE_LLVM >= 0x0209
44 #include <llvm/Support/Host.h>
45 #else /* HAVE_LLVM < 0x0209 */
46 #include <llvm/System/Host.h>
47 #endif /* HAVE_LLVM < 0x0209 */
48
49 #if HAVE_LLVM >= 0x0207
50 #include <llvm/MC/MCDisassembler.h>
51 #include <llvm/MC/MCAsmInfo.h>
52 #include <llvm/MC/MCInst.h>
53 #include <llvm/MC/MCInstPrinter.h>
54 #endif /* HAVE_LLVM >= 0x0207 */
55 #if HAVE_LLVM >= 0x0301
56 #include <llvm/MC/MCRegisterInfo.h>
57 #endif /* HAVE_LLVM >= 0x0301 */
58
59 #if HAVE_LLVM >= 0x0303
60 #include <llvm/ADT/OwningPtr.h>
61 #endif
62
63 #include "util/u_math.h"
64 #include "util/u_debug.h"
65
66 #include "lp_bld_debug.h"
67
68 #ifdef __linux__
69 #include <sys/stat.h>
70 #include <fcntl.h>
71 #endif
72
73
74
75 /**
76 * Check alignment.
77 *
78 * It is important that this check is not implemented as a macro or inlined
79 * function, as the compiler assumptions in respect to alignment of global
80 * and stack variables would often make the check a no op, defeating the
81 * whole purpose of the exercise.
82 */
83 extern "C" boolean
84 lp_check_alignment(const void *ptr, unsigned alignment)
85 {
86 assert(util_is_power_of_two(alignment));
87 return ((uintptr_t)ptr & (alignment - 1)) == 0;
88 }
89
90
91 class raw_debug_ostream :
92 public llvm::raw_ostream
93 {
94 private:
95 uint64_t pos;
96
97 public:
98 raw_debug_ostream() : pos(0) { }
99
100 void write_impl(const char *Ptr, size_t Size);
101
102 #if HAVE_LLVM >= 0x207
103 uint64_t current_pos() const { return pos; }
104 size_t preferred_buffer_size() const { return 512; }
105 #else
106 uint64_t current_pos() { return pos; }
107 size_t preferred_buffer_size() { return 512; }
108 #endif
109 };
110
111
112 void
113 raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
114 {
115 if (Size > 0) {
116 char *lastPtr = (char *)&Ptr[Size];
117 char last = *lastPtr;
118 *lastPtr = 0;
119 _debug_printf("%*s", Size, Ptr);
120 *lastPtr = last;
121 pos += Size;
122 }
123 }
124
125
126 /**
127 * Same as LLVMDumpValue, but through our debugging channels.
128 */
129 extern "C" void
130 lp_debug_dump_value(LLVMValueRef value)
131 {
132 #if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBDDED)
133 raw_debug_ostream os;
134 llvm::unwrap(value)->print(os);
135 os.flush();
136 #else
137 LLVMDumpValue(value);
138 #endif
139 }
140
141
142 #if HAVE_LLVM >= 0x0207
143 /*
144 * MemoryObject wrapper around a buffer of memory, to be used by MC
145 * disassembler.
146 */
147 class BufferMemoryObject:
148 public llvm::MemoryObject
149 {
150 private:
151 const uint8_t *Bytes;
152 uint64_t Length;
153 public:
154 BufferMemoryObject(const uint8_t *bytes, uint64_t length) :
155 Bytes(bytes), Length(length)
156 {
157 }
158
159 uint64_t getBase() const
160 {
161 return 0;
162 }
163
164 uint64_t getExtent() const
165 {
166 return Length;
167 }
168
169 int readByte(uint64_t addr, uint8_t *byte) const
170 {
171 if (addr > getExtent())
172 return -1;
173 *byte = Bytes[addr];
174 return 0;
175 }
176 };
177 #endif /* HAVE_LLVM >= 0x0207 */
178
179
180 /*
181 * Disassemble a function, using the LLVM MC disassembler.
182 *
183 * See also:
184 * - http://blog.llvm.org/2010/01/x86-disassembler.html
185 * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
186 */
187 static size_t
188 disassemble(const void* func, llvm::raw_ostream & Out)
189 {
190 #if HAVE_LLVM >= 0x0207
191 using namespace llvm;
192
193 const uint8_t *bytes = (const uint8_t *)func;
194
195 /*
196 * Limit disassembly to this extent
197 */
198 const uint64_t extent = 96 * 1024;
199
200 uint64_t max_pc = 0;
201
202 /*
203 * Initialize all used objects.
204 */
205
206 #if HAVE_LLVM >= 0x0301
207 std::string Triple = sys::getDefaultTargetTriple();
208 #else
209 std::string Triple = sys::getHostTriple();
210 #endif
211
212 std::string Error;
213 const Target *T = TargetRegistry::lookupTarget(Triple, Error);
214
215 #if HAVE_LLVM >= 0x0304
216 OwningPtr<const MCAsmInfo> AsmInfo(T->createMCAsmInfo(*T->createMCRegInfo(Triple), Triple));
217 #elif HAVE_LLVM >= 0x0300
218 OwningPtr<const MCAsmInfo> AsmInfo(T->createMCAsmInfo(Triple));
219 #else
220 OwningPtr<const MCAsmInfo> AsmInfo(T->createAsmInfo(Triple));
221 #endif
222
223 if (!AsmInfo) {
224 Out << "error: no assembly info for target " << Triple << "\n";
225 return 0;
226 }
227
228 #if HAVE_LLVM >= 0x0300
229 const MCSubtargetInfo *STI = T->createMCSubtargetInfo(Triple, sys::getHostCPUName(), "");
230 OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler(*STI));
231 #else
232 OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
233 #endif
234 if (!DisAsm) {
235 Out << "error: no disassembler for target " << Triple << "\n";
236 return 0;
237 }
238
239 #if HAVE_LLVM >= 0x0300
240 unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
241 #else
242 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
243 #endif
244
245 #if HAVE_LLVM >= 0x0301
246 OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
247 if (!MRI) {
248 Out << "error: no register info for target " << Triple.c_str() << "\n";
249 return 0;
250 }
251
252 OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
253 if (!MII) {
254 Out << "error: no instruction info for target " << Triple.c_str() << "\n";
255 return 0;
256 }
257 #endif
258
259 #if HAVE_LLVM >= 0x0301
260 OwningPtr<MCInstPrinter> Printer(
261 T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
262 #elif HAVE_LLVM == 0x0300
263 OwningPtr<MCInstPrinter> Printer(
264 T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *STI));
265 #elif HAVE_LLVM >= 0x0208
266 OwningPtr<MCInstPrinter> Printer(
267 T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo));
268 #else
269 OwningPtr<MCInstPrinter> Printer(
270 T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
271 #endif
272 if (!Printer) {
273 Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
274 return 0;
275 }
276
277 #if HAVE_LLVM >= 0x0301
278 TargetOptions options;
279 #if defined(DEBUG)
280 options.JITEmitDebugInfo = true;
281 #endif
282 #if defined(PIPE_ARCH_X86)
283 options.StackAlignmentOverride = 4;
284 #endif
285 #if defined(DEBUG) || defined(PROFILE)
286 options.NoFramePointerElim = true;
287 #endif
288 TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), "", options);
289 #elif HAVE_LLVM == 0x0300
290 TargetMachine *TM = T->createTargetMachine(Triple, sys::getHostCPUName(), "");
291 #else
292 TargetMachine *TM = T->createTargetMachine(Triple, "");
293 #endif
294
295 const TargetInstrInfo *TII = TM->getInstrInfo();
296
297 /*
298 * Wrap the data in a MemoryObject
299 */
300 BufferMemoryObject memoryObject((const uint8_t *)bytes, extent);
301
302 uint64_t pc;
303 pc = 0;
304 while (true) {
305 MCInst Inst;
306 uint64_t Size;
307
308 /*
309 * Print address. We use addresses relative to the start of the function,
310 * so that between runs.
311 */
312
313 Out << llvm::format("%6lu:\t", (unsigned long)pc);
314
315 if (!DisAsm->getInstruction(Inst, Size, memoryObject,
316 pc,
317 #if HAVE_LLVM >= 0x0300
318 nulls(), nulls())) {
319 #else
320 nulls())) {
321 #endif
322 Out << "invalid";
323 pc += 1;
324 }
325
326 /*
327 * Output the bytes in hexidecimal format.
328 */
329
330 if (0) {
331 unsigned i;
332 for (i = 0; i < Size; ++i) {
333 Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
334 }
335 for (; i < 16; ++i) {
336 Out << " ";
337 }
338 }
339
340 /*
341 * Print the instruction.
342 */
343 #if HAVE_LLVM >= 0x0300
344 Printer->printInst(&Inst, Out, "");
345 #elif HAVE_LLVM >= 0x208
346 Printer->printInst(&Inst, Out);
347 #else
348 Printer->printInst(&Inst);
349 #endif
350
351 /*
352 * Advance.
353 */
354
355 pc += Size;
356
357 #if HAVE_LLVM >= 0x0300
358 const MCInstrDesc &TID = TII->get(Inst.getOpcode());
359 #else
360 const TargetInstrDesc &TID = TII->get(Inst.getOpcode());
361 #endif
362
363 /*
364 * Keep track of forward jumps to a nearby address.
365 */
366
367 if (TID.isBranch()) {
368 for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
369 const MCOperand &operand = Inst.getOperand(i);
370 if (operand.isImm()) {
371 uint64_t jump;
372
373 /*
374 * FIXME: Handle both relative and absolute addresses correctly.
375 * EDInstInfo actually has this info, but operandTypes and
376 * operandFlags enums are not exposed in the public interface.
377 */
378
379 if (1) {
380 /*
381 * PC relative addr.
382 */
383
384 jump = pc + operand.getImm();
385 } else {
386 /*
387 * Absolute addr.
388 */
389
390 jump = (uint64_t)operand.getImm();
391 }
392
393 /*
394 * Output the address relative to the function start, given
395 * that MC will print the addresses relative the current pc.
396 */
397 Out << "\t\t; " << jump;
398
399 /*
400 * Ignore far jumps given it could be actually a tail return to
401 * a random address.
402 */
403
404 if (jump > max_pc &&
405 jump < extent) {
406 max_pc = jump;
407 }
408 }
409 }
410 }
411
412 Out << "\n";
413
414 /*
415 * Stop disassembling on return statements, if there is no record of a
416 * jump to a successive address.
417 */
418
419 if (TID.isReturn()) {
420 if (pc > max_pc) {
421 break;
422 }
423 }
424 }
425
426 /*
427 * Print GDB command, useful to verify output.
428 */
429
430 if (0) {
431 _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
432 }
433
434 Out << "\n";
435 Out.flush();
436
437 return pc;
438 #else /* HAVE_LLVM < 0x0207 */
439 (void)func;
440 return 0;
441 #endif /* HAVE_LLVM < 0x0207 */
442 }
443
444
445 extern "C" void
446 lp_disassemble(LLVMValueRef func, const void *code) {
447 raw_debug_ostream Out;
448 disassemble(code, Out);
449 }
450
451
452 /*
453 * Linux perf profiler integration.
454 *
455 * See also:
456 * - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html
457 * - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc
458 * - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d
459 */
460 extern "C" void
461 lp_profile(LLVMValueRef func, const void *code)
462 {
463 #if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
464 static boolean first_time = TRUE;
465 static FILE *perf_map_file = NULL;
466 static int perf_asm_fd = -1;
467 if (first_time) {
468 /*
469 * We rely on the disassembler for determining a function's size, but
470 * the disassembly is a leaky and slow operation, so avoid running
471 * this except when running inside linux perf, which can be inferred
472 * by the PERF_BUILDID_DIR environment variable.
473 */
474 if (getenv("PERF_BUILDID_DIR")) {
475 pid_t pid = getpid();
476 char filename[256];
477 util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
478 perf_map_file = fopen(filename, "wt");
479 util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
480 mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
481 perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
482 }
483 first_time = FALSE;
484 }
485 if (perf_map_file) {
486 const char *symbol = LLVMGetValueName(func);
487 unsigned long addr = (uintptr_t)code;
488 llvm::raw_fd_ostream Out(perf_asm_fd, false);
489 Out << symbol << ":\n";
490 unsigned long size = disassemble(code, Out);
491 fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
492 fflush(perf_map_file);
493 }
494 #else
495 (void)func;
496 (void)code;
497 #endif
498 }
499
500