From b8f6858fcb762b47ca2ad30efd286bd203042f17 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Wed, 17 Apr 2013 13:32:15 +0100 Subject: [PATCH] gallivm: JIT symbol resolution with linux perf. Details on docs/llvmpipe.html Reviewed-by: Brian Paul Reviewed-by: Roland Scheidegger --- bin/perf-annotate-jit | 251 ++++++++++++++++++ configure.ac | 7 - docs/llvmpipe.html | 40 +-- .../auxiliary/gallivm/lp_bld_debug.cpp | 117 ++++++-- src/gallium/auxiliary/gallivm/lp_bld_debug.h | 6 +- src/gallium/auxiliary/gallivm/lp_bld_init.c | 11 +- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 23 -- src/gallium/auxiliary/gallivm/lp_bld_misc.h | 3 - 8 files changed, 372 insertions(+), 86 deletions(-) create mode 100755 bin/perf-annotate-jit diff --git a/bin/perf-annotate-jit b/bin/perf-annotate-jit new file mode 100755 index 00000000000..746434008fd --- /dev/null +++ b/bin/perf-annotate-jit @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# +# Copyright 2012 VMware Inc +# Copyright 2008-2009 Jose Fonseca +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +"""Perf annotate for JIT code. + +Linux `perf annotate` does not work with JIT code. This script takes the data +produced by `perf script` command, plus the diassemblies outputed by gallivm +into /tmp/perf-XXXXX.map.asm and produces output similar to `perf annotate`. + +See docs/llvmpipe.html for usage instructions. + +The `perf script` output parser was derived from the gprof2dot.py script. +""" + + +import sys +import os.path +import re +import optparse +import subprocess + + +class Parser: + """Parser interface.""" + + def __init__(self): + pass + + def parse(self): + raise NotImplementedError + + +class LineParser(Parser): + """Base class for parsers that read line-based formats.""" + + def __init__(self, file): + Parser.__init__(self) + self._file = file + self.__line = None + self.__eof = False + self.line_no = 0 + + def readline(self): + line = self._file.readline() + if not line: + self.__line = '' + self.__eof = True + else: + self.line_no += 1 + self.__line = line.rstrip('\r\n') + + def lookahead(self): + assert self.__line is not None + return self.__line + + def consume(self): + assert self.__line is not None + line = self.__line + self.readline() + return line + + def eof(self): + assert self.__line is not None + return self.__eof + + +mapFile = None + +def lookupMap(filename, matchSymbol): + global mapFile + mapFile = filename + stream = open(filename, 'rt') + for line in stream: + start, length, symbol = line.split() + + start = int(start, 16) + length = int(length,16) + + if symbol == matchSymbol: + return start + + return None + +def lookupAsm(filename, desiredFunction): + stream = open(filename + '.asm', 'rt') + while stream.readline() != desiredFunction + ':\n': + pass + + asm = [] + line = stream.readline().strip() + while line: + addr, instr = line.split(':', 1) + addr = int(addr) + asm.append((addr, instr)) + line = stream.readline().strip() + + return asm + + + +samples = {} + + +class PerfParser(LineParser): + """Parser for linux perf callgraph output. + + It expects output generated with + + perf record -g + perf script + """ + + def __init__(self, infile, symbol): + LineParser.__init__(self, infile) + self.symbol = symbol + + def readline(self): + # Override LineParser.readline to ignore comment lines + while True: + LineParser.readline(self) + if self.eof() or not self.lookahead().startswith('#'): + break + + def parse(self): + # read lookahead + self.readline() + + while not self.eof(): + self.parse_event() + + asm = lookupAsm(mapFile, self.symbol) + + addresses = samples.keys() + addresses.sort() + total_samples = 0 + + sys.stdout.write('%s:\n' % self.symbol) + for address, instr in asm: + try: + sample = samples.pop(address) + except KeyError: + sys.stdout.write(6*' ') + else: + sys.stdout.write('%6u' % (sample)) + total_samples += sample + sys.stdout.write('%6u: %s\n' % (address, instr)) + print 'total:', total_samples + assert len(samples) == 0 + + sys.exit(0) + + def parse_event(self): + if self.eof(): + return + + line = self.consume() + assert line + + callchain = self.parse_callchain() + if not callchain: + return + + def parse_callchain(self): + callchain = [] + while self.lookahead(): + function = self.parse_call(len(callchain) == 0) + if function is None: + break + callchain.append(function) + if self.lookahead() == '': + self.consume() + return callchain + + call_re = re.compile(r'^\s+(?P
[0-9a-fA-F]+)\s+(?P.*)\s+\((?P[^)]*)\)$') + + def parse_call(self, first): + line = self.consume() + mo = self.call_re.match(line) + assert mo + if not mo: + return None + + if not first: + return None + + function_name = mo.group('symbol') + if not function_name: + function_name = mo.group('address') + + module = mo.group('module') + + function_id = function_name + ':' + module + + address = mo.group('address') + address = int(address, 16) + + if function_name != self.symbol: + return None + + start_address = lookupMap(module, function_name) + address -= start_address + + #print function_name, module, address + + samples[address] = samples.get(address, 0) + 1 + + return True + + +def main(): + """Main program.""" + + optparser = optparse.OptionParser( + usage="\n\t%prog [options] symbol_name") + (options, args) = optparser.parse_args(sys.argv[1:]) + if len(args) != 1: + optparser.error('wrong number of arguments') + + symbol = args[0] + + p = subprocess.Popen(['perf', 'script'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + parser = PerfParser(p.stdout, symbol) + parser.parse() + + +if __name__ == '__main__': + main() + + +# vim: set sw=4 et: diff --git a/configure.ac b/configure.ac index fb1f324d66f..ba922580e84 100644 --- a/configure.ac +++ b/configure.ac @@ -1601,13 +1601,6 @@ if test "x$enable_gallium_llvm" = xyes; then LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit" fi - if test "x$enable_debug" = xyes; then - # Debug builds require OProfileJIT if LLVM was built with support for it - if $LLVM_CONFIG --components | grep -q '\'; then - LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit" - fi - fi - if test "x$enable_opencl" = xyes; then LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation" # LLVM 3.3 >= 177971 requires IRReader diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html index be0308321a7..80f8a017665 100644 --- a/docs/llvmpipe.html +++ b/docs/llvmpipe.html @@ -130,38 +130,38 @@ need to ask, don't even try it.

Profiling

-To profile llvmpipe you should pass the options - +

+To profile llvmpipe you should build as +

   scons build=profile <same-as-before>
 
+

This will ensure that frame pointers are used both in C and JIT functions, and that no tail call optimizations are done by gcc. +

-To better profile JIT code you'll need to build LLVM with oprofile integration. - -
-  ./configure \
-      --prefix=$install_dir \
-      --enable-optimized \
-      --disable-profiling \
-      --enable-targets=host-only \
-      --with-oprofile
-
-  make -C "$build_dir"
-  make -C "$build_dir" install
-
-  find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug
-
+

Linux perf integration

-The you should define +

+On Linux, it is possible to have symbol resolution of JIT code with Linux perf: +

-  export LLVM=/path/to/llvm-2.6-profile
+	perf record -g /my/application
+	perf report
 
-and rebuild. +

+When run inside Linux perf, llvmpipe will create a /tmp/perf-XXXXX.map file with +symbol address table. It also dumps assembly code to /tmp/perf-XXXXX.map.asm, +which can be used by the bin/perf-annotate-jit script to produce disassembly of +the generated code annotated with the samples. +

+ +

You can obtain a call graph via +Gprof2Dot.

Unit testing

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index af50fcc1425..ac8e10bbd5c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #if HAVE_LLVM >= 0x0300 @@ -60,6 +61,11 @@ #include "lp_bld_debug.h" +#ifdef __linux__ +#include +#include +#endif + /** @@ -174,8 +180,8 @@ public: * - http://blog.llvm.org/2010/01/x86-disassembler.html * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html */ -extern "C" void -lp_disassemble(const void* func) +static size_t +disassemble(const void* func, llvm::raw_ostream & Out) { #if HAVE_LLVM >= 0x0207 using namespace llvm; @@ -209,8 +215,8 @@ lp_disassemble(const void* func) #endif if (!AsmInfo) { - debug_printf("error: no assembly info for target %s\n", Triple.c_str()); - return; + Out << "error: no assembly info for target " << Triple << "\n"; + return 0; } #if HAVE_LLVM >= 0x0300 @@ -220,12 +226,10 @@ lp_disassemble(const void* func) OwningPtr DisAsm(T->createMCDisassembler()); #endif if (!DisAsm) { - debug_printf("error: no disassembler for target %s\n", Triple.c_str()); - return; + Out << "error: no disassembler for target " << Triple << "\n"; + return 0; } - raw_debug_ostream Out; - #if HAVE_LLVM >= 0x0300 unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); #else @@ -235,14 +239,14 @@ lp_disassemble(const void* func) #if HAVE_LLVM >= 0x0301 OwningPtr MRI(T->createMCRegInfo(Triple)); if (!MRI) { - debug_printf("error: no register info for target %s\n", Triple.c_str()); - return; + Out << "error: no register info for target " << Triple.c_str() << "\n"; + return 0; } OwningPtr MII(T->createMCInstrInfo()); if (!MII) { - debug_printf("error: no instruction info for target %s\n", Triple.c_str()); - return; + Out << "error: no instruction info for target " << Triple.c_str() << "\n"; + return 0; } #endif @@ -260,8 +264,8 @@ lp_disassemble(const void* func) T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out)); #endif if (!Printer) { - debug_printf("error: no instruction printer for target %s\n", Triple.c_str()); - return; + Out << "error: no instruction printer for target " << Triple.c_str() << "\n"; + return 0; } #if HAVE_LLVM >= 0x0301 @@ -300,7 +304,7 @@ lp_disassemble(const void* func) * so that between runs. */ - debug_printf("%6lu:\t", (unsigned long)pc); + Out << llvm::format("%6lu:\t", (unsigned long)pc); if (!DisAsm->getInstruction(Inst, Size, memoryObject, pc, @@ -309,7 +313,7 @@ lp_disassemble(const void* func) #else nulls())) { #endif - debug_printf("invalid\n"); + Out << "invalid"; pc += 1; } @@ -320,25 +324,23 @@ lp_disassemble(const void* func) if (0) { unsigned i; for (i = 0; i < Size; ++i) { - debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]); + Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]); } for (; i < 16; ++i) { - debug_printf(" "); + Out << " "; } } /* * Print the instruction. */ - #if HAVE_LLVM >= 0x0300 - Printer->printInst(&Inst, Out, ""); + Printer->printInst(&Inst, Out, ""); #elif HAVE_LLVM >= 0x208 - Printer->printInst(&Inst, Out); + Printer->printInst(&Inst, Out); #else - Printer->printInst(&Inst); + Printer->printInst(&Inst); #endif - Out.flush(); /* * Advance. @@ -386,7 +388,7 @@ lp_disassemble(const void* func) * Output the address relative to the function start, given * that MC will print the addresses relative the current pc. */ - debug_printf("\t\t; %lu", (unsigned long)jump); + Out << "\t\t; " << jump; /* * Ignore far jumps given it could be actually a tail return to @@ -401,7 +403,7 @@ lp_disassemble(const void* func) } } - debug_printf("\n"); + Out << "\n"; /* * Stop disassembling on return statements, if there is no record of a @@ -420,12 +422,73 @@ lp_disassemble(const void* func) */ if (0) { - debug_printf("disassemble %p %p\n", bytes, bytes + pc); + _debug_printf("disassemble %p %p\n", bytes, bytes + pc); } - debug_printf("\n"); + Out << "\n"; + Out.flush(); + + return pc; #else /* HAVE_LLVM < 0x0207 */ (void)func; + return 0; #endif /* HAVE_LLVM < 0x0207 */ } + +extern "C" void +lp_disassemble(LLVMValueRef func, const void *code) { + raw_debug_ostream Out; + disassemble(code, Out); +} + + +/* + * Linux perf profiler integration. + * + * See also: + * - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html + * - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc + * - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d + */ +extern "C" void +lp_profile(LLVMValueRef func, const void *code) +{ +#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE)) + static boolean first_time = TRUE; + static FILE *perf_map_file = NULL; + static int perf_asm_fd = -1; + if (first_time) { + /* + * We rely on the disassembler for determining a function's size, but + * the disassembly is a leaky and slow operation, so avoid running + * this except when running inside linux perf, which can be inferred + * by the PERF_BUILDID_DIR environment variable. + */ + if (getenv("PERF_BUILDID_DIR")) { + pid_t pid = getpid(); + char filename[256]; + util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid); + perf_map_file = fopen(filename, "wt"); + util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid); + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode); + } + first_time = FALSE; + } + if (perf_map_file) { + const char *symbol = LLVMGetValueName(func); + unsigned long addr = (uintptr_t)code; + llvm::raw_fd_ostream Out(perf_asm_fd, false); + Out << symbol << ":\n"; + unsigned long size = disassemble(code, Out); + fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol); + fflush(perf_map_file); + } +#else + (void)func; + (void)code; +#endif +} + + diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h index da873f30b2d..ab83d98feed 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h @@ -83,7 +83,11 @@ lp_check_alignment(const void *ptr, unsigned alignment); void -lp_disassemble(const void* func); +lp_disassemble(LLVMValueRef func, const void *code); + + +void +lp_profile(LLVMValueRef func, const void *code); #ifdef __cplusplus diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c index 4fa5887e878..1153411dd52 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c @@ -273,10 +273,6 @@ init_gallivm_engine(struct gallivm_state *gallivm) LLVMDisposeMessage(error); goto fail; } - -#if defined(DEBUG) || defined(PROFILE) - lp_register_oprofile_jit_event_listener(gallivm->engine); -#endif } LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new @@ -635,6 +631,7 @@ gallivm_compile_module(struct gallivm_state *gallivm) } + func_pointer gallivm_jit_function(struct gallivm_state *gallivm, LLVMValueRef func) @@ -650,9 +647,13 @@ gallivm_jit_function(struct gallivm_state *gallivm, jit_func = pointer_to_func(code); if (gallivm_debug & GALLIVM_DEBUG_ASM) { - lp_disassemble(code); + lp_disassemble(func, code); } +#if defined(PROFILE) + lp_profile(func, code); +#endif + /* Free the function body to save memory */ lp_func_delete_body(func); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp index 46cdbad2683..c51279556b6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp @@ -54,7 +54,6 @@ #include #include #include -#include #if HAVE_LLVM >= 0x0301 #include #include @@ -75,28 +74,6 @@ #include "lp_bld_misc.h" -/** - * Register the engine with oprofile. - * - * This allows to see the LLVM IR function names in oprofile output. - * - * To actually work LLVM needs to be built with the --with-oprofile configure - * option. - * - * Also a oprofile:oprofile user:group is necessary. Which is not created by - * default on some distributions. - */ -extern "C" void -lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE) -{ -#if HAVE_LLVM >= 0x0301 - llvm::unwrap(EE)->RegisterJITEventListener(llvm::JITEventListener::createOProfileJITEventListener()); -#else - llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener()); -#endif -} - - extern "C" void lp_set_target_options(void) { diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h index 9ed7c348bb4..1f735fbcde6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h @@ -40,9 +40,6 @@ extern "C" { -extern void -lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE); - extern void lp_set_target_options(void); -- 2.30.2