mapi: Inline call x86_current_tls.
authorLepton Wu <lepton@chromium.org>
Tue, 22 Oct 2019 03:22:18 +0000 (20:22 -0700)
committerMatt Turner <mattst88@gmail.com>
Wed, 30 Oct 2019 00:18:06 +0000 (17:18 -0700)
This saves one return and a simple benchmark which calls glGetString
repeatedly on my desktop shows it improves calls per second from 123M
to 141M.

Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/1997
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Lepton Wu <lepton@chromium.org>
src/mapi/entry_x86_tls.h

index b938679979236589128080e849054460c9272173..58a27cef18ed539d41e2790af45531c93aafa2d9 100644 (file)
@@ -33,7 +33,7 @@
 #define HIDDEN
 #endif
 
-#define X86_ENTRY_SIZE 16
+#define X86_ENTRY_SIZE 32
 
 __asm__(".text");
 
@@ -58,9 +58,13 @@ __asm__(".balign 16\n"
    ".balign 16\n"                \
    func ":"
 
-#define STUB_ASM_CODE(slot)      \
-   "call x86_current_tls\n\t"    \
-   "movl %gs:(%eax), %eax\n\t"   \
+#define STUB_ASM_CODE(slot)                                 \
+   "call 1f\n"                                              \
+   "1:\n\t"                                                 \
+   "popl %eax\n\t"                                          \
+   "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %eax\n\t"           \
+   "movl " ENTRY_CURRENT_TABLE "@GOTNTPOFF(%eax), %eax\n\t" \
+   "movl %gs:(%eax), %eax\n\t"                              \
    "jmp *(4 * " slot ")(%eax)"
 
 #define MAPI_TMP_STUB_ASM_GCC