mapi: Inline call x86_current_tls.
authorLepton Wu <lepton@chromium.org>
Tue, 22 Oct 2019 03:22:18 +0000 (20:22 -0700)
committerLepton Wu <lepton@chromium.org>
Thu, 24 Oct 2019 23:37:18 +0000 (23:37 +0000)
This saves one return and a simple benchmark which calls glGetString
repeatedly on my desktop shows it improves calls per second from 118M
to 128M.

Signed-off-by: Lepton Wu <lepton@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/mapi/entry_x86_tls.h

index 545b5a3c786d90e6b4e0d3376f21ee8956068b4e..4b3d6bd02adfa3bb3fd61ba23200a07d02a0eaf8 100644 (file)
@@ -56,9 +56,13 @@ __asm__(".balign 16\n"
    ".balign 16\n"                \
    func ":"
 
-#define STUB_ASM_CODE(slot)      \
-   "call x86_current_tls\n\t"    \
-   "movl %gs:(%eax), %eax\n\t"   \
+#define STUB_ASM_CODE(slot)                                 \
+   "call 1f\n"                                              \
+   "1:\n\t"                                                 \
+   "popl %eax\n\t"                                          \
+   "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %eax\n\t"           \
+   "movl " ENTRY_CURRENT_TABLE "@GOTNTPOFF(%eax), %eax\n\t" \
+   "movl %gs:(%eax), %eax\n\t"                              \
    "jmp *(4 * " slot ")(%eax)"
 
 #define MAPI_TMP_STUB_ASM_GCC