This saves one return and a simple benchmark which calls glGetString
repeatedly on my desktop shows it improves calls per second from 118M
to 128M.
Signed-off-by: Lepton Wu <lepton@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
".balign 16\n" \
func ":"
-#define STUB_ASM_CODE(slot) \
- "call x86_current_tls\n\t" \
- "movl %gs:(%eax), %eax\n\t" \
+#define STUB_ASM_CODE(slot) \
+ "call 1f\n" \
+ "1:\n\t" \
+ "popl %eax\n\t" \
+ "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %eax\n\t" \
+ "movl " ENTRY_CURRENT_TABLE "@GOTNTPOFF(%eax), %eax\n\t" \
+ "movl %gs:(%eax), %eax\n\t" \
"jmp *(4 * " slot ")(%eax)"
#define MAPI_TMP_STUB_ASM_GCC