This skips touching %ebx most times and it shows that glGetString performance
increased from 114M/s to 120M/s on my desktop.
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Lepton Wu <lepton@chromium.org>
func ":"
#define STUB_ASM_CODE(slot) \
- "push %ebx\n\t" \
"call 1f\n\t" \
"1:\n\t" \
- "popl %ebx\n\t" \
- "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx\n\t" \
- "movl " ENTRY_CURRENT_TABLE "@GOT(%ebx), %eax\n\t" \
+ "popl %ecx\n\t" \
+ "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ecx\n\t" \
+ "movl " ENTRY_CURRENT_TABLE "@GOT(%ecx), %eax\n\t" \
"mov (%eax), %eax\n\t" \
"testl %eax, %eax\n\t" \
"jne 1f\n\t" \
+ "push %ebx\n\t" \
+ "movl %ecx, %ebx\n\t" \
"call " ENTRY_CURRENT_TABLE_GET "@PLT\n\t" \
+ "popl %ebx\n\t" \
"1:\n\t" \
- "pop %ebx\n\t" \
"jmp *(4 * " slot ")(%eax)"
#define MAPI_TMP_STUB_ASM_GCC