mapi: Enable assembly language API acceleration for PPC64LE (V2)
[mesa.git] / src / mapi / entry_ppc64le_tsd.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2017 Red Hat
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Ben Crocker <bcrocker@redhat.com>
26 */
27
28 #ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
29 #define HIDDEN __attribute__((visibility("hidden")))
30 #else
31 #define HIDDEN
32 #endif
33
34 // NOTE: These must be powers of two:
35 #define PPC64LE_ENTRY_SIZE 256
36 #define PPC64LE_PAGE_ALIGN 65536
37 #if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0)
38 #error PPC64LE_ENTRY_SIZE must be a power of two!
39 #endif
40 #if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0)
41 #error PPC64LE_PAGE_ALIGN must be a power of two!
42 #endif
43
44 __asm__(".text\n"
45 ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n"
46 "ppc64le_entry_start:");
47
48 #define STUB_ASM_ENTRY(func) \
49 ".globl " func "\n" \
50 ".type " func ", @function\n" \
51 ".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n" \
52 func ":\n\t" \
53 " addis 2, 12, .TOC.-" func "@ha\n\t" \
54 " addi 2, 2, .TOC.-" func "@l\n\t" \
55 " .localentry " func ", .-" func "\n\t"
56
57 #define STUB_ASM_CODE(slot) \
58 " addis 11, 2, " ENTRY_CURRENT_TABLE "@got@ha\n\t" \
59 " ld 11, " ENTRY_CURRENT_TABLE "@got@l(11)\n\t" \
60 " ld 11, 0(11)\n\t" \
61 " cmpldi 11, 0\n\t" \
62 " beq 2000f\n" \
63 "1050:\n\t" \
64 " ld 12, " slot "*8(11)\n\t" \
65 " mtctr 12\n\t" \
66 " bctr\n" \
67 "2000:\n\t" \
68 " mflr 0\n\t" \
69 " std 0, 16(1)\n\t" \
70 " std 2, 40(1)\n\t" \
71 " stdu 1, -144(1)\n\t" \
72 " std 3, 56(1)\n\t" \
73 " std 4, 64(1)\n\t" \
74 " std 5, 72(1)\n\t" \
75 " std 6, 80(1)\n\t" \
76 " std 7, 88(1)\n\t" \
77 " std 8, 96(1)\n\t" \
78 " std 9, 104(1)\n\t" \
79 " std 10, 112(1)\n\t" \
80 " std 12, 128(1)\n\t" \
81 " addis 12, 2, " ENTRY_CURRENT_TABLE_GET "@got@ha\n\t" \
82 " ld 12, " ENTRY_CURRENT_TABLE_GET "@got@l(12)\n\t" \
83 " mtctr 12\n\t" \
84 " bctrl\n\t" \
85 " ld 2, 144+40(1)\n\t" \
86 " mr 11, 3\n\t" \
87 " ld 3, 56(1)\n\t" \
88 " ld 4, 64(1)\n\t" \
89 " ld 5, 72(1)\n\t" \
90 " ld 6, 80(1)\n\t" \
91 " ld 7, 88(1)\n\t" \
92 " ld 8, 96(1)\n\t" \
93 " ld 9, 104(1)\n\t" \
94 " ld 10, 112(1)\n\t" \
95 " ld 12, 128(1)\n\t" \
96 " addi 1, 1, 144\n\t" \
97 " ld 0, 16(1)\n\t" \
98 " mtlr 0\n\t" \
99 " b 1050b\n"
100
101 #define MAPI_TMP_STUB_ASM_GCC
102 #include "mapi_tmp.h"
103
104 #ifndef MAPI_MODE_BRIDGE
105
106 #include <string.h>
107 #include "u_execmem.h"
108
109 void
110 entry_patch_public(void)
111 {
112 }
113
114 extern char
115 ppc64le_entry_start[] HIDDEN;
116
117 mapi_func
118 entry_get_public(int slot)
119 {
120 return (mapi_func) (ppc64le_entry_start + slot * PPC64LE_ENTRY_SIZE);
121 }
122
123 static const uint32_t code_templ[] = {
124 // This should be functionally the same code as would be generated from
125 // the STUB_ASM_CODE macro, but defined as a buffer.
126 // This is used to generate new dispatch stubs. Mesa will copy this
127 // data to the dispatch stub, and then it will patch the slot number and
128 // any addresses that it needs to.
129 // NOTE!!! NOTE!!! NOTE!!!
130 // This representation is correct for both little- and big-endian systems.
131 // However, more work needs to be done for big-endian Linux because it
132 // adheres to an older, AIX-compatible ABI that uses function descriptors.
133 // 1000:
134 0x7C0802A6, // <ENTRY+000>: mflr 0
135 0xF8010010, // <ENTRY+004>: std 0, 16(1)
136 0xE96C0098, // <ENTRY+008>: ld 11, 9000f-1000b+0(12)
137 0xE96B0000, // <ENTRY+012>: ld 11, 0(11)
138 0x282B0000, // <ENTRY+016>: cmpldi 11, 0
139 0x41820014, // <ENTRY+020>: beq 2000f
140 // 1050:
141 0xE80C00A8, // <ENTRY+024>: ld 0, 9000f-1000b+16(12)
142 0x7D8B002A, // <ENTRY+028>: ldx 12, 11, 0
143 0x7D8903A6, // <ENTRY+032>: mtctr 12
144 0x4E800420, // <ENTRY+036>: bctr
145 // 2000:
146 0xF8410028, // <ENTRY+040>: std 2, 40(1)
147 0xF821FF71, // <ENTRY+044>: stdu 1, -144(1)
148 0xF8610038, // <ENTRY+048>: std 3, 56(1)
149 0xF8810040, // <ENTRY+052>: std 4, 64(1)
150 0xF8A10048, // <ENTRY+056>: std 5, 72(1)
151 0xF8C10050, // <ENTRY+060>: std 6, 80(1)
152 0xF8E10058, // <ENTRY+064>: std 7, 88(1)
153 0xF9010060, // <ENTRY+068>: std 8, 96(1)
154 0xF9210068, // <ENTRY+072>: std 9, 104(1)
155 0xF9410070, // <ENTRY+076>: std 10, 112(1)
156 0xF9810080, // <ENTRY+080>: std 12, 128(1)
157 0xE98C00A0, // <ENTRY+084>: ld 12, 9000f-1000b+8(12)
158 0x7D8903A6, // <ENTRY+088>: mtctr 12
159 0x4E800421, // <ENTRY+092>: bctrl
160 0x7C6B1B78, // <ENTRY+096>: mr 11, 3
161 0xE8610038, // <ENTRY+100>: ld 3, 56(1)
162 0xE8810040, // <ENTRY+104>: ld 4, 64(1)
163 0xE8A10048, // <ENTRY+108>: ld 5, 72(1)
164 0xE8C10050, // <ENTRY+112>: ld 6, 80(1)
165 0xE8E10058, // <ENTRY+116>: ld 7, 88(1)
166 0xE9010060, // <ENTRY+120>: ld 8, 96(1)
167 0xE9210068, // <ENTRY+124>: ld 9, 104(1)
168 0xE9410070, // <ENTRY+128>: ld 10, 112(1)
169 0xE9810080, // <ENTRY+132>: ld 12, 128(1)
170 0x38210090, // <ENTRY+136>: addi 1, 1, 144
171 0xE8010010, // <ENTRY+140>: ld 0, 16(1)
172 0x7C0803A6, // <ENTRY+144>: mtlr 0
173 0x4BFFFF84, // <ENTRY+148>: b 1050b
174 // 9000:
175 0, 0, // <ENTRY+152>: .quad ENTRY_CURRENT_TABLE
176 0, 0, // <ENTRY+160>: .quad ENTRY_CURRENT_TABLE_GET
177 0, 0 // <ENTRY+168>: .quad <slot>*8
178 };
179 static const uint64_t TEMPLATE_OFFSET_CURRENT_TABLE = sizeof(code_templ) - 3*8;
180 static const uint64_t TEMPLATE_OFFSET_CURRENT_TABLE_GET = sizeof(code_templ) - 2*8;
181 static const uint64_t TEMPLATE_OFFSET_SLOT = sizeof(code_templ) - 1*8;
182
183 void
184 entry_patch(mapi_func entry, int slot)
185 {
186 char *code = (char *) entry;
187 *((uint64_t *) (code + TEMPLATE_OFFSET_CURRENT_TABLE)) = (uint64_t) ENTRY_CURRENT_TABLE;
188 *((uint64_t *) (code + TEMPLATE_OFFSET_CURRENT_TABLE_GET)) = (uint64_t) ENTRY_CURRENT_TABLE_GET;
189 *((uint64_t *) (code + TEMPLATE_OFFSET_SLOT)) = slot * sizeof(mapi_func);
190 }
191
192 mapi_func
193 entry_generate(int slot)
194 {
195 char *code;
196 mapi_func entry;
197
198 code = u_execmem_alloc(sizeof(code_templ));
199 if (!code)
200 return NULL;
201
202 memcpy(code, code_templ, sizeof(code_templ));
203
204 entry = (mapi_func) code;
205 entry_patch(entry, slot);
206
207 return entry;
208 }
209
210 #endif /* MAPI_MODE_BRIDGE */