defd7744cce09b7d1bb59ee5019c5fa96fb72203
[gcc.git] / libffi / src / x86 / ffi64.c
1 /* -----------------------------------------------------------------------
2 ffi64.c - Copyright (c) 20011 Anthony Green
3 Copyright (c) 2008, 2010 Red Hat, Inc.
4 Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
5
6 x86-64 Foreign Function Interface
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 ``Software''), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23 HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 ----------------------------------------------------------------------- */
28
29 #include <ffi.h>
30 #include <ffi_common.h>
31
32 #include <stdlib.h>
33 #include <stdarg.h>
34
35 #ifdef __x86_64__
36
37 #define MAX_GPR_REGS 6
38 #define MAX_SSE_REGS 8
39
40 struct register_args
41 {
42 /* Registers for argument passing. */
43 UINT64 gpr[MAX_GPR_REGS];
44 __int128_t sse[MAX_SSE_REGS];
45 };
46
47 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
48 void *raddr, void (*fnaddr)(void), unsigned ssecount);
49
50 /* All reference to register classes here is identical to the code in
51 gcc/config/i386/i386.c. Do *not* change one without the other. */
52
53 /* Register class used for passing given 64bit part of the argument.
54 These represent classes as documented by the PS ABI, with the
55 exception of SSESF, SSEDF classes, that are basically SSE class,
56 just gcc will use SF or DFmode move instead of DImode to avoid
57 reformatting penalties.
58
59 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
60 whenever possible (upper half does contain padding). */
61 enum x86_64_reg_class
62 {
63 X86_64_NO_CLASS,
64 X86_64_INTEGER_CLASS,
65 X86_64_INTEGERSI_CLASS,
66 X86_64_SSE_CLASS,
67 X86_64_SSESF_CLASS,
68 X86_64_SSEDF_CLASS,
69 X86_64_SSEUP_CLASS,
70 X86_64_X87_CLASS,
71 X86_64_X87UP_CLASS,
72 X86_64_COMPLEX_X87_CLASS,
73 X86_64_MEMORY_CLASS
74 };
75
76 #define MAX_CLASSES 4
77
78 #define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
79
80 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
81 of this code is to classify each 8bytes of incoming argument by the register
82 class and assign registers accordingly. */
83
84 /* Return the union class of CLASS1 and CLASS2.
85 See the x86-64 PS ABI for details. */
86
87 static enum x86_64_reg_class
88 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
89 {
90 /* Rule #1: If both classes are equal, this is the resulting class. */
91 if (class1 == class2)
92 return class1;
93
94 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
95 the other class. */
96 if (class1 == X86_64_NO_CLASS)
97 return class2;
98 if (class2 == X86_64_NO_CLASS)
99 return class1;
100
101 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
102 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
103 return X86_64_MEMORY_CLASS;
104
105 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
106 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
107 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
108 return X86_64_INTEGERSI_CLASS;
109 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
110 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
111 return X86_64_INTEGER_CLASS;
112
113 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
114 MEMORY is used. */
115 if (class1 == X86_64_X87_CLASS
116 || class1 == X86_64_X87UP_CLASS
117 || class1 == X86_64_COMPLEX_X87_CLASS
118 || class2 == X86_64_X87_CLASS
119 || class2 == X86_64_X87UP_CLASS
120 || class2 == X86_64_COMPLEX_X87_CLASS)
121 return X86_64_MEMORY_CLASS;
122
123 /* Rule #6: Otherwise class SSE is used. */
124 return X86_64_SSE_CLASS;
125 }
126
127 /* Classify the argument of type TYPE and mode MODE.
128 CLASSES will be filled by the register class used to pass each word
129 of the operand. The number of words is returned. In case the parameter
130 should be passed in memory, 0 is returned. As a special case for zero
131 sized containers, classes[0] will be NO_CLASS and 1 is returned.
132
133 See the x86-64 PS ABI for details.
134 */
135 static int
136 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
137 size_t byte_offset)
138 {
139 switch (type->type)
140 {
141 case FFI_TYPE_UINT8:
142 case FFI_TYPE_SINT8:
143 case FFI_TYPE_UINT16:
144 case FFI_TYPE_SINT16:
145 case FFI_TYPE_UINT32:
146 case FFI_TYPE_SINT32:
147 case FFI_TYPE_UINT64:
148 case FFI_TYPE_SINT64:
149 case FFI_TYPE_POINTER:
150 {
151 int size = byte_offset + type->size;
152
153 if (size <= 4)
154 {
155 classes[0] = X86_64_INTEGERSI_CLASS;
156 return 1;
157 }
158 else if (size <= 8)
159 {
160 classes[0] = X86_64_INTEGER_CLASS;
161 return 1;
162 }
163 else if (size <= 12)
164 {
165 classes[0] = X86_64_INTEGER_CLASS;
166 classes[1] = X86_64_INTEGERSI_CLASS;
167 return 2;
168 }
169 else if (size <= 16)
170 {
171 classes[0] = classes[1] = X86_64_INTEGERSI_CLASS;
172 return 2;
173 }
174 else
175 FFI_ASSERT (0);
176 }
177 case FFI_TYPE_FLOAT:
178 if (!(byte_offset % 8))
179 classes[0] = X86_64_SSESF_CLASS;
180 else
181 classes[0] = X86_64_SSE_CLASS;
182 return 1;
183 case FFI_TYPE_DOUBLE:
184 classes[0] = X86_64_SSEDF_CLASS;
185 return 1;
186 case FFI_TYPE_LONGDOUBLE:
187 classes[0] = X86_64_X87_CLASS;
188 classes[1] = X86_64_X87UP_CLASS;
189 return 2;
190 case FFI_TYPE_STRUCT:
191 {
192 const int UNITS_PER_WORD = 8;
193 int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
194 ffi_type **ptr;
195 int i;
196 enum x86_64_reg_class subclasses[MAX_CLASSES];
197
198 /* If the struct is larger than 32 bytes, pass it on the stack. */
199 if (type->size > 32)
200 return 0;
201
202 for (i = 0; i < words; i++)
203 classes[i] = X86_64_NO_CLASS;
204
205 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
206 signalize memory class, so handle it as special case. */
207 if (!words)
208 {
209 classes[0] = X86_64_NO_CLASS;
210 return 1;
211 }
212
213 /* Merge the fields of structure. */
214 for (ptr = type->elements; *ptr != NULL; ptr++)
215 {
216 int num;
217
218 byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
219
220 num = classify_argument (*ptr, subclasses, byte_offset % 8);
221 if (num == 0)
222 return 0;
223 for (i = 0; i < num; i++)
224 {
225 int pos = byte_offset / 8;
226 classes[i + pos] =
227 merge_classes (subclasses[i], classes[i + pos]);
228 }
229
230 byte_offset += (*ptr)->size;
231 }
232
233 if (words > 2)
234 {
235 /* When size > 16 bytes, if the first one isn't
236 X86_64_SSE_CLASS or any other ones aren't
237 X86_64_SSEUP_CLASS, everything should be passed in
238 memory. */
239 if (classes[0] != X86_64_SSE_CLASS)
240 return 0;
241
242 for (i = 1; i < words; i++)
243 if (classes[i] != X86_64_SSEUP_CLASS)
244 return 0;
245 }
246
247 /* Final merger cleanup. */
248 for (i = 0; i < words; i++)
249 {
250 /* If one class is MEMORY, everything should be passed in
251 memory. */
252 if (classes[i] == X86_64_MEMORY_CLASS)
253 return 0;
254
255 /* The X86_64_SSEUP_CLASS should be always preceded by
256 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
257 if (classes[i] == X86_64_SSEUP_CLASS
258 && classes[i - 1] != X86_64_SSE_CLASS
259 && classes[i - 1] != X86_64_SSEUP_CLASS)
260 {
261 /* The first one should never be X86_64_SSEUP_CLASS. */
262 FFI_ASSERT (i != 0);
263 classes[i] = X86_64_SSE_CLASS;
264 }
265
266 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
267 everything should be passed in memory. */
268 if (classes[i] == X86_64_X87UP_CLASS
269 && (classes[i - 1] != X86_64_X87_CLASS))
270 {
271 /* The first one should never be X86_64_X87UP_CLASS. */
272 FFI_ASSERT (i != 0);
273 return 0;
274 }
275 }
276 return words;
277 }
278
279 default:
280 FFI_ASSERT(0);
281 }
282 return 0; /* Never reached. */
283 }
284
285 /* Examine the argument and return set number of register required in each
286 class. Return zero iff parameter should be passed in memory, otherwise
287 the number of registers. */
288
289 static int
290 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
291 _Bool in_return, int *pngpr, int *pnsse)
292 {
293 int i, n, ngpr, nsse;
294
295 n = classify_argument (type, classes, 0);
296 if (n == 0)
297 return 0;
298
299 ngpr = nsse = 0;
300 for (i = 0; i < n; ++i)
301 switch (classes[i])
302 {
303 case X86_64_INTEGER_CLASS:
304 case X86_64_INTEGERSI_CLASS:
305 ngpr++;
306 break;
307 case X86_64_SSE_CLASS:
308 case X86_64_SSESF_CLASS:
309 case X86_64_SSEDF_CLASS:
310 nsse++;
311 break;
312 case X86_64_NO_CLASS:
313 case X86_64_SSEUP_CLASS:
314 break;
315 case X86_64_X87_CLASS:
316 case X86_64_X87UP_CLASS:
317 case X86_64_COMPLEX_X87_CLASS:
318 return in_return != 0;
319 default:
320 abort ();
321 }
322
323 *pngpr = ngpr;
324 *pnsse = nsse;
325
326 return n;
327 }
328
329 /* Perform machine dependent cif processing. */
330
331 ffi_status
332 ffi_prep_cif_machdep (ffi_cif *cif)
333 {
334 int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
335 enum x86_64_reg_class classes[MAX_CLASSES];
336 size_t bytes;
337
338 gprcount = ssecount = 0;
339
340 flags = cif->rtype->type;
341 if (flags != FFI_TYPE_VOID)
342 {
343 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
344 if (n == 0)
345 {
346 /* The return value is passed in memory. A pointer to that
347 memory is the first argument. Allocate a register for it. */
348 gprcount++;
349 /* We don't have to do anything in asm for the return. */
350 flags = FFI_TYPE_VOID;
351 }
352 else if (flags == FFI_TYPE_STRUCT)
353 {
354 /* Mark which registers the result appears in. */
355 _Bool sse0 = SSE_CLASS_P (classes[0]);
356 _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
357 if (sse0 && !sse1)
358 flags |= 1 << 8;
359 else if (!sse0 && sse1)
360 flags |= 1 << 9;
361 else if (sse0 && sse1)
362 flags |= 1 << 10;
363 /* Mark the true size of the structure. */
364 flags |= cif->rtype->size << 12;
365 }
366 }
367
368 /* Go over all arguments and determine the way they should be passed.
369 If it's in a register and there is space for it, let that be so. If
370 not, add it's size to the stack byte count. */
371 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
372 {
373 if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
374 || gprcount + ngpr > MAX_GPR_REGS
375 || ssecount + nsse > MAX_SSE_REGS)
376 {
377 long align = cif->arg_types[i]->alignment;
378
379 if (align < 8)
380 align = 8;
381
382 bytes = ALIGN (bytes, align);
383 bytes += cif->arg_types[i]->size;
384 }
385 else
386 {
387 gprcount += ngpr;
388 ssecount += nsse;
389 }
390 }
391 if (ssecount)
392 flags |= 1 << 11;
393 cif->flags = flags;
394 cif->bytes = ALIGN (bytes, 8);
395
396 return FFI_OK;
397 }
398
399 void
400 ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
401 {
402 enum x86_64_reg_class classes[MAX_CLASSES];
403 char *stack, *argp;
404 ffi_type **arg_types;
405 int gprcount, ssecount, ngpr, nsse, i, avn;
406 _Bool ret_in_memory;
407 struct register_args *reg_args;
408
409 /* Can't call 32-bit mode from 64-bit mode. */
410 FFI_ASSERT (cif->abi == FFI_UNIX64);
411
412 /* If the return value is a struct and we don't have a return value
413 address then we need to make one. Note the setting of flags to
414 VOID above in ffi_prep_cif_machdep. */
415 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
416 && (cif->flags & 0xff) == FFI_TYPE_VOID);
417 if (rvalue == NULL && ret_in_memory)
418 rvalue = alloca (cif->rtype->size);
419
420 /* Allocate the space for the arguments, plus 4 words of temp space. */
421 stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
422 reg_args = (struct register_args *) stack;
423 argp = stack + sizeof (struct register_args);
424
425 gprcount = ssecount = 0;
426
427 /* If the return value is passed in memory, add the pointer as the
428 first integer argument. */
429 if (ret_in_memory)
430 reg_args->gpr[gprcount++] = (unsigned long) rvalue;
431
432 avn = cif->nargs;
433 arg_types = cif->arg_types;
434
435 for (i = 0; i < avn; ++i)
436 {
437 size_t size = arg_types[i]->size;
438 int n;
439
440 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
441 if (n == 0
442 || gprcount + ngpr > MAX_GPR_REGS
443 || ssecount + nsse > MAX_SSE_REGS)
444 {
445 long align = arg_types[i]->alignment;
446
447 /* Stack arguments are *always* at least 8 byte aligned. */
448 if (align < 8)
449 align = 8;
450
451 /* Pass this argument in memory. */
452 argp = (void *) ALIGN (argp, align);
453 memcpy (argp, avalue[i], size);
454 argp += size;
455 }
456 else
457 {
458 /* The argument is passed entirely in registers. */
459 char *a = (char *) avalue[i];
460 int j;
461
462 for (j = 0; j < n; j++, a += 8, size -= 8)
463 {
464 switch (classes[j])
465 {
466 case X86_64_INTEGER_CLASS:
467 case X86_64_INTEGERSI_CLASS:
468 reg_args->gpr[gprcount] = 0;
469 memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
470 gprcount++;
471 break;
472 case X86_64_SSE_CLASS:
473 case X86_64_SSEDF_CLASS:
474 reg_args->sse[ssecount++] = *(UINT64 *) a;
475 break;
476 case X86_64_SSESF_CLASS:
477 reg_args->sse[ssecount++] = *(UINT32 *) a;
478 break;
479 default:
480 abort();
481 }
482 }
483 }
484 }
485
486 ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
487 cif->flags, rvalue, fn, ssecount);
488 }
489
490
491 extern void ffi_closure_unix64(void);
492
493 ffi_status
494 ffi_prep_closure_loc (ffi_closure* closure,
495 ffi_cif* cif,
496 void (*fun)(ffi_cif*, void*, void**, void*),
497 void *user_data,
498 void *codeloc)
499 {
500 volatile unsigned short *tramp;
501
502 /* Sanity check on the cif ABI. */
503 {
504 int abi = cif->abi;
505 if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
506 return FFI_BAD_ABI;
507 }
508
509 tramp = (volatile unsigned short *) &closure->tramp[0];
510
511 tramp[0] = 0xbb49; /* mov <code>, %r11 */
512 *((unsigned long long * volatile) &tramp[1])
513 = (unsigned long) ffi_closure_unix64;
514 tramp[5] = 0xba49; /* mov <data>, %r10 */
515 *((unsigned long long * volatile) &tramp[6])
516 = (unsigned long) codeloc;
517
518 /* Set the carry bit iff the function uses any sse registers.
519 This is clc or stc, together with the first byte of the jmp. */
520 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
521
522 tramp[11] = 0xe3ff; /* jmp *%r11 */
523
524 closure->cif = cif;
525 closure->fun = fun;
526 closure->user_data = user_data;
527
528 return FFI_OK;
529 }
530
531 int
532 ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
533 struct register_args *reg_args, char *argp)
534 {
535 ffi_cif *cif;
536 void **avalue;
537 ffi_type **arg_types;
538 long i, avn;
539 int gprcount, ssecount, ngpr, nsse;
540 int ret;
541
542 cif = closure->cif;
543 avalue = alloca(cif->nargs * sizeof(void *));
544 gprcount = ssecount = 0;
545
546 ret = cif->rtype->type;
547 if (ret != FFI_TYPE_VOID)
548 {
549 enum x86_64_reg_class classes[MAX_CLASSES];
550 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
551 if (n == 0)
552 {
553 /* The return value goes in memory. Arrange for the closure
554 return value to go directly back to the original caller. */
555 rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
556 /* We don't have to do anything in asm for the return. */
557 ret = FFI_TYPE_VOID;
558 }
559 else if (ret == FFI_TYPE_STRUCT && n == 2)
560 {
561 /* Mark which register the second word of the structure goes in. */
562 _Bool sse0 = SSE_CLASS_P (classes[0]);
563 _Bool sse1 = SSE_CLASS_P (classes[1]);
564 if (!sse0 && sse1)
565 ret |= 1 << 8;
566 else if (sse0 && !sse1)
567 ret |= 1 << 9;
568 }
569 }
570
571 avn = cif->nargs;
572 arg_types = cif->arg_types;
573
574 for (i = 0; i < avn; ++i)
575 {
576 enum x86_64_reg_class classes[MAX_CLASSES];
577 int n;
578
579 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
580 if (n == 0
581 || gprcount + ngpr > MAX_GPR_REGS
582 || ssecount + nsse > MAX_SSE_REGS)
583 {
584 long align = arg_types[i]->alignment;
585
586 /* Stack arguments are *always* at least 8 byte aligned. */
587 if (align < 8)
588 align = 8;
589
590 /* Pass this argument in memory. */
591 argp = (void *) ALIGN (argp, align);
592 avalue[i] = argp;
593 argp += arg_types[i]->size;
594 }
595 /* If the argument is in a single register, or two consecutive
596 integer registers, then we can use that address directly. */
597 else if (n == 1
598 || (n == 2 && !(SSE_CLASS_P (classes[0])
599 || SSE_CLASS_P (classes[1]))))
600 {
601 /* The argument is in a single register. */
602 if (SSE_CLASS_P (classes[0]))
603 {
604 avalue[i] = &reg_args->sse[ssecount];
605 ssecount += n;
606 }
607 else
608 {
609 avalue[i] = &reg_args->gpr[gprcount];
610 gprcount += n;
611 }
612 }
613 /* Otherwise, allocate space to make them consecutive. */
614 else
615 {
616 char *a = alloca (16);
617 int j;
618
619 avalue[i] = a;
620 for (j = 0; j < n; j++, a += 8)
621 {
622 if (SSE_CLASS_P (classes[j]))
623 memcpy (a, &reg_args->sse[ssecount++], 8);
624 else
625 memcpy (a, &reg_args->gpr[gprcount++], 8);
626 }
627 }
628 }
629
630 /* Invoke the closure. */
631 closure->fun (cif, rvalue, avalue, closure->user_data);
632
633 /* Tell assembly how to perform return type promotions. */
634 return ret;
635 }
636
637 #endif /* __x86_64__ */