#include <stdlib.h>
-/* ffi_prep_args is called by the assembly routine once stack space
- has been allocated for the function's arguments */
+/* Forward declares. */
+static int vfp_type_p (ffi_type *);
+static void layout_vfp_args (ffi_cif *);
-void ffi_prep_args(char *stack, extended_cif *ecif)
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments
+
+ The vfp_space parameter is the load area for VFP regs, the return
+ value is cif->vfp_used (word bitset of VFP regs used for passing
+ arguments). These are only used for the VFP hard-float ABI.
+*/
+int ffi_prep_args(char *stack, extended_cif *ecif, float *vfp_space)
{
- register unsigned int i;
+ register unsigned int i, vi = 0;
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
{
size_t z;
+ /* Allocated in VFP registers. */
+ if (ecif->cif->abi == FFI_VFP
+ && vi < ecif->cif->vfp_nargs && vfp_type_p (*p_arg))
+ {
+ float* vfp_slot = vfp_space + ecif->cif->vfp_args[vi++];
+ if ((*p_arg)->type == FFI_TYPE_FLOAT)
+ *((float*)vfp_slot) = *((float*)*p_argv);
+ else if ((*p_arg)->type == FFI_TYPE_DOUBLE)
+ *((double*)vfp_slot) = *((double*)*p_argv);
+ else
+ memcpy(vfp_slot, *p_argv, (*p_arg)->size);
+ p_argv++;
+ continue;
+ }
+
/* Align if necessary */
if (((*p_arg)->alignment - 1) & (unsigned) argp) {
argp = (char *) ALIGN(argp, (*p_arg)->alignment);
p_argv++;
argp += z;
}
-
- return;
+
+ /* Indicate the VFP registers used. */
+ return ecif->cif->vfp_used;
}
/* Perform machine dependent cif processing */
ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
{
+ int type_code;
/* Round the stack up to a multiple of 8 bytes. This isn't needed
everywhere, but it is on some platforms, and it doesn't harm anything
when it isn't needed. */
break;
case FFI_TYPE_STRUCT:
- if (cif->rtype->size <= 4)
+ if (cif->abi == FFI_VFP
+ && (type_code = vfp_type_p (cif->rtype)) != 0)
+ {
+ /* A Composite Type passed in VFP registers, either
+ FFI_TYPE_STRUCT_VFP_FLOAT or FFI_TYPE_STRUCT_VFP_DOUBLE. */
+ cif->flags = (unsigned) type_code;
+ }
+ else if (cif->rtype->size <= 4)
/* A Composite Type not larger than 4 bytes is returned in r0. */
cif->flags = (unsigned)FFI_TYPE_INT;
else
break;
}
+ /* Map out the register placements of VFP register args.
+ The VFP hard-float calling conventions are slightly more sophisticated than
+ the base calling conventions, so we do it here instead of in ffi_prep_args(). */
+ if (cif->abi == FFI_VFP)
+ layout_vfp_args (cif);
+
return FFI_OK;
}
-extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
- unsigned, unsigned, unsigned *, void (*fn)(void));
+/* Prototypes for assembly functions, in sysv.S */
+extern void ffi_call_SYSV (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *);
+extern void ffi_call_VFP (void (*fn)(void), extended_cif *, unsigned, unsigned, unsigned *);
void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
int small_struct = (cif->flags == FFI_TYPE_INT
&& cif->rtype->type == FFI_TYPE_STRUCT);
+ int vfp_struct = (cif->flags == FFI_TYPE_STRUCT_VFP_FLOAT
+ || cif->flags == FFI_TYPE_STRUCT_VFP_DOUBLE);
ecif.cif = cif;
ecif.avalue = avalue;
}
else if (small_struct)
ecif.rvalue = &temp;
+ else if (vfp_struct)
+ {
+ /* Largest case is double x 4. */
+ ecif.rvalue = alloca(32);
+ }
else
ecif.rvalue = rvalue;
switch (cif->abi)
{
case FFI_SYSV:
- ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
- fn);
+ ffi_call_SYSV (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
+ break;
+ case FFI_VFP:
+ ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
break;
+
default:
FFI_ASSERT(0);
break;
}
if (small_struct)
memcpy (rvalue, &temp, cif->rtype->size);
+ else if (vfp_struct)
+ memcpy (rvalue, ecif.rvalue, cif->rtype->size);
}
/** private members **/
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
- void** args, ffi_cif* cif);
+ void** args, ffi_cif* cif, float *vfp_stack);
void ffi_closure_SYSV (ffi_closure *);
+void ffi_closure_VFP (ffi_closure *);
+
/* This function is jumped to by the trampoline */
unsigned int
-ffi_closure_SYSV_inner (closure, respp, args)
+ffi_closure_SYSV_inner (closure, respp, args, vfp_args)
ffi_closure *closure;
void **respp;
void *args;
+ void *vfp_args;
{
// our various things...
ffi_cif *cif;
* a structure, it will re-set RESP to point to the
* structure return address. */
- ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args);
(closure->fun) (cif, *respp, arg_area, closure->user_data);
/*@-exportheader@*/
static void
ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
- void **avalue, ffi_cif *cif)
+ void **avalue, ffi_cif *cif,
+ /* Used only under VFP hard-float ABI. */
+ float *vfp_stack)
/*@=exportheader@*/
{
- register unsigned int i;
+ register unsigned int i, vi = 0;
register void **p_argv;
register char *argp;
register ffi_type **p_arg;
for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
{
size_t z;
-
- size_t alignment = (*p_arg)->alignment;
+ size_t alignment;
+
+ if (cif->abi == FFI_VFP
+ && vi < cif->vfp_nargs && vfp_type_p (*p_arg))
+ {
+ *p_argv++ = (void*)(vfp_stack + cif->vfp_args[vi++]);
+ continue;
+ }
+
+ alignment = (*p_arg)->alignment;
if (alignment < 4)
alignment = 4;
/* Align if necessary */
void *user_data,
void *codeloc)
{
- FFI_ASSERT (cif->abi == FFI_SYSV);
+ void (*closure_func)(ffi_closure*) = NULL;
+ if (cif->abi == FFI_SYSV)
+ closure_func = &ffi_closure_SYSV;
+ else if (cif->abi == FFI_VFP)
+ closure_func = &ffi_closure_VFP;
+ else
+ FFI_ASSERT (0);
+
FFI_INIT_TRAMPOLINE (&closure->tramp[0], \
- &ffi_closure_SYSV, \
+ closure_func, \
codeloc);
closure->cif = cif;
return FFI_OK;
}
+
+/* Below are routines for VFP hard-float support. */
+
+static int rec_vfp_type_p (ffi_type *t, int *elt, int *elnum)
+{
+ switch (t->type)
+ {
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ *elt = (int) t->type;
+ *elnum = 1;
+ return 1;
+
+ case FFI_TYPE_STRUCT_VFP_FLOAT:
+ *elt = FFI_TYPE_FLOAT;
+ *elnum = t->size / sizeof (float);
+ return 1;
+
+ case FFI_TYPE_STRUCT_VFP_DOUBLE:
+ *elt = FFI_TYPE_DOUBLE;
+ *elnum = t->size / sizeof (double);
+ return 1;
+
+ case FFI_TYPE_STRUCT:;
+ {
+ int base_elt = 0, total_elnum = 0;
+ ffi_type **el = t->elements;
+ while (*el)
+ {
+ int el_elt = 0, el_elnum = 0;
+ if (! rec_vfp_type_p (*el, &el_elt, &el_elnum)
+ || (base_elt && base_elt != el_elt)
+ || total_elnum + el_elnum > 4)
+ return 0;
+ base_elt = el_elt;
+ total_elnum += el_elnum;
+ el++;
+ }
+ *elnum = total_elnum;
+ *elt = base_elt;
+ return 1;
+ }
+ default: ;
+ }
+ return 0;
+}
+
+static int vfp_type_p (ffi_type *t)
+{
+ int elt, elnum;
+ if (rec_vfp_type_p (t, &elt, &elnum))
+ {
+ if (t->type == FFI_TYPE_STRUCT)
+ {
+ if (elnum == 1)
+ t->type = elt;
+ else
+ t->type = (elt == FFI_TYPE_FLOAT
+ ? FFI_TYPE_STRUCT_VFP_FLOAT
+ : FFI_TYPE_STRUCT_VFP_DOUBLE);
+ }
+ return (int) t->type;
+ }
+ return 0;
+}
+
+static void place_vfp_arg (ffi_cif *cif, ffi_type *t)
+{
+ int reg = cif->vfp_reg_free;
+ int nregs = t->size / sizeof (float);
+ int align = ((t->type == FFI_TYPE_STRUCT_VFP_FLOAT
+ || t->type == FFI_TYPE_FLOAT) ? 1 : 2);
+ /* Align register number. */
+ if ((reg & 1) && align == 2)
+ reg++;
+ while (reg + nregs <= 16)
+ {
+ int s, new_used = 0;
+ for (s = reg; s < reg + nregs; s++)
+ {
+ new_used |= (1 << s);
+ if (cif->vfp_used & (1 << s))
+ {
+ reg += align;
+ goto next_reg;
+ }
+ }
+ /* Found regs to allocate. */
+ cif->vfp_used |= new_used;
+ cif->vfp_args[cif->vfp_nargs++] = reg;
+
+ /* Update vfp_reg_free. */
+ if (cif->vfp_used & (1 << cif->vfp_reg_free))
+ {
+ reg += nregs;
+ while (cif->vfp_used & (1 << reg))
+ reg += 1;
+ cif->vfp_reg_free = reg;
+ }
+ return;
+ next_reg: ;
+ }
+}
+
+static void layout_vfp_args (ffi_cif *cif)
+{
+ int i;
+ /* Init VFP fields */
+ cif->vfp_used = 0;
+ cif->vfp_nargs = 0;
+ cif->vfp_reg_free = 0;
+ memset (cif->vfp_args, -1, 16); /* Init to -1. */
+
+ for (i = 0; i < cif->nargs; i++)
+ {
+ ffi_type *t = cif->arg_types[i];
+ if (vfp_type_p (t))
+ place_vfp_arg (cif, t);
+ }
+}
.endm
- @ r0: ffi_prep_args
+ @ r0: fn
@ r1: &ecif
@ r2: cif->bytes
@ r3: fig->flags
@ sp+0: ecif.rvalue
- @ sp+4: fn
@ This assumes we are using gas.
ARM_FUNC_START ffi_call_SYSV
sub sp, fp, r2
@ Place all of the ffi_prep_args in position
- mov ip, r0
mov r0, sp
@ r1 already set
@ Call ffi_prep_args(stack, &ecif)
- call_reg(ip)
+ bl ffi_prep_args
@ move first 4 parameters in registers
ldmia sp, {r0-r3}
@ and adjust stack
- ldr ip, [fp, #8]
- cmp ip, #16
- movhs ip, #16
- add sp, sp, ip
+ sub lr, fp, sp @ cif->bytes == fp - sp
+ ldr ip, [fp] @ load fn() in advance
+ cmp lr, #16
+ movhs lr, #16
+ add sp, sp, lr
@ call (fn) (...)
- ldr ip, [fp, #28]
call_reg(ip)
@ Remove the space we pushed for the args
UNWIND .fnend
.size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV)
+
+ @ r0: fn
+ @ r1: &ecif
+ @ r2: cif->bytes
+ @ r3: fig->flags
+ @ sp+0: ecif.rvalue
+
+ARM_FUNC_START ffi_call_VFP
+ @ Save registers
+ stmfd sp!, {r0-r3, fp, lr}
+ UNWIND .save {r0-r3, fp, lr}
+ mov fp, sp
+ UNWIND .setfp fp, sp
+
+ @ Make room for all of the new args.
+ sub sp, sp, r2
+
+ @ Make room for loading VFP args
+ sub sp, sp, #64
+
+ @ Place all of the ffi_prep_args in position
+ mov r0, sp
+ @ r1 already set
+ sub r2, fp, #64 @ VFP scratch space
+
+ @ Call ffi_prep_args(stack, &ecif, vfp_space)
+ bl ffi_prep_args
+
+ @ Load VFP register args if needed
+ cmp r0, #0
+ beq LSYM(Lbase_args)
+
+ @ Load only d0 if possible
+ cmp r0, #3
+ sub ip, fp, #64
+ flddle d0, [ip]
+ fldmiadgt ip, {d0-d7}
+
+LSYM(Lbase_args):
+ @ move first 4 parameters in registers
+ ldmia sp, {r0-r3}
+
+ @ and adjust stack
+ sub lr, ip, sp @ cif->bytes == (fp - 64) - sp
+ ldr ip, [fp] @ load fn() in advance
+ cmp lr, #16
+ movhs lr, #16
+ add sp, sp, lr
+
+ @ call (fn) (...)
+ call_reg(ip)
+
+ @ Remove the space we pushed for the args
+ mov sp, fp
+
+ @ Load r2 with the pointer to storage for
+ @ the return value
+ ldr r2, [sp, #24]
+
+ @ Load r3 with the return type code
+ ldr r3, [sp, #12]
+
+ @ If the return value pointer is NULL,
+ @ assume no return value.
+ cmp r2, #0
+ beq LSYM(Lepilogue_vfp)
+
+ cmp r3, #FFI_TYPE_INT
+ streq r0, [r2]
+ beq LSYM(Lepilogue_vfp)
+
+ cmp r3, #FFI_TYPE_SINT64
+ stmeqia r2, {r0, r1}
+ beq LSYM(Lepilogue_vfp)
+
+ cmp r3, #FFI_TYPE_FLOAT
+ fstseq s0, [r2]
+ beq LSYM(Lepilogue_vfp)
+
+ cmp r3, #FFI_TYPE_DOUBLE
+ fstdeq d0, [r2]
+ beq LSYM(Lepilogue_vfp)
+
+ cmp r3, #FFI_TYPE_STRUCT_VFP_FLOAT
+ cmpne r3, #FFI_TYPE_STRUCT_VFP_DOUBLE
+ fstmiadeq r2, {d0-d3}
+
+LSYM(Lepilogue_vfp):
+ RETLDM "r0-r3,fp"
+
+.ffi_call_VFP_end:
+ UNWIND .fnend
+ .size CNAME(ffi_call_VFP),.ffi_call_VFP_end-CNAME(ffi_call_VFP)
+
+
/*
unsigned int FFI_HIDDEN
ffi_closure_SYSV_inner (closure, respp, args)
UNWIND .fnend
.size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV)
+
+ARM_FUNC_START ffi_closure_VFP
+ fstmfdd sp!, {d0-d7}
+ @ r0-r3, then d0-d7
+ UNWIND .pad #80
+ add ip, sp, #80
+ stmfd sp!, {ip, lr}
+ UNWIND .save {r0, lr}
+ add r2, sp, #72
+ add r3, sp, #8
+ .pad #72
+ sub sp, sp, #72
+ str sp, [sp, #64]
+ add r1, sp, #64
+ bl ffi_closure_SYSV_inner
+
+ cmp r0, #FFI_TYPE_INT
+ beq .Lretint_vfp
+
+ cmp r0, #FFI_TYPE_FLOAT
+ beq .Lretfloat_vfp
+
+ cmp r0, #FFI_TYPE_DOUBLE
+ cmpne r0, #FFI_TYPE_LONGDOUBLE
+ beq .Lretdouble_vfp
+
+ cmp r0, #FFI_TYPE_SINT64
+ beq .Lretlonglong_vfp
+
+ cmp r0, #FFI_TYPE_STRUCT_VFP_FLOAT
+ beq .Lretfloat_struct_vfp
+
+ cmp r0, #FFI_TYPE_STRUCT_VFP_DOUBLE
+ beq .Lretdouble_struct_vfp
+
+.Lclosure_epilogue_vfp:
+ add sp, sp, #72
+ ldmfd sp, {sp, pc}
+
+.Lretfloat_vfp:
+ flds s0, [sp]
+ b .Lclosure_epilogue_vfp
+.Lretdouble_vfp:
+ fldd d0, [sp]
+ b .Lclosure_epilogue_vfp
+.Lretint_vfp:
+ ldr r0, [sp]
+ b .Lclosure_epilogue_vfp
+.Lretlonglong_vfp:
+ ldmia sp, {r0, r1}
+ b .Lclosure_epilogue_vfp
+.Lretfloat_struct_vfp:
+ fldmiad sp, {d0-d1}
+ b .Lclosure_epilogue_vfp
+.Lretdouble_struct_vfp:
+ fldmiad sp, {d0-d3}
+ b .Lclosure_epilogue_vfp
+
+.ffi_closure_VFP_end:
+ UNWIND .fnend
+ .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP)
+
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",%progbits
#endif