lib1funcs.S: Add new wrapper.
authorHale Wang <hale.wang@arm.com>
Mon, 11 Jul 2016 17:11:31 +0000 (17:11 +0000)
committerAndre Vieira <avieira@gcc.gnu.org>
Mon, 11 Jul 2016 17:11:31 +0000 (17:11 +0000)
2016-07-11  Hale Wang  <hale.wang@arm.com>
    Andre Vieira  <andre.simoesdiasvieira@arm.com>

* config/arm/lib1funcs.S: Add new wrapper.

Co-Authored-By: Andre Vieira <andre.simoesdiasvieira@arm.com>
From-SVN: r238215

libgcc/ChangeLog
libgcc/config/arm/lib1funcs.S

index b6f830fee82372bb13e1b5f5437927ad1441d2b1..974ac5e5a4c2b26dd7a6dc7ce78a2cf26fb45e06 100644 (file)
@@ -1,3 +1,8 @@
+2016-07-11  Hale Wang  <hale.wang@arm.com>
+           Andre Vieira  <andre.simoesdiasvieira@arm.com>
+
+       * config/arm/lib1funcs.S: Add new wrapper.
+
 2016-07-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
        * config/arm/lib1funcs.S (__ARM_ARCH__): Define to 8 for ARMv8-M.
index 96e206ee542126c5d68091087446afe9f01aa51f..ba52e7b762f5573445349a574a3878859a992f13 100644 (file)
@@ -311,34 +311,13 @@ LSYM(Lend_fde):
 #ifdef __ARM_EABI__
 .macro THUMB_LDIV0 name signed
 #ifdef NOT_ISA_TARGET_32BIT
-       .ifc \signed, unsigned
-       cmp     r0, #0
-       beq     1f
-       mov     r0, #0
-       mvn     r0, r0          @ 0xffffffff
-1:
-       .else
-       cmp     r0, #0
-       beq     2f
-       blt     3f
+
+       push    {r0, lr}
        mov     r0, #0
-       mvn     r0, r0
-       lsr     r0, r0, #1      @ 0x7fffffff
-       b       2f
-3:     mov     r0, #0x80
-       lsl     r0, r0, #24     @ 0x80000000
-2:
-       .endif
-       push    {r0, r1, r2}
-       ldr     r0, 4f
-       adr     r1, 4f
-       add     r0, r1
-       str     r0, [sp, #8]
+       bl      SYM(__aeabi_idiv0)
        @ We know we are not on armv4t, so pop pc is safe.
-       pop     {r0, r1, pc}
-       .align  2
-4:
-       .word   __aeabi_idiv0 - 4b
+       pop     {r1, pc}
+
 #elif defined(__thumb2__)
        .syntax unified
        .ifc \signed, unsigned
@@ -950,7 +929,170 @@ LSYM(Lover7):
        add     dividend, work
   .endif
 LSYM(Lgot_result):
-.endm  
+.endm
+
+/* If performance is preferred, the following functions are provided.  */
+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
+
+/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
+.macro BranchToDiv n, label
+       lsr     curbit, dividend, \n
+       cmp     curbit, divisor
+       blo     \label
+.endm
+
+/* Body of div(n).  Shift the divisor in n bits and compare the divisor
+   and dividend.  Update the dividend as the substruction result.  */
+.macro DoDiv n
+       lsr     curbit, dividend, \n
+       cmp     curbit, divisor
+       bcc     1f
+       lsl     curbit, divisor, \n
+       sub     dividend, dividend, curbit
+
+1:     adc     result, result
+.endm
+
+/* The body of division with positive divisor.  Unless the divisor is very
+   big, shift it up in multiples of four bits, since this is the amount of
+   unwinding in the main division loop.  Continue shifting until the divisor
+   is larger than the dividend.  */
+.macro THUMB1_Div_Positive
+       mov     result, #0
+       BranchToDiv #1, LSYM(Lthumb1_div1)
+       BranchToDiv #4, LSYM(Lthumb1_div4)
+       BranchToDiv #8, LSYM(Lthumb1_div8)
+       BranchToDiv #12, LSYM(Lthumb1_div12)
+       BranchToDiv #16, LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_large_positive):
+       mov     result, #0xff
+       lsl     divisor, divisor, #8
+       rev     result, result
+       lsr     curbit, dividend, #16
+       cmp     curbit, divisor
+       blo     1f
+       asr     result, #8
+       lsl     divisor, divisor, #8
+       beq     LSYM(Ldivbyzero_waypoint)
+
+1:     lsr     curbit, dividend, #12
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div12)
+       b       LSYM(Lthumb1_div16)
+LSYM(Lthumb1_div_loop):
+       lsr     divisor, divisor, #8
+LSYM(Lthumb1_div16):
+       Dodiv   #15
+       Dodiv   #14
+       Dodiv   #13
+       Dodiv   #12
+LSYM(Lthumb1_div12):
+       Dodiv   #11
+       Dodiv   #10
+       Dodiv   #9
+       Dodiv   #8
+       bcs     LSYM(Lthumb1_div_loop)
+LSYM(Lthumb1_div8):
+       Dodiv   #7
+       Dodiv   #6
+       Dodiv   #5
+LSYM(Lthumb1_div5):
+       Dodiv   #4
+LSYM(Lthumb1_div4):
+       Dodiv   #3
+LSYM(Lthumb1_div3):
+       Dodiv   #2
+LSYM(Lthumb1_div2):
+       Dodiv   #1
+LSYM(Lthumb1_div1):
+       sub     divisor, dividend, divisor
+       bcs     1f
+       cpy     divisor, dividend
+
+1:     adc     result, result
+       cpy     dividend, result
+       RET
+
+LSYM(Ldivbyzero_waypoint):
+       b       LSYM(Ldiv0)
+.endm
+
+/* The body of division with negative divisor.  Similar with
+   THUMB1_Div_Positive except that the shift steps are in multiples
+   of six bits.  */
+.macro THUMB1_Div_Negative
+       lsr     result, divisor, #31
+       beq     1f
+       neg     divisor, divisor
+
+1:     asr     curbit, dividend, #32
+       bcc     2f
+       neg     dividend, dividend
+
+2:     eor     curbit, result
+       mov     result, #0
+       cpy     ip, curbit
+       BranchToDiv #4, LSYM(Lthumb1_div_negative4)
+       BranchToDiv #8, LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_large):
+       mov     result, #0xfc
+       lsl     divisor, divisor, #6
+       rev     result, result
+       lsr     curbit, dividend, #8
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       asr     result, result, #6
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       asr     result, result, #6
+       cmp     curbit, divisor
+       blo     LSYM(Lthumb1_div_negative8)
+
+       lsl     divisor, divisor, #6
+       beq     LSYM(Ldivbyzero_negative)
+       asr     result, result, #6
+       b       LSYM(Lthumb1_div_negative8)
+LSYM(Lthumb1_div_negative_loop):
+       lsr     divisor, divisor, #6
+LSYM(Lthumb1_div_negative8):
+       DoDiv   #7
+       DoDiv   #6
+       DoDiv   #5
+       DoDiv   #4
+LSYM(Lthumb1_div_negative4):
+       DoDiv   #3
+       DoDiv   #2
+       bcs     LSYM(Lthumb1_div_negative_loop)
+       DoDiv   #1
+       sub     divisor, dividend, divisor
+       bcs     1f
+       cpy     divisor, dividend
+
+1:     cpy     curbit, ip
+       adc     result, result
+       asr     curbit, curbit, #1
+       cpy     dividend, result
+       bcc     2f
+       neg     dividend, dividend
+       cmp     curbit, #0
+
+2:     bpl     3f
+       neg     divisor, divisor
+
+3:     RET
+
+LSYM(Ldivbyzero_negative):
+       cpy     curbit, ip
+       asr     curbit, curbit, #1
+       bcc     LSYM(Ldiv0)
+       neg     dividend, dividend
+.endm
+#endif /* ARM Thumb version.  */
+
 /* ------------------------------------------------------------------------ */
 /*             Start of the Real Functions                                 */
 /* ------------------------------------------------------------------------ */
@@ -960,6 +1102,7 @@ LSYM(Lgot_result):
 
        FUNC_START udivsi3
        FUNC_ALIAS aeabi_uidiv udivsi3
+#if defined(__OPTIMIZE_SIZE__)
 
        cmp     divisor, #0
        beq     LSYM(Ldiv0)
@@ -977,6 +1120,14 @@ LSYM(udivsi3_skip_div0_test):
        pop     { work }
        RET
 
+/* Implementation of aeabi_uidiv for ARMv6m.  This version is only
+   used in ARMv6-M when we need an efficient implementation.  */
+#else
+LSYM(udivsi3_skip_div0_test):
+       THUMB1_Div_Positive
+
+#endif /* __OPTIMIZE_SIZE__ */
+
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 
        ARM_FUNC_START udivsi3
@@ -1028,12 +1179,21 @@ LSYM(udivsi3_skip_div0_test):
 FUNC_START aeabi_uidivmod
        cmp     r1, #0
        beq     LSYM(Ldiv0)
+# if defined(__OPTIMIZE_SIZE__)
        push    {r0, r1, lr}
        bl      LSYM(udivsi3_skip_div0_test)
        POP     {r1, r2, r3}
        mul     r2, r0
        sub     r1, r1, r2
        bx      r3
+# else
+       /* Both the quotient and remainder are calculated simultaneously
+          in THUMB1_Div_Positive.  There is no need to calculate the
+          remainder again here.  */
+       b       LSYM(udivsi3_skip_div0_test)
+       RET
+# endif /* __OPTIMIZE_SIZE__ */
+
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 ARM_FUNC_START aeabi_uidivmod
        cmp     r1, #0
@@ -1089,7 +1249,7 @@ LSYM(Lover10):
        RET
        
 #else  /* ARM version.  */
-       
+
        FUNC_START umodsi3
 
        subs    r2, r1, #1                      @ compare divisor with 1
@@ -1114,8 +1274,9 @@ LSYM(Lover10):
 
 #if defined(__prefer_thumb__)
 
-       FUNC_START divsi3       
+       FUNC_START divsi3
        FUNC_ALIAS aeabi_idiv divsi3
+#if defined(__OPTIMIZE_SIZE__)
 
        cmp     divisor, #0
        beq     LSYM(Ldiv0)
@@ -1138,7 +1299,7 @@ LSYM(Lover11):
        blo     LSYM(Lgot_result)
 
        THUMB_DIV_MOD_BODY 0
-       
+
        mov     r0, result
        mov     work, ip
        cmp     work, #0
@@ -1148,6 +1309,22 @@ LSYM(Lover12):
        pop     { work }
        RET
 
+/* Implementation of aeabi_idiv for ARMv6m.  This version is only
+   used in ARMv6-M when we need an efficient implementation.  */
+#else
+LSYM(divsi3_skip_div0_test):
+       cpy     curbit, dividend
+       orr     curbit, divisor
+       bmi     LSYM(Lthumb1_div_negative)
+
+LSYM(Lthumb1_div_positive):
+       THUMB1_Div_Positive
+
+LSYM(Lthumb1_div_negative):
+       THUMB1_Div_Negative
+
+#endif /* __OPTIMIZE_SIZE__ */
+
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 
        ARM_FUNC_START divsi3
@@ -1159,8 +1336,8 @@ LSYM(Lover12):
        RET
 
 #else /* ARM/Thumb-2 version.  */
-       
-       ARM_FUNC_START divsi3   
+
+       ARM_FUNC_START divsi3
        ARM_FUNC_ALIAS aeabi_idiv divsi3
 
        cmp     r1, #0
@@ -1214,12 +1391,21 @@ LSYM(divsi3_skip_div0_test):
 FUNC_START aeabi_idivmod
        cmp     r1, #0
        beq     LSYM(Ldiv0)
+# if defined(__OPTIMIZE_SIZE__)
        push    {r0, r1, lr}
        bl      LSYM(divsi3_skip_div0_test)
        POP     {r1, r2, r3}
        mul     r2, r0
        sub     r1, r1, r2
        bx      r3
+# else
+       /* Both the quotient and remainder are calculated simultaneously
+          in THUMB1_Div_Positive and THUMB1_Div_Negative.  There is no
+          need to calculate the remainder again here.  */
+       b       LSYM(divsi3_skip_div0_test)
+       RET
+# endif /* __OPTIMIZE_SIZE__ */
+
 #elif defined(__ARM_ARCH_EXT_IDIV__)
 ARM_FUNC_START aeabi_idivmod
        cmp     r1, #0