[ARC] Rework ARC600 64-bit multiplication patterns.
[gcc.git] / libgcc / config / arc / gmon / profil.S
1 /* This file contains code to do profiling.
2
3 Copyright (C) 2007-2016 Free Software Foundation, Inc.
4 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
5 on behalf of Synopsys Inc.
6
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #include "../asm.h"
30 #include "auxreg.h"
31 /* This file contains code to do profiling. */
32 .weak __profile_timer_cycles
33 .global __profile_timer_cycles
34 .set __profile_timer_cycles, 200
35
36 .section .bss
37 .global __profil_offset
38 .align 4
39 .type __profil_offset, @object
40 .size __profil_offset, 4
41 __profil_offset:
42 .zero 4
43
44 .text
45 .global __dcache_linesz
46 .global __profil
47 FUNC(__profil)
48 #if !defined (__EM__) && !defined (__HS__)
49 .Lstop_profiling:
50 sr r0,[CONTROL0]
51 j_s [blink]
52 .balign 4
53 __profil:
54 .Lprofil:
55 breq_s r0,0,.Lstop_profiling
56 ; r0: buf r1: bufsiz r2: offset r3: scale
57 bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
58 push_s blink
59 lsr_s r2,r2,1
60 mov_s r8,r0
61 flag.ne 1 ; halt if wrong scale
62 sub_s r0,r0,r2
63 st r0,[__profil_offset]
64 bl __dcache_linesz
65 pop_s blink
66 bbit1.d r0,0,nocache
67 mov_s r0,r8
68 #ifdef __ARC700__
69 add_s r1,r1,31
70 lsr.f lp_count,r1,5
71 lpne 2f
72 sr r0,[DC_FLDL]
73 add_s r0,r0,32
74 #else /* !__ARC700__ */
75 # FIX ME: set up loop according to cache line size
76 lr r12,[D_CACHE_BUILD]
77 sub_s r0,r0,16
78 sub_s r1,r1,1
79 lsr_s r12,r12,16
80 asr_s r1,r1,4
81 bmsk_s r12,r12,3
82 asr_s r1,r1,r12
83 add.f lp_count,r1,1
84 mov_s r1,16
85 asl_s r1,r1,r12
86 lpne 2f
87 add r0,r0,r1
88 sr r0,[DC_FLDL]
89 #endif /* __ARC700__ */
90 2: b_s .Lcounters_cleared
91 nocache:
92 .Lcounters_cleared:
93 lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
94 sr r3,[CONTROL0]
95 sr r3,[COUNT0]
96 0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
97 0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
98 st_s r0,[r1,24]; timer0 uses vector3
99 st_s r12,[r1,24+4]; timer0 uses vector3
100 ;sr 10000,[LIMIT0]
101 sr __profile_timer_cycles,[LIMIT0]
102 mov_s r12,3 ; enable timer interrupts; count only when not halted.
103 sr r12,[CONTROL0]
104 lr r12,[STATUS32]
105 bset_s r12,r12,1 ; allow level 1 interrupts
106 flag r12
107 mov_s r0,0
108 j_s [blink]
109 .balign 4
110 1: j __profil_irq
111 #else
112 __profil:
113 .balign 4
114 mov_s r0,-1
115 j_s [blink]
116 #endif /* !__EM__ && !__HS__ */
117 ENDFUNC(__profil)
118
119 FUNC(__profil_irq)
120 .balign 4 ; make final jump unaligned to avoid delay penalty
121 .balign 32,0,12 ; make sure the code spans no more that two cache lines
122 nop_s
123 __profil_irq:
124 #if !defined (__EM__) && !defined (__HS__)
125 push_s r0
126 ld r0,[__profil_offset]
127 push_s r1
128 lsr r1,ilink1,2
129 push_s r2
130 ldw.as.di r2,[r0,r1]
131 add1 r0,r0,r1
132 ld_s r1,[sp,4]
133 add_s r2,r2,1
134 bbit1 r2,16,nostore
135 stw.di r2,[r0]
136 nostore:ld.ab r2,[sp,8]
137 pop_s r0
138 j.f [ilink1]
139 #else
140 rtie
141 #endif /* !__EM__ && !__HS__ */
142 ENDFUNC(__profil_irq)
143
144 ; could save one cycle if the counters were allocated at link time and
145 ; the contents of __profil_offset were pre-computed at link time, like this:
146 #if 0
147 ; __profil_offset needs to be PROVIDEd as __profile_base-text/4
148 .global __profil_offset
149 .balign 4
150 __profil_irq:
151 push_s r0
152 lsr r0,ilink1,2
153 add1 r0,__profil_offset,r0
154 push_s r1
155 ldw.di r1,[r0]
156
157
158 add_s r1,r1,1
159 bbit1 r1,16,nostore
160 stw.di r1,[r0]
161 nostore:pop_s r1
162 pop_s r0
163 j [ilink1]
164 #endif /* 0 */