Convert ChangeLog files to UTF-8.
[gcc.git] / gcc / java / mangle_name.c
1 /* Shared functions related to mangling names for the GNU compiler
2 for the Java(TM) language.
3 Copyright (C) 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
20
21 Java and all Java-based marks are trademarks or registered trademarks
22 of Sun Microsystems, Inc. in the United States and other countries.
23 The Free Software Foundation is independent of Sun Microsystems, Inc. */
24
25 /* Written by Alexandre Petit-Bianco <apbianco@cygnus.com> */
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "tm.h"
31 #include "jcf.h"
32 #include "tree.h"
33 #include "java-tree.h"
34 #include "obstack.h"
35 #include "toplev.h"
36
37 static void append_unicode_mangled_name (const char *, int);
38 #ifndef HAVE_AS_UTF8
39 static int unicode_mangling_length (const char *, int);
40 #endif
41
42 extern struct obstack *mangle_obstack;
43
44 /* If the assembler doesn't support UTF8 in symbol names, some
45 characters might need to be escaped. */
46
47 #ifndef HAVE_AS_UTF8
48
49 /* Assuming (NAME, LEN) is a Utf8-encoding string, emit the string
50 appropriately mangled (with Unicode escapes if needed) to
51 MANGLE_OBSTACK. Note that `java', `lang' and `Object' are used so
52 frequently that they could be cached. */
53
54 void
55 append_gpp_mangled_name (const char *name, int len)
56 {
57 int encoded_len = unicode_mangling_length (name, len);
58 int needs_escapes = encoded_len > 0;
59 char buf[6];
60
61 sprintf (buf, "%d", (needs_escapes ? encoded_len : len));
62 obstack_grow (mangle_obstack, buf, strlen (buf));
63
64 if (needs_escapes)
65 append_unicode_mangled_name (name, len);
66 else
67 obstack_grow (mangle_obstack, name, len);
68 }
69
70 /* Assuming (NAME, LEN) is a Utf8-encoded string, emit the string
71 appropriately mangled (with Unicode escapes) to MANGLE_OBSTACK.
72 Characters needing an escape are encoded `__UNN_' to `__UNNNN_', in
73 which case `__U' will be mangled `__U_'. */
74
75 static void
76 append_unicode_mangled_name (const char *name, int len)
77 {
78 const unsigned char *ptr;
79 const unsigned char *limit = (const unsigned char *)name + len;
80 int uuU = 0;
81 for (ptr = (const unsigned char *) name; ptr < limit; )
82 {
83 int ch = UTF8_GET(ptr, limit);
84
85 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
86 obstack_1grow (mangle_obstack, ch);
87 /* Everything else needs encoding */
88 else
89 {
90 char buf [9];
91 if (ch == '_' || ch == 'U')
92 {
93 /* Prepare to recognize __U */
94 if (ch == '_' && (uuU < 3))
95 {
96 uuU++;
97 obstack_1grow (mangle_obstack, ch);
98 }
99 /* We recognize __U that we wish to encode
100 __U_. Finish the encoding. */
101 else if (ch == 'U' && (uuU == 2))
102 {
103 uuU = 0;
104 obstack_grow (mangle_obstack, "U_", 2);
105 }
106 /* Otherwise, just reset uuU and emit the character we
107 have. */
108 else
109 {
110 uuU = 0;
111 obstack_1grow (mangle_obstack, ch);
112 }
113 continue;
114 }
115 sprintf (buf, "__U%x_", ch);
116 obstack_grow (mangle_obstack, buf, strlen (buf));
117 uuU = 0;
118 }
119 }
120 }
121
122 /* Assuming (NAME, LEN) is a Utf8-encoding string, calculate the
123 length of the string as mangled (a la g++) including Unicode
124 escapes. If no escapes are needed, return 0. */
125
126 static int
127 unicode_mangling_length (const char *name, int len)
128 {
129 const unsigned char *ptr;
130 const unsigned char *limit = (const unsigned char *)name + len;
131 int need_escapes = 0; /* Whether we need an escape or not */
132 int num_chars = 0; /* Number of characters in the mangled name */
133 int uuU = 0; /* Help us to find __U. 0: '_', 1: '__' */
134 for (ptr = (const unsigned char *) name; ptr < limit; )
135 {
136 int ch = UTF8_GET(ptr, limit);
137
138 if (ch < 0)
139 error ("internal error - invalid Utf8 name");
140 if ((ISALNUM (ch) && ch != 'U') || ch == '$')
141 num_chars++;
142 /* Everything else needs encoding */
143 else
144 {
145 int encoding_length = 2;
146
147 if (ch == '_' || ch == 'U')
148 {
149 /* It's always at least one character. */
150 num_chars++;
151
152 /* Prepare to recognize __U */
153 if (ch == '_' && (uuU < 3))
154 uuU++;
155
156 /* We recognize __U that we wish to encode __U_, we
157 count one more character. */
158 else if (ch == 'U' && (uuU == 2))
159 {
160 num_chars++;
161 need_escapes = 1;
162 uuU = 0;
163 }
164 /* Otherwise, just reset uuU */
165 else
166 uuU = 0;
167
168 continue;
169 }
170
171 if (ch > 0xff)
172 encoding_length++;
173 if (ch > 0xfff)
174 encoding_length++;
175
176 num_chars += (4 + encoding_length);
177 need_escapes = 1;
178 uuU = 0;
179 }
180 }
181 if (need_escapes)
182 return num_chars;
183 else
184 return 0;
185 }
186
187 #else
188
189 /* The assembler supports UTF8, we don't use escapes. Mangling is
190 simply <N>NAME. <N> is the number of UTF8 encoded characters that
191 are found in NAME. Note that `java', `lang' and `Object' are used
192 so frequently that they could be cached. */
193
194 void
195 append_gpp_mangled_name (const char *name, int len)
196 {
197 const unsigned char *ptr;
198 const unsigned char *limit = (const unsigned char *)name + len;
199 int encoded_len;
200 char buf [6];
201
202 /* Compute the length of the string we wish to mangle. */
203 for (encoded_len = 0, ptr = (const unsigned char *) name;
204 ptr < limit; encoded_len++)
205 {
206 int ch = UTF8_GET(ptr, limit);
207
208 if (ch < 0)
209 error ("internal error - invalid Utf8 name");
210 }
211
212 sprintf (buf, "%d", encoded_len);
213 obstack_grow (mangle_obstack, buf, strlen (buf));
214 obstack_grow (mangle_obstack, name, len);
215 }
216
217 #endif /* HAVE_AS_UTF8 */