Mercurial > hg > truffle
annotate src/share/vm/utilities/utf8.cpp @ 6972:bd7a7ce2e264
6830717: replay of compilations would help with debugging
Summary: When java process crashed in compiler thread, repeat the compilation process will help finding root cause. This is done with using SA dump application class data and replay data from core dump, then use debug version of jvm to recompile the problematic java method.
Reviewed-by: kvn, twisti, sspitsyn
Contributed-by: yumin.qi@oracle.com
author | minqi |
---|---|
date | Mon, 12 Nov 2012 14:03:53 -0800 |
parents | 1d1603768966 |
children | 8c03fc47511d |
rev | line source |
---|---|
0 | 1 /* |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "utilities/utf8.hpp" | |
0 | 27 |
28 // Assume the utf8 string is in legal form and has been | |
29 // checked in the class file parser/format checker. | |
30 char* UTF8::next(const char* str, jchar* value) { | |
31 unsigned const char *ptr = (const unsigned char *)str; | |
32 unsigned char ch, ch2, ch3; | |
33 int length = -1; /* bad length */ | |
34 jchar result; | |
35 switch ((ch = ptr[0]) >> 4) { | |
36 default: | |
37 result = ch; | |
38 length = 1; | |
39 break; | |
40 | |
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF: | |
42 /* Shouldn't happen. */ | |
43 break; | |
44 | |
45 case 0xC: case 0xD: | |
46 /* 110xxxxx 10xxxxxx */ | |
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) { | |
48 unsigned char high_five = ch & 0x1F; | |
49 unsigned char low_six = ch2 & 0x3F; | |
50 result = (high_five << 6) + low_six; | |
51 length = 2; | |
52 break; | |
53 } | |
54 break; | |
55 | |
56 case 0xE: | |
57 /* 1110xxxx 10xxxxxx 10xxxxxx */ | |
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) { | |
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) { | |
60 unsigned char high_four = ch & 0x0f; | |
61 unsigned char mid_six = ch2 & 0x3f; | |
62 unsigned char low_six = ch3 & 0x3f; | |
63 result = (((high_four << 6) + mid_six) << 6) + low_six; | |
64 length = 3; | |
65 } | |
66 } | |
67 break; | |
68 } /* end of switch */ | |
69 | |
70 if (length <= 0) { | |
71 *value = ptr[0]; /* default bad result; */ | |
72 return (char*)(ptr + 1); // make progress somehow | |
73 } | |
74 | |
75 *value = result; | |
76 | |
77 // The assert is correct but the .class file is wrong | |
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation"); | |
79 return (char *)(ptr + length); | |
80 } | |
81 | |
82 char* UTF8::next_character(const char* str, jint* value) { | |
83 unsigned const char *ptr = (const unsigned char *)str; | |
84 /* See if it's legal supplementary character: | |
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */ | |
86 if (is_supplementary_character(ptr)) { | |
87 *value = get_supplementary_character(ptr); | |
88 return (char *)(ptr + 6); | |
89 } | |
90 jchar result; | |
91 char* next_ch = next(str, &result); | |
92 *value = result; | |
93 return next_ch; | |
94 } | |
95 | |
96 // Count bytes of the form 10xxxxxx and deduct this count | |
97 // from the total byte count. The utf8 string must be in | |
98 // legal form which has been verified in the format checker. | |
99 int UTF8::unicode_length(const char* str, int len) { | |
100 int num_chars = len; | |
101 for (int i = 0; i < len; i++) { | |
102 if ((str[i] & 0xC0) == 0x80) { | |
103 --num_chars; | |
104 } | |
105 } | |
106 return num_chars; | |
107 } | |
108 | |
109 // Count bytes of the utf8 string except those in form | |
110 // 10xxxxxx which only appear in multibyte characters. | |
111 // The utf8 string must be in legal form and has been | |
112 // verified in the format checker. | |
113 int UTF8::unicode_length(const char* str) { | |
114 int num_chars = 0; | |
115 for (const char* p = str; *p; p++) { | |
116 if (((*p) & 0xC0) != 0x80) { | |
117 num_chars++; | |
118 } | |
119 } | |
120 return num_chars; | |
121 } | |
122 | |
123 // Writes a jchar a utf8 and returns the end | |
124 static u_char* utf8_write(u_char* base, jchar ch) { | |
125 if ((ch != 0) && (ch <=0x7f)) { | |
126 base[0] = (u_char) ch; | |
127 return base + 1; | |
128 } | |
129 | |
130 if (ch <= 0x7FF) { | |
131 /* 11 bits or less. */ | |
132 unsigned char high_five = ch >> 6; | |
133 unsigned char low_six = ch & 0x3F; | |
134 base[0] = high_five | 0xC0; /* 110xxxxx */ | |
135 base[1] = low_six | 0x80; /* 10xxxxxx */ | |
136 return base + 2; | |
137 } | |
138 /* possibly full 16 bits. */ | |
139 char high_four = ch >> 12; | |
140 char mid_six = (ch >> 6) & 0x3F; | |
141 char low_six = ch & 0x3f; | |
142 base[0] = high_four | 0xE0; /* 1110xxxx */ | |
143 base[1] = mid_six | 0x80; /* 10xxxxxx */ | |
144 base[2] = low_six | 0x80; /* 10xxxxxx */ | |
145 return base + 3; | |
146 } | |
147 | |
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) { | |
149 unsigned char ch; | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
150 const char *ptr = utf8_str; |
0 | 151 int index = 0; |
152 | |
153 /* ASCII case loop optimization */ | |
154 for (; index < unicode_length; index++) { | |
155 if((ch = ptr[0]) > 0x7F) { break; } | |
156 unicode_str[index] = ch; | |
157 ptr = (const char *)(ptr + 1); | |
158 } | |
159 | |
160 for (; index < unicode_length; index++) { | |
161 ptr = UTF8::next(ptr, &unicode_str[index]); | |
162 } | |
163 } | |
164 | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
165 // returns the quoted ascii length of a 0-terminated utf8 string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
167 const char *ptr = utf8_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
168 const char* end = ptr + utf8_length; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
169 int result = 0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
170 while (ptr < end) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
171 jchar c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
172 ptr = UTF8::next(ptr, &c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
173 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
174 result++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
175 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
176 result += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
177 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
178 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
179 return result; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
180 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
181 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
182 // converts a utf8 string to quoted ascii |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
183 void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
184 const char *ptr = utf8_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
185 char* p = buf; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
186 char* end = buf + buflen; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
187 while (*ptr != '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
188 jchar c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
189 ptr = UTF8::next(ptr, &c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
190 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
191 if (p + 1 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
192 *p++ = (char)c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
193 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
194 if (p + 6 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
195 sprintf(p, "\\u%04x", c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
196 p += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
197 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
198 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
199 *p = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
200 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
201 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
202 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
203 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
204 const char *ptr = quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
205 char* result = NULL; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
206 while (*ptr != '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
207 char c = *ptr; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
208 if (c < 32 || c >= 127) break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
209 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
210 if (*ptr == '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
211 // nothing to do so return original string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
212 return quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
213 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
214 // everything up to this point was ok. |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
215 int length = ptr - quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
216 char* buffer = NULL; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
217 for (int round = 0; round < 2; round++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
218 while (*ptr != '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
219 if (*ptr != '\\') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
220 if (buffer != NULL) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
221 buffer[length] = *ptr; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
222 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
223 length++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
224 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
225 switch (ptr[1]) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
226 case 'u': { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
227 ptr += 2; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
228 jchar value=0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
229 for (int i=0; i<4; i++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
230 char c = *ptr++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
231 switch (c) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
232 case '0': case '1': case '2': case '3': case '4': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
233 case '5': case '6': case '7': case '8': case '9': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
234 value = (value << 4) + c - '0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
235 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
236 case 'a': case 'b': case 'c': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
237 case 'd': case 'e': case 'f': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
238 value = (value << 4) + 10 + c - 'a'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
239 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
240 case 'A': case 'B': case 'C': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
241 case 'D': case 'E': case 'F': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
242 value = (value << 4) + 10 + c - 'A'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
243 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
244 default: |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
245 ShouldNotReachHere(); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
246 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
247 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
248 if (buffer == NULL) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
249 char utf8_buffer[4]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
250 char* next = (char*)utf8_write((u_char*)utf8_buffer, value); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
251 length += next - utf8_buffer; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
252 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
253 char* next = (char*)utf8_write((u_char*)&buffer[length], value); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
254 length += next - &buffer[length]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
255 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
256 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
257 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
258 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
259 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
260 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
261 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
262 default: |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
263 ShouldNotReachHere(); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
264 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
265 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
266 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
267 if (round == 0) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
268 buffer = NEW_RESOURCE_ARRAY(char, length + 1); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
269 ptr = quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
270 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
271 buffer[length] = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
272 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
273 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
274 return buffer; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
275 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
276 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
277 |
0 | 278 // Returns NULL if 'c' it not found. This only works as long |
279 // as 'c' is an ASCII character | |
2177
3582bf76420e
6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents:
1972
diff
changeset
|
280 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) { |
0 | 281 assert(length >= 0, "sanity check"); |
282 assert(c >= 0, "does not work for non-ASCII characters"); | |
283 // Skip backwards in string until 'c' is found or end is reached | |
284 while(--length >= 0 && base[length] != c); | |
285 return (length < 0) ? NULL : &base[length]; | |
286 } | |
287 | |
2177
3582bf76420e
6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents:
1972
diff
changeset
|
288 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) { |
0 | 289 // Length must be the same |
290 if (length1 != length2) return false; | |
291 for (int i = 0; i < length1; i++) { | |
292 if (base1[i] != base2[i]) return false; | |
293 } | |
294 return true; | |
295 } | |
296 | |
297 bool UTF8::is_supplementary_character(const unsigned char* str) { | |
298 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80) | |
299 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80); | |
300 } | |
301 | |
302 jint UTF8::get_supplementary_character(const unsigned char* str) { | |
303 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10) | |
304 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f); | |
305 } | |
306 | |
307 | |
308 //------------------------------------------------------------------------------------- | |
309 | |
310 | |
311 int UNICODE::utf8_size(jchar c) { | |
312 if ((0x0001 <= c) && (c <= 0x007F)) return 1; | |
313 if (c <= 0x07FF) return 2; | |
314 return 3; | |
315 } | |
316 | |
317 int UNICODE::utf8_length(jchar* base, int length) { | |
318 int result = 0; | |
319 for (int index = 0; index < length; index++) { | |
320 jchar c = base[index]; | |
321 if ((0x0001 <= c) && (c <= 0x007F)) result += 1; | |
322 else if (c <= 0x07FF) result += 2; | |
323 else result += 3; | |
324 } | |
325 return result; | |
326 } | |
327 | |
328 char* UNICODE::as_utf8(jchar* base, int length) { | |
329 int utf8_len = utf8_length(base, length); | |
330 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1); | |
331 u_char* p = result; | |
332 for (int index = 0; index < length; index++) { | |
333 p = utf8_write(p, base[index]); | |
334 } | |
335 *p = '\0'; | |
336 assert(p == &result[utf8_len], "length prediction must be correct"); | |
337 return (char*) result; | |
338 } | |
339 | |
340 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) { | |
341 u_char* p = (u_char*)buf; | |
342 u_char* end = (u_char*)buf + buflen; | |
343 for (int index = 0; index < length; index++) { | |
344 jchar c = base[index]; | |
345 if (p + utf8_size(c) >= end) break; // string is truncated | |
346 p = utf8_write(p, base[index]); | |
347 } | |
348 *p = '\0'; | |
349 return buf; | |
350 } | |
351 | |
352 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) { | |
353 for(int index = 0; index < length; index++) { | |
354 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]); | |
355 } | |
356 *utf8_buffer = '\0'; | |
357 } | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
358 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
359 // returns the quoted ascii length of a unicode string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
360 int UNICODE::quoted_ascii_length(jchar* base, int length) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
361 int result = 0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
362 for (int i = 0; i < length; i++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
363 jchar c = base[i]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
364 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
365 result++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
366 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
367 result += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
368 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
369 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
370 return result; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
371 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
372 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
373 // converts a utf8 string to quoted ascii |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
374 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
375 char* p = buf; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
376 char* end = buf + buflen; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
377 for (int index = 0; index < length; index++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
378 jchar c = base[index]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
379 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
380 if (p + 1 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
381 *p++ = (char)c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
382 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
383 if (p + 6 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
384 sprintf(p, "\\u%04x", c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
385 p += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
386 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
387 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
388 *p = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
389 } |