Mercurial > hg > truffle
annotate src/share/vm/utilities/utf8.cpp @ 20543:e7d0505c8a30
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
author | tschatzl |
---|---|
date | Fri, 10 Oct 2014 15:51:58 +0200 |
parents | 8c03fc47511d |
children |
rev | line source |
---|---|
0 | 1 /* |
8851
8c03fc47511d
8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents:
6972
diff
changeset
|
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
0 | 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
1552
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
20 * or visit www.oracle.com if you need additional information or have any |
c18cbe5936b8
6941466: Oracle rebranding changes for Hotspot repositories
trims
parents:
0
diff
changeset
|
21 * questions. |
0 | 22 * |
23 */ | |
24 | |
1972 | 25 #include "precompiled.hpp" |
26 #include "utilities/utf8.hpp" | |
0 | 27 |
28 // Assume the utf8 string is in legal form and has been | |
29 // checked in the class file parser/format checker. | |
30 char* UTF8::next(const char* str, jchar* value) { | |
31 unsigned const char *ptr = (const unsigned char *)str; | |
32 unsigned char ch, ch2, ch3; | |
33 int length = -1; /* bad length */ | |
34 jchar result; | |
35 switch ((ch = ptr[0]) >> 4) { | |
36 default: | |
37 result = ch; | |
38 length = 1; | |
39 break; | |
40 | |
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF: | |
42 /* Shouldn't happen. */ | |
43 break; | |
44 | |
45 case 0xC: case 0xD: | |
46 /* 110xxxxx 10xxxxxx */ | |
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) { | |
48 unsigned char high_five = ch & 0x1F; | |
49 unsigned char low_six = ch2 & 0x3F; | |
50 result = (high_five << 6) + low_six; | |
51 length = 2; | |
52 break; | |
53 } | |
54 break; | |
55 | |
56 case 0xE: | |
57 /* 1110xxxx 10xxxxxx 10xxxxxx */ | |
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) { | |
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) { | |
60 unsigned char high_four = ch & 0x0f; | |
61 unsigned char mid_six = ch2 & 0x3f; | |
62 unsigned char low_six = ch3 & 0x3f; | |
63 result = (((high_four << 6) + mid_six) << 6) + low_six; | |
64 length = 3; | |
65 } | |
66 } | |
67 break; | |
68 } /* end of switch */ | |
69 | |
70 if (length <= 0) { | |
71 *value = ptr[0]; /* default bad result; */ | |
72 return (char*)(ptr + 1); // make progress somehow | |
73 } | |
74 | |
75 *value = result; | |
76 | |
77 // The assert is correct but the .class file is wrong | |
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation"); | |
79 return (char *)(ptr + length); | |
80 } | |
81 | |
82 char* UTF8::next_character(const char* str, jint* value) { | |
83 unsigned const char *ptr = (const unsigned char *)str; | |
84 /* See if it's legal supplementary character: | |
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */ | |
86 if (is_supplementary_character(ptr)) { | |
87 *value = get_supplementary_character(ptr); | |
88 return (char *)(ptr + 6); | |
89 } | |
90 jchar result; | |
91 char* next_ch = next(str, &result); | |
92 *value = result; | |
93 return next_ch; | |
94 } | |
95 | |
96 // Count bytes of the form 10xxxxxx and deduct this count | |
97 // from the total byte count. The utf8 string must be in | |
98 // legal form which has been verified in the format checker. | |
99 int UTF8::unicode_length(const char* str, int len) { | |
100 int num_chars = len; | |
101 for (int i = 0; i < len; i++) { | |
102 if ((str[i] & 0xC0) == 0x80) { | |
103 --num_chars; | |
104 } | |
105 } | |
106 return num_chars; | |
107 } | |
108 | |
109 // Count bytes of the utf8 string except those in form | |
110 // 10xxxxxx which only appear in multibyte characters. | |
111 // The utf8 string must be in legal form and has been | |
112 // verified in the format checker. | |
113 int UTF8::unicode_length(const char* str) { | |
114 int num_chars = 0; | |
115 for (const char* p = str; *p; p++) { | |
116 if (((*p) & 0xC0) != 0x80) { | |
117 num_chars++; | |
118 } | |
119 } | |
120 return num_chars; | |
121 } | |
122 | |
123 // Writes a jchar a utf8 and returns the end | |
124 static u_char* utf8_write(u_char* base, jchar ch) { | |
125 if ((ch != 0) && (ch <=0x7f)) { | |
126 base[0] = (u_char) ch; | |
127 return base + 1; | |
128 } | |
129 | |
130 if (ch <= 0x7FF) { | |
131 /* 11 bits or less. */ | |
132 unsigned char high_five = ch >> 6; | |
133 unsigned char low_six = ch & 0x3F; | |
134 base[0] = high_five | 0xC0; /* 110xxxxx */ | |
135 base[1] = low_six | 0x80; /* 10xxxxxx */ | |
136 return base + 2; | |
137 } | |
138 /* possibly full 16 bits. */ | |
139 char high_four = ch >> 12; | |
140 char mid_six = (ch >> 6) & 0x3F; | |
141 char low_six = ch & 0x3f; | |
142 base[0] = high_four | 0xE0; /* 1110xxxx */ | |
143 base[1] = mid_six | 0x80; /* 10xxxxxx */ | |
144 base[2] = low_six | 0x80; /* 10xxxxxx */ | |
145 return base + 3; | |
146 } | |
147 | |
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) { | |
149 unsigned char ch; | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
150 const char *ptr = utf8_str; |
0 | 151 int index = 0; |
152 | |
153 /* ASCII case loop optimization */ | |
154 for (; index < unicode_length; index++) { | |
155 if((ch = ptr[0]) > 0x7F) { break; } | |
156 unicode_str[index] = ch; | |
157 ptr = (const char *)(ptr + 1); | |
158 } | |
159 | |
160 for (; index < unicode_length; index++) { | |
161 ptr = UTF8::next(ptr, &unicode_str[index]); | |
162 } | |
163 } | |
164 | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
165 // returns the quoted ascii length of a 0-terminated utf8 string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
167 const char *ptr = utf8_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
168 const char* end = ptr + utf8_length; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
169 int result = 0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
170 while (ptr < end) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
171 jchar c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
172 ptr = UTF8::next(ptr, &c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
173 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
174 result++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
175 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
176 result += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
177 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
178 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
179 return result; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
180 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
181 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
182 // converts a utf8 string to quoted ascii |
8851
8c03fc47511d
8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents:
6972
diff
changeset
|
183 void UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen) { |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
184 const char *ptr = utf8_str; |
8851
8c03fc47511d
8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents:
6972
diff
changeset
|
185 const char *utf8_end = ptr + utf8_length; |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
186 char* p = buf; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
187 char* end = buf + buflen; |
8851
8c03fc47511d
8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents:
6972
diff
changeset
|
188 while (ptr < utf8_end) { |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
189 jchar c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
190 ptr = UTF8::next(ptr, &c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
191 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
192 if (p + 1 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
193 *p++ = (char)c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
194 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
195 if (p + 6 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
196 sprintf(p, "\\u%04x", c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
197 p += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
198 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
199 } |
8851
8c03fc47511d
8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents:
6972
diff
changeset
|
200 assert(p < end, "sanity"); |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
201 *p = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
202 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
203 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
204 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
205 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
206 const char *ptr = quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
207 char* result = NULL; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
208 while (*ptr != '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
209 char c = *ptr; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
210 if (c < 32 || c >= 127) break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
211 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
212 if (*ptr == '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
213 // nothing to do so return original string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
214 return quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
215 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
216 // everything up to this point was ok. |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
217 int length = ptr - quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
218 char* buffer = NULL; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
219 for (int round = 0; round < 2; round++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
220 while (*ptr != '\0') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
221 if (*ptr != '\\') { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
222 if (buffer != NULL) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
223 buffer[length] = *ptr; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
224 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
225 length++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
226 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
227 switch (ptr[1]) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
228 case 'u': { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
229 ptr += 2; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
230 jchar value=0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
231 for (int i=0; i<4; i++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
232 char c = *ptr++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
233 switch (c) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
234 case '0': case '1': case '2': case '3': case '4': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
235 case '5': case '6': case '7': case '8': case '9': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
236 value = (value << 4) + c - '0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
237 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
238 case 'a': case 'b': case 'c': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
239 case 'd': case 'e': case 'f': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
240 value = (value << 4) + 10 + c - 'a'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
241 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
242 case 'A': case 'B': case 'C': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
243 case 'D': case 'E': case 'F': |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
244 value = (value << 4) + 10 + c - 'A'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
245 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
246 default: |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
247 ShouldNotReachHere(); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
248 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
249 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
250 if (buffer == NULL) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
251 char utf8_buffer[4]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
252 char* next = (char*)utf8_write((u_char*)utf8_buffer, value); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
253 length += next - utf8_buffer; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
254 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
255 char* next = (char*)utf8_write((u_char*)&buffer[length], value); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
256 length += next - &buffer[length]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
257 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
258 break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
259 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
260 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
261 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
262 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
263 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
264 default: |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
265 ShouldNotReachHere(); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
266 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
267 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
268 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
269 if (round == 0) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
270 buffer = NEW_RESOURCE_ARRAY(char, length + 1); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
271 ptr = quoted_ascii_str; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
272 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
273 buffer[length] = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
274 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
275 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
276 return buffer; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
277 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
278 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
279 |
0 | 280 // Returns NULL if 'c' it not found. This only works as long |
281 // as 'c' is an ASCII character | |
2177
3582bf76420e
6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents:
1972
diff
changeset
|
282 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) { |
0 | 283 assert(length >= 0, "sanity check"); |
284 assert(c >= 0, "does not work for non-ASCII characters"); | |
285 // Skip backwards in string until 'c' is found or end is reached | |
286 while(--length >= 0 && base[length] != c); | |
287 return (length < 0) ? NULL : &base[length]; | |
288 } | |
289 | |
2177
3582bf76420e
6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents:
1972
diff
changeset
|
290 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) { |
0 | 291 // Length must be the same |
292 if (length1 != length2) return false; | |
293 for (int i = 0; i < length1; i++) { | |
294 if (base1[i] != base2[i]) return false; | |
295 } | |
296 return true; | |
297 } | |
298 | |
299 bool UTF8::is_supplementary_character(const unsigned char* str) { | |
300 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80) | |
301 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80); | |
302 } | |
303 | |
304 jint UTF8::get_supplementary_character(const unsigned char* str) { | |
305 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10) | |
306 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f); | |
307 } | |
308 | |
309 | |
310 //------------------------------------------------------------------------------------- | |
311 | |
312 | |
313 int UNICODE::utf8_size(jchar c) { | |
314 if ((0x0001 <= c) && (c <= 0x007F)) return 1; | |
315 if (c <= 0x07FF) return 2; | |
316 return 3; | |
317 } | |
318 | |
319 int UNICODE::utf8_length(jchar* base, int length) { | |
320 int result = 0; | |
321 for (int index = 0; index < length; index++) { | |
322 jchar c = base[index]; | |
323 if ((0x0001 <= c) && (c <= 0x007F)) result += 1; | |
324 else if (c <= 0x07FF) result += 2; | |
325 else result += 3; | |
326 } | |
327 return result; | |
328 } | |
329 | |
330 char* UNICODE::as_utf8(jchar* base, int length) { | |
331 int utf8_len = utf8_length(base, length); | |
332 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1); | |
333 u_char* p = result; | |
334 for (int index = 0; index < length; index++) { | |
335 p = utf8_write(p, base[index]); | |
336 } | |
337 *p = '\0'; | |
338 assert(p == &result[utf8_len], "length prediction must be correct"); | |
339 return (char*) result; | |
340 } | |
341 | |
342 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) { | |
343 u_char* p = (u_char*)buf; | |
344 u_char* end = (u_char*)buf + buflen; | |
345 for (int index = 0; index < length; index++) { | |
346 jchar c = base[index]; | |
347 if (p + utf8_size(c) >= end) break; // string is truncated | |
348 p = utf8_write(p, base[index]); | |
349 } | |
350 *p = '\0'; | |
351 return buf; | |
352 } | |
353 | |
354 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) { | |
355 for(int index = 0; index < length; index++) { | |
356 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]); | |
357 } | |
358 *utf8_buffer = '\0'; | |
359 } | |
6972
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
360 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
361 // returns the quoted ascii length of a unicode string |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
362 int UNICODE::quoted_ascii_length(jchar* base, int length) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
363 int result = 0; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
364 for (int i = 0; i < length; i++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
365 jchar c = base[i]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
366 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
367 result++; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
368 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
369 result += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
370 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
371 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
372 return result; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
373 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
374 |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
375 // converts a utf8 string to quoted ascii |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
376 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
377 char* p = buf; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
378 char* end = buf + buflen; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
379 for (int index = 0; index < length; index++) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
380 jchar c = base[index]; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
381 if (c >= 32 && c < 127) { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
382 if (p + 1 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
383 *p++ = (char)c; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
384 } else { |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
385 if (p + 6 >= end) break; // string is truncated |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
386 sprintf(p, "\\u%04x", c); |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
387 p += 6; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
388 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
389 } |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
390 *p = '\0'; |
bd7a7ce2e264
6830717: replay of compilations would help with debugging
minqi
parents:
2426
diff
changeset
|
391 } |