annotate src/share/vm/utilities/utf8.cpp @ 6972:bd7a7ce2e264

6830717: replay of compilations would help with debugging Summary: When java process crashed in compiler thread, repeat the compilation process will help finding root cause. This is done with using SA dump application class data and replay data from core dump, then use debug version of jvm to recompile the problematic java method. Reviewed-by: kvn, twisti, sspitsyn Contributed-by: yumin.qi@oracle.com
author minqi
date Mon, 12 Nov 2012 14:03:53 -0800
parents 1d1603768966
children 8c03fc47511d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a61af66fc99e Initial load
duke
parents:
diff changeset
1 /*
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
0
a61af66fc99e Initial load
duke
parents:
diff changeset
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
a61af66fc99e Initial load
duke
parents:
diff changeset
4 *
a61af66fc99e Initial load
duke
parents:
diff changeset
5 * This code is free software; you can redistribute it and/or modify it
a61af66fc99e Initial load
duke
parents:
diff changeset
6 * under the terms of the GNU General Public License version 2 only, as
a61af66fc99e Initial load
duke
parents:
diff changeset
7 * published by the Free Software Foundation.
a61af66fc99e Initial load
duke
parents:
diff changeset
8 *
a61af66fc99e Initial load
duke
parents:
diff changeset
9 * This code is distributed in the hope that it will be useful, but WITHOUT
a61af66fc99e Initial load
duke
parents:
diff changeset
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
a61af66fc99e Initial load
duke
parents:
diff changeset
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
a61af66fc99e Initial load
duke
parents:
diff changeset
12 * version 2 for more details (a copy is included in the LICENSE file that
a61af66fc99e Initial load
duke
parents:
diff changeset
13 * accompanied this code).
a61af66fc99e Initial load
duke
parents:
diff changeset
14 *
a61af66fc99e Initial load
duke
parents:
diff changeset
15 * You should have received a copy of the GNU General Public License version
a61af66fc99e Initial load
duke
parents:
diff changeset
16 * 2 along with this work; if not, write to the Free Software Foundation,
a61af66fc99e Initial load
duke
parents:
diff changeset
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
a61af66fc99e Initial load
duke
parents:
diff changeset
18 *
1552
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
20 * or visit www.oracle.com if you need additional information or have any
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
21 * questions.
0
a61af66fc99e Initial load
duke
parents:
diff changeset
22 *
a61af66fc99e Initial load
duke
parents:
diff changeset
23 */
a61af66fc99e Initial load
duke
parents:
diff changeset
24
1972
f95d63e2154a 6989984: Use standard include model for Hospot
stefank
parents: 1552
diff changeset
25 #include "precompiled.hpp"
f95d63e2154a 6989984: Use standard include model for Hospot
stefank
parents: 1552
diff changeset
26 #include "utilities/utf8.hpp"
0
a61af66fc99e Initial load
duke
parents:
diff changeset
27
a61af66fc99e Initial load
duke
parents:
diff changeset
28 // Assume the utf8 string is in legal form and has been
a61af66fc99e Initial load
duke
parents:
diff changeset
29 // checked in the class file parser/format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
30 char* UTF8::next(const char* str, jchar* value) {
a61af66fc99e Initial load
duke
parents:
diff changeset
31 unsigned const char *ptr = (const unsigned char *)str;
a61af66fc99e Initial load
duke
parents:
diff changeset
32 unsigned char ch, ch2, ch3;
a61af66fc99e Initial load
duke
parents:
diff changeset
33 int length = -1; /* bad length */
a61af66fc99e Initial load
duke
parents:
diff changeset
34 jchar result;
a61af66fc99e Initial load
duke
parents:
diff changeset
35 switch ((ch = ptr[0]) >> 4) {
a61af66fc99e Initial load
duke
parents:
diff changeset
36 default:
a61af66fc99e Initial load
duke
parents:
diff changeset
37 result = ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
38 length = 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
39 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
40
a61af66fc99e Initial load
duke
parents:
diff changeset
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
a61af66fc99e Initial load
duke
parents:
diff changeset
42 /* Shouldn't happen. */
a61af66fc99e Initial load
duke
parents:
diff changeset
43 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
44
a61af66fc99e Initial load
duke
parents:
diff changeset
45 case 0xC: case 0xD:
a61af66fc99e Initial load
duke
parents:
diff changeset
46 /* 110xxxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
48 unsigned char high_five = ch & 0x1F;
a61af66fc99e Initial load
duke
parents:
diff changeset
49 unsigned char low_six = ch2 & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
50 result = (high_five << 6) + low_six;
a61af66fc99e Initial load
duke
parents:
diff changeset
51 length = 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
52 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
53 }
a61af66fc99e Initial load
duke
parents:
diff changeset
54 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
55
a61af66fc99e Initial load
duke
parents:
diff changeset
56 case 0xE:
a61af66fc99e Initial load
duke
parents:
diff changeset
57 /* 1110xxxx 10xxxxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
60 unsigned char high_four = ch & 0x0f;
a61af66fc99e Initial load
duke
parents:
diff changeset
61 unsigned char mid_six = ch2 & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
62 unsigned char low_six = ch3 & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
63 result = (((high_four << 6) + mid_six) << 6) + low_six;
a61af66fc99e Initial load
duke
parents:
diff changeset
64 length = 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
65 }
a61af66fc99e Initial load
duke
parents:
diff changeset
66 }
a61af66fc99e Initial load
duke
parents:
diff changeset
67 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
68 } /* end of switch */
a61af66fc99e Initial load
duke
parents:
diff changeset
69
a61af66fc99e Initial load
duke
parents:
diff changeset
70 if (length <= 0) {
a61af66fc99e Initial load
duke
parents:
diff changeset
71 *value = ptr[0]; /* default bad result; */
a61af66fc99e Initial load
duke
parents:
diff changeset
72 return (char*)(ptr + 1); // make progress somehow
a61af66fc99e Initial load
duke
parents:
diff changeset
73 }
a61af66fc99e Initial load
duke
parents:
diff changeset
74
a61af66fc99e Initial load
duke
parents:
diff changeset
75 *value = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
76
a61af66fc99e Initial load
duke
parents:
diff changeset
77 // The assert is correct but the .class file is wrong
a61af66fc99e Initial load
duke
parents:
diff changeset
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
a61af66fc99e Initial load
duke
parents:
diff changeset
79 return (char *)(ptr + length);
a61af66fc99e Initial load
duke
parents:
diff changeset
80 }
a61af66fc99e Initial load
duke
parents:
diff changeset
81
a61af66fc99e Initial load
duke
parents:
diff changeset
82 char* UTF8::next_character(const char* str, jint* value) {
a61af66fc99e Initial load
duke
parents:
diff changeset
83 unsigned const char *ptr = (const unsigned char *)str;
a61af66fc99e Initial load
duke
parents:
diff changeset
84 /* See if it's legal supplementary character:
a61af66fc99e Initial load
duke
parents:
diff changeset
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
86 if (is_supplementary_character(ptr)) {
a61af66fc99e Initial load
duke
parents:
diff changeset
87 *value = get_supplementary_character(ptr);
a61af66fc99e Initial load
duke
parents:
diff changeset
88 return (char *)(ptr + 6);
a61af66fc99e Initial load
duke
parents:
diff changeset
89 }
a61af66fc99e Initial load
duke
parents:
diff changeset
90 jchar result;
a61af66fc99e Initial load
duke
parents:
diff changeset
91 char* next_ch = next(str, &result);
a61af66fc99e Initial load
duke
parents:
diff changeset
92 *value = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
93 return next_ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
94 }
a61af66fc99e Initial load
duke
parents:
diff changeset
95
a61af66fc99e Initial load
duke
parents:
diff changeset
96 // Count bytes of the form 10xxxxxx and deduct this count
a61af66fc99e Initial load
duke
parents:
diff changeset
97 // from the total byte count. The utf8 string must be in
a61af66fc99e Initial load
duke
parents:
diff changeset
98 // legal form which has been verified in the format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
99 int UTF8::unicode_length(const char* str, int len) {
a61af66fc99e Initial load
duke
parents:
diff changeset
100 int num_chars = len;
a61af66fc99e Initial load
duke
parents:
diff changeset
101 for (int i = 0; i < len; i++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
102 if ((str[i] & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
103 --num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
104 }
a61af66fc99e Initial load
duke
parents:
diff changeset
105 }
a61af66fc99e Initial load
duke
parents:
diff changeset
106 return num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
107 }
a61af66fc99e Initial load
duke
parents:
diff changeset
108
a61af66fc99e Initial load
duke
parents:
diff changeset
109 // Count bytes of the utf8 string except those in form
a61af66fc99e Initial load
duke
parents:
diff changeset
110 // 10xxxxxx which only appear in multibyte characters.
a61af66fc99e Initial load
duke
parents:
diff changeset
111 // The utf8 string must be in legal form and has been
a61af66fc99e Initial load
duke
parents:
diff changeset
112 // verified in the format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
113 int UTF8::unicode_length(const char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
114 int num_chars = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
115 for (const char* p = str; *p; p++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
116 if (((*p) & 0xC0) != 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
117 num_chars++;
a61af66fc99e Initial load
duke
parents:
diff changeset
118 }
a61af66fc99e Initial load
duke
parents:
diff changeset
119 }
a61af66fc99e Initial load
duke
parents:
diff changeset
120 return num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
121 }
a61af66fc99e Initial load
duke
parents:
diff changeset
122
a61af66fc99e Initial load
duke
parents:
diff changeset
123 // Writes a jchar a utf8 and returns the end
a61af66fc99e Initial load
duke
parents:
diff changeset
124 static u_char* utf8_write(u_char* base, jchar ch) {
a61af66fc99e Initial load
duke
parents:
diff changeset
125 if ((ch != 0) && (ch <=0x7f)) {
a61af66fc99e Initial load
duke
parents:
diff changeset
126 base[0] = (u_char) ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
127 return base + 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
128 }
a61af66fc99e Initial load
duke
parents:
diff changeset
129
a61af66fc99e Initial load
duke
parents:
diff changeset
130 if (ch <= 0x7FF) {
a61af66fc99e Initial load
duke
parents:
diff changeset
131 /* 11 bits or less. */
a61af66fc99e Initial load
duke
parents:
diff changeset
132 unsigned char high_five = ch >> 6;
a61af66fc99e Initial load
duke
parents:
diff changeset
133 unsigned char low_six = ch & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
134 base[0] = high_five | 0xC0; /* 110xxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
135 base[1] = low_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
136 return base + 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
137 }
a61af66fc99e Initial load
duke
parents:
diff changeset
138 /* possibly full 16 bits. */
a61af66fc99e Initial load
duke
parents:
diff changeset
139 char high_four = ch >> 12;
a61af66fc99e Initial load
duke
parents:
diff changeset
140 char mid_six = (ch >> 6) & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
141 char low_six = ch & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
142 base[0] = high_four | 0xE0; /* 1110xxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
143 base[1] = mid_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
144 base[2] = low_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
145 return base + 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
146 }
a61af66fc99e Initial load
duke
parents:
diff changeset
147
a61af66fc99e Initial load
duke
parents:
diff changeset
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
149 unsigned char ch;
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
150 const char *ptr = utf8_str;
0
a61af66fc99e Initial load
duke
parents:
diff changeset
151 int index = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
152
a61af66fc99e Initial load
duke
parents:
diff changeset
153 /* ASCII case loop optimization */
a61af66fc99e Initial load
duke
parents:
diff changeset
154 for (; index < unicode_length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
155 if((ch = ptr[0]) > 0x7F) { break; }
a61af66fc99e Initial load
duke
parents:
diff changeset
156 unicode_str[index] = ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
157 ptr = (const char *)(ptr + 1);
a61af66fc99e Initial load
duke
parents:
diff changeset
158 }
a61af66fc99e Initial load
duke
parents:
diff changeset
159
a61af66fc99e Initial load
duke
parents:
diff changeset
160 for (; index < unicode_length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
161 ptr = UTF8::next(ptr, &unicode_str[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
162 }
a61af66fc99e Initial load
duke
parents:
diff changeset
163 }
a61af66fc99e Initial load
duke
parents:
diff changeset
164
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
165 // returns the quoted ascii length of a 0-terminated utf8 string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
167 const char *ptr = utf8_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
168 const char* end = ptr + utf8_length;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
169 int result = 0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
170 while (ptr < end) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
171 jchar c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
172 ptr = UTF8::next(ptr, &c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
173 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
174 result++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
175 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
176 result += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
177 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
178 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
179 return result;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
180 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
181
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
182 // converts a utf8 string to quoted ascii
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
183 void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
184 const char *ptr = utf8_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
185 char* p = buf;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
186 char* end = buf + buflen;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
187 while (*ptr != '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
188 jchar c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
189 ptr = UTF8::next(ptr, &c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
190 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
191 if (p + 1 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
192 *p++ = (char)c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
193 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
194 if (p + 6 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
195 sprintf(p, "\\u%04x", c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
196 p += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
197 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
198 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
199 *p = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
200 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
201
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
202
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
203 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
204 const char *ptr = quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
205 char* result = NULL;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
206 while (*ptr != '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
207 char c = *ptr;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
208 if (c < 32 || c >= 127) break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
209 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
210 if (*ptr == '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
211 // nothing to do so return original string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
212 return quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
213 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
214 // everything up to this point was ok.
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
215 int length = ptr - quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
216 char* buffer = NULL;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
217 for (int round = 0; round < 2; round++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
218 while (*ptr != '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
219 if (*ptr != '\\') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
220 if (buffer != NULL) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
221 buffer[length] = *ptr;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
222 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
223 length++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
224 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
225 switch (ptr[1]) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
226 case 'u': {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
227 ptr += 2;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
228 jchar value=0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
229 for (int i=0; i<4; i++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
230 char c = *ptr++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
231 switch (c) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
232 case '0': case '1': case '2': case '3': case '4':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
233 case '5': case '6': case '7': case '8': case '9':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
234 value = (value << 4) + c - '0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
235 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
236 case 'a': case 'b': case 'c':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
237 case 'd': case 'e': case 'f':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
238 value = (value << 4) + 10 + c - 'a';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
239 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
240 case 'A': case 'B': case 'C':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
241 case 'D': case 'E': case 'F':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
242 value = (value << 4) + 10 + c - 'A';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
243 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
244 default:
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
245 ShouldNotReachHere();
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
246 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
247 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
248 if (buffer == NULL) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
249 char utf8_buffer[4];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
250 char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
251 length += next - utf8_buffer;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
252 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
253 char* next = (char*)utf8_write((u_char*)&buffer[length], value);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
254 length += next - &buffer[length];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
255 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
256 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
257 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
258 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
259 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
260 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
261 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
262 default:
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
263 ShouldNotReachHere();
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
264 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
265 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
266 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
267 if (round == 0) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
268 buffer = NEW_RESOURCE_ARRAY(char, length + 1);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
269 ptr = quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
270 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
271 buffer[length] = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
272 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
273 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
274 return buffer;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
275 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
276
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
277
0
a61af66fc99e Initial load
duke
parents:
diff changeset
278 // Returns NULL if 'c' it not found. This only works as long
a61af66fc99e Initial load
duke
parents:
diff changeset
279 // as 'c' is an ASCII character
2177
3582bf76420e 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 1972
diff changeset
280 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
0
a61af66fc99e Initial load
duke
parents:
diff changeset
281 assert(length >= 0, "sanity check");
a61af66fc99e Initial load
duke
parents:
diff changeset
282 assert(c >= 0, "does not work for non-ASCII characters");
a61af66fc99e Initial load
duke
parents:
diff changeset
283 // Skip backwards in string until 'c' is found or end is reached
a61af66fc99e Initial load
duke
parents:
diff changeset
284 while(--length >= 0 && base[length] != c);
a61af66fc99e Initial load
duke
parents:
diff changeset
285 return (length < 0) ? NULL : &base[length];
a61af66fc99e Initial load
duke
parents:
diff changeset
286 }
a61af66fc99e Initial load
duke
parents:
diff changeset
287
2177
3582bf76420e 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 1972
diff changeset
288 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {
0
a61af66fc99e Initial load
duke
parents:
diff changeset
289 // Length must be the same
a61af66fc99e Initial load
duke
parents:
diff changeset
290 if (length1 != length2) return false;
a61af66fc99e Initial load
duke
parents:
diff changeset
291 for (int i = 0; i < length1; i++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
292 if (base1[i] != base2[i]) return false;
a61af66fc99e Initial load
duke
parents:
diff changeset
293 }
a61af66fc99e Initial load
duke
parents:
diff changeset
294 return true;
a61af66fc99e Initial load
duke
parents:
diff changeset
295 }
a61af66fc99e Initial load
duke
parents:
diff changeset
296
a61af66fc99e Initial load
duke
parents:
diff changeset
297 bool UTF8::is_supplementary_character(const unsigned char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
298 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
a61af66fc99e Initial load
duke
parents:
diff changeset
299 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
a61af66fc99e Initial load
duke
parents:
diff changeset
300 }
a61af66fc99e Initial load
duke
parents:
diff changeset
301
a61af66fc99e Initial load
duke
parents:
diff changeset
302 jint UTF8::get_supplementary_character(const unsigned char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
303 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
a61af66fc99e Initial load
duke
parents:
diff changeset
304 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f);
a61af66fc99e Initial load
duke
parents:
diff changeset
305 }
a61af66fc99e Initial load
duke
parents:
diff changeset
306
a61af66fc99e Initial load
duke
parents:
diff changeset
307
a61af66fc99e Initial load
duke
parents:
diff changeset
308 //-------------------------------------------------------------------------------------
a61af66fc99e Initial load
duke
parents:
diff changeset
309
a61af66fc99e Initial load
duke
parents:
diff changeset
310
a61af66fc99e Initial load
duke
parents:
diff changeset
311 int UNICODE::utf8_size(jchar c) {
a61af66fc99e Initial load
duke
parents:
diff changeset
312 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
313 if (c <= 0x07FF) return 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
314 return 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
315 }
a61af66fc99e Initial load
duke
parents:
diff changeset
316
a61af66fc99e Initial load
duke
parents:
diff changeset
317 int UNICODE::utf8_length(jchar* base, int length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
318 int result = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
319 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
320 jchar c = base[index];
a61af66fc99e Initial load
duke
parents:
diff changeset
321 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
322 else if (c <= 0x07FF) result += 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
323 else result += 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
324 }
a61af66fc99e Initial load
duke
parents:
diff changeset
325 return result;
a61af66fc99e Initial load
duke
parents:
diff changeset
326 }
a61af66fc99e Initial load
duke
parents:
diff changeset
327
a61af66fc99e Initial load
duke
parents:
diff changeset
328 char* UNICODE::as_utf8(jchar* base, int length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
329 int utf8_len = utf8_length(base, length);
a61af66fc99e Initial load
duke
parents:
diff changeset
330 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
a61af66fc99e Initial load
duke
parents:
diff changeset
331 u_char* p = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
332 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
333 p = utf8_write(p, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
334 }
a61af66fc99e Initial load
duke
parents:
diff changeset
335 *p = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
336 assert(p == &result[utf8_len], "length prediction must be correct");
a61af66fc99e Initial load
duke
parents:
diff changeset
337 return (char*) result;
a61af66fc99e Initial load
duke
parents:
diff changeset
338 }
a61af66fc99e Initial load
duke
parents:
diff changeset
339
a61af66fc99e Initial load
duke
parents:
diff changeset
340 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
a61af66fc99e Initial load
duke
parents:
diff changeset
341 u_char* p = (u_char*)buf;
a61af66fc99e Initial load
duke
parents:
diff changeset
342 u_char* end = (u_char*)buf + buflen;
a61af66fc99e Initial load
duke
parents:
diff changeset
343 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
344 jchar c = base[index];
a61af66fc99e Initial load
duke
parents:
diff changeset
345 if (p + utf8_size(c) >= end) break; // string is truncated
a61af66fc99e Initial load
duke
parents:
diff changeset
346 p = utf8_write(p, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
347 }
a61af66fc99e Initial load
duke
parents:
diff changeset
348 *p = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
349 return buf;
a61af66fc99e Initial load
duke
parents:
diff changeset
350 }
a61af66fc99e Initial load
duke
parents:
diff changeset
351
a61af66fc99e Initial load
duke
parents:
diff changeset
352 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
a61af66fc99e Initial load
duke
parents:
diff changeset
353 for(int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
354 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
355 }
a61af66fc99e Initial load
duke
parents:
diff changeset
356 *utf8_buffer = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
357 }
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
358
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
359 // returns the quoted ascii length of a unicode string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
360 int UNICODE::quoted_ascii_length(jchar* base, int length) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
361 int result = 0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
362 for (int i = 0; i < length; i++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
363 jchar c = base[i];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
364 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
365 result++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
366 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
367 result += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
368 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
369 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
370 return result;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
371 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
372
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
373 // converts a utf8 string to quoted ascii
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
374 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
375 char* p = buf;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
376 char* end = buf + buflen;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
377 for (int index = 0; index < length; index++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
378 jchar c = base[index];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
379 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
380 if (p + 1 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
381 *p++ = (char)c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
382 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
383 if (p + 6 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
384 sprintf(p, "\\u%04x", c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
385 p += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
386 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
387 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
388 *p = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
389 }