annotate src/share/vm/utilities/utf8.cpp @ 20543:e7d0505c8a30

8059758: Footprint regressions with JDK-8038423 Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything. Reviewed-by: jwilhelm, brutisso
author tschatzl
date Fri, 10 Oct 2014 15:51:58 +0200
parents 8c03fc47511d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a61af66fc99e Initial load
duke
parents:
diff changeset
1 /*
8851
8c03fc47511d 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 6972
diff changeset
2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
0
a61af66fc99e Initial load
duke
parents:
diff changeset
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
a61af66fc99e Initial load
duke
parents:
diff changeset
4 *
a61af66fc99e Initial load
duke
parents:
diff changeset
5 * This code is free software; you can redistribute it and/or modify it
a61af66fc99e Initial load
duke
parents:
diff changeset
6 * under the terms of the GNU General Public License version 2 only, as
a61af66fc99e Initial load
duke
parents:
diff changeset
7 * published by the Free Software Foundation.
a61af66fc99e Initial load
duke
parents:
diff changeset
8 *
a61af66fc99e Initial load
duke
parents:
diff changeset
9 * This code is distributed in the hope that it will be useful, but WITHOUT
a61af66fc99e Initial load
duke
parents:
diff changeset
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
a61af66fc99e Initial load
duke
parents:
diff changeset
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
a61af66fc99e Initial load
duke
parents:
diff changeset
12 * version 2 for more details (a copy is included in the LICENSE file that
a61af66fc99e Initial load
duke
parents:
diff changeset
13 * accompanied this code).
a61af66fc99e Initial load
duke
parents:
diff changeset
14 *
a61af66fc99e Initial load
duke
parents:
diff changeset
15 * You should have received a copy of the GNU General Public License version
a61af66fc99e Initial load
duke
parents:
diff changeset
16 * 2 along with this work; if not, write to the Free Software Foundation,
a61af66fc99e Initial load
duke
parents:
diff changeset
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
a61af66fc99e Initial load
duke
parents:
diff changeset
18 *
1552
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
20 * or visit www.oracle.com if you need additional information or have any
c18cbe5936b8 6941466: Oracle rebranding changes for Hotspot repositories
trims
parents: 0
diff changeset
21 * questions.
0
a61af66fc99e Initial load
duke
parents:
diff changeset
22 *
a61af66fc99e Initial load
duke
parents:
diff changeset
23 */
a61af66fc99e Initial load
duke
parents:
diff changeset
24
1972
f95d63e2154a 6989984: Use standard include model for Hospot
stefank
parents: 1552
diff changeset
25 #include "precompiled.hpp"
f95d63e2154a 6989984: Use standard include model for Hospot
stefank
parents: 1552
diff changeset
26 #include "utilities/utf8.hpp"
0
a61af66fc99e Initial load
duke
parents:
diff changeset
27
a61af66fc99e Initial load
duke
parents:
diff changeset
28 // Assume the utf8 string is in legal form and has been
a61af66fc99e Initial load
duke
parents:
diff changeset
29 // checked in the class file parser/format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
30 char* UTF8::next(const char* str, jchar* value) {
a61af66fc99e Initial load
duke
parents:
diff changeset
31 unsigned const char *ptr = (const unsigned char *)str;
a61af66fc99e Initial load
duke
parents:
diff changeset
32 unsigned char ch, ch2, ch3;
a61af66fc99e Initial load
duke
parents:
diff changeset
33 int length = -1; /* bad length */
a61af66fc99e Initial load
duke
parents:
diff changeset
34 jchar result;
a61af66fc99e Initial load
duke
parents:
diff changeset
35 switch ((ch = ptr[0]) >> 4) {
a61af66fc99e Initial load
duke
parents:
diff changeset
36 default:
a61af66fc99e Initial load
duke
parents:
diff changeset
37 result = ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
38 length = 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
39 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
40
a61af66fc99e Initial load
duke
parents:
diff changeset
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
a61af66fc99e Initial load
duke
parents:
diff changeset
42 /* Shouldn't happen. */
a61af66fc99e Initial load
duke
parents:
diff changeset
43 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
44
a61af66fc99e Initial load
duke
parents:
diff changeset
45 case 0xC: case 0xD:
a61af66fc99e Initial load
duke
parents:
diff changeset
46 /* 110xxxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
48 unsigned char high_five = ch & 0x1F;
a61af66fc99e Initial load
duke
parents:
diff changeset
49 unsigned char low_six = ch2 & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
50 result = (high_five << 6) + low_six;
a61af66fc99e Initial load
duke
parents:
diff changeset
51 length = 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
52 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
53 }
a61af66fc99e Initial load
duke
parents:
diff changeset
54 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
55
a61af66fc99e Initial load
duke
parents:
diff changeset
56 case 0xE:
a61af66fc99e Initial load
duke
parents:
diff changeset
57 /* 1110xxxx 10xxxxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
60 unsigned char high_four = ch & 0x0f;
a61af66fc99e Initial load
duke
parents:
diff changeset
61 unsigned char mid_six = ch2 & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
62 unsigned char low_six = ch3 & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
63 result = (((high_four << 6) + mid_six) << 6) + low_six;
a61af66fc99e Initial load
duke
parents:
diff changeset
64 length = 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
65 }
a61af66fc99e Initial load
duke
parents:
diff changeset
66 }
a61af66fc99e Initial load
duke
parents:
diff changeset
67 break;
a61af66fc99e Initial load
duke
parents:
diff changeset
68 } /* end of switch */
a61af66fc99e Initial load
duke
parents:
diff changeset
69
a61af66fc99e Initial load
duke
parents:
diff changeset
70 if (length <= 0) {
a61af66fc99e Initial load
duke
parents:
diff changeset
71 *value = ptr[0]; /* default bad result; */
a61af66fc99e Initial load
duke
parents:
diff changeset
72 return (char*)(ptr + 1); // make progress somehow
a61af66fc99e Initial load
duke
parents:
diff changeset
73 }
a61af66fc99e Initial load
duke
parents:
diff changeset
74
a61af66fc99e Initial load
duke
parents:
diff changeset
75 *value = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
76
a61af66fc99e Initial load
duke
parents:
diff changeset
77 // The assert is correct but the .class file is wrong
a61af66fc99e Initial load
duke
parents:
diff changeset
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
a61af66fc99e Initial load
duke
parents:
diff changeset
79 return (char *)(ptr + length);
a61af66fc99e Initial load
duke
parents:
diff changeset
80 }
a61af66fc99e Initial load
duke
parents:
diff changeset
81
a61af66fc99e Initial load
duke
parents:
diff changeset
82 char* UTF8::next_character(const char* str, jint* value) {
a61af66fc99e Initial load
duke
parents:
diff changeset
83 unsigned const char *ptr = (const unsigned char *)str;
a61af66fc99e Initial load
duke
parents:
diff changeset
84 /* See if it's legal supplementary character:
a61af66fc99e Initial load
duke
parents:
diff changeset
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
86 if (is_supplementary_character(ptr)) {
a61af66fc99e Initial load
duke
parents:
diff changeset
87 *value = get_supplementary_character(ptr);
a61af66fc99e Initial load
duke
parents:
diff changeset
88 return (char *)(ptr + 6);
a61af66fc99e Initial load
duke
parents:
diff changeset
89 }
a61af66fc99e Initial load
duke
parents:
diff changeset
90 jchar result;
a61af66fc99e Initial load
duke
parents:
diff changeset
91 char* next_ch = next(str, &result);
a61af66fc99e Initial load
duke
parents:
diff changeset
92 *value = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
93 return next_ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
94 }
a61af66fc99e Initial load
duke
parents:
diff changeset
95
a61af66fc99e Initial load
duke
parents:
diff changeset
96 // Count bytes of the form 10xxxxxx and deduct this count
a61af66fc99e Initial load
duke
parents:
diff changeset
97 // from the total byte count. The utf8 string must be in
a61af66fc99e Initial load
duke
parents:
diff changeset
98 // legal form which has been verified in the format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
99 int UTF8::unicode_length(const char* str, int len) {
a61af66fc99e Initial load
duke
parents:
diff changeset
100 int num_chars = len;
a61af66fc99e Initial load
duke
parents:
diff changeset
101 for (int i = 0; i < len; i++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
102 if ((str[i] & 0xC0) == 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
103 --num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
104 }
a61af66fc99e Initial load
duke
parents:
diff changeset
105 }
a61af66fc99e Initial load
duke
parents:
diff changeset
106 return num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
107 }
a61af66fc99e Initial load
duke
parents:
diff changeset
108
a61af66fc99e Initial load
duke
parents:
diff changeset
109 // Count bytes of the utf8 string except those in form
a61af66fc99e Initial load
duke
parents:
diff changeset
110 // 10xxxxxx which only appear in multibyte characters.
a61af66fc99e Initial load
duke
parents:
diff changeset
111 // The utf8 string must be in legal form and has been
a61af66fc99e Initial load
duke
parents:
diff changeset
112 // verified in the format checker.
a61af66fc99e Initial load
duke
parents:
diff changeset
113 int UTF8::unicode_length(const char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
114 int num_chars = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
115 for (const char* p = str; *p; p++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
116 if (((*p) & 0xC0) != 0x80) {
a61af66fc99e Initial load
duke
parents:
diff changeset
117 num_chars++;
a61af66fc99e Initial load
duke
parents:
diff changeset
118 }
a61af66fc99e Initial load
duke
parents:
diff changeset
119 }
a61af66fc99e Initial load
duke
parents:
diff changeset
120 return num_chars;
a61af66fc99e Initial load
duke
parents:
diff changeset
121 }
a61af66fc99e Initial load
duke
parents:
diff changeset
122
a61af66fc99e Initial load
duke
parents:
diff changeset
123 // Writes a jchar a utf8 and returns the end
a61af66fc99e Initial load
duke
parents:
diff changeset
124 static u_char* utf8_write(u_char* base, jchar ch) {
a61af66fc99e Initial load
duke
parents:
diff changeset
125 if ((ch != 0) && (ch <=0x7f)) {
a61af66fc99e Initial load
duke
parents:
diff changeset
126 base[0] = (u_char) ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
127 return base + 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
128 }
a61af66fc99e Initial load
duke
parents:
diff changeset
129
a61af66fc99e Initial load
duke
parents:
diff changeset
130 if (ch <= 0x7FF) {
a61af66fc99e Initial load
duke
parents:
diff changeset
131 /* 11 bits or less. */
a61af66fc99e Initial load
duke
parents:
diff changeset
132 unsigned char high_five = ch >> 6;
a61af66fc99e Initial load
duke
parents:
diff changeset
133 unsigned char low_six = ch & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
134 base[0] = high_five | 0xC0; /* 110xxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
135 base[1] = low_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
136 return base + 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
137 }
a61af66fc99e Initial load
duke
parents:
diff changeset
138 /* possibly full 16 bits. */
a61af66fc99e Initial load
duke
parents:
diff changeset
139 char high_four = ch >> 12;
a61af66fc99e Initial load
duke
parents:
diff changeset
140 char mid_six = (ch >> 6) & 0x3F;
a61af66fc99e Initial load
duke
parents:
diff changeset
141 char low_six = ch & 0x3f;
a61af66fc99e Initial load
duke
parents:
diff changeset
142 base[0] = high_four | 0xE0; /* 1110xxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
143 base[1] = mid_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
144 base[2] = low_six | 0x80; /* 10xxxxxx */
a61af66fc99e Initial load
duke
parents:
diff changeset
145 return base + 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
146 }
a61af66fc99e Initial load
duke
parents:
diff changeset
147
a61af66fc99e Initial load
duke
parents:
diff changeset
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
149 unsigned char ch;
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
150 const char *ptr = utf8_str;
0
a61af66fc99e Initial load
duke
parents:
diff changeset
151 int index = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
152
a61af66fc99e Initial load
duke
parents:
diff changeset
153 /* ASCII case loop optimization */
a61af66fc99e Initial load
duke
parents:
diff changeset
154 for (; index < unicode_length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
155 if((ch = ptr[0]) > 0x7F) { break; }
a61af66fc99e Initial load
duke
parents:
diff changeset
156 unicode_str[index] = ch;
a61af66fc99e Initial load
duke
parents:
diff changeset
157 ptr = (const char *)(ptr + 1);
a61af66fc99e Initial load
duke
parents:
diff changeset
158 }
a61af66fc99e Initial load
duke
parents:
diff changeset
159
a61af66fc99e Initial load
duke
parents:
diff changeset
160 for (; index < unicode_length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
161 ptr = UTF8::next(ptr, &unicode_str[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
162 }
a61af66fc99e Initial load
duke
parents:
diff changeset
163 }
a61af66fc99e Initial load
duke
parents:
diff changeset
164
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
165 // returns the quoted ascii length of a 0-terminated utf8 string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
167 const char *ptr = utf8_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
168 const char* end = ptr + utf8_length;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
169 int result = 0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
170 while (ptr < end) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
171 jchar c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
172 ptr = UTF8::next(ptr, &c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
173 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
174 result++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
175 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
176 result += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
177 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
178 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
179 return result;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
180 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
181
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
182 // converts a utf8 string to quoted ascii
8851
8c03fc47511d 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 6972
diff changeset
183 void UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen) {
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
184 const char *ptr = utf8_str;
8851
8c03fc47511d 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 6972
diff changeset
185 const char *utf8_end = ptr + utf8_length;
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
186 char* p = buf;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
187 char* end = buf + buflen;
8851
8c03fc47511d 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 6972
diff changeset
188 while (ptr < utf8_end) {
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
189 jchar c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
190 ptr = UTF8::next(ptr, &c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
191 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
192 if (p + 1 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
193 *p++ = (char)c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
194 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
195 if (p + 6 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
196 sprintf(p, "\\u%04x", c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
197 p += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
198 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
199 }
8851
8c03fc47511d 8011048: Possible reading from unmapped memory in UTF8::as_quoted_ascii()
iklam
parents: 6972
diff changeset
200 assert(p < end, "sanity");
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
201 *p = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
202 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
203
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
204
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
205 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
206 const char *ptr = quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
207 char* result = NULL;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
208 while (*ptr != '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
209 char c = *ptr;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
210 if (c < 32 || c >= 127) break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
211 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
212 if (*ptr == '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
213 // nothing to do so return original string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
214 return quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
215 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
216 // everything up to this point was ok.
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
217 int length = ptr - quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
218 char* buffer = NULL;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
219 for (int round = 0; round < 2; round++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
220 while (*ptr != '\0') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
221 if (*ptr != '\\') {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
222 if (buffer != NULL) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
223 buffer[length] = *ptr;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
224 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
225 length++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
226 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
227 switch (ptr[1]) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
228 case 'u': {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
229 ptr += 2;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
230 jchar value=0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
231 for (int i=0; i<4; i++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
232 char c = *ptr++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
233 switch (c) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
234 case '0': case '1': case '2': case '3': case '4':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
235 case '5': case '6': case '7': case '8': case '9':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
236 value = (value << 4) + c - '0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
237 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
238 case 'a': case 'b': case 'c':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
239 case 'd': case 'e': case 'f':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
240 value = (value << 4) + 10 + c - 'a';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
241 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
242 case 'A': case 'B': case 'C':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
243 case 'D': case 'E': case 'F':
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
244 value = (value << 4) + 10 + c - 'A';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
245 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
246 default:
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
247 ShouldNotReachHere();
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
248 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
249 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
250 if (buffer == NULL) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
251 char utf8_buffer[4];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
252 char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
253 length += next - utf8_buffer;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
254 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
255 char* next = (char*)utf8_write((u_char*)&buffer[length], value);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
256 length += next - &buffer[length];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
257 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
258 break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
259 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
260 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
261 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
262 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
263 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
264 default:
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
265 ShouldNotReachHere();
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
266 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
267 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
268 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
269 if (round == 0) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
270 buffer = NEW_RESOURCE_ARRAY(char, length + 1);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
271 ptr = quoted_ascii_str;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
272 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
273 buffer[length] = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
274 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
275 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
276 return buffer;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
277 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
278
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
279
0
a61af66fc99e Initial load
duke
parents:
diff changeset
280 // Returns NULL if 'c' it not found. This only works as long
a61af66fc99e Initial load
duke
parents:
diff changeset
281 // as 'c' is an ASCII character
2177
3582bf76420e 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 1972
diff changeset
282 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
0
a61af66fc99e Initial load
duke
parents:
diff changeset
283 assert(length >= 0, "sanity check");
a61af66fc99e Initial load
duke
parents:
diff changeset
284 assert(c >= 0, "does not work for non-ASCII characters");
a61af66fc99e Initial load
duke
parents:
diff changeset
285 // Skip backwards in string until 'c' is found or end is reached
a61af66fc99e Initial load
duke
parents:
diff changeset
286 while(--length >= 0 && base[length] != c);
a61af66fc99e Initial load
duke
parents:
diff changeset
287 return (length < 0) ? NULL : &base[length];
a61af66fc99e Initial load
duke
parents:
diff changeset
288 }
a61af66fc99e Initial load
duke
parents:
diff changeset
289
2177
3582bf76420e 6990754: Use native memory and reference counting to implement SymbolTable
coleenp
parents: 1972
diff changeset
290 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {
0
a61af66fc99e Initial load
duke
parents:
diff changeset
291 // Length must be the same
a61af66fc99e Initial load
duke
parents:
diff changeset
292 if (length1 != length2) return false;
a61af66fc99e Initial load
duke
parents:
diff changeset
293 for (int i = 0; i < length1; i++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
294 if (base1[i] != base2[i]) return false;
a61af66fc99e Initial load
duke
parents:
diff changeset
295 }
a61af66fc99e Initial load
duke
parents:
diff changeset
296 return true;
a61af66fc99e Initial load
duke
parents:
diff changeset
297 }
a61af66fc99e Initial load
duke
parents:
diff changeset
298
a61af66fc99e Initial load
duke
parents:
diff changeset
299 bool UTF8::is_supplementary_character(const unsigned char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
300 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
a61af66fc99e Initial load
duke
parents:
diff changeset
301 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
a61af66fc99e Initial load
duke
parents:
diff changeset
302 }
a61af66fc99e Initial load
duke
parents:
diff changeset
303
a61af66fc99e Initial load
duke
parents:
diff changeset
304 jint UTF8::get_supplementary_character(const unsigned char* str) {
a61af66fc99e Initial load
duke
parents:
diff changeset
305 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
a61af66fc99e Initial load
duke
parents:
diff changeset
306 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f);
a61af66fc99e Initial load
duke
parents:
diff changeset
307 }
a61af66fc99e Initial load
duke
parents:
diff changeset
308
a61af66fc99e Initial load
duke
parents:
diff changeset
309
a61af66fc99e Initial load
duke
parents:
diff changeset
310 //-------------------------------------------------------------------------------------
a61af66fc99e Initial load
duke
parents:
diff changeset
311
a61af66fc99e Initial load
duke
parents:
diff changeset
312
a61af66fc99e Initial load
duke
parents:
diff changeset
313 int UNICODE::utf8_size(jchar c) {
a61af66fc99e Initial load
duke
parents:
diff changeset
314 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
315 if (c <= 0x07FF) return 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
316 return 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
317 }
a61af66fc99e Initial load
duke
parents:
diff changeset
318
a61af66fc99e Initial load
duke
parents:
diff changeset
319 int UNICODE::utf8_length(jchar* base, int length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
320 int result = 0;
a61af66fc99e Initial load
duke
parents:
diff changeset
321 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
322 jchar c = base[index];
a61af66fc99e Initial load
duke
parents:
diff changeset
323 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
a61af66fc99e Initial load
duke
parents:
diff changeset
324 else if (c <= 0x07FF) result += 2;
a61af66fc99e Initial load
duke
parents:
diff changeset
325 else result += 3;
a61af66fc99e Initial load
duke
parents:
diff changeset
326 }
a61af66fc99e Initial load
duke
parents:
diff changeset
327 return result;
a61af66fc99e Initial load
duke
parents:
diff changeset
328 }
a61af66fc99e Initial load
duke
parents:
diff changeset
329
a61af66fc99e Initial load
duke
parents:
diff changeset
330 char* UNICODE::as_utf8(jchar* base, int length) {
a61af66fc99e Initial load
duke
parents:
diff changeset
331 int utf8_len = utf8_length(base, length);
a61af66fc99e Initial load
duke
parents:
diff changeset
332 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
a61af66fc99e Initial load
duke
parents:
diff changeset
333 u_char* p = result;
a61af66fc99e Initial load
duke
parents:
diff changeset
334 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
335 p = utf8_write(p, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
336 }
a61af66fc99e Initial load
duke
parents:
diff changeset
337 *p = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
338 assert(p == &result[utf8_len], "length prediction must be correct");
a61af66fc99e Initial load
duke
parents:
diff changeset
339 return (char*) result;
a61af66fc99e Initial load
duke
parents:
diff changeset
340 }
a61af66fc99e Initial load
duke
parents:
diff changeset
341
a61af66fc99e Initial load
duke
parents:
diff changeset
342 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
a61af66fc99e Initial load
duke
parents:
diff changeset
343 u_char* p = (u_char*)buf;
a61af66fc99e Initial load
duke
parents:
diff changeset
344 u_char* end = (u_char*)buf + buflen;
a61af66fc99e Initial load
duke
parents:
diff changeset
345 for (int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
346 jchar c = base[index];
a61af66fc99e Initial load
duke
parents:
diff changeset
347 if (p + utf8_size(c) >= end) break; // string is truncated
a61af66fc99e Initial load
duke
parents:
diff changeset
348 p = utf8_write(p, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
349 }
a61af66fc99e Initial load
duke
parents:
diff changeset
350 *p = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
351 return buf;
a61af66fc99e Initial load
duke
parents:
diff changeset
352 }
a61af66fc99e Initial load
duke
parents:
diff changeset
353
a61af66fc99e Initial load
duke
parents:
diff changeset
354 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
a61af66fc99e Initial load
duke
parents:
diff changeset
355 for(int index = 0; index < length; index++) {
a61af66fc99e Initial load
duke
parents:
diff changeset
356 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
a61af66fc99e Initial load
duke
parents:
diff changeset
357 }
a61af66fc99e Initial load
duke
parents:
diff changeset
358 *utf8_buffer = '\0';
a61af66fc99e Initial load
duke
parents:
diff changeset
359 }
6972
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
360
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
361 // returns the quoted ascii length of a unicode string
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
362 int UNICODE::quoted_ascii_length(jchar* base, int length) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
363 int result = 0;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
364 for (int i = 0; i < length; i++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
365 jchar c = base[i];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
366 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
367 result++;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
368 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
369 result += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
370 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
371 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
372 return result;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
373 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
374
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
375 // converts a utf8 string to quoted ascii
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
376 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
377 char* p = buf;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
378 char* end = buf + buflen;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
379 for (int index = 0; index < length; index++) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
380 jchar c = base[index];
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
381 if (c >= 32 && c < 127) {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
382 if (p + 1 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
383 *p++ = (char)c;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
384 } else {
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
385 if (p + 6 >= end) break; // string is truncated
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
386 sprintf(p, "\\u%04x", c);
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
387 p += 6;
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
388 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
389 }
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
390 *p = '\0';
bd7a7ce2e264 6830717: replay of compilations would help with debugging
minqi
parents: 2426
diff changeset
391 }