comparison src/share/vm/utilities/utf8.cpp @ 6972:bd7a7ce2e264

6830717: replay of compilations would help with debugging Summary: When java process crashed in compiler thread, repeat the compilation process will help finding root cause. This is done with using SA dump application class data and replay data from core dump, then use debug version of jvm to recompile the problematic java method. Reviewed-by: kvn, twisti, sspitsyn Contributed-by: yumin.qi@oracle.com
author minqi
date Mon, 12 Nov 2012 14:03:53 -0800
parents 1d1603768966
children 8c03fc47511d
comparison
equal deleted inserted replaced
6965:3be318ecfae5 6972:bd7a7ce2e264
1 /* 1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 * 4 *
5 * This code is free software; you can redistribute it and/or modify it 5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as 6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
145 return base + 3; 145 return base + 3;
146 } 146 }
147 147
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) { 148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
149 unsigned char ch; 149 unsigned char ch;
150 const char *ptr = (const char *)utf8_str; 150 const char *ptr = utf8_str;
151 int index = 0; 151 int index = 0;
152 152
153 /* ASCII case loop optimization */ 153 /* ASCII case loop optimization */
154 for (; index < unicode_length; index++) { 154 for (; index < unicode_length; index++) {
155 if((ch = ptr[0]) > 0x7F) { break; } 155 if((ch = ptr[0]) > 0x7F) { break; }
159 159
160 for (; index < unicode_length; index++) { 160 for (; index < unicode_length; index++) {
161 ptr = UTF8::next(ptr, &unicode_str[index]); 161 ptr = UTF8::next(ptr, &unicode_str[index]);
162 } 162 }
163 } 163 }
164
165 // returns the quoted ascii length of a 0-terminated utf8 string
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
167 const char *ptr = utf8_str;
168 const char* end = ptr + utf8_length;
169 int result = 0;
170 while (ptr < end) {
171 jchar c;
172 ptr = UTF8::next(ptr, &c);
173 if (c >= 32 && c < 127) {
174 result++;
175 } else {
176 result += 6;
177 }
178 }
179 return result;
180 }
181
182 // converts a utf8 string to quoted ascii
183 void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
184 const char *ptr = utf8_str;
185 char* p = buf;
186 char* end = buf + buflen;
187 while (*ptr != '\0') {
188 jchar c;
189 ptr = UTF8::next(ptr, &c);
190 if (c >= 32 && c < 127) {
191 if (p + 1 >= end) break; // string is truncated
192 *p++ = (char)c;
193 } else {
194 if (p + 6 >= end) break; // string is truncated
195 sprintf(p, "\\u%04x", c);
196 p += 6;
197 }
198 }
199 *p = '\0';
200 }
201
202
203 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
204 const char *ptr = quoted_ascii_str;
205 char* result = NULL;
206 while (*ptr != '\0') {
207 char c = *ptr;
208 if (c < 32 || c >= 127) break;
209 }
210 if (*ptr == '\0') {
211 // nothing to do so return original string
212 return quoted_ascii_str;
213 }
214 // everything up to this point was ok.
215 int length = ptr - quoted_ascii_str;
216 char* buffer = NULL;
217 for (int round = 0; round < 2; round++) {
218 while (*ptr != '\0') {
219 if (*ptr != '\\') {
220 if (buffer != NULL) {
221 buffer[length] = *ptr;
222 }
223 length++;
224 } else {
225 switch (ptr[1]) {
226 case 'u': {
227 ptr += 2;
228 jchar value=0;
229 for (int i=0; i<4; i++) {
230 char c = *ptr++;
231 switch (c) {
232 case '0': case '1': case '2': case '3': case '4':
233 case '5': case '6': case '7': case '8': case '9':
234 value = (value << 4) + c - '0';
235 break;
236 case 'a': case 'b': case 'c':
237 case 'd': case 'e': case 'f':
238 value = (value << 4) + 10 + c - 'a';
239 break;
240 case 'A': case 'B': case 'C':
241 case 'D': case 'E': case 'F':
242 value = (value << 4) + 10 + c - 'A';
243 break;
244 default:
245 ShouldNotReachHere();
246 }
247 }
248 if (buffer == NULL) {
249 char utf8_buffer[4];
250 char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
251 length += next - utf8_buffer;
252 } else {
253 char* next = (char*)utf8_write((u_char*)&buffer[length], value);
254 length += next - &buffer[length];
255 }
256 break;
257 }
258 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
259 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
260 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
261 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
262 default:
263 ShouldNotReachHere();
264 }
265 }
266 }
267 if (round == 0) {
268 buffer = NEW_RESOURCE_ARRAY(char, length + 1);
269 ptr = quoted_ascii_str;
270 } else {
271 buffer[length] = '\0';
272 }
273 }
274 return buffer;
275 }
276
164 277
165 // Returns NULL if 'c' it not found. This only works as long 278 // Returns NULL if 'c' it not found. This only works as long
166 // as 'c' is an ASCII character 279 // as 'c' is an ASCII character
167 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) { 280 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
168 assert(length >= 0, "sanity check"); 281 assert(length >= 0, "sanity check");
240 for(int index = 0; index < length; index++) { 353 for(int index = 0; index < length; index++) {
241 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]); 354 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
242 } 355 }
243 *utf8_buffer = '\0'; 356 *utf8_buffer = '\0';
244 } 357 }
358
359 // returns the quoted ascii length of a unicode string
360 int UNICODE::quoted_ascii_length(jchar* base, int length) {
361 int result = 0;
362 for (int i = 0; i < length; i++) {
363 jchar c = base[i];
364 if (c >= 32 && c < 127) {
365 result++;
366 } else {
367 result += 6;
368 }
369 }
370 return result;
371 }
372
373 // converts a utf8 string to quoted ascii
374 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
375 char* p = buf;
376 char* end = buf + buflen;
377 for (int index = 0; index < length; index++) {
378 jchar c = base[index];
379 if (c >= 32 && c < 127) {
380 if (p + 1 >= end) break; // string is truncated
381 *p++ = (char)c;
382 } else {
383 if (p + 6 >= end) break; // string is truncated
384 sprintf(p, "\\u%04x", c);
385 p += 6;
386 }
387 }
388 *p = '\0';
389 }