Mercurial > hg > truffle
view graal/com.oracle.max.base/src/com/sun/max/util/Utf8.java @ 4231:d713eaf4d288
Renaming
author | Christian Wimmer <Christian.Wimmer@Oracle.com> |
---|---|
date | Thu, 05 Jan 2012 16:09:47 -0800 |
parents | e233f5660da4 |
children |
line wrap: on
line source
/* * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package com.sun.max.util; import java.io.*; /** */ public final class Utf8 { private Utf8() { } /** * @return the length in bytes of the UTF8 representation of the string */ public static int utf8Length(String string) { int result = 0; for (int i = 0; i < string.length(); i++) { final int ch = string.charAt(i); if ((ch >= 0x0001) && (ch <= 0x007F)) { result++; } else if (ch > 0x07FF) { result += 3; } else { result += 2; } } return result; } public static byte[] stringToUtf8(String string) { final byte[] result = new byte[utf8Length(string)]; int index = 0; for (int i = 0; i < string.length(); i++) { final char ch = string.charAt(i); if ((ch >= 0x0001) && (ch <= 0x007F)) { result[index++] = (byte) ch; } else if (ch > 0x07FF) { result[index++] = (byte) (0xe0 | (byte) (ch >> 12)); result[index++] = (byte) (0x80 | ((ch & 0xfc0) >> 6)); result[index++] = (byte) (0x80 | (ch & 0x3f)); } else { result[index++] = (byte) (0xc0 | (byte) (ch >> 6)); result[index++] = (byte) (0x80 | (ch & 0x3f)); } } return result; } /** * Reads a UTF-8 encoded String from {@code in}. * * @param in a data input source * @param zeroIsEncodedIn2Bytes if true, then 0 is decoded from two bytes as opposed to one * @param length the numbers of bytes to be decoded * @return the decoded string */ public static String readUtf8(DataInput in, boolean zeroIsEncodedIn2Bytes, int length) throws IOException, Utf8Exception { if (length == 0) { return ""; } final byte[] utf8Data = new byte[length]; boolean sevenBit = true; for (int i = 0; i < length; i++) { final byte ch = in.readByte(); utf8Data[i] = ch; if (ch < 0 || (zeroIsEncodedIn2Bytes && ch == 0)) { sevenBit = false; } } if (sevenBit) { final char[] charData = new char[length]; for (int i = 0; i < length; i++) { charData[i] = (char) (utf8Data[i] & 0xff); } return new String(charData); } return utf8ToString(zeroIsEncodedIn2Bytes, utf8Data); } /** * Converts an array of UTF-8 data to a String. * * @param zeroIsEncodedIn2Bytes if true, then 0 is decoded from two bytes as opposed to one * @param utf8Data the data * @return the decoded string */ public static String utf8ToString(boolean zeroIsEncodedIn2Bytes, byte[] utf8Data) throws Utf8Exception { final int length = utf8Data.length; int count = 0; final StringBuilder sb = new StringBuilder(length); while (count < length) { final int c = utf8Data[count] & 0xff; if (zeroIsEncodedIn2Bytes && c == 0) { throw new Utf8Exception(); } switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: { /* 0xxxxxxx*/ count++; sb.append((char) c); break; } case 12: case 13: { /* 110x xxxx 10xx xxxx*/ count += 2; if (count > length) { throw new Utf8Exception(); } final int char2 = utf8Data[count - 1]; if ((char2 & 0xC0) != 0x80) { throw new Utf8Exception(); } sb.append((char) (((c & 0x1F) << 6) | (char2 & 0x3F))); break; } case 14: { /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > length) { throw new Utf8Exception(); } final int char2 = utf8Data[count - 2]; final int char3 = utf8Data[count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new Utf8Exception(); } sb.append((char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0))); break; } default: { /* 10xx xxxx, 1111 xxxx */ throw new Utf8Exception(); } } } // The number of chars produced may be less than utflen return new String(sb); } private static byte[] readZeroTerminatedBytes(InputStream inputStream) throws IOException { final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); while (true) { final int ch = inputStream.read(); if (ch < 0) { throw new IOException(); } if (ch == 0) { return buffer.toByteArray(); } buffer.write(ch); } } /** * Reads a 0-terminated UTF8 encoded string from a given stream. * * @param inputStream the stream to read from * @return the String constructed from the UTF8 encoded chars read from {@code inputStream}, omitting the terminating 0 */ public static String readString(InputStream inputStream) throws IOException, Utf8Exception { final byte[] utf8Data = readZeroTerminatedBytes(inputStream); return Utf8.utf8ToString(false, utf8Data); } /** * Writes a 0-terminated UTF8 encoded string to a given stream. * * @param inputStream the stream to read from * @param string the String to be written */ public static void writeString(OutputStream outputStream, String string) throws IOException { outputStream.write(stringToUtf8(string)); outputStream.write((byte) 0); } }