Mercurial > hg > truffle
changeset 17066:0bcefb0f8488
Truffle: byte[] sources.
author | Chris Seaton <chris.seaton@oracle.com> |
---|---|
date | Mon, 08 Sep 2014 22:21:21 +0100 |
parents | a6277ae87f0e |
children | 2bc092f3d574 |
files | graal/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/source/BytesSourceSectionTest.java graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/BytesDecoder.java graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java |
diffstat | 3 files changed, 302 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.truffle.api.test/src/com/oracle/truffle/api/test/source/BytesSourceSectionTest.java Mon Sep 08 22:21:21 2014 +0100 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.api.test.source; + +import static org.junit.Assert.*; + +import java.nio.charset.*; + +import org.junit.*; + +import com.oracle.truffle.api.source.*; + +public class BytesSourceSectionTest { + + @Test + public void testSectionsFromLineNumberASCII() { + final byte[] bytes = "foo\nbar\nbaz\n".getBytes(StandardCharsets.UTF_8); + final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder()); + assertEquals("foo", source.createSection("identifier", 1).getCode()); + assertEquals("bar", source.createSection("identifier", 2).getCode()); + assertEquals("baz", source.createSection("identifier", 3).getCode()); + } + + @Test + public void testSectionsFromOffsetsASCII() { + final byte[] bytes = "foo\nbar\nbaz\n".getBytes(StandardCharsets.UTF_8); + final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder()); + assertEquals("foo", source.createSection("identifier", 0, 3).getCode()); + assertEquals("bar", source.createSection("identifier", 4, 3).getCode()); + assertEquals("baz", source.createSection("identifier", 8, 3).getCode()); + } + + @Test + public void testSectionsFromLineNumberUTF8() { + // ☃ is three bytes in UTF8 + final byte[] bytes = "foo\n☃\nbaz\n".getBytes(StandardCharsets.UTF_8); + final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder()); + assertEquals("foo", source.createSection("identifier", 1).getCode()); + assertEquals("☃", source.createSection("identifier", 2).getCode()); + assertEquals("baz", source.createSection("identifier", 3).getCode()); + } + + @Test + public void testSectionsFromOffsetsUTF8() { + // ☃ is three bytes in UTF8 + final byte[] bytes = "foo\n☃\nbaz\n".getBytes(StandardCharsets.UTF_8); + final Source source = Source.fromBytes(bytes, "description", new BytesDecoder.UTF8BytesDecoder()); + assertEquals("foo", source.createSection("identifier", 0, 3).getCode()); + assertEquals("☃", source.createSection("identifier", 4, 3).getCode()); + assertEquals("baz", source.createSection("identifier", 8, 3).getCode()); + } + + @Test + public void testOffset() { + final byte[] bytes = "xxxfoo\nbar\nbaz\nxxx".getBytes(StandardCharsets.UTF_8); + final Source source = Source.fromBytes(bytes, 3, bytes.length - 6, "description", new BytesDecoder.UTF8BytesDecoder()); + assertEquals("foo", source.createSection("identifier", 0, 3).getCode()); + assertEquals("bar", source.createSection("identifier", 4, 3).getCode()); + assertEquals("baz", source.createSection("identifier", 8, 3).getCode()); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/BytesDecoder.java Mon Sep 08 22:21:21 2014 +0100 @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package com.oracle.truffle.api.source; + +import java.nio.charset.*; +import java.util.*; + +/** + * For a language where strings do not map into Java strings, provides utilities to find line + * endings and to decode raw bytes into an approximate representation for tools to display. + * <p> + * See {@link Source#fromBytes}. + */ +public interface BytesDecoder { + + String decode(byte[] bytes, int byteIndex, int length); + + void decodeLines(byte[] bytes, int byteIndex, int length, LineMarker lineMarker); + + public interface LineMarker { + + void markLine(int index); + + } + + public static class UTF8BytesDecoder implements BytesDecoder { + + @Override + public String decode(byte[] bytes, int byteIndex, int length) { + return new String(Arrays.copyOfRange(bytes, byteIndex, byteIndex + length), StandardCharsets.UTF_8); + } + + @Override + public void decodeLines(byte[] bytes, int byteIndex, int length, LineMarker lineMarker) { + for (int n = byteIndex; n < byteIndex + length; n++) { + if (bytes[n] == '\n') { + lineMarker.markLine(n + 1); + } + } + } + + } + +}
--- a/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java Mon Sep 08 13:49:40 2014 +0200 +++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/source/Source.java Mon Sep 08 22:21:21 2014 +0100 @@ -158,6 +158,37 @@ } /** + * Creates a source from raw bytes. This can be used if the encoding of strings in your language + * is not compatible with Java strings, or if your parser returns byte indices instead of + * character indices. The returned source is then indexed by byte, not by character. + * + * @param bytes the raw bytes of the source + * @param description a note about the origin, possibly useful for debugging + * @param decoder how to decode the bytes into Java strings + * @return a newly created, non-indexed source representation + */ + public static Source fromBytes(byte[] bytes, String description, BytesDecoder decoder) { + return fromBytes(bytes, 0, bytes.length, description, decoder); + } + + /** + * Creates a source from raw bytes. This can be used if the encoding of strings in your language + * is not compatible with Java strings, or if your parser returns byte indices instead of + * character indices. The returned source is then indexed by byte, not by character. Offsets are + * relative to byteIndex. + * + * @param bytes the raw bytes of the source + * @param byteIndex where the string starts in the byte array + * @param length the length of the string in the byte array + * @param description a note about the origin, possibly useful for debugging + * @param decoder how to decode the bytes into Java strings + * @return a newly created, non-indexed source representation + */ + public static Source fromBytes(byte[] bytes, int byteIndex, int length, String description, BytesDecoder decoder) { + return new BytesSource(description, bytes, byteIndex, length, decoder); + } + + /** * Creates a source from literal text, but which acts as a file and can be retrieved by name * (unlike other literal sources); intended for testing. * @@ -246,6 +277,10 @@ */ public abstract String getCode(); + public String getCode(int charIndex, int charLength) { + return getCode().substring(charIndex, charIndex + charLength); + } + /** * Gets the text (not including a possible terminating newline) in a (1-based) numbered line. */ @@ -368,10 +403,7 @@ * @throws IllegalStateException if the source is one of the "null" instances */ public final SourceSection createSection(String identifier, int charIndex, int length) throws IllegalArgumentException { - final int codeLength = getCode().length(); - if (!(charIndex >= 0 && length >= 0 && charIndex + length <= codeLength)) { - throw new IllegalArgumentException("text positions out of range"); - } + checkRange(charIndex, length); checkTextMap(); final int startLine = getLineNumber(charIndex); final int startColumn = charIndex - getLineStartOffset(startLine) + 1; @@ -379,6 +411,12 @@ return new DefaultSourceSection(this, identifier, startLine, startColumn, charIndex, length); } + protected void checkRange(int charIndex, int length) { + if (!(charIndex >= 0 && length >= 0 && charIndex + length <= getCode().length())) { + throw new IllegalArgumentException("text positions out of range"); + } + } + /** * Creates a representation of a line of text in the source identified only by line number, from * which the character information will be computed. @@ -409,15 +447,19 @@ private TextMap checkTextMap() { if (textMap == null) { - final String code = getCode(); - if (code == null) { - throw new RuntimeException("can't read file " + getName()); - } - textMap = new TextMap(code); + textMap = createTextMap(); } return textMap; } + protected TextMap createTextMap() { + final String code = getCode(); + if (code == null) { + throw new RuntimeException("can't read file " + getName()); + } + return TextMap.fromString(code); + } + private static final class LiteralSource extends Source { private final String name; // Name used originally to describe the source @@ -621,6 +663,74 @@ } + private static final class BytesSource extends Source { + + private final String name; + private final byte[] bytes; + private final int byteIndex; + private final int length; + private final BytesDecoder decoder; + + public BytesSource(String name, byte[] bytes, int byteIndex, int length, BytesDecoder decoder) { + this.name = name; + this.bytes = bytes; + this.byteIndex = byteIndex; + this.length = length; + this.decoder = decoder; + } + + @Override + protected void reset() { + } + + @Override + public String getName() { + return name; + } + + @Override + public String getShortName() { + return name; + } + + @Override + public String getPath() { + return name; + } + + @Override + public URL getURL() { + return null; + } + + @Override + public Reader getReader() { + return null; + } + + @Override + public String getCode() { + return decoder.decode(bytes, byteIndex, length); + } + + @Override + public String getCode(int byteOffset, int codeLength) { + return decoder.decode(bytes, byteIndex + byteOffset, codeLength); + } + + @Override + protected void checkRange(int charIndex, int rangeLength) { + if (!(charIndex >= 0 && rangeLength >= 0 && charIndex + rangeLength <= length)) { + throw new IllegalArgumentException("text positions out of range"); + } + } + + @Override + protected TextMap createTextMap() { + return TextMap.fromBytes(bytes, byteIndex, length, decoder); + } + } + private static final class DefaultSourceSection implements SourceSection { private final Source source; @@ -704,7 +814,7 @@ @Override public final String getCode() { - return getSource().getCode().substring(charIndex, charIndex + charLength); + return getSource().getCode(charIndex, charLength); } @Override @@ -866,12 +976,18 @@ // Is the final text character a newline? final boolean finalNL; + public TextMap(int[] nlOffsets, int textLength, boolean finalNL) { + this.nlOffsets = nlOffsets; + this.textLength = textLength; + this.finalNL = finalNL; + } + /** * Constructs map permitting translation between 0-based character offsets and 1-based * lines/columns. */ - public TextMap(String text) { - this.textLength = text.length(); + public static TextMap fromString(String text) { + final int textLength = text.length(); final ArrayList<Integer> lines = new ArrayList<>(); lines.add(0); int offset = 0; @@ -887,12 +1003,37 @@ } lines.add(Integer.MAX_VALUE); - nlOffsets = new int[lines.size()]; + final int[] nlOffsets = new int[lines.size()]; for (int line = 0; line < lines.size(); line++) { nlOffsets[line] = lines.get(line); } - finalNL = textLength > 0 && (textLength == nlOffsets[nlOffsets.length - 2]); + final boolean finalNL = textLength > 0 && (textLength == nlOffsets[nlOffsets.length - 2]); + + return new TextMap(nlOffsets, textLength, finalNL); + } + + public static TextMap fromBytes(byte[] bytes, int byteIndex, int length, BytesDecoder bytesDecoder) { + final ArrayList<Integer> lines = new ArrayList<>(); + lines.add(0); + + bytesDecoder.decodeLines(bytes, byteIndex, length, new BytesDecoder.LineMarker() { + + public void markLine(int index) { + lines.add(index); + } + }); + + lines.add(Integer.MAX_VALUE); + + final int[] nlOffsets = new int[lines.size()]; + for (int line = 0; line < lines.size(); line++) { + nlOffsets[line] = lines.get(line); + } + + final boolean finalNL = length > 0 && (length == nlOffsets[nlOffsets.length - 2]); + + return new TextMap(nlOffsets, length, finalNL); } /**